diff --git a/SRC/EXTENSIONS/GAD/CUDA/cuda_GADDevice.cu b/SRC/EXTENSIONS/GAD/CUDA/cuda_GADDevice.cu
new file mode 100644
index 0000000..6c55981
--- /dev/null
+++ b/SRC/EXTENSIONS/GAD/CUDA/cuda_GADDevice.cu
@@ -0,0 +1,965 @@
+/* FastEddy®: SRC/EXTENSIONS/GAD/CUDA/cuda_GADDevice.cu
+* ©2016 University Corporation for Atmospheric Research
+* 
+* This file is licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+* 
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+/*--- GAD*/ 
+__constant__ int GADSelector_d;     /* Generalized Actuator Disk Selector: 0=off, 1=on */
+__constant__ int GADoutputForces_d;   /* Flag to include GAD forces in the output: 0=off, 1=on */
+__constant__ int GADofflineForces_d;  /* Flag to compute GAD forces in an offline mode: 0=off, 1=on */
+__constant__ int GADaxialInduction_d;   /* Flag to compute axial induction factor: 0==off (uses prescribed GADaxialIndVal), 1==on */
+__constant__ float GADaxialIndVal_d;    /* Prescribed constant axial induction factor when GADaxialInduction==0 */
+__constant__ int GADrefSwitch_d;   /* Switch to use reference windspeed: 0=off, 1=on */
+__constant__ float GADrefU_d;    /* Prescribed constant reference hub-height windspeed*/
+__constant__ int GADForcingSwitch_d;    /* Switch to use the GADrefU-based or local windspeed in computing GAD forces: 0=local, 1=ref */
+__constant__ int GADNumTurbines_d;     /* Number of GAD Turbines */
+__constant__ int GADNumTurbineTypes_d;  /* Number of GAD Turbine Types */
+__constant__ int turbinePolyOrderMax_d; /* Maximum Polynomial order across all turbine types */
+__constant__ int turbinePolyClCdrNormSegments_d; /* Number of segments in the normalized radius for the lift and drag coefficient polynomial */
+__constant__ int alphaBounds_d;         /* Number of elements in the min/max angle of attack array for the lift/drag curves */
+
+__constant__ int GADsamplingAvgLength_d;   /*length of sampling average windows (averaging over fastest timescales)*/
+__constant__ float GADsamplingAvgWeight_d;   /*weight of instances in taking sampling average*/
+__constant__ int GADrefSeriesLength_d;   /*number of sample average windows to incorporate into full Reference average*/
+__constant__ float GADrefSeriesWeight_d; /*precalculated averaging weight for Reference average*/
+
+__constant__ int numgridCells_away_d; /*Halo-region of cells considered in rotor disk distance-wise smoothing function*/
+
+int* GAD_turbineType_d;     /* Integer class-label for turbine type*/
+int* GAD_turbineRank_d;     /* Integer mpi-rank of nacelle center cell for each turbine reference velMag and velDir grid cell*/
+int* GAD_turbineRefi_d;     /* Integer i-index of nacelle center cell for each turbine reference velMag and velDir grid cell*/
+int* GAD_turbineRefj_d;     /* Integer j-index of nacelle center cell for each turbine reference velMag and velDir grid cell*/
+int* GAD_turbineRefk_d;     /* Integer k-index of nacelle center cell for each turbine reference velMag and velDir grid cell*/
+int* GAD_turbineYawing_d;   /* Integer indicating in a turbine is currently yawing ==1*/
+float* GAD_Xcoords_d;       /* turbine x-location [m] from SW domain corner */
+float* GAD_Ycoords_d;       /* turbine y-location [m] from SW domain corner */
+float* GAD_turbineRefMag_d; /* Reference "ambient" velocity magnitude for yaw control and beta/omega [m/s]*/
+float* GAD_turbineRefDir_d; /* *Reference "ambient" velocity direction (horizontal, met. standard orientation) for yaw control and beta/omega [degrees]*/
+float* GAD_turbineUseries_d;/* uSeries of sample averages spanning the rolling-average reference period */
+float* GAD_turbineVseries_d;/* vSeries of sample averages spanning the rolling-average reference period */
+float* u_sampAvg_d;         /* u sample averages for each turbine */
+float* v_sampAvg_d;         /* v sample averages for each turbine */
+float* GAD_yawError_d;      /* yaw error between the incoming wind and the turbine orientation */
+float* GAD_anFactor_d;     /* turbine axial induction factor at hub heigth*/
+float* GAD_rotorTheta_d;    /* turbine yaw angle [deg. North] */
+float* GAD_hubHeights_d;    /* turbine hub height [m AGL] */
+float* GAD_rotorD_d;        /* turbine rotor diameter [m] */
+float* GAD_nacelleD_d;      /* nacelle diameter [m] */
+float* turbinePolyTwist_d;  /* turbine-type-specific twist polynomial coefficients*/
+float* turbinePolyChord_d;  /* turbine-type-specific chord polynomial coefficients*/
+float* turbinePolyPitch_d;  /* turbine-type-specific pitch polynomial coefficients*/
+float* turbinePolyOmega_d;  /* turbine-type-specific omega polynomial coefficients*/
+float* rnorm_vect_d;        /* turbine-type-specific normalized radious segment limits*/
+float* alpha_minmax_vect_d; /* turbine-type-specific maximum and minimum angle of attack for the lift/drag curves*/
+float* turbinePolyCl_d;     /* turbine-type-specific lift coefficient polynomial coefficients*/
+float* turbinePolyCd_d;     /* turbine-type-specific drag coefficient polynomial coefficients*/
+
+float* GAD_turbineVolMask_d; /* turbine Volume mask (0 if turbine free cell in domain, else turbine ID of cell in turbine yaw-swept volume*/
+float* GAD_forceX_d;         /* turbine forces in the x-direction */
+float* GAD_forceY_d;         /* turbine forces in the y-direction */
+float* GAD_forceZ_d;         /* turbine forces in the z-direction */
+
+
+/*#################------------ GAD submodule function definitions ------------------#############*/
+/*----->>>>> int cuda_GADDeviceSetup();       ---------------------------------------------------------
+ * Used to cudaMalloc and cudaMemcpy parameters and arrays, and for the GAD_CUDA submodule.
+*/
+extern "C" int cuda_GADDeviceSetup(){
+   int errorCode = CUDA_GAD_SUCCESS;
+   float* tmp_vector;
+   float pi=acosf(-1.0);
+   int i,iturb;
+
+   cudaMemcpyToSymbol(GADSelector_d, &GADSelector, sizeof(int));
+   if(GADSelector > 0){
+    /*Host-to-Device memcopy constant values */
+    cudaMemcpyToSymbol(GADNumTurbines_d, &GADNumTurbines, sizeof(int));
+    cudaMemcpyToSymbol(GADoutputForces_d, &GADoutputForces, sizeof(int));
+    cudaMemcpyToSymbol(GADofflineForces_d, &GADofflineForces, sizeof(int));
+    cudaMemcpyToSymbol(GADaxialInduction_d, &GADaxialInduction, sizeof(int));
+    cudaMemcpyToSymbol(GADaxialIndVal_d, &GADaxialIndVal, sizeof(float));
+    cudaMemcpyToSymbol(GADrefSwitch_d, &GADrefSwitch, sizeof(int));
+    cudaMemcpyToSymbol(GADrefU_d, &GADrefU, sizeof(float));
+    cudaMemcpyToSymbol(GADForcingSwitch_d, &GADForcingSwitch_d, sizeof(int)); 
+    cudaMemcpyToSymbol(GADNumTurbineTypes_d, &GADNumTurbineTypes, sizeof(int));
+    cudaMemcpyToSymbol(turbinePolyOrderMax_d, &turbinePolyOrderMax, sizeof(int));
+    cudaMemcpyToSymbol(turbinePolyClCdrNormSegments_d, &turbinePolyClCdrNormSegments, sizeof(int));
+    cudaMemcpyToSymbol(alphaBounds_d, &alphaBounds, sizeof(int));
+    cudaMemcpyToSymbol(GADsamplingAvgLength_d, &GADsamplingAvgLength, sizeof(int));
+    cudaMemcpyToSymbol(GADsamplingAvgWeight_d, &GADsamplingAvgWeight, sizeof(float));
+    cudaMemcpyToSymbol(GADrefSeriesLength_d, &GADrefSeriesLength, sizeof(int));
+    cudaMemcpyToSymbol(GADrefSeriesWeight_d, &GADrefSeriesWeight, sizeof(float));
+    cudaMemcpyToSymbol(numgridCells_away_d, &numgridCells_away, sizeof(int));
+
+    /*Device memory allocations and Host-to-Device memcopy for turbine arrays */
+    fecuda_DeviceMallocInt(GADNumTurbines*sizeof(int), &GAD_turbineType_d);
+    fecuda_DeviceMallocInt(GADNumTurbines*sizeof(int), &GAD_turbineRank_d);
+    fecuda_DeviceMallocInt(GADNumTurbines*sizeof(int), &GAD_turbineRefi_d);
+    fecuda_DeviceMallocInt(GADNumTurbines*sizeof(int), &GAD_turbineRefj_d);
+    fecuda_DeviceMallocInt(GADNumTurbines*sizeof(int), &GAD_turbineRefk_d);
+    fecuda_DeviceMallocInt(GADNumTurbines*sizeof(int), &GAD_turbineYawing_d);
+    cudaMemcpy(GAD_turbineType_d, GAD_turbineType, GADNumTurbines*sizeof(int), cudaMemcpyHostToDevice);
+    cudaMemcpy(GAD_turbineRank_d, GAD_turbineRank, GADNumTurbines*sizeof(int), cudaMemcpyHostToDevice);
+    cudaMemcpy(GAD_turbineRefi_d, GAD_turbineRefi, GADNumTurbines*sizeof(int), cudaMemcpyHostToDevice);
+    cudaMemcpy(GAD_turbineRefj_d, GAD_turbineRefj, GADNumTurbines*sizeof(int), cudaMemcpyHostToDevice);
+    cudaMemcpy(GAD_turbineRefk_d, GAD_turbineRefk, GADNumTurbines*sizeof(int), cudaMemcpyHostToDevice);
+    cudaMemcpy(GAD_turbineYawing_d, GAD_turbineYawing, GADNumTurbines*sizeof(int), cudaMemcpyHostToDevice);
+
+    fecuda_DeviceMalloc(GADNumTurbines*sizeof(float), &GAD_turbineRefMag_d);
+    fecuda_DeviceMalloc(GADNumTurbines*sizeof(float), &GAD_turbineRefDir_d);
+    fecuda_DeviceMalloc(GADNumTurbines*sizeof(float), &GAD_Xcoords_d);
+    fecuda_DeviceMalloc(GADNumTurbines*sizeof(float), &GAD_Ycoords_d);
+    fecuda_DeviceMalloc(GADNumTurbines*sizeof(float), &GAD_rotorTheta_d);
+    fecuda_DeviceMalloc(GADNumTurbines*sizeof(float), &GAD_yawError_d);
+    fecuda_DeviceMalloc(GADNumTurbines*sizeof(float), &GAD_anFactor_d);
+    cudaMemcpy(GAD_turbineRefMag_d, GAD_turbineRefMag, GADNumTurbines*sizeof(float), cudaMemcpyHostToDevice);
+    cudaMemcpy(GAD_turbineRefDir_d, GAD_turbineRefDir, GADNumTurbines*sizeof(float), cudaMemcpyHostToDevice);
+    cudaMemcpy(GAD_Xcoords_d, GAD_Xcoords, GADNumTurbines*sizeof(float), cudaMemcpyHostToDevice);
+    cudaMemcpy(GAD_Ycoords_d, GAD_Ycoords, GADNumTurbines*sizeof(float), cudaMemcpyHostToDevice);
+    cudaMemcpy(GAD_rotorTheta_d, GAD_rotorTheta, GADNumTurbines*sizeof(float), cudaMemcpyHostToDevice);
+#ifdef DEBUG_GAD
+    for (iturb=0; iturb<GADNumTurbines; iturb++){
+       printf("%d/%d: iturb--%d: rotorTheta=%f\n",
+              mpi_rank_world,mpi_size_world,iturb,GAD_rotorTheta[iturb]);
+    }
+#endif
+    cudaMemcpy(GAD_yawError_d, GAD_yawError, GADNumTurbines*sizeof(float), cudaMemcpyHostToDevice);
+    cudaMemcpy(GAD_anFactor_d, GAD_anFactor, GADNumTurbines*sizeof(float), cudaMemcpyHostToDevice);
+    
+    fecuda_DeviceMalloc(GADNumTurbines*GADrefSeriesLength*sizeof(float), &GAD_turbineUseries_d);
+    fecuda_DeviceMalloc(GADNumTurbines*GADrefSeriesLength*sizeof(float), &GAD_turbineVseries_d);
+    fecuda_DeviceMalloc(GADNumTurbines*sizeof(float), &u_sampAvg_d);
+    fecuda_DeviceMalloc(GADNumTurbines*sizeof(float), &v_sampAvg_d);
+
+    //Initialize u_sampAvg & GAD_turbineUseries as constant (per-turbine) then send down to the device 
+    tmp_vector = (float *) malloc(GADrefSeriesLength*sizeof(float));
+    for (iturb=0; iturb<GADNumTurbines; iturb++){
+       tmp_vector[0] = -GAD_turbineRefMag[iturb]*sinf(GAD_turbineRefDir[iturb]*pi/180.0);
+#ifdef DEBUG_GAD
+       printf("%d/%d: iturb--%d \tu_refSeries_initial=%f\n",
+              mpi_rank_world,mpi_size_world,iturb,tmp_vector[0]);
+#endif
+       for (i=1; i<GADrefSeriesLength; i++){
+           tmp_vector[i] = tmp_vector[0];
+       }
+
+       cudaMemcpy(&u_sampAvg_d[iturb], &tmp_vector[0], sizeof(float), cudaMemcpyHostToDevice);
+       cudaMemcpy(&GAD_turbineUseries_d[iturb*GADrefSeriesLength], tmp_vector, GADrefSeriesLength*sizeof(float), cudaMemcpyHostToDevice);
+    }
+    //Initialize v_sampAvg & GAD_turbineVseries as constant (per-turbine) then send down to the device 
+    for (iturb=0; iturb<GADNumTurbines; iturb++){
+       tmp_vector[0] = -GAD_turbineRefMag[iturb]*cosf(GAD_turbineRefDir[iturb]*pi/180.0);
+#ifdef DEBUG_GAD
+       printf("%d/%d: iturb--%d \tv_refSeries_initial=%f\n",
+              mpi_rank_world,mpi_size_world,iturb,tmp_vector[0]);
+#endif
+       for (i=1; i<GADrefSeriesLength; i++){
+           tmp_vector[i] = tmp_vector[0];
+       }
+       cudaMemcpy(&v_sampAvg_d[iturb], &tmp_vector[0], sizeof(float), cudaMemcpyHostToDevice);
+       cudaMemcpy(&GAD_turbineVseries_d[iturb*GADrefSeriesLength], tmp_vector, GADrefSeriesLength*sizeof(float), cudaMemcpyHostToDevice);
+    }
+    free(tmp_vector);
+
+    fecuda_DeviceMalloc(GADNumTurbineTypes*sizeof(float), &GAD_hubHeights_d);
+    fecuda_DeviceMalloc(GADNumTurbineTypes*sizeof(float), &GAD_rotorD_d);
+    fecuda_DeviceMalloc(GADNumTurbineTypes*sizeof(float), &GAD_nacelleD_d);
+    cudaMemcpy(GAD_hubHeights_d, GAD_hubHeights, GADNumTurbineTypes*sizeof(float), cudaMemcpyHostToDevice);
+    cudaMemcpy(GAD_rotorD_d, GAD_rotorD, GADNumTurbineTypes*sizeof(float), cudaMemcpyHostToDevice);
+    cudaMemcpy(GAD_nacelleD_d, GAD_nacelleD, GADNumTurbineTypes*sizeof(float), cudaMemcpyHostToDevice);
+   
+     
+    fecuda_DeviceMalloc(GADNumTurbineTypes*turbinePolyOrderMax*sizeof(float), &turbinePolyTwist_d);
+    fecuda_DeviceMalloc(GADNumTurbineTypes*turbinePolyOrderMax*sizeof(float), &turbinePolyChord_d);
+    fecuda_DeviceMalloc(GADNumTurbineTypes*turbinePolyOrderMax*sizeof(float), &turbinePolyPitch_d);
+    fecuda_DeviceMalloc(GADNumTurbineTypes*turbinePolyOrderMax*sizeof(float), &turbinePolyOmega_d);
+    cudaMemcpy(turbinePolyTwist_d, turbinePolyTwist, GADNumTurbineTypes*turbinePolyOrderMax*sizeof(float), cudaMemcpyHostToDevice);
+    cudaMemcpy(turbinePolyChord_d, turbinePolyChord, GADNumTurbineTypes*turbinePolyOrderMax*sizeof(float), cudaMemcpyHostToDevice);
+    cudaMemcpy(turbinePolyPitch_d, turbinePolyPitch, GADNumTurbineTypes*turbinePolyOrderMax*sizeof(float), cudaMemcpyHostToDevice);
+    cudaMemcpy(turbinePolyOmega_d, turbinePolyOmega, GADNumTurbineTypes*turbinePolyOrderMax*sizeof(float), cudaMemcpyHostToDevice);
+
+    fecuda_DeviceMalloc(GADNumTurbineTypes*(turbinePolyClCdrNormSegments+1)*sizeof(float), &rnorm_vect_d);
+    fecuda_DeviceMalloc(GADNumTurbineTypes*alphaBounds*sizeof(float), &alpha_minmax_vect_d);
+    fecuda_DeviceMalloc(GADNumTurbineTypes*turbinePolyClCdrNormSegments*turbinePolyOrderMax*sizeof(float), &turbinePolyCl_d);
+    fecuda_DeviceMalloc(GADNumTurbineTypes*turbinePolyClCdrNormSegments*turbinePolyOrderMax*sizeof(float), &turbinePolyCd_d);
+
+    cudaMemcpy(rnorm_vect_d, rnorm_vect, GADNumTurbineTypes*(turbinePolyClCdrNormSegments+1)*sizeof(float), cudaMemcpyHostToDevice);
+    cudaMemcpy(alpha_minmax_vect_d, alpha_minmax_vect, GADNumTurbineTypes*alphaBounds*sizeof(float), cudaMemcpyHostToDevice);
+    cudaMemcpy(turbinePolyCd_d, turbinePolyCd, GADNumTurbineTypes*turbinePolyClCdrNormSegments*turbinePolyOrderMax*sizeof(float), cudaMemcpyHostToDevice);
+    cudaMemcpy(turbinePolyCl_d, turbinePolyCl, GADNumTurbineTypes*turbinePolyClCdrNormSegments*turbinePolyOrderMax*sizeof(float), cudaMemcpyHostToDevice);
+
+    fecuda_DeviceMalloc((Nxp+2*Nh)*(Nyp+2*Nh)*(Nzp+2*Nh)*sizeof(float), &GAD_turbineVolMask_d);
+    cudaMemcpy(GAD_turbineVolMask_d, GAD_turbineVolMask, (Nxp+2*Nh)*(Nyp+2*Nh)*(Nzp+2*Nh)*sizeof(float), cudaMemcpyHostToDevice);
+
+    if (GADoutputForces == 1){
+      fecuda_DeviceMalloc((Nxp+2*Nh)*(Nyp+2*Nh)*(Nzp+2*Nh)*sizeof(float), &GAD_forceX_d);
+      fecuda_DeviceMalloc((Nxp+2*Nh)*(Nyp+2*Nh)*(Nzp+2*Nh)*sizeof(float), &GAD_forceY_d);
+      fecuda_DeviceMalloc((Nxp+2*Nh)*(Nyp+2*Nh)*(Nzp+2*Nh)*sizeof(float), &GAD_forceZ_d);
+      cudaMemcpy(GAD_forceX_d, GAD_forceX, (Nxp+2*Nh)*(Nyp+2*Nh)*(Nzp+2*Nh)*sizeof(float), cudaMemcpyHostToDevice);
+      cudaMemcpy(GAD_forceY_d, GAD_forceY, (Nxp+2*Nh)*(Nyp+2*Nh)*(Nzp+2*Nh)*sizeof(float), cudaMemcpyHostToDevice);
+      cudaMemcpy(GAD_forceZ_d, GAD_forceZ, (Nxp+2*Nh)*(Nyp+2*Nh)*(Nzp+2*Nh)*sizeof(float), cudaMemcpyHostToDevice);
+    }
+   }
+
+
+   return(errorCode);
+} //end cuda_GADDeviceSetup()
+
+/*----->>>>> extern "C" int cuda_GADDeviceCleanup();  -----------------------------------------------------------
+Used to free all malloced memory by the GAD submodule.
+*/
+
+extern "C" int cuda_GADDeviceCleanup(){
+   int errorCode = CUDA_GAD_SUCCESS;
+
+   /* Free any GAD submodule arrays */
+   if(GADSelector > 0){
+     cudaFree(GAD_turbineType_d);
+     cudaFree(GAD_turbineRank_d);
+     cudaFree(GAD_turbineRefi_d);
+     cudaFree(GAD_turbineRefj_d);
+     cudaFree(GAD_turbineRefk_d);
+     cudaFree(GAD_turbineYawing_d);
+     cudaFree(GAD_turbineRefMag_d);
+     cudaFree(GAD_turbineRefDir_d);
+     cudaFree(GAD_turbineUseries_d);
+     cudaFree(GAD_turbineVseries_d);
+     cudaFree(u_sampAvg_d);
+     cudaFree(v_sampAvg_d);
+     cudaFree(GAD_yawError);
+     cudaFree(GAD_anFactor_d);
+     cudaFree(GAD_Xcoords_d);
+     cudaFree(GAD_Ycoords_d);
+     cudaFree(GAD_rotorTheta_d);
+     
+     cudaFree(GAD_hubHeights_d);
+     cudaFree(GAD_rotorD_d);
+     cudaFree(GAD_nacelleD_d);
+     
+     cudaFree(turbinePolyTwist_d);
+     cudaFree(turbinePolyChord_d);
+     cudaFree(turbinePolyPitch_d);
+     cudaFree(turbinePolyOmega_d);
+
+     cudaFree(GAD_turbineVolMask_d);
+     if (GADoutputForces == 1){
+       cudaFree(GAD_forceX_d);
+       cudaFree(GAD_forceY_d);
+       cudaFree(GAD_forceZ_d);
+     }
+   }
+
+   return(errorCode);
+
+}//end cuda_GADDeviceCleanup()
+
+/*----->>>>> __global__ void  cudaDevice_GADinter();  --------------------------------------------------
+* This function is the global entry kernel for computing reference values for GAD yawing and other turbine characteristics
+*/
+__global__ void cudaDevice_GADinter(float* xPos_d, float* yPos_d, float* zPos_d, float* topoPos_d,
+		                    int simTime_it, int timeStage, int numRKstages, float dt,
+		                    float* hydroFlds_d, int* GAD_turbineType_d, float* GAD_turbineVolMask_d,
+                                    float* GAD_Xcoords_d, float* GAD_Ycoords_d, float* GAD_rotorTheta_d,
+                                    float* GAD_hubHeights_d, float* GAD_rotorD_d, float* GAD_nacelleD_d,
+                                    float* turbinePolyTwist_d, float* turbinePolyChord_d,
+                                    float* turbinePolyPitch_d, float* turbinePolyOmega_d,
+                                    float* rnorm_vect_d, float* alpha_minmax_vect_d,
+                                    float* turbinePolyCl_d, float* turbinePolyCd_d,
+		                    int* GAD_turbineRank_d, int* GAD_turbineRefi_d, int* GAD_turbineRefj_d, int* GAD_turbineRefk_d,
+                                    float* u_sampAvg_d, float* v_sampAvg_d,
+                                    float* GAD_turbineUseries_d, float* GAD_turbineVseries_d,
+                                    float* GAD_turbineRefMag_d, float* GAD_turbineRefDir_d,
+				    int* GAD_turbineYawing_d, float* GAD_yawError_d, float* GAD_anFactor_d){
+
+   int i,j,k,ijk,ij;
+   int fldStride;
+   int iStride,jStride,kStride;
+   int iturb;
+   int sampleIndex;
+   float cell_inRotor;
+   float cell_rVector;
+   float cell_twistAngle;
+   float cell_chordLength;
+   float cell_betaAngle;
+   float cell_omegaRot;
+   float tiltAngle=0.0;
+
+   /*Establish necessary indices for spatial locality*/
+   i = (blockIdx.x)*blockDim.x + threadIdx.x;
+   j = (blockIdx.y)*blockDim.y + threadIdx.y;
+   k = (blockIdx.z)*blockDim.z + threadIdx.z;
+   fldStride = (Nx_d+2*Nh_d)*(Ny_d+2*Nh_d)*(Nz_d+2*Nh_d);
+   iStride = (Ny_d+2*Nh_d)*(Nz_d+2*Nh_d);
+   jStride = (Nz_d+2*Nh_d);
+   kStride = 1;
+
+   if((i >= iMin_d)&&(i < iMax_d) &&
+      (j >= jMin_d)&&(j < jMax_d) &&
+      (k >= kMin_d)&&(k < kMax_d) ){
+      ijk = i*iStride + j*jStride + k*kStride;
+      ij = i*(Ny_d+2*Nh_d) + j*(1);
+      if(GAD_turbineVolMask_d[ijk] > 0.0){
+        iturb = __float2int_rn( GAD_turbineVolMask_d[ijk] ) - 1;
+        if((timeStage == numRKstages) &&
+           (mpi_rank_world_d == GAD_turbineRank_d[iturb]) &&
+           (i == GAD_turbineRefi_d[iturb]) &&
+           (j == GAD_turbineRefj_d[iturb]) &&
+           (k == GAD_turbineRefk_d[iturb])){
+           if(simTime_it%GADsamplingAvgLength_d == 0){
+             sampleIndex = (simTime_it/GADsamplingAvgLength_d)%GADrefSeriesLength_d;
+             //update the corresponding series element and refMag and refDir values
+             update_turbineRefMagDir(sampleIndex, u_sampAvg_d[iturb],v_sampAvg_d[iturb],
+                                     &GAD_turbineUseries_d[iturb*GADrefSeriesLength_d], &GAD_turbineVseries_d[iturb*GADrefSeriesLength_d], &GAD_turbineRefMag_d[iturb], &GAD_turbineRefDir_d[iturb]);
+#ifdef DEBUG_GAD
+             printf("%d/%d:simTime_it=%d, iturb--%d @ (%d,%d,%d): rotTheta = %f, u_sA=%f, v_sA=%f, RefMag=%f, RefDir=%f \n",
+                mpi_rank_world_d,mpi_size_world_d,simTime_it,iturb,i,j,k, GAD_rotorTheta_d[iturb], u_sampAvg_d[iturb],v_sampAvg_d[iturb],GAD_turbineRefMag_d[iturb],GAD_turbineRefDir_d[iturb]);
+#endif
+
+             //reset the sampleAVG values to zero
+             u_sampAvg_d[iturb] = 0.0;
+             v_sampAvg_d[iturb] = 0.0;
+	     // compute normal/axial induction factor at hub height from time-averaged hub-hight wind speed
+	     if (simTime_it > 0){
+             cudaDevice_cellInRotor(&cell_inRotor, &cell_rVector, iturb, GAD_Xcoords_d[iturb], GAD_Ycoords_d[iturb],
+                                    GAD_rotorTheta_d[iturb], GAD_hubHeights_d[GAD_turbineType_d[iturb]], tiltAngle,
+                                    GAD_rotorD_d[GAD_turbineType_d[iturb]], GAD_nacelleD_d[GAD_turbineType_d[iturb]],
+                                    xPos_d[ijk], yPos_d[ijk],
+                                    zPos_d[ijk]-topoPos_d[ij],
+                                    dX_d,dY_d);
+	     cudaDevice_GADtwistChord(&turbinePolyTwist_d[GAD_turbineType_d[iturb]*turbinePolyOrderMax_d], &turbinePolyChord_d[GAD_turbineType_d[iturb]*turbinePolyOrderMax_d],
+                                      GAD_rotorD_d[GAD_turbineType_d[iturb]], cell_rVector, &cell_twistAngle, &cell_chordLength);
+	     cudaDevice_GADbetaOmega(GAD_turbineRefMag_d[iturb], GAD_anFactor_d[iturb],
+                                     &turbinePolyPitch_d[GAD_turbineType_d[iturb]*turbinePolyOrderMax_d], &turbinePolyOmega_d[GAD_turbineType_d[iturb]*turbinePolyOrderMax_d],
+                                     GAD_rotorD_d[GAD_turbineType_d[iturb]], cell_rVector, cell_twistAngle, &cell_betaAngle, &cell_omegaRot);
+
+	     compute_normalInduction(GAD_turbineRefMag_d[iturb], GAD_rotorD_d[GAD_turbineType_d[iturb]], GAD_nacelleD_d[GAD_turbineType_d[iturb]],
+                                     cell_rVector, cell_betaAngle, cell_omegaRot, cell_chordLength,
+				     &rnorm_vect_d[GAD_turbineType_d[iturb]*(turbinePolyClCdrNormSegments_d+1)],
+                                     &alpha_minmax_vect_d[GAD_turbineType_d[iturb]*alphaBounds_d],
+                                     &turbinePolyCl_d[GAD_turbineType_d[iturb]*turbinePolyClCdrNormSegments_d*turbinePolyOrderMax_d],
+                                     &turbinePolyCd_d[GAD_turbineType_d[iturb]*turbinePolyClCdrNormSegments_d*turbinePolyOrderMax_d],
+				     &GAD_anFactor_d[iturb]);
+#ifdef DEBUG_GAD
+	     printf("GAD_turbineRefMag_d[iturb]=%f,GAD_anFactor_d[iturb]=%f \n",GAD_turbineRefMag_d[iturb],GAD_anFactor_d[iturb]);
+#endif
+	     } // if (simTime_it > 0)
+
+           }//endif beginning/end of a sample window
+           // accumulate this timestep instance into the sampling window average
+           update_sampleRefVel(hydroFlds_d[fldStride*U_INDX+ijk], hydroFlds_d[fldStride*V_INDX+ijk], hydroFlds_d[fldStride*RHO_INDX+ijk], &u_sampAvg_d[iturb], &v_sampAvg_d[iturb]);
+
+	   if ((simTime_it%GADsamplingAvgLength_d == 0) && (simTime_it >= GADsamplingAvgLength_d*GADrefSeriesLength_d)){
+	     update_yawError(&GAD_turbineRefDir_d[iturb], &GAD_rotorTheta_d[iturb], &GAD_yawError_d[iturb], &GAD_turbineYawing_d[iturb], dt);
+#ifdef DEBUG_GAD
+	   printf("%d/%d:simTime_it=%d, iturb--%d @ (%d,%d,%d) [after update_yawError]: GAD_turbineYawing_d[iturb]=%d, GAD_yawError_d[iturb]=%f \n",
+		  mpi_rank_world_d,mpi_size_world_d,simTime_it,iturb,i,j,k,GAD_turbineYawing_d[iturb], GAD_yawError_d[iturb]);
+#endif
+           }
+	   if (GAD_turbineYawing_d[iturb] == 1){
+	     update_rotorTheta(&GAD_turbineRefDir_d[iturb], &GAD_rotorTheta_d[iturb], &GAD_yawError_d[iturb], &GAD_turbineYawing_d[iturb], dt);
+#ifdef DEBUG_GAD
+           printf("%d/%d:simTime_it=%d, iturb--%d @ (%d,%d,%d) [after update_rotorTheta]: GAD_turbineYawing_d[iturb]=%d, GAD_rotorTheta_d[iturb]=%f \n",
+		  mpi_rank_world_d,mpi_size_world_d,simTime_it,iturb,i,j,k,GAD_turbineYawing_d[iturb], GAD_rotorTheta_d[iturb]);
+#endif
+	   }
+
+        }
+
+      } // end if(GAD_turbineVolMask_d[ijk] > 0.0){
+   }//end if in the range of non-halo cells
+
+} // end cudaDevice_GADinter()
+
+/*----->>>>> __global__ void cudaDevice_GADfinal();  --------------------------------------------------
+* This function is the global entry kernel for computing GAD forcing from turbines
+*/
+__global__ void cudaDevice_GADfinal(float* xPos_d, float* yPos_d, float* zPos_d, float* topoPos_d,
+                                    float* hydroFlds_d, float* hydroFldsFrhs_d, int simTime_it, float dt,
+                                    int* GAD_turbineType_d, float* GAD_turbineVolMask_d,
+                                    float* GAD_Xcoords_d, float* GAD_Ycoords_d, float* GAD_rotorTheta_d,
+                                    float* GAD_hubHeights_d, float* GAD_rotorD_d, float* GAD_nacelleD_d,
+                                    float* turbinePolyTwist_d, float* turbinePolyChord_d,
+                                    float* turbinePolyPitch_d, float* turbinePolyOmega_d,
+                                    float* rnorm_vect_d, float* alpha_minmax_vect_d,
+                                    float* turbinePolyCl_d, float* turbinePolyCd_d,
+				    float* GAD_turbineRefMag_d, float* GAD_anFactor_d,
+                                    float* GAD_forceX_d, float* GAD_forceY_d, float* GAD_forceZ_d){
+
+   int i,j,k,ijk,ij;
+   int fldStride;
+   int iStride,jStride,kStride;
+   int iturb;
+   float cell_inRotor;
+   float cell_rVector;
+   float cell_twistAngle;
+   float cell_chordLength;
+   float cell_betaAngle;
+   float cell_omegaRot;
+   float cell_forceN;
+   float cell_forceT;
+
+   float tiltAngle=0.0;
+
+   /*Establish necessary indices for spatial locality*/
+   i = (blockIdx.x)*blockDim.x + threadIdx.x;
+   j = (blockIdx.y)*blockDim.y + threadIdx.y;
+   k = (blockIdx.z)*blockDim.z + threadIdx.z;
+   fldStride = (Nx_d+2*Nh_d)*(Ny_d+2*Nh_d)*(Nz_d+2*Nh_d);
+   iStride = (Ny_d+2*Nh_d)*(Nz_d+2*Nh_d);
+   jStride = (Nz_d+2*Nh_d);
+   kStride = 1;
+
+   if((i >= iMin_d)&&(i < iMax_d) &&
+      (j >= jMin_d)&&(j < jMax_d) &&
+      (k >= kMin_d)&&(k < kMax_d) ){
+      ijk = i*iStride + j*jStride + k*kStride;
+      ij = i*(Ny_d+2*Nh_d) + j*(1);
+      if(GAD_turbineVolMask_d[ijk] > 0.0){
+        iturb = __float2int_rn( GAD_turbineVolMask_d[ijk] ) - 1;
+        cudaDevice_cellInRotor(&cell_inRotor, &cell_rVector, iturb, GAD_Xcoords_d[iturb], GAD_Ycoords_d[iturb], 
+                               GAD_rotorTheta_d[iturb], GAD_hubHeights_d[GAD_turbineType_d[iturb]], tiltAngle,
+                               GAD_rotorD_d[GAD_turbineType_d[iturb]], GAD_nacelleD_d[GAD_turbineType_d[iturb]],
+                               xPos_d[ijk], yPos_d[ijk], 
+                               zPos_d[ijk]-topoPos_d[ij],
+                               dX_d,dY_d);
+        if(cell_inRotor > 0.0){ //Compute the momentum Frhs forces from the GAD blade element momentum theory for this cell
+          cudaDevice_GADtwistChord(&turbinePolyTwist_d[GAD_turbineType_d[iturb]*turbinePolyOrderMax_d], &turbinePolyChord_d[GAD_turbineType_d[iturb]*turbinePolyOrderMax_d],
+                                   GAD_rotorD_d[GAD_turbineType_d[iturb]], cell_rVector, &cell_twistAngle, &cell_chordLength);
+          cudaDevice_GADbetaOmega(GAD_turbineRefMag_d[iturb], GAD_anFactor_d[iturb],
+                                  &turbinePolyPitch_d[GAD_turbineType_d[iturb]*turbinePolyOrderMax_d], &turbinePolyOmega_d[GAD_turbineType_d[iturb]*turbinePolyOrderMax_d],
+                                  GAD_rotorD_d[GAD_turbineType_d[iturb]], cell_rVector, cell_twistAngle, &cell_betaAngle, &cell_omegaRot);
+//#define DEBUG_GAD
+#ifdef DEBUG_GAD
+      if(fabsf(cell_rVector-0.5*(GAD_rotorD_d[GAD_turbineType_d[iturb]]-GAD_nacelleD_d[GAD_turbineType_d[iturb]]))<5.0){
+        printf("At i,j,k (%d,%d,%d): cell_rVector = %f, twist=%f, chord=%f, pitch=%f, omega=%f, U_ijk=%f\n",
+                i,j,k,cell_rVector, cell_twistAngle, cell_chordLength,cell_betaAngle, cell_omegaRot, 
+                sqrtf(powf(hydroFlds_d[fldStride*U_INDX+ijk]/hydroFlds_d[fldStride*RHO_INDX+ijk],2.0)+powf(hydroFlds_d[fldStride*V_INDX+ijk]/hydroFlds_d[fldStride*RHO_INDX+ijk],2.0)));        
+      }
+#endif
+          cudaDevice_GADforcesCompute(hydroFlds_d[fldStride*U_INDX+ijk], hydroFlds_d[fldStride*V_INDX+ijk], hydroFlds_d[fldStride*RHO_INDX+ijk],
+                                      GAD_rotorD_d[GAD_turbineType_d[iturb]], GAD_nacelleD_d[GAD_turbineType_d[iturb]],
+                                      cell_rVector, cell_betaAngle, cell_omegaRot, cell_chordLength,
+                                      &rnorm_vect_d[GAD_turbineType_d[iturb]*(turbinePolyClCdrNormSegments_d+1)],
+                                      &alpha_minmax_vect_d[GAD_turbineType_d[iturb]*alphaBounds_d],
+                                      &turbinePolyCl_d[GAD_turbineType_d[iturb]*turbinePolyClCdrNormSegments_d*turbinePolyOrderMax_d],
+                                      &turbinePolyCd_d[GAD_turbineType_d[iturb]*turbinePolyClCdrNormSegments_d*turbinePolyOrderMax_d],
+                                      &cell_forceN, &cell_forceT);
+	  if (simTime_it >= (GADsamplingAvgLength_d*GADrefSeriesLength_d)){ // prevents use of potentially unrealistic initial values...
+          cudaDevice_GADforcesApply(hydroFlds_d[fldStride*RHO_INDX+ijk], GAD_Xcoords_d[iturb], GAD_Ycoords_d[iturb],
+                                    GAD_hubHeights_d[GAD_turbineType_d[iturb]], GAD_rotorTheta_d[iturb], GAD_rotorD_d[GAD_turbineType_d[iturb]],
+                                    xPos_d[ijk], yPos_d[ijk], (zPos_d[ijk]-topoPos_d[ij]),
+                                    cell_forceN, cell_forceT, &hydroFldsFrhs_d[fldStride*U_INDX+ijk], &hydroFldsFrhs_d[fldStride*V_INDX+ijk],
+                                    &hydroFldsFrhs_d[fldStride*W_INDX+ijk],&GAD_forceX_d[ijk],&GAD_forceY_d[ijk],&GAD_forceZ_d[ijk],
+                                    cell_rVector, GAD_nacelleD_d[GAD_turbineType_d[iturb]]);
+	  }
+        }
+      }//if this is a cell in the yaw-swept volume sphere
+   }//end if in the range of non-halo cells
+
+} // end cudaDevice_GADfinal()
+
+/*----->>>>> __device__ void  cudaDevice_cellInRotor();  --------------------------------------------------
+* This functions calculates a radial vector and setes a flag to detrmine if a cell is in a rotor disk area
+*/
+__device__ void cudaDevice_cellInRotor(float* cell_inRotor, float* cell_rVector, 
+                                       int iturb, float turbX, float turbY, 
+                                       float turbTheta, float turbHubHgt, float tiltAngle,
+                                       float rotorD, float nacelleD,   
+                                       float xLoc, float yLoc, float zLoc, float dx, float dy){
+
+    float x_hat[3];
+    float dr[3];
+   
+    float perpDist;
+    float perpdx_rot;
+    float pi = 3.1415926535;
+
+    /*Initialize the cell flag value to 0.0 (False) */
+    *cell_inRotor = 0.0;
+
+    //Unit horizontal vector normal to the rotor-disk plane 
+    x_hat[0] = cosf(tiltAngle*pi/180.0)*cosf(turbTheta*pi/180.0); 
+    x_hat[1] = cosf(tiltAngle*pi/180.0)*sinf(turbTheta*pi/180.0); 
+    x_hat[2] = -sinf(tiltAngle*pi/180.0); 
+    
+    //Vector from nacelle center to current grid point
+    dr[0] = xLoc-turbX;
+    dr[1] = yLoc-turbY;
+    dr[2] = zLoc-turbHubHgt;
+
+    //Perpendicular distance from nacelle-center to current grid point (normal to the rotor-disk plane)
+    perpDist = dr[0]*x_hat[0] + dr[1]*x_hat[1] + dr[2]*x_hat[2];
+    //Proper radial distance of blade segment 
+    *cell_rVector = sqrtf( powf(dr[0]-perpDist*x_hat[0],2.0)
+                          +powf(dr[1]-perpDist*x_hat[1],2.0) 
+                          +powf(dr[2]-perpDist*x_hat[2],2.0) );
+
+    /*Perpendicular "dx" in the rotated "x-y" plane  */
+    perpdx_rot =  fabsf(dx*cosf(0.5*pi+turbTheta*pi/180.0)) + fabsf(dy*sinf(0.5*pi+turbTheta*pi/180.0));
+
+    if(   (perpDist < __int2float_rn(numgridCells_away_d)*perpdx_rot)
+       && *cell_rVector <= (0.5*rotorD)     //Not in notebook but needed?
+       && *cell_rVector > (0.5*nacelleD) ){
+       *cell_inRotor = 1.0;
+    }
+
+} // end cudaDevice_cellInRotor()
+/*----->>>>> __device__ void cudaDevice_GADtwistChord();  --------------------------------------------------
+*/
+__device__ void cudaDevice_GADtwistChord(float* turbinePolyTwist_d, float* turbinePolyChord_d,
+                                         float rotorD, float turbineRadius, float* twist_angle, float* chord_length){
+
+  int nn;
+  float r_norm,blade_length;
+  float pi = acosf(-1.0);
+  float temp_expo;
+
+  blade_length = 0.5*rotorD;
+  r_norm = turbineRadius/blade_length;
+
+  /* twist angle */
+  temp_expo = __int2float_rn(turbinePolyOrderMax_d);
+  *twist_angle = 0.0;
+  for (nn=0; nn<turbinePolyOrderMax_d; nn++){
+     temp_expo = temp_expo - 1.0;
+     *twist_angle = *twist_angle + powf(r_norm,temp_expo)*turbinePolyTwist_d[nn];
+  }
+  *twist_angle = *twist_angle*(pi/180.0); // rad
+
+  /* chord length */
+  temp_expo = __int2float_rn(turbinePolyOrderMax_d);
+  *chord_length = 0.0;
+  for (nn=0; nn<turbinePolyOrderMax_d; nn++){
+     temp_expo = temp_expo - 1.0;
+     *chord_length = *chord_length + powf(r_norm,temp_expo)*turbinePolyChord_d[nn];
+  }
+  //JAS 4-19-2023 Opting not to use chord polynomial fits normalized by blade length
+  //*chord_length = *chord_length*blade_length; // rad S  
+
+} // end cudaDevice_GADtwistChord()
+
+/*----->>>>> __device__ void cudaDevice_GADbetaOmega();  --------------------------------------------------
+*/
+__device__ void cudaDevice_GADbetaOmega(float turbineRefMag, float anFactor, float* turbinePolyPitch_d, float* turbinePolyOmega_d,
+                                        float rotorD, float turbineRadius, float twist_angle, float* beta_angle, float* omega_rot){
+
+  int nn;
+  float pitch_angle;
+  float U_ijk;
+  float pi = acosf(-1.0);
+  float temp_expo;
+/*  Removed use of this limiters
+  float pitch_umin = 11.0; // perhaps to be read in from netCDF turbine characteristics file...
+  float omega_umin = 4.0; //  perhaps to be read in from netCDF turbine characteristics file...
+  float omega_umax = 9.0; //  perhaps to be read in from netCDF turbine characteristics file...
+*/
+  if(GADrefSwitch_d == 1){
+    U_ijk = GADrefU_d;
+  }else{
+    U_ijk = turbineRefMag/(1.0-anFactor); // should this include vertical velcoty too - w???
+  }
+
+  /* pitch angle */
+  temp_expo = __int2float_rn(turbinePolyOrderMax_d);
+  for (nn=0; nn<turbinePolyOrderMax_d; nn++){
+     temp_expo = temp_expo - 1.0;
+     pitch_angle = pitch_angle + powf(U_ijk,temp_expo)*turbinePolyPitch_d[nn];
+  }
+  pitch_angle = pitch_angle*(pi/180.0); // rad
+  *beta_angle = twist_angle + pitch_angle; // total twist angle
+
+  /* rotational speed */
+  temp_expo = __int2float_rn(turbinePolyOrderMax_d);
+  *omega_rot = 0.0;
+  for (nn=0; nn<turbinePolyOrderMax_d; nn++){
+     temp_expo = temp_expo - 1.0;
+     *omega_rot = *omega_rot + powf(U_ijk,temp_expo)*turbinePolyOmega_d[nn];
+  }
+  *omega_rot = *omega_rot*(2.0*pi/60.0); // rad s-1
+
+} // end cudaDevice_GADbetaOmega()
+
+/*----->>>>> __device__ void cudaDevice_GADforcesCompute();  --------------------------------------------------
+*/
+__device__ void cudaDevice_GADforcesCompute(float u, float v, float rho, float rotorD, float nacelleD,
+                                            float turbineRadius, float beta_angle, float omega_rot, float chord_length,
+                                            float *rnorm_vect, float *alpha_minmax_vect, float *turbinePolyCl, float *turbinePolyCd,
+                                            float *GADforce_n, float *GADforce_t){
+
+  float U_ijk,U_rel,phi_rel,alpha_angle;
+  float a_tol;
+  float a_tol_min = 1.0e-5; // minimum tolerance for converged induction factors
+  float at_it,at_0;
+  float an_it,an_0;
+  float pi = acosf(-1.0);
+  float C_l,C_d;
+  float r_norm,blade_length;
+  float B_num = 3.0; // number of blades
+  float sigma,f_tip,f_hub,F_tot;
+  float r_hub;
+  float c_n,c_t;
+  float L_f,D_f;
+  int iter_cnt;
+  int max_iter = 50;
+  float U_ijk_tmp; 
+  float switchFactor;
+
+  blade_length = 0.5*rotorD;
+  r_norm = turbineRadius/blade_length;
+  r_hub = 0.5*nacelleD;
+
+  if(GADForcingSwitch_d == 1){
+    U_ijk = GADrefU_d; 
+    switchFactor = 1.0;
+  }else{  
+    U_ijk = sqrtf(powf(u/rho,2.0)+powf(v/rho,2.0));
+    switchFactor = 0.0;
+  }//end if-else  GADForcingSwitch_d == 1
+
+  if (GADaxialInduction_d == 1){
+    an_it = 0.0;
+  }else{
+    an_it = GADaxialIndVal_d;
+  }
+  at_it = 0.0;
+  an_0 = an_it;
+  at_0 = at_it;
+  // iterative solve for induction factor(s)
+  a_tol = a_tol_min + 1.0; //Initialize a_tol to get into the while loop
+  iter_cnt = 0;
+  while((a_tol > a_tol_min) && (iter_cnt < max_iter)){
+
+    U_ijk_tmp = (1.0-switchFactor)*U_ijk + switchFactor*U_ijk*(1.0-an_it); 
+    U_rel = sqrtf(powf(U_ijk_tmp,2.0) + powf(omega_rot*turbineRadius*(1.0+at_it),2.0)); // wind/blade relative velocity
+    phi_rel = atanf(U_ijk_tmp/(omega_rot*turbineRadius*(1.0+at_it))); // angle between relative velocity and plane of rotation    
+    alpha_angle = (phi_rel - beta_angle)*(180.0/pi); // angle of attack
+
+    alpha_angle = fmaxf(fminf(alpha_angle,alpha_minmax_vect[1]),alpha_minmax_vect[0]);
+    compute_ClCd_incoeff(rnorm_vect,turbinePolyCl,turbinePolyCd,alpha_angle,r_norm,&C_l,&C_d); // lift and drag coefficients
+    sigma = B_num*chord_length/(2.0*pi*turbineRadius); // solidity factor
+    f_tip = B_num*(0.5*rotorD-turbineRadius)/(2.0*turbineRadius*sin(phi_rel)); // blade tip losses
+    f_hub = B_num*(turbineRadius-r_hub)/(2.0*turbineRadius*sin(phi_rel)); // blade hub losses
+    F_tot = (2.0/pi)*acosf(expf(-f_tip))*(2.0/pi)*acosf(expf(-f_hub)); // blade total losses
+
+    if (GADaxialInduction_d == 1){
+      c_n = C_l*cosf(phi_rel)+C_d*sinf(phi_rel);
+      an_it = 1.0/(1.0 + 4.0*F_tot*sinf(phi_rel)*sinf(phi_rel)/(sigma*c_n)); // normal induction factor
+    }
+
+    c_t = C_l*sinf(phi_rel)-C_d*cosf(phi_rel);
+    at_it = 1.0/(4.0*F_tot*sinf(phi_rel)*cosf(phi_rel)/(sigma*c_t) - 1.0); // tangential induction factor
+    __syncthreads();
+
+    a_tol = sqrtf(powf(an_it-an_0,2.0) + powf(at_it-at_0,2.0));
+    an_0 = an_it;
+    at_0 = at_it;
+
+    __syncthreads();
+    iter_cnt++;
+  } //while(a_tol > a_tol_min) --end of iterative process
+
+  // calculate lift and drag forces (N m-1)
+  L_f = 0.5*rho*U_rel*U_rel*chord_length*C_l;
+  D_f = 0.5*rho*U_rel*U_rel*chord_length*C_d;
+  // calculate normal and tangential forces (N m-2)
+  *GADforce_n = fmaxf((B_num/(2.0*pi*turbineRadius))*(L_f*cosf(phi_rel)+D_f*sinf(phi_rel)),0.0);
+  *GADforce_t = fmaxf((B_num/(2.0*pi*turbineRadius))*(L_f*sinf(phi_rel)-D_f*cosf(phi_rel)),0.0);
+#ifdef DEBUG_GAD
+  if(fabsf(turbineRadius-0.5*(rotorD-nacelleD))<5.0){
+        printf("rVector = %f, phi_rel=%f, alpha_angle=%f, f_tip=%f, f_hub=%f, F_tot=%f, sigma=%f, c_n=%f, an_it=%f, at_it=%f a_tol=%14.12f\n\t C_l=%f, C_d=%f, U_rel=%f, L=%f, D=%f, F_n=%f, F_t=%f\n",
+                turbineRadius, phi_rel, alpha_angle, f_tip, f_hub, F_tot, sigma, c_n, an_it, at_it, a_tol, C_l, C_d, U_rel, L_f, D_f, *GADforce_n, *GADforce_t);
+  }
+#endif
+} // end cudaDevice_GADforcesCompute()
+
+/*----->>>>> __device__ void cudaDevice_GADforcesApply();  --------------------------------------------------
+*/
+__device__ void cudaDevice_GADforcesApply(float rho, float turb_Xcoord, float turb_Ycoord, float hubHeight, float rotorTheta, float rotorD, 
+                                          float xLoc, float yLoc, float zLoc,
+                                          float GADforce_n, float GADforce_t, float* GADforce_x, float* GADforce_y, float* GADforce_z,
+                                          float* GAD_fX, float* GAD_fY, float* GAD_fZ, float turbineRadius, float nacelleD){
+
+  float hor_dist,sign_quadrant;
+  float dist_x,dist_y,y_prime; // x_prime
+  float epsilon;
+  float F_x,F_y,F_z;
+  float F_factor;
+  float pi = acosf(-1.0);
+  float rotorTheta_tmp;
+
+  rotorTheta_tmp = rotorTheta - 180.0;
+  dist_x = xLoc - turb_Xcoord;
+  dist_y = yLoc - turb_Ycoord;
+  hor_dist = sqrtf(powf(dist_x,2.0) + powf(dist_y,2.0));
+  sign_quadrant = 0.0;
+//  x_prime =  dist_x*cosf(rotorTheta_tmp*pi/180.0) + dist_y*sinf(rotorTheta_tmp*pi/180.0);
+  y_prime = -dist_x*sinf(rotorTheta_tmp*pi/180.0) + dist_y*cosf(rotorTheta_tmp*pi/180.0);
+  sign_quadrant = copysign(1.0,y_prime);
+  epsilon = atan2f(zLoc-hubHeight,sign_quadrant*hor_dist);
+
+  // project turbine forces into cartesian grid coordinates
+  F_x = -GADforce_n*cosf(rotorTheta_tmp*pi/180.0) - GADforce_t*sinf(epsilon)*sinf(rotorTheta_tmp*pi/180.0);
+  F_y = -GADforce_n*sinf(rotorTheta_tmp*pi/180.0) + GADforce_t*sinf(epsilon)*cosf(rotorTheta_tmp*pi/180.0);
+  F_z = -GADforce_t*cosf(epsilon);
+  distribute_GADforces(xLoc, yLoc, turb_Xcoord, turb_Ycoord, rotorTheta_tmp, rotorD, &F_factor);
+
+#ifdef DEBUG_GAD
+  if(fabsf(turbineRadius-0.5*(rotorD-nacelleD))<5.0){
+        printf("rVector = %f, (xLoc,yLoc)=(%f,%f), (xT,yT)=(%f,%f), rotorTheta_tmp=%f, \n\t dist_x=%f, dist_y=%f,zLoc=%f, epsilon=%f, F_factor=%f\n\t F_x=%f, F_y=%f, F_z=%f\n",
+                turbineRadius, xLoc,yLoc,turb_Xcoord,turb_Ycoord, rotorTheta_tmp, dist_x,dist_y,zLoc,epsilon, F_factor, F_x, F_y, F_z);
+  }
+#endif
+  // forces exerted by the turbine on the flow have opposite sign
+  if (GADofflineForces_d==0){
+    *GADforce_x = *GADforce_x - rho*F_factor*F_x;
+    *GADforce_y = *GADforce_y - rho*F_factor*F_y;
+    *GADforce_z = *GADforce_z - rho*F_factor*F_z;
+  }
+  // save forces for output
+  if (GADoutputForces_d==1){
+    *GAD_fX = -rho*F_factor*F_x;
+    *GAD_fY = -rho*F_factor*F_y;
+    *GAD_fZ = -rho*F_factor*F_z;
+  }
+
+} // end cudaDevice_GADforcesApply()
+
+/*----->>>>> __device__ void compute_ClCd_incoeff();  --------------------------------------------------
+*/
+__device__ void compute_ClCd_incoeff(float* rnorm_vect, float* turbinePolyCl, float* turbinePolyCd, float alpha, float r_norm, float* C_l, float* C_d){
+
+  int rn,rn_seg;
+  float rn_min,rn_max;
+  int ind_ps,ind_pe,nn;
+  float temp_expo;
+
+  *C_l = 0.0;
+  *C_d = 0.0;
+
+  // determine the segment of normalized radius where the point belongs
+  rn_seg = -1;
+  for (rn=0; rn<turbinePolyClCdrNormSegments_d; rn++){
+     rn_min = rnorm_vect[rn];
+     rn_max = rnorm_vect[rn+1];
+     if ((r_norm>rn_min)&&(rn_min<=rn_max)){
+       rn_seg = rn;
+     }
+  }
+
+  if ((rn_seg>=0)&&(rn_seg<turbinePolyClCdrNormSegments_d)){
+    temp_expo = __int2float_rn(turbinePolyOrderMax_d);
+    ind_ps = rn_seg*turbinePolyOrderMax_d;
+    ind_pe = ind_ps + turbinePolyOrderMax_d;
+    for (nn=ind_ps; nn<ind_pe; nn++){
+       temp_expo = temp_expo - 1.0;
+       *C_l = *C_l + turbinePolyCl[nn]*powf(alpha,temp_expo);
+       *C_d = *C_d + turbinePolyCd[nn]*powf(alpha,temp_expo);
+    }
+  }
+
+} // end compute_ClCd_incoeff()
+
+
+/*----->>>>> __device__ void distribute_GADforces();  --------------------------------------------------
+*/
+__device__ void distribute_GADforces(float xLoc, float yLoc, float x_turb, float y_turb, float theta_turb, float rotorD, float* F_dist_fact){
+
+  float x1,x2,y1,y2;
+  float dist_grid_rotor,proj_dx;
+  float a_coeff,b_coeff;
+  float pi = acosf(-1.0);
+
+  x1 = x_turb - 0.5*rotorD*cosf(0.5*pi + theta_turb*pi/180.0);
+  x2 = x_turb + 0.5*rotorD*cosf(0.5*pi + theta_turb*pi/180.0);
+  y1 = y_turb - 0.5*rotorD*sinf(0.5*pi + theta_turb*pi/180.0);
+  y2 = y_turb + 0.5*rotorD*sinf(0.5*pi + theta_turb*pi/180.0);
+
+  dist_grid_rotor = fabsf((x2-x1)*(y1-yLoc)-(x1-xLoc)*(y2-y1))/sqrtf(powf(x2-x1,2.0)+powf(y2-y1,2.0));
+  proj_dx = fabsf(dX_d*cosf(0.5*pi+theta_turb*pi/180.0)) + fabsf(dY_d*sinf(0.5*pi+theta_turb*pi/180.0));
+  a_coeff = sqrtf(2.0*pi)*proj_dx;
+  b_coeff = 2.0*proj_dx*proj_dx;
+
+  *F_dist_fact = (1.0/a_coeff)*expf(-dist_grid_rotor*dist_grid_rotor/b_coeff);
+
+} // end distribute_GADforces()
+
+/*----->>>>> __device__ void update_sampleRefVel();  --------------------------------------------------
+*/
+__device__ void update_sampleRefVel(float u, float v, float rho, float* u_sampAvg, float* v_sampAvg){
+
+  *u_sampAvg = *u_sampAvg+GADsamplingAvgWeight_d*u/rho;
+  *v_sampAvg = *v_sampAvg+GADsamplingAvgWeight_d*v/rho;
+
+} // update_sampleRefVel()
+
+/*----->>>>> __device__ void update_turbineRefMagDir();  --------------------------------------------------
+*/
+__device__ void update_turbineRefMagDir(int sampleIndex, float u_sampAvg, float v_sampAvg,
+		                        float* uSeries, float* vSeries, float* turbineRefMag, float* turbineRefDir){
+  
+  int idx;
+  float u_seriesAvg;
+  float v_seriesAvg;
+  float pi = acosf(-1.0);
+
+  uSeries[sampleIndex] = u_sampAvg;
+  vSeries[sampleIndex] = v_sampAvg;
+  u_seriesAvg = 0.0;
+  v_seriesAvg = 0.0;
+
+  for(idx=0; idx < GADrefSeriesLength_d; idx++){
+    u_seriesAvg = u_seriesAvg + uSeries[idx];
+    v_seriesAvg = v_seriesAvg + vSeries[idx];
+  }
+  u_seriesAvg = u_seriesAvg*GADrefSeriesWeight_d;  
+  v_seriesAvg = v_seriesAvg*GADrefSeriesWeight_d;  
+
+  *turbineRefMag = sqrtf(powf(u_seriesAvg,2.0)+powf(v_seriesAvg,2.0));
+  *turbineRefDir = 180.0 + atan2f(u_seriesAvg,v_seriesAvg)*180.0/pi;
+
+} // update_turbineRefMagDir()
+
+/*----->>>>> __device__ void update_yawError();  --------------------------------------------------
+*/
+__device__ void update_yawError(float* turbineRefDir, float* rotorTheta, float* yawError, int* turbineYawing, float dt){
+
+  float diff_angle;
+  float t_refresh;
+  float yawErr_max = 10000.0; // (deg)^2 s -> threshold to start yawing to align with incoming wind
+
+  if (*turbineYawing == 0){ // turbine currently not yawing
+    t_refresh = GADsamplingAvgLength_d*dt;
+    Angle_TurbWind(*turbineRefDir, *rotorTheta, &diff_angle);
+    *yawError = *yawError + copysign(1.0,diff_angle)*powf(diff_angle,2.0)*t_refresh;
+    if (fabsf(*yawError) >= yawErr_max){
+      *turbineYawing = 1;
+    }
+  }
+
+} // update_yawError()
+
+/*----->>>>> __device__ void update_rotorTheta();  --------------------------------------------------
+*/
+__device__ void update_rotorTheta(float* turbineRefDir, float* rotorTheta, float* yawError, int* turbineYawing, float dt){
+
+  float diff_angle;
+  float yawing_angle;
+  float yawing_rate = 2.0; // deg s-1 -> turbine's yawing rate
+  float ref360 = 360.0;
+
+  yawing_angle = copysign(1.0,*yawError)*yawing_rate*dt;
+  *rotorTheta = *rotorTheta + yawing_angle;
+  *rotorTheta = fmodf(*rotorTheta,ref360);
+  Angle_TurbWind(*turbineRefDir, *rotorTheta, &diff_angle);
+
+  if (fabsf(diff_angle) <  fabsf(yawing_angle)){
+    *turbineYawing = 0;
+    *yawError = 0.0;
+  }
+
+#ifdef DEBUG_GAD
+  printf("[in update_rotorTheta]: GAD_turbineYawing_d[iturb]=%d, GAD_rotorTheta_d[iturb]=%f, diff_angle=%f, yawing_angle=%f \n",
+          *turbineYawing, *rotorTheta,diff_angle,yawing_angle);
+#endif
+
+} // update_rotorTheta()
+
+/*----->>>>> __device__ void Angle_TurbWind();  --------------------------------------------------
+*/
+__device__ void Angle_TurbWind(float turbineRefDir, float rotorTheta, float* diff_angle){
+
+  float sign_diff;
+  *diff_angle = 0;
+
+  *diff_angle = fmodf(270.0 - turbineRefDir,360.0) - rotorTheta;
+  if (fabsf(*diff_angle) > 180.0){
+    sign_diff = -copysign(1.0,*diff_angle);
+    *diff_angle = sign_diff*(360.0 - fabsf(*diff_angle));
+  }
+
+} // Angle_TurbWind()
+
+/*----->>>>> __device__ void compute_normalInduction();  --------------------------------------------------
+*/
+__device__ void compute_normalInduction(float turbineRefMag, float rotorD, float nacelleD,
+                                        float turbineRadius, float beta_angle, float omega_rot, float chord_length,
+                                        float *rnorm_vect, float *alpha_minmax_vect, float *turbinePolyCl, float *turbinePolyCd,
+					float *turbineRefAn){
+
+  float U_ijk,phi_rel,alpha_angle;
+  float a_tol;
+  float a_tol_min = 1.0e-5; // minimum tolerance for converged induction factors
+  float at_it,at_0;
+  float an_it,an_0;
+  float pi = acosf(-1.0);
+  float C_l,C_d;
+  float r_norm,blade_length;
+  float B_num = 3.0; // number of blades
+  float sigma,f_tip,f_hub,F_tot;
+  float r_hub;
+  float c_n,c_t;
+  int iter_cnt;
+  int max_iter = 50;
+  float U_ijk_tmp;
+  float switchFactor;
+
+  blade_length = 0.5*rotorD;
+  r_norm = turbineRadius/blade_length;
+  r_hub = 0.5*nacelleD;
+
+  if(GADForcingSwitch_d == 1){
+    U_ijk = GADrefU_d;
+    switchFactor = 1.0;
+  }else{
+    U_ijk = turbineRefMag; // hub-height local velocity (time averaged)
+    switchFactor = 0.0;
+  }
+
+  an_it = 0.0;
+  at_it = 0.0;
+  an_0 = an_it;
+  at_0 = at_it;
+  // iterative solve for induction factor(s)
+  a_tol = a_tol_min + 1.0; //Initialize a_tol to get into the while loop
+  iter_cnt = 0;
+  while((a_tol > a_tol_min) && (iter_cnt < max_iter)){
+
+    U_ijk_tmp = (1.0-switchFactor)*U_ijk + switchFactor*U_ijk*(1.0-an_it);
+    phi_rel = atanf(U_ijk_tmp/(omega_rot*turbineRadius*(1.0+at_it))); // angle between relative velocity and plane of rotation
+    alpha_angle = (phi_rel - beta_angle)*(180.0/pi); // angle of attack
+
+    alpha_angle = fmaxf(fminf(alpha_angle,alpha_minmax_vect[1]),alpha_minmax_vect[0]);
+    compute_ClCd_incoeff(rnorm_vect,turbinePolyCl,turbinePolyCd,alpha_angle,r_norm,&C_l,&C_d); // lift and drag coefficients
+    sigma = B_num*chord_length/(2.0*pi*turbineRadius); // solidity factor
+    f_tip = B_num*(0.5*rotorD-turbineRadius)/(2.0*turbineRadius*sin(phi_rel)); // blade tip losses
+    f_hub = B_num*(turbineRadius-r_hub)/(2.0*turbineRadius*sin(phi_rel)); // blade hub losses
+    F_tot = (2.0/pi)*acosf(expf(-f_tip))*(2.0/pi)*acosf(expf(-f_hub)); // blade total losses
+
+    c_n = C_l*cosf(phi_rel)+C_d*sinf(phi_rel);
+    an_it = 1.0/(1.0 + 4.0*F_tot*sinf(phi_rel)*sinf(phi_rel)/(sigma*c_n)); // normal induction factor
+
+    c_t = C_l*sinf(phi_rel)-C_d*cosf(phi_rel);
+    at_it = 1.0/(4.0*F_tot*sinf(phi_rel)*cosf(phi_rel)/(sigma*c_t) - 1.0); // tangential induction factor
+    __syncthreads();
+
+    a_tol = sqrtf(powf(an_it-an_0,2.0) + powf(at_it-at_0,2.0));
+    an_0 = an_it;
+    at_0 = at_it;
+
+    __syncthreads();
+    iter_cnt++;
+  } //while(a_tol > a_tol_min) --end of iterative process
+
+  *turbineRefAn = fmaxf(fminf(an_it,0.5),0.0);
+
+} // end compute_normalInduction()
diff --git a/SRC/EXTENSIONS/GAD/CUDA/cuda_GADDevice_cu.h b/SRC/EXTENSIONS/GAD/CUDA/cuda_GADDevice_cu.h
new file mode 100644
index 0000000..cf45f61
--- /dev/null
+++ b/SRC/EXTENSIONS/GAD/CUDA/cuda_GADDevice_cu.h
@@ -0,0 +1,189 @@
+/* FastEddy®: SRC/EXTENSIONS/GAD/CUDA/cuda_GADDevice_cu.h
+* ©2016 University Corporation for Atmospheric Research
+* 
+* This file is licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+* 
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+#ifndef _GAD_CUDADEV_CU_H
+#define _GAD_CUDADEV_CU_H
+
+/*GAD return codes */
+#define CUDA_GAD_SUCCESS    0
+
+/*##############------------------- GAD submodule variable declarations ---------------------#################*/
+extern __constant__ int GADSelector_d;     /* Generalized Actuator Disk Selector: 0=off, 1=on */
+extern __constant__ int GADoutputForces_d;    /* Flag to include GAD forces in the output: 0=off, 1=on */
+extern __constant__ int GADofflineForces_d;   /* Flag to compute GAD forces in an offline mode: 0=off, 1=on */
+extern __constant__ int GADaxialInduction_d;   /* Flag to compute axial induction factor: 0==off (uses prescribed GADaxialIndVal), 1==on */
+extern __constant__ float GADaxialIndVal_d;    /* Prescribed constant axial induction factor when GADaxialInduction==0 */
+extern __constant__ int GADrefSwitch_d;   /* Switch to use reference windspeed: 0=off, 1=on */
+extern __constant__ float GADrefU_d;    /* Prescribed constant reference hub-height windspeed*/
+extern __constant__ int GADForcingSwitch_d;    /* Switch to use the GADrefU-based or local windspeed in computing GAD forces: 0=local, 1=ref */
+extern __constant__ int GADNumTurbines_d;     /* Number of GAD Turbines */
+extern __constant__ int GADNumTurbineTypes_d;  /* Number of GAD Turbine Types */
+extern __constant__ int turbinePolyOrderMax_d; /* Maximum Polynomial order across all turbine types */
+extern __constant__ int turbinePolyClCdrNormSegments_d; /* Number of segments in the normalized radius for the lift and drag coefficient polynomial */
+extern __constant__ int alphaBounds_d;         /* Number of elements in the min/max angle of attack array for the lift/drag curves */
+
+extern __constant__ int GADsamplingAvgLength_d;   /*length of sampling average windows (averaging over fastest timescales)*/
+extern __constant__ float GADsamplingAvgWeight_d;   /*weight of instances in taking sampling average*/
+extern __constant__ int GADrefSeriesLength_d;   /*number of sample average windows to incorporate into full Reference average*/
+extern __constant__ float GADrefSeriesWeight_d; /*precalculated averaging weight for Reference average*/
+
+extern __constant__ int numgridCells_away_d; /*Halo-region of cells considered in rotor disk distance-wise smoothing function*/
+
+extern int* GAD_turbineType_d;     /* Integer class-label for turbine type*/
+extern int* GAD_turbineRank_d;     /* Integer mpi-rank of nacelle center cell for each turbine reference velMag and velDir grid cell*/
+extern int* GAD_turbineRefi_d;     /* Integer i-index of nacelle center cell for each turbine reference velMag and velDir grid cell*/
+extern int* GAD_turbineRefj_d;     /* Integer j-index of nacelle center cell for each turbine reference velMag and velDir grid cell*/
+extern int* GAD_turbineRefk_d;     /* Integer k-index of nacelle center cell for each turbine reference velMag and velDir grid cell*/
+extern int* GAD_turbineYawing_d;   /* Integer indicating in a turbine is currently yawing ==1*/
+extern float* GAD_Xcoords_d;       /* turbine x-location [m] from SW domain corner */
+extern float* GAD_Ycoords_d;       /* turbine y-location [m] from SW domain corner */
+extern float* GAD_turbineRefMag_d; /* Reference "ambient" velocity magnitude for yaw control and beta/omega [m/s]*/
+extern float* GAD_turbineRefDir_d; /* *Reference "ambient" velocity direction (horizontal, met. standard orientation) for yaw control and beta/omega [degrees]*/
+extern float* GAD_turbineUseries_d;/* uSeries of sample averages spanning the rolling-average reference period */
+extern float* GAD_turbineVseries_d;/* vSeries of sample averages spanning the rolling-average reference period */
+extern float* u_sampAvg_d;         /* u sample averages for each turbine */
+extern float* v_sampAvg_d;         /* v sample averages for each turbine */
+extern float* GAD_yawError_d;      /* yaw error between the incoming wind and the turbine orientation */
+extern float* GAD_anFactor_d;     /* turbine axial induction factor at hub heigth*/
+extern float* GAD_rotorTheta_d;    /* turbine yaw angle [deg. North] */
+extern float* GAD_hubHeights_d;    /* turbine hub height [m AGL] */
+extern float* GAD_rotorD_d;        /* turbine rotor diameter [m] */
+extern float* GAD_nacelleD_d;      /* nacelle diameter [m] */
+extern float* turbinePolyTwist_d;  /* turbine-type-specific twist polynomial coefficients*/
+extern float* turbinePolyChord_d;  /* turbine-type-specific chord polynomial coefficients*/
+extern float* turbinePolyPitch_d;  /* turbine-type-specific pitch polynomial coefficients*/
+extern float* turbinePolyOmega_d;  /* turbine-type-specific omega polynomial coefficients*/
+extern float* rnorm_vect_d;        /* turbine-type-specific normalized radious segment limits*/
+extern float* alpha_minmax_vect_d; /* turbine-type-specific maximum and minimum angle of attack for the lift/drag curves*/
+extern float* turbinePolyCl_d;     /* turbine-type-specific lift coefficient polynomial coefficients*/
+extern float* turbinePolyCd_d;     /* turbine-type-specific drag coefficient polynomial coefficients*/
+
+extern float* GAD_turbineVolMask_d; /* turbine Volume mask (0 if turbine free cell in domain, else turbine ID of cell in turbine yaw-swept volume*/
+extern float* GAD_forceX_d;         /* turbine forces in the x-direction */
+extern float* GAD_forceY_d;         /* turbine forces in the y-direction */
+extern float* GAD_forceZ_d;         /* turbine forces in the z-direction */
+
+/*##############-------------- GAD_CUDADEV submodule function declarations ------------------############*/
+
+/*----->>>>> int cuda_GADDeviceSetup();       ---------------------------------------------------------
+* Used to cudaMalloc and cudaMemcpy parameters and coordinate arrays, and for the GAD_CUDA submodule.
+*/
+extern "C" int cuda_GADDeviceSetup();
+
+/*----->>>>> extern "C" int cuda_GADDeviceCleanup();  -----------------------------------------------------------
+* Used to free all malloced memory by the GAD submodule.
+*/
+extern "C" int cuda_GADDeviceCleanup();
+
+/*----->>>>> __global__ void  cudaDevice_GADinter();  --------------------------------------------------
+* This function is the global entry kernel for computing reference values for GAD yawing and other turbine characteristics
+*/
+__global__ void cudaDevice_GADinter(float* xPos_d, float* yPos_d, float* zPos_d, float* topoPos_d,
+		                    int simTime_it, int timeStage, int numRKstages, float dt,
+		                    float* hydroFlds_d, int* GAD_turbineType_d, float* GAD_turbineVolMask_d,
+                                    float* GAD_Xcoords_d, float* GAD_Ycoords_d, float* GAD_rotorTheta_d,
+                                    float* GAD_hubHeights_d, float* GAD_rotorD_d, float* GAD_nacelleD_d,
+                                    float* turbinePolyTwist_d, float* turbinePolyChord_d,
+                                    float* turbinePolyPitch_d, float* turbinePolyOmega_d,
+                                    float* rnorm_vect_d, float* alpha_minmax_vect_d,
+                                    float* turbinePolyCl_d, float* turbinePolyCd_d,
+                                    int* GAD_turbineRank_d, int* GAD_turbineRefi_d, int* GAD_turbineRefj_d, int* GAD_turbineRefk_d,
+                                    float* u_sampAvg_d, float* v_sampAvg_d,
+                                    float* GAD_turbineUseries_d, float* GAD_turbineVseries_d,
+                                    float* GAD_turbineRefMag_d, float* GAD_turbineRefDir_d,
+				    int* GAD_turbineYawing_d, float* GAD_yawError_d, float* GAD_anFactor_d);
+
+/*----->>>>> __global__ void  cudaDevice_GADfinal();  --------------------------------------------------
+* This function is the global entry kernel for computing GAD forcing from turbines
+*/
+__global__ void cudaDevice_GADfinal(float* xPos_d, float* yPos_d, float* zPos_d, float* topoPos_d,
+                                    float* hydroFlds_d, float* hydroFldsFrhs_d, int simTime_it, float dt,
+                                    int* GAD_turbineType_d, float* GAD_turbineVolMask_d,
+                                    float* GAD_Xcoords_d, float* GAD_Ycoords_d, float* GAD_rotorTheta_d,
+                                    float* GAD_hubHeights_d, float* GAD_rotorD_d, float* GAD_nacelleD_d,
+                                    float* turbinePolyTwist_d, float* turbinePolyChord_d,
+                                    float* turbinePolyPitch_d, float* turbinePolyOmega_d,
+                                    float* rnorm_vect_d, float* alpha_minmax_vect_d,
+                                    float* turbinePolyCl_d, float* turbinePolyCd_d,
+				    float* GAD_turbineRefMag_d, float* GAD_anFactor_d,
+                                    float* GAD_forceX_d, float* GAD_forceY_d, float* GAD_forceZ_d);
+
+/*----->>>>> __device__ void  cudaDevice_cellInRotor();  --------------------------------------------------
+ * This functions calculates a radial vector and setes a flag to detrmine if a cell is in a rotor disk area
+ */
+__device__ void cudaDevice_cellInRotor(float* cell_inRotor, float* cell_rVector,
+                                       int iturb, float turbX, float turbY,
+                                       float turbTheta, float turbHubHgt, float tiltAngle,
+                                       float rotorD, float nacelleD,
+                                       float xLoc, float yLoc, float zLoc, float dx, float dy);
+
+/*----->>>>> __device__ void cudaDevice_GADtwistChord();  --------------------------------------------------
+*/
+__device__ void cudaDevice_GADtwistChord(float* turbinePolyTwist_d, float* turbinePolyChord_d,
+                                         float rotorD, float turbineRadius, float* twist_angle, float* chord_length);
+
+/*----->>>>> __device__ void cudaDevice_GADbetaOmega();  --------------------------------------------------
+*/
+__device__ void cudaDevice_GADbetaOmega(float turbineRefMag, float anFactor, float* turbinePolyPitch_d, float* turbinePolyOmega_d,
+                                        float rotorD, float turbineRadius, float twist_angle, float* beta_angle, float* omega_rot);
+
+/*----->>>>> __device__ void cudaDevice_GADforcesCompute();  --------------------------------------------------
+*/
+__device__ void cudaDevice_GADforcesCompute(float u, float v, float rho, float rotorD, float nacelleD,
+                                            float turbineRadius, float beta_angle, float omega_rot, float chord_length,
+                                            float *rnorm_vect, float *alpha_minmax_vect, float *turbinePolyCl, float *turbinePolyCd,
+                                            float *GADforce_n, float *GADforce_t);
+
+/*----->>>>> __device__ void cudaDevice_GADforcesApply();  --------------------------------------------------
+*/
+__device__ void cudaDevice_GADforcesApply(float rho, float turb_Xcoord, float turb_Ycoord, float hubHeight, float rotorTheta, float rotorD, 
+                                          float xLoc, float yLoc, float zLoc,
+                                          float GADforce_n, float GADforce_t, float* GADforce_x, float* GADforce_y, float* GADforce_z,
+                                          float* GAD_fX, float* GAD_fY, float* GAD_fZ, float turbineRadius, float nacelleD);
+
+/*----->>>>> __device__ void compute_ClCd_incoeff();  --------------------------------------------------
+*/
+__device__ void compute_ClCd_incoeff(float* rnorm_vect, float* turbinePolyCl, float* turbinePolyCd, float alpha, float r_norm, float* C_l, float* C_d);
+
+/*----->>>>> __device__ void distribute_GADforces();  --------------------------------------------------
+*/
+__device__ void distribute_GADforces(float xLoc, float yLoc, float x_turb, float y_turb, float theta_turb, float rotorD, float* F_dist_fact);
+
+/*----->>>>> __device__ void update_sampleRefVel();  --------------------------------------------------
+*/
+__device__ void update_sampleRefVel(float u, float v, float rho, float* u_sampAvg, float* v_sampAvg);
+
+/*----->>>>> __device__ void update_turbineRefMagDir();  --------------------------------------------------
+*/
+__device__ void update_turbineRefMagDir(int sampleIndex, float u_sampAvg, float v_sampAvg,
+                                        float* uSeries, float* vSeries, float* turbineRefMag, float* turbineRefDir);
+
+/*----->>>>> __device__ void update_yawError();  --------------------------------------------------
+*/
+__device__ void update_yawError(float* turbineRefDir, float* rotorTheta, float* yawError, int* turbineYawing, float dt);
+
+/*----->>>>> __device__ void update_rotorTheta();  --------------------------------------------------
+*/
+__device__ void update_rotorTheta(float* turbineRefDir, float* rotorTheta, float* yawError, int* turbineYawing, float dt);
+/*----->>>>> __device__ void Angle_TurbWind();  --------------------------------------------------
+*/
+__device__ void Angle_TurbWind(float turbineRefDir, float rotorTheta, float* diff_angle);
+/*----->>>>> __device__ void compute_normalInduction();  --------------------------------------------------
+*/
+__device__ void compute_normalInduction(float turbineRefMag, float rotorD, float nacelleD,
+                                        float turbineRadius, float beta_angle, float omega_rot, float chord_length,
+                                        float *rnorm_vect, float *alpha_minmax_vect, float *turbinePolyCl, float *turbinePolyCd,
+					float *turbineRefAn);
+#endif // _GAD_CUDADEV_CU_H
diff --git a/SRC/EXTENSIONS/GAD/GAD.c b/SRC/EXTENSIONS/GAD/GAD.c
new file mode 100644
index 0000000..5909d0c
--- /dev/null
+++ b/SRC/EXTENSIONS/GAD/GAD.c
@@ -0,0 +1,985 @@
+/* FastEddy®: SRC/EXTENSIONS/GAD/GAD.c 
+* ©2016 University Corporation for Atmospheric Research
+* 
+* This file is licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+* 
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+/*##################------------------- GAD sub-module variable definitions ---------------------#################*/
+
+/*---GAD parameters */
+int GADSelector;         /* Generalized Actuator Disk Selector: 0=off, 1=on */
+char *turbineSpecsFile;  /* The path+filename to a turbine specifications file*/
+int GADoutputForces;     /* Flag to include GAD forces in the output: 0=off, 1=on */
+int GADofflineForces;    /* Flag to compute GAD forces in an offline mode: 0=off, 1=on */
+int GADaxialInduction;   /* Flag to compute axial induction factor: 0==off (uses prescribed GADaxialIndVal), 1==on */
+float GADaxialIndVal;    /* Prescribed constant axial induction factor when GADaxialInduction==0 */
+int GADrefSwitch;        /* Switch to use reference windspeed: 0=off, 1=on */
+float GADrefU;           /* Prescribed constant reference hub-height windspeed*/
+float GADrefSampleWindow;/* Sample duration (in seconds) over which to average per-timestep values (filtering out highest frequencies) */
+int GADsamplingAvgLength;/* number of timestep in the prescribed sample window */
+float GADsamplingAvgWeight;/* sample window averaging weight*/
+int GADrefSeriesLength;  /* Number of sampling windows over which to average again for reference velocity magnitude and direction */
+float GADrefSeriesWeight;  /* ref Series averaging weight */
+int GADForcingSwitch;    /* Switch to use the GADrefU-based or local windspeed in computing GAD forces: 0=local, 1=ref */
+int GADNumTurbines = 0;      /* Number of GAD Turbines */
+int GADNumTurbineTypes;  /* Number of GAD Turbine Types */
+int turbinePolyOrderMax; /* Maximum Polynomial order across all turbine types */
+int turbinePolyClCdrNormSegments; /* Number of segments in the normalized radius for the lift and drag coefficient polynomial */
+int alphaBounds;         /* Number of elements in the min/max angle of attack array for the lift/drag curves */
+
+int numgridCells_away; /*Halo-region of cells considered in rotor disk distance-wise smoothing function*/
+/*---GAD turbine characteristics arrays */
+int* GAD_turbineType;    /* Integer class-label for turbine type*/
+int* GAD_turbineRank;    /* Integer mpi-rank of nacelle center cell for each turbine reference velMag and velDir grid cell*/
+int* GAD_turbineRefi;    /* Integer i-index of nacelle center cell for each turbine reference velMag and velDir grid cell*/
+int* GAD_turbineRefj;    /* Integer j-index of nacelle center cell for each turbine reference velMag and velDir grid cell*/
+int* GAD_turbineRefk;    /* Integer k-index of nacelle center cell for each turbine reference velMag and velDir grid cell*/
+int* GAD_turbineYawing;  /* Integer indicating in a turbine is currently yawing ==1*/
+float* GAD_Xcoords;      /* SW-corner (0,0)-relative x-coordinate of turbines [m]*/ 
+float* GAD_Ycoords;      /* SW-corner (0,0)-relative y-coordinate of turbines [m]*/
+float* GAD_turbineRefMag;/* Reference "ambient" velocity magnitude for yaw control and beta/omega [m/s]*/
+float* GAD_turbineRefDir;/* *Reference "ambient" velocity direction (horizontal, met. standard orientation) for yaw control and beta/omega [degrees]*/
+float* GAD_yawError;     /* yaw error between the incoming wind and the turbine orientation */
+float* GAD_anFactor;     /* turbine axial induction factor at hub heigth*/
+float* GAD_rotorTheta;   /* rotor-normal horizontal angle from North [degrees]*/
+float* GAD_hubHeights;   /* Above-ground-level hub-heights of turbines [m]*/
+float* GAD_rotorD;       /* turbine-specific rotor diameters [m]*/
+float* GAD_nacelleD;     /* turbine-specific nacelle diameters [m]*/
+float* turbinePolyTwist; /* turbine-type-specific twist polynomial coefficients*/
+float* turbinePolyChord; /* turbine-type-specific chord polynomial coefficients*/
+float* turbinePolyPitch; /* turbine-type-specific pitch polynomial coefficients*/
+float* turbinePolyOmega; /* turbine-type-specific omega polynomial coefficients*/
+float* rnorm_vect;       /* turbine-type-specific normalized radious segment limits*/
+float* alpha_minmax_vect;/* turbine-type-specific maximum and minimum angle of attack for the lift/drag curves*/
+float* turbinePolyCl;    /* turbine-type-specific lift coefficient polynomial coefficients*/
+float* turbinePolyCd;    /* turbine-type-specific drag coefficient polynomial coefficients*/
+
+
+float* GAD_turbineVolMask; /* turbine Volume mask (0 if turbine free cell in domain, else turbine ID of cell in turbine yaw-swept volume*/
+float* GAD_turbineRotorMask; /* turbine Rotor-disk  mask (0 if turbine free cell in domain, else 1.0 in turbine yaw-centric disk*/
+float* GAD_forceX;         /* turbine forces in the x-direction */
+float* GAD_forceY;         /* turbine forces in the y-direction */
+float* GAD_forceZ;         /* turbine forces in the z-direction */
+
+/*----->>>>> int GADGetParams();   ----------------------------------------------------------------------
+ * Obtain parameters for the GAD sub-module
+*/
+int GADGetParams(){
+   int errorCode = GAD_SUCCESS;
+
+   GADSelector = 0; // Default to 0
+   errorCode = queryIntegerParameter("GADSelector", &GADSelector, 0, 1, PARAM_OPTIONAL);
+   if(GADSelector > 0){
+     errorCode = queryFileParameter("turbineSpecsFile", &turbineSpecsFile, PARAM_OPTIONAL);
+     GADoutputForces = 0; // default off
+     errorCode = queryIntegerParameter("GADoutputForces", &GADoutputForces, 0, 1, PARAM_OPTIONAL);
+     GADofflineForces = 0; // default off
+     errorCode = queryIntegerParameter("GADofflineForces", &GADofflineForces, 0, 1, PARAM_OPTIONAL);
+     GADaxialInduction = 1; // default off
+     errorCode = queryIntegerParameter("GADaxialInduction", &GADaxialInduction, 0, 1, PARAM_OPTIONAL);
+     GADaxialIndVal = 0.02; // default to 2%
+     errorCode = queryFloatParameter("GADaxialIndVal", &GADaxialIndVal, 0.0, 1.0, PARAM_OPTIONAL);
+     GADrefSwitch = 0; // default off
+     errorCode = queryIntegerParameter("GADrefSwitch", &GADrefSwitch, 0, 1, PARAM_OPTIONAL);
+     GADrefSampleWindow = 30.0; // default to 30.0 seconds, limit in range 1.0-60.0 seconds
+     errorCode = queryFloatParameter("GADrefSampleWindow", &GADrefSampleWindow, 1.0, 300.0, PARAM_OPTIONAL);
+     GADrefSeriesLength = 10; // default to a series length of 10 sample-window averaged values
+     errorCode = queryIntegerParameter("GADrefSeriesLength", &GADrefSeriesLength, 1, 360, PARAM_OPTIONAL);
+     GADForcingSwitch = 0; // default off
+     errorCode = queryIntegerParameter("GADForcingSwitch", &GADForcingSwitch, 0, 1, PARAM_OPTIONAL);
+     if (GADrefSwitch == 1){
+       GADrefU = 0.0; // default to 0.0 m/s
+       errorCode = queryFloatParameter("GADrefU", &GADrefU, 0.0, 50.0, PARAM_MANDATORY);
+     }
+   }//End if GADSelector > 0
+   
+   return(errorCode);
+} //end GADGetParams()
+
+/*----->>>>> int GADPrintParams();   ----------------------------------------------------------------------
+* Print parameters for the GAD sub-module
+*/
+int GADPrintParams(){
+   int errorCode = GAD_SUCCESS;
+   if(mpi_rank_world == 0){
+     printComment("----------: GAD ---");
+     printParameter("GADSelector", "Generalized Actuator Disk Selector: 0=off, 1=on ");
+     if(GADSelector > 0){
+       printParameter("turbineSpecsFile", "netCDF file with turbine specifications ");
+       printParameter("GADoutputForces", "Flag to include GAD forces in the output: 0=off, 1=on");
+       printParameter("GADofflineForces", "Flag to compute GAD forces in an offline mode: 0=off, 1=on");
+       printParameter("GADaxialInduction", "Flag to compute axial induction factor: 0==off (uses prescribed GADaxialIndVal), 1==on");
+       printParameter("GADaxialIndVal", "Prescribed constant axial induction factor when GADaxialInduction==0");
+       printParameter("GADrefSwitch", "Switch to use reference windspeed: 0=off, 1=on");
+       printParameter("GADrefU", "Prescribed constant reference hub-height windspeed");
+       printParameter("GADrefSampleWindow", "Sample duration (in seconds) over which to average per-timestep values (filtering out highest frequencies)");
+       printParameter("GADrefSeriesLength", "Number of sampling windows over which to average again for reference velocity magnitude and direction");
+       printParameter("GADForcingSwitch", "Switch to use the GADrefU-based or local windspeed in computing GAD forces: 0=local, 1=ref");
+     }
+   } //end if(mpi_rank_world == 0)
+   return(errorCode);
+} //end GADPrintParams()
+
+/*----->>>>> int GADInit();   ----------------------------------------------------------------------
+ * Used to broadcast and print parameters, allocate memory, and initialize configuration settings 
+ * for the GAD sub-module.
+*/
+int GADInit(){
+   int errorCode = GAD_SUCCESS;
+   int strLength;
+
+   /*Broadcast GAD parameters to all mpi_ranks*/
+   MPI_Bcast(&GADSelector, 1, MPI_INT, 0, MPI_COMM_WORLD);
+   if(GADSelector > 0){
+      strLength = 0;
+      if(mpi_rank_world == 0){
+        if(turbineSpecsFile != NULL){
+          strLength = strlen(turbineSpecsFile)+1;
+        }else{
+          strLength = 0;
+        }
+      } //end if(mpi_rank_world == 0)
+      MPI_Bcast(&strLength, 1, MPI_INTEGER, 0, MPI_COMM_WORLD);
+      if(strLength > 0){
+        if(mpi_rank_world != 0){
+           turbineSpecsFile = (char *) malloc(strLength*sizeof(char));
+        } //if a non-root mpi_rank
+        MPI_Bcast(turbineSpecsFile, strLength, MPI_CHARACTER, 0, MPI_COMM_WORLD);
+      }
+
+      /*Call the GAD module instance turbine array configuration constructor*/
+      errorCode = GADConstructor();
+
+      MPI_Bcast(&GADoutputForces, 1, MPI_INT, 0, MPI_COMM_WORLD);
+      MPI_Bcast(&GADofflineForces, 1, MPI_INT, 0, MPI_COMM_WORLD);
+      MPI_Bcast(&GADaxialInduction, 1, MPI_INT, 0, MPI_COMM_WORLD);
+      MPI_Bcast(&GADaxialIndVal, 1, MPI_FLOAT, 0, MPI_COMM_WORLD);
+      MPI_Bcast(&GADrefSwitch, 1, MPI_INT, 0, MPI_COMM_WORLD);
+      MPI_Bcast(&GADrefU, 1, MPI_FLOAT, 0, MPI_COMM_WORLD);
+      MPI_Bcast(&GADForcingSwitch, 1, MPI_INT, 0, MPI_COMM_WORLD);
+   } //end if GADSelector > 0
+
+   
+   /*Could set this to be a runtime parameter at some point, setting constant for now.*/
+   numgridCells_away = 3;
+ 
+   return(errorCode);
+} //end GADInit()
+
+/*----->>>>> int GADConstructor();   ----------------------------------------------------------------------
+* This function constructs the GAD sub-module instance by reading a GAD (netCDF) input configuration file,
+* allocating CPU-level memory for GAD arrays, and initializing these arrays with values specified in 
+* the inputs file.
+*/
+int GADConstructor(){
+  int errorCode = GAD_SUCCESS;
+  int ncid;
+  int ncfldid;
+  int iFld;
+  int dimids[64];
+  size_t count[64];
+  size_t start[64];
+  size_t polycount[64];
+  size_t polystart[64];
+  char fldName[64];
+  float **fldPtr;
+  int **intfldPtr;
+
+  //Root-rank should read the netcdf turbineSpecsFile
+  if(mpi_rank_world == 0){
+//#define DEBUG_GADCONSTRUCTOR
+#ifdef DEBUG_GADCONSTRUCTOR
+    printf("Attempting to open turbineSpecsFile = %s\n",turbineSpecsFile);
+    fflush(stdout);
+#endif
+
+    //Open the netcdf GAD inputs file
+    errorCode = ioOpenNetCDFinFile(turbineSpecsFile, &ncid);
+    if(errorCode > 0){
+      printf("Failed to open turbineSpecsFile = %s EXITING NOW!!!!\n",turbineSpecsFile);
+      fflush(stdout);
+      exit(0);
+    }
+#ifdef DEBUG_GADCONSTRUCTOR
+    printf("Opened turbineSpecsFile = %s with ncid = %d\n",turbineSpecsFile,ncid);
+    fflush(stdout);
+#endif
+
+    //Inquire for the dimID of the turbine configuration fundamental parameter, GADNumTurbines
+    if((errorCode = nc_inq_dimid(ncid, "GADNumTurbines", &dimids[0]))){
+      ERR(errorCode);
+    }
+    //Inquire for the value of GADNumTurbines 
+    if((errorCode = nc_inq_dimlen(ncid, dimids[0], &count[dimids[0]]))){
+      ERR(errorCode);
+    }
+    //Assign the GADNumTurbines to the value of the netCDF file dimension  
+    GADNumTurbines = (int)count[dimids[0]];
+    
+    //Inquire for the dimID of the turbine configuration fundamental parameter, GADNumTurbineTypes
+    if((errorCode = nc_inq_dimid(ncid, "GADNumTurbineTypes", &dimids[1]))){
+      ERR(errorCode);
+    }
+    //Inquire for the value of GADNumTurbineTypes 
+    if((errorCode = nc_inq_dimlen(ncid, dimids[1], &count[dimids[1]]))){
+      ERR(errorCode);
+    }
+    //Assign the GADNumTurbineTypes to the value of the netCDF file dimension  
+    GADNumTurbineTypes = (int)count[dimids[1]];
+   
+    
+    //Inquire for the dimID of the turbine configuration fundamental parameter, turbinePolyOrderMax 
+    if((errorCode = nc_inq_dimid(ncid, "turbinePolyOrderMax", &dimids[2]))){
+      ERR(errorCode);
+    }
+    //Inquire for the value of turbinePolyOrderMax 
+    if((errorCode = nc_inq_dimlen(ncid, dimids[2], &count[dimids[2]]))){
+      ERR(errorCode);
+    }
+    //Assign the turbinePolyOrderMax to the value of the netCDF file dimension  
+    turbinePolyOrderMax = (int)count[dimids[2]];
+
+    //Inquire for the dimID of the turbine configuration fundamental parameter, turbinePolyClCdrNormSegments
+    if((errorCode = nc_inq_dimid(ncid, "turbinePolyClCdrNormSegments", &dimids[3]))){
+      ERR(errorCode);
+    }
+    //Inquire for the value of turbinePolyClCdrNormSegments
+    if((errorCode = nc_inq_dimlen(ncid, dimids[3], &count[dimids[3]]))){
+      ERR(errorCode);
+    }
+    //Assign the turbinePolyClCdrNormSegments to the value of the netCDF file dimension
+    turbinePolyClCdrNormSegments = (int)count[dimids[3]];
+
+    //Inquire for the dimID of the turbine configuration fundamental parameter, alphaBounds
+    if((errorCode = nc_inq_dimid(ncid, "alphaBounds", &dimids[4]))){
+      ERR(errorCode);
+    }
+    //Inquire for the value of alphaBounds
+    if((errorCode = nc_inq_dimlen(ncid, dimids[4], &count[dimids[4]]))){
+      ERR(errorCode);
+    }
+    //Assign the alphaBounds to the value of the netCDF file dimension
+    alphaBounds = (int)count[dimids[4]];
+
+  } //end if(mpi_rank_world == 0)
+  MPI_Bcast(&GADNumTurbines, 1, MPI_INTEGER, 0, MPI_COMM_WORLD); 
+  MPI_Bcast(&GADNumTurbineTypes, 1, MPI_INTEGER, 0, MPI_COMM_WORLD); 
+  MPI_Bcast(&turbinePolyOrderMax, 1, MPI_INTEGER, 0, MPI_COMM_WORLD); 
+  MPI_Bcast(&turbinePolyClCdrNormSegments, 1, MPI_INTEGER, 0, MPI_COMM_WORLD);
+  MPI_Bcast(&alphaBounds, 1, MPI_INTEGER, 0, MPI_COMM_WORLD);
+  
+#ifdef DEBUG_GADCONSTRUCTOR
+  printf("%d/%d GADNumTurbines = %d\n",mpi_rank_world,mpi_size_world, GADNumTurbines);
+  printf("%d/%d GADNumTurbineTypes = %d\n",mpi_rank_world,mpi_size_world, GADNumTurbineTypes);
+  printf("%d/%d turbinePolyOrderMax = %d\n",mpi_rank_world,mpi_size_world, turbinePolyOrderMax);
+  printf("%d/%d turbinePolyClCdrNormSegments = %d\n",mpi_rank_world,mpi_size_world, turbinePolyClCdrNormSegments);
+  printf("%d/%d alphaBounds = %d\n",mpi_rank_world,mpi_size_world, alphaBounds);
+  fflush(stdout);
+#endif
+    
+
+  /*Allocate for the GAD_turbineType array*/
+  sprintf(fldName,"GAD_turbineType");
+  intfldPtr = &GAD_turbineType;
+  *intfldPtr = (int*) malloc(GADNumTurbines*sizeof(int));
+  /* Read in netCDF file values for each turbine-Type */
+  if(mpi_rank_world == 0){
+    start[0] = 0;
+    if ( (errorCode = nc_inq_varid(ncid, fldName, &ncfldid)) ){
+       ERR(errorCode);
+       printf("Error GADConstructor(): field = %s was not found in this file,!\n",fldName);
+       fflush(stdout);
+    } //if nc_inq_varid
+    if ((errorCode = nc_get_vara_int(ncid, ncfldid, &start[0], &count[dimids[0]], *intfldPtr )) ){
+       ERR(errorCode);
+    }
+  } //end if mpi_rank_world == 0
+  MPI_Bcast(*intfldPtr, GADNumTurbines, MPI_INTEGER, 0, MPI_COMM_WORLD);
+
+  /*Allocate, read in field values (root-rank only), and broadcast
+   * each of the turbine-general characteristics arrays */
+  for(iFld = 0; iFld < 3; iFld++){
+     switch (iFld){
+         case 0:
+           sprintf(fldName,"GAD_Xcoords");
+           fldPtr = &GAD_Xcoords;
+           break;
+         case 1:
+           sprintf(fldName,"GAD_Ycoords");
+           fldPtr = &GAD_Ycoords;
+           break;
+         case 2:
+           sprintf(fldName,"GAD_rotorTheta");
+           fldPtr = &GAD_rotorTheta;
+           break;
+         default:    //invalid iFld value
+           sprintf(fldName,"");
+           fldPtr = NULL;
+           errorCode = GAD_FAIL;
+           break;
+     }//end switch(iFld)
+     if(iFld<3){
+       /*Allocate for the field*/
+       *fldPtr = (float*) malloc(GADNumTurbines*sizeof(float));
+       /* Read in netCDF file values for each of the turbine characteristics arrays */
+       if(mpi_rank_world == 0){
+         start[0] = 0;
+         if ( (errorCode = nc_inq_varid(ncid, fldName, &ncfldid)) ){
+            ERR(errorCode);
+             printf("Error GADConstructor(): field = %s was not found in this file,!\n",fldName);
+             fflush(stdout);
+         } //if nc_inq_varid
+         if ((errorCode = nc_get_vara_float(ncid, ncfldid, &start[0], &count[dimids[0]], *fldPtr )) ){
+                ERR(errorCode);
+         }
+       } //end if mpi_rank_world == 0
+       MPI_Bcast(*fldPtr, GADNumTurbines, MPI_FLOAT, 0, MPI_COMM_WORLD);
+       if(iFld == 2){
+         errorCode = ioRegisterVar(&fldName[0], "float", 2, dims1dTD_GAD, *fldPtr);
+         errorCode = ioAddStandardAttrs("GAD_rotorTheta", "degrees", "rotor angle from west increasing counter-clockwise", NULL);
+         printf("%d/%d: GADConstructor()-- %s stored at %p, has been registered with IO.\n",
+                mpi_rank_world, mpi_size_world, &fldName[0], *fldPtr);
+         fflush(stdout);
+       } //end if iFld ==2 ... GAD_rotorTheta
+#ifdef DEBUG_GADCONSTRUCTOR
+       int iRank,i,j,ipoly;
+       for(iRank = 0; iRank < mpi_size_world; iRank++){
+         MPI_Barrier(MPI_COMM_WORLD);
+         if(iRank == mpi_rank_world){
+           printf("%d/%d %s:--------- \n",mpi_rank_world,mpi_size_world, fldName);
+           for(i = 0; i < GADNumTurbines; i++){
+               printf("\t%d \t= \t%f\n",i,(*fldPtr)[i]);
+           }// end for i...
+           printf("\n");
+           fflush(stdout);
+         } //end if iRank == mpi_rank_world 
+         MPI_Barrier(MPI_COMM_WORLD);
+       }//end for iRank...
+#endif
+    }//end if iFld < 3
+  }// end for iFld...
+ 
+  /*Allocate, read in field values (root-rank only), and broadcast
+   * each of the turbine-type-specific characteristics arrays */
+  for(iFld = 0; iFld < 3; iFld++){
+     switch (iFld){
+         case 0:
+           sprintf(fldName,"GAD_hubHeights");
+           fldPtr = &GAD_hubHeights;
+           break;
+         case 1:
+           sprintf(fldName,"GAD_rotorD");
+           fldPtr = &GAD_rotorD;
+           break;
+         case 2:
+           sprintf(fldName,"GAD_nacelleD");
+           fldPtr = &GAD_nacelleD;
+           break;
+         default:    //invalid iFld value
+           sprintf(fldName,"");
+           fldPtr = NULL;
+           errorCode = GAD_FAIL;
+           break;
+     }//end switch(iFld)
+     if(iFld<3){
+       /*Allocate for the field*/
+       *fldPtr = (float*) malloc(GADNumTurbineTypes*sizeof(float));
+       /* Read in netCDF file values for each of the turbine characteristics arrays */
+       if(mpi_rank_world == 0){
+         start[1] = 0;
+         if ( (errorCode = nc_inq_varid(ncid, fldName, &ncfldid)) ){
+            ERR(errorCode);
+             printf("Error GADConstructor(): field = %s was not found in this file,!\n",fldName);
+             fflush(stdout);
+         } //if nc_inq_varid
+         if ((errorCode = nc_get_vara_float(ncid, ncfldid, &start[1], &count[dimids[1]], *fldPtr )) ){
+                ERR(errorCode);
+         }
+       } //end if mpi_rank_world == 0
+       MPI_Bcast(*fldPtr, GADNumTurbineTypes, MPI_FLOAT, 0, MPI_COMM_WORLD);
+#ifdef DEBUG_GADCONSTRUCTOR
+       for(iRank = 0; iRank < mpi_size_world; iRank++){
+         MPI_Barrier(MPI_COMM_WORLD);
+         if(iRank == mpi_rank_world){
+           printf("%d/%d %s:--------- \n",mpi_rank_world,mpi_size_world, fldName);
+           for(i = 0; i < GADNumTurbineTypes; i++){
+             printf("\t%d \t= \t%f\n",i,(*fldPtr)[i]);
+           }// end for i...
+           printf("\n");
+           fflush(stdout);
+         } //end if iRank == mpi_rank_world 
+         MPI_Barrier(MPI_COMM_WORLD);
+       }//end for iRank...
+#endif
+    }//end if iFld < 3
+  }// end for iFld...
+  
+  /*Allocate, read in field values (root-rank only), and broadcast
+   * each of the turbine-type-specific polynomial coefficients arrays */
+  for(iFld = 0; iFld < 8; iFld++){
+     switch (iFld){
+         case 0:
+           sprintf(fldName,"turbinePolyTwist");
+           fldPtr = &turbinePolyTwist;
+           break;
+         case 1:
+           sprintf(fldName,"turbinePolyChord");
+           fldPtr = &turbinePolyChord;
+           break;
+         case 2:
+           sprintf(fldName,"turbinePolyPitch");
+           fldPtr = &turbinePolyPitch;
+           break;
+         case 3:
+           sprintf(fldName,"turbinePolyOmega");
+           fldPtr = &turbinePolyOmega;
+           break;
+         case 4:
+           sprintf(fldName,"rnorm_vect");
+           fldPtr = &rnorm_vect;
+           break;
+         case 5:
+           sprintf(fldName,"alpha_minmax_vect");
+           fldPtr = &alpha_minmax_vect;
+           break;
+         case 6:
+           sprintf(fldName,"turbinePolyCl");
+           fldPtr = &turbinePolyCl;
+           break;
+         case 7:
+           sprintf(fldName,"turbinePolyCd");
+           fldPtr = &turbinePolyCd;
+           break;
+         default:    //invalid iFld value
+           sprintf(fldName,"");
+           fldPtr = NULL;
+           errorCode = GAD_FAIL;
+           break;
+     }//end switch(iFld)
+     if(iFld<4){
+       /*Allocate for the field*/
+       *fldPtr = (float*) malloc(GADNumTurbineTypes*turbinePolyOrderMax*sizeof(float));
+       /* Read in netCDF file values for each of the turbine characteristics arrays */
+       if(mpi_rank_world == 0){
+         polystart[0] = 0;
+         polystart[1] = 0;
+         polycount[0] = count[dimids[1]];
+         polycount[1] = count[dimids[2]];
+         if ( (errorCode = nc_inq_varid(ncid, fldName, &ncfldid)) ){
+            ERR(errorCode);
+             printf("Error GADConstructor(): field = %s was not found in this file,!\n",fldName);
+             fflush(stdout);
+         } //if nc_inq_varid
+         if ((errorCode = nc_get_vara_float(ncid, ncfldid, &polystart[0], &polycount[0], *fldPtr )) ){ //Note fortuitous reuse of count[1-2] being the right dims.
+                ERR(errorCode);
+         }  
+       } //end if mpi_rank_world == 0
+       MPI_Bcast(*fldPtr, GADNumTurbineTypes*turbinePolyOrderMax, MPI_FLOAT, 0, MPI_COMM_WORLD);
+#ifdef DEBUG_GADCONSTRUCTOR
+       for(iRank = 0; iRank < mpi_size_world; iRank++){
+         MPI_Barrier(MPI_COMM_WORLD);
+         if(iRank == mpi_rank_world){
+           printf("%d/%d %s:--------- \n",mpi_rank_world,mpi_size_world, fldName);
+           for(i = 0; i < GADNumTurbineTypes; i++){
+               printf("\t%d \t=\t",i);
+             for(ipoly = 0; ipoly < turbinePolyOrderMax; ipoly++){
+               printf("%f\t",(*fldPtr)[i*turbinePolyOrderMax+ipoly]);
+             }// end for i...
+             printf("\n");
+           }// end for i...
+           printf("\n");
+           fflush(stdout);
+         } //end if iRank == mpi_rank_world 
+         MPI_Barrier(MPI_COMM_WORLD);
+       }//end for iRank...
+#endif
+     }else if(iFld==4){ // rnorm_vect
+       /*Allocate for the field*/
+       *fldPtr = (float*) malloc(GADNumTurbineTypes*(turbinePolyClCdrNormSegments+1)*sizeof(float));
+       /* Read in netCDF file values for each of the turbine characteristics arrays */
+       if(mpi_rank_world == 0){
+         polystart[0] = 0;
+         polystart[1] = 0;
+         polycount[0] = count[dimids[1]];
+         polycount[1] = count[dimids[3]]+1;
+         if ( (errorCode = nc_inq_varid(ncid, fldName, &ncfldid)) ){
+            ERR(errorCode);
+             printf("Error GADConstructor(): field = %s was not found in this file,!\n",fldName);
+             fflush(stdout);
+         } //if nc_inq_varid
+         if ((errorCode = nc_get_vara_float(ncid, ncfldid, &polystart[0], &polycount[0], *fldPtr )) ){ //Note fortuitous reuse of count[1-2] being the right dims.
+                ERR(errorCode);
+         }
+       } //end if mpi_rank_world == 0
+       MPI_Bcast(*fldPtr, GADNumTurbineTypes*(turbinePolyClCdrNormSegments+1), MPI_FLOAT, 0, MPI_COMM_WORLD);
+#ifdef DEBUG_GADCONSTRUCTOR
+       for(iRank = 0; iRank < mpi_size_world; iRank++){
+         MPI_Barrier(MPI_COMM_WORLD);
+         if(iRank == mpi_rank_world){
+           printf("%d/%d %s:--------- \n",mpi_rank_world,mpi_size_world, fldName);
+           for(i = 0; i < GADNumTurbineTypes; i++){
+               printf("\t%d \t=\t",i);
+             for(ipoly = 0; ipoly < turbinePolyClCdrNormSegments+1; ipoly++){
+               printf("%f\t",(*fldPtr)[i*(turbinePolyClCdrNormSegments+1)+ipoly]);
+             }// end for i...
+             printf("\n");
+           }// end for i...
+           printf("\n");
+           fflush(stdout);
+         } //end if iRank == mpi_rank_world
+         MPI_Barrier(MPI_COMM_WORLD);
+       }//end for iRank...
+#endif
+     }else if(iFld==5){ // alpha_minmax_vect
+       /*Allocate for the field*/
+       *fldPtr = (float*) malloc(GADNumTurbineTypes*alphaBounds*sizeof(float));
+       /* Read in netCDF file values for each of the turbine characteristics arrays */
+       if(mpi_rank_world == 0){
+         polystart[0] = 0;
+         polystart[1] = 0;
+         polycount[0] = count[dimids[1]];
+         polycount[1] = count[dimids[4]];
+         if ( (errorCode = nc_inq_varid(ncid, fldName, &ncfldid)) ){
+            ERR(errorCode);
+             printf("Error GADConstructor(): field = %s was not found in this file,!\n",fldName);
+             fflush(stdout);
+         } //if nc_inq_varid
+         if ((errorCode = nc_get_vara_float(ncid, ncfldid, &polystart[0], &polycount[0], *fldPtr )) ){ //Note fortuitous reuse of count[1-2] being the right dims.
+                ERR(errorCode);
+         }
+       } //end if mpi_rank_world == 0
+       MPI_Bcast(*fldPtr, GADNumTurbineTypes*alphaBounds, MPI_FLOAT, 0, MPI_COMM_WORLD);
+#ifdef DEBUG_GADCONSTRUCTOR
+       for(iRank = 0; iRank < mpi_size_world; iRank++){
+         MPI_Barrier(MPI_COMM_WORLD);
+         if(iRank == mpi_rank_world){
+           printf("%d/%d %s:--------- \n",mpi_rank_world,mpi_size_world, fldName);
+           for(i = 0; i < GADNumTurbineTypes; i++){
+               printf("\t%d \t=\t",i);
+             for(ipoly = 0; ipoly < alphaBounds; ipoly++){
+               printf("%f\t",(*fldPtr)[i*alphaBounds+ipoly]);
+             }// end for i...
+             printf("\n");
+           }// end for i...
+           printf("\n");
+           fflush(stdout);
+         } //end if iRank == mpi_rank_world
+         MPI_Barrier(MPI_COMM_WORLD);
+       }//end for iRank...
+#endif
+     }else if(iFld>5 && iFld<8){ // turbinePolyCl,turbinePolyCd
+       /*Allocate for the field*/
+       *fldPtr = (float*) malloc(GADNumTurbineTypes*turbinePolyClCdrNormSegments*turbinePolyOrderMax*sizeof(float));
+       /* Read in netCDF file values for each of the turbine characteristics arrays */
+       if(mpi_rank_world == 0){
+         polystart[0] = 0;
+         polystart[1] = 0;
+         polystart[2] = 0;
+         polycount[0] = count[dimids[1]];
+         polycount[1] = count[dimids[3]];
+         polycount[2] = count[dimids[2]];
+         if ( (errorCode = nc_inq_varid(ncid, fldName, &ncfldid)) ){
+            ERR(errorCode);
+             printf("Error GADConstructor(): field = %s was not found in this file,!\n",fldName);
+             fflush(stdout);
+         } //if nc_inq_varid
+         if ((errorCode = nc_get_vara_float(ncid, ncfldid, &polystart[0], &polycount[0], *fldPtr )) ){ //Note fortuitous reuse of count[1-2] being the right dims.
+                ERR(errorCode);
+         }
+       } //end if mpi_rank_world == 0
+       MPI_Bcast(*fldPtr, GADNumTurbineTypes*turbinePolyClCdrNormSegments*turbinePolyOrderMax, MPI_FLOAT, 0, MPI_COMM_WORLD);
+#ifdef DEBUG_GADCONSTRUCTOR
+       for(iRank = 0; iRank < mpi_size_world; iRank++){
+         MPI_Barrier(MPI_COMM_WORLD);
+         if(iRank == mpi_rank_world){
+           printf("%d/%d %s:--------- \n",mpi_rank_world,mpi_size_world, fldName);
+           for(i = 0; i < GADNumTurbineTypes; i++){
+               printf("\t i %d \t=\t",i);
+             for(j = 0; j < turbinePolyClCdrNormSegments; j++){
+                printf("\n\t segment %d \t=\t",j);
+                for(ipoly = 0; ipoly < turbinePolyOrderMax; ipoly++){
+                  printf("%g\t",(*fldPtr)[i*(turbinePolyOrderMax*turbinePolyClCdrNormSegments)+j*turbinePolyOrderMax+ipoly]);
+               }// end for ipoly...
+             }// end for j...
+             printf("\n");
+           }// end for i...
+           printf("\n");
+           fflush(stdout);
+         } //end if iRank == mpi_rank_world
+         MPI_Barrier(MPI_COMM_WORLD);
+       }//end for iRank...
+#endif
+     }//end if iFld read in data
+  }// end for iFld...
+
+
+  //Done reading the turbineSpecs netCDF
+  if(mpi_rank_world == 0){
+    /* Close the file. */
+    if ((errorCode = nc_close(ncid))){
+     ERR(errorCode);
+    }
+  } //end if mpi_rank_world == 0
+
+  /*Allocate for other turbine-specific internal characteristics arrays*/
+  GAD_turbineRank = (int*) malloc(GADNumTurbines*sizeof(int));
+  GAD_turbineRefi = (int*) malloc(GADNumTurbines*sizeof(int));
+  GAD_turbineRefj = (int*) malloc(GADNumTurbines*sizeof(int));
+  GAD_turbineRefk = (int*) malloc(GADNumTurbines*sizeof(int));
+  GAD_turbineYawing = (int*) malloc(GADNumTurbines*sizeof(int));
+  sprintf(&fldName[0],"GAD_turbineYawing");
+  errorCode = ioRegisterVar(&fldName[0], "int", 2, dims1dTD_GAD, &GAD_turbineYawing[0]);
+  errorCode = ioAddStandardAttrs("GAD_turbineYawing", "-", "flag indicating turbine is in the process of yawing", NULL);
+  GAD_turbineRefMag = (float*) malloc(GADNumTurbines*sizeof(float));
+  sprintf(&fldName[0],"GAD_turbineRefMag");
+  errorCode = ioRegisterVar(&fldName[0], "float", 2, dims1dTD_GAD, &GAD_turbineRefMag[0]);
+  errorCode = ioAddStandardAttrs("GAD_turbineRefMag", "m s-1", "turbine reference wind speed", NULL);
+  GAD_turbineRefDir = (float*) malloc(GADNumTurbines*sizeof(float));
+  sprintf(&fldName[0],"GAD_turbineRefDir");
+  errorCode = ioRegisterVar(&fldName[0], "float", 2, dims1dTD_GAD, &GAD_turbineRefDir[0]);
+  errorCode = ioAddStandardAttrs("GAD_turbineRefDir", "degrees", "turbine reference wind direction", NULL);
+  GAD_yawError = (float*) malloc(GADNumTurbines*sizeof(float));
+  sprintf(&fldName[0],"GAD_yawError");
+  errorCode = ioRegisterVar(&fldName[0], "float", 2, dims1dTD_GAD, &GAD_yawError[0]);
+  errorCode = ioAddStandardAttrs("GAD_yawError", "degrees2 s", "turbine-wind misalignment error for yaw-controller", NULL);
+  GAD_anFactor = (float*) malloc(GADNumTurbines*sizeof(float));
+  sprintf(&fldName[0],"GAD_anFactor");
+  errorCode = ioRegisterVar(&fldName[0], "float", 2, dims1dTD_GAD, &GAD_anFactor[0]);
+  errorCode = ioAddStandardAttrs("GAD_anFactor", "-", "rotor-normal upstream wind speed induction factor", NULL);
+
+  return(errorCode);
+} //end GADConstructor()
+
+/*----->>>>> int GADInitTurbineRefChars();   ----------------------------------------------------------------------
+* This function iinitializes turbine reference location characteristic values (location mpi_rank and i,j,k indices). 
+*/
+int GADInitTurbineRefChars(float dt){
+  int errorCode = GAD_SUCCESS;
+  int iturb,i,j,k;
+  int ijk;
+  int ij;
+  float rVec;
+  float rVec0;
+  float deltaz, deltaz0;
+  
+  /*Initialize requisite parameters for the RefMag and RefDir calculations*/
+  GADsamplingAvgLength = (int) floor(GADrefSampleWindow/dt);    //Determine the number of model timesteps in a sample window (high frequencies filter)
+  MPI_Bcast(&GADsamplingAvgLength, 1, MPI_INT, 0, MPI_COMM_WORLD);
+  GADsamplingAvgWeight = 1.0/((float) GADsamplingAvgLength); //Precompute the averaging weight across instances in a sample window.
+  MPI_Bcast(&GADsamplingAvgWeight, 1, MPI_FLOAT, 0, MPI_COMM_WORLD);
+  GADrefSeriesWeight = 1.0/((float) GADrefSeriesLength);  //Precompute the averaging weight across the full reference averaging period series of sample average values  
+  MPI_Bcast(&GADrefSeriesLength, 1, MPI_INT, 0, MPI_COMM_WORLD); //Broadcast the read-in parameter for series length to all ranks
+  MPI_Bcast(&GADrefSeriesWeight, 1, MPI_FLOAT, 0, MPI_COMM_WORLD);
+//#define DEBUG_TURBCHAR
+#ifdef DEBUG_TURBCHAR
+  printf("%d/%d: GADsamplingAvgLength=%d, GADsamplingAvgWeight=%f, GADrefSeriesLength=%d, GADrefSeriesWeight=%f\n",
+         mpi_rank_world,mpi_size_world,GADsamplingAvgLength,GADsamplingAvgWeight,GADrefSeriesLength,GADrefSeriesWeight);
+  fflush(stdout);
+#endif
+
+  for(iturb = 0; iturb < GADNumTurbines; iturb++){
+    rVec0 = 99999.9999;
+    deltaz0 = 99999.9999;
+      GAD_turbineRank[iturb] = -999;  //Initialize to special "absent" value of -999
+      GAD_turbineRefi[iturb] = -999;  //Initialize to special "absent" value of -999
+      GAD_turbineRefj[iturb] = -999;  //Initialize to special "absent" value of -999
+      GAD_turbineRefk[iturb] = -999;  //Initialize to special "absent" value of -999
+    if(inFile == NULL){
+      GAD_turbineRefMag[iturb] = 0.0; //Standard initialization to zero
+      GAD_turbineRefDir[iturb] = 0.0; //Standard initialization to zero
+      GAD_turbineYawing[iturb] = 0;   //Initialize to all turbines not rotating
+      GAD_yawError[iturb] = 0.0;      //Initialize to zero yaw error
+      GAD_anFactor[iturb] = 0.0;      //Initialize to zero axial induction factor
+    }//end if inFile == NULL
+    for(i=iMin-Nh; i < iMax+Nh; i++){
+      for(j=jMin-Nh; j < jMax+Nh; j++){
+        for(k=kMin-Nh; k < kMax+Nh; k++){
+           ijk = i*(Nyp+2*Nh)*(Nzp+2*Nh)+j*(Nzp+2*Nh)+k;
+           ij = i*(Nyp+2*Nh)+j;
+           rVec = sqrt( pow((GAD_Xcoords[iturb]-xPos[ijk]),2.0)
+                       +pow((GAD_Ycoords[iturb]-yPos[ijk]),2.0));
+           if(rVec <= sqrt(pow(dX,2.0)+pow(dY,2.0))){ //Should be a candiate gridcell for (nacelle center) reference location
+	     if(rVec <= rVec0){
+	       if(rVec < rVec0){
+	         rVec0 = rVec;
+	         GAD_turbineRank[iturb] = mpi_rank_world; 
+                 GAD_turbineRefi[iturb] = i;
+	         GAD_turbineRefj[iturb] = j;
+	       }
+	       deltaz = sqrt(pow((GAD_hubHeights[GAD_turbineType[iturb]]-(zPos[ijk]-topoPos[ij])),2.0));
+#ifdef DEBUG_TURBCHAR
+               printf("%d/%d: deltaz = %f, 0.5/(J33[ijk]*dZi)) = %f\n",
+                      mpi_rank_world,mpi_size_world, deltaz, 0.5/(J33[ijk]*dZi));
+#endif
+               if(deltaz <= 0.5/(J33[ijk]*dZi)){         // 1/(J33[ijk]*dZi)) = dz of cell
+		 deltaz0 = deltaz;      
+		 GAD_turbineRefk[iturb] = k;      
+               }//end if vertical delta < dz ...
+             }//end if rVec < rVec0...
+           }//end if rVec...
+        } //end for(k...
+      } // end for(j...
+    } // end for(i...
+    //dummy update here making use of deltaz0 avoiding compiler warning for an usused variable
+    deltaz = deltaz0;
+#ifdef DEBUG_TURBCHAR
+    if(GAD_turbineRank[iturb] == mpi_rank_world){
+      printf("%d/%d: Turbine %d has determined nacelle center cell @ %d,%d,%d with rVec0 = %f and deltaz0 = %f\n",
+             mpi_rank_world,mpi_size_world,iturb,GAD_turbineRefi[iturb],GAD_turbineRefj[iturb],GAD_turbineRefk[iturb],rVec0, deltaz0);
+      fflush(stdout);
+    }
+#endif
+  }// end for iturb...
+
+  //Having determined the mpi_rank of each turbine, zero out the 
+  //GAD_rotorTheta and other elements for any turbines which do NOT belong to a given mpi_rank_world
+  //This allows subsequent MPI_Reduce in IO using MPI_SUM op to update GAD_rotoTheta with time  
+  for(iturb = 0; iturb < GADNumTurbines; iturb++){
+     if(GAD_turbineRank[iturb] != mpi_rank_world){
+       GAD_rotorTheta[iturb] = 0.0;
+       GAD_turbineRefMag[iturb] = 0.0;
+       GAD_turbineRefDir[iturb] = 0.0;
+       GAD_turbineYawing[iturb] = 0;
+       GAD_yawError[iturb] = 0.0;
+       GAD_anFactor[iturb] = 0.0;
+     }
+  }// end for iturb...
+
+  return(errorCode);
+}//end int GADInitTurbineRefChars() 
+
+/*----->>>>> int GADCreateTurbineVolMask();   ----------------------------------------------------------------------
+* This function creates the swept-volume mask (of turbine IDs as floats) for the turbine array
+*/
+int GADCreateTurbineVolMask(){
+  int errorCode = GAD_SUCCESS;
+  int iturb,i,j,k;
+  int ijk;
+  int ij;
+  float turbineID;
+  float rVec;
+  char fldName[64];
+
+  GAD_turbineVolMask = memAllocateFloat3DField(Nxp, Nyp, Nzp, Nh, "GAD_turbineVolMask");
+  sprintf(&fldName[0],"GAD_turbineVolMask");
+  errorCode = ioRegisterVar(&fldName[0], "float", 4, dims4d, &GAD_turbineVolMask[0]);
+  errorCode = ioAddStandardAttrs("GAD_turbineVolMask", "-", "Turbine Volume Mask", NULL);
+  if(mpi_rank_world == 0){
+    printf("GADCreateTurbineVolMask: %s stored at %p, has been registered with IO.\n",
+    &fldName[0],&GAD_turbineVolMask[0]);
+    fflush(stdout);
+  }
+  turbineID = 0.0;
+  for(iturb = 0; iturb < GADNumTurbines; iturb++){
+    turbineID = (float) (iturb+1.0);
+    for(i=iMin-Nh; i < iMax+Nh; i++){
+      for(j=jMin-Nh; j < jMax+Nh; j++){
+        for(k=kMin-Nh; k < kMax+Nh; k++){
+           ijk = i*(Nyp+2*Nh)*(Nzp+2*Nh)+j*(Nzp+2*Nh)+k;
+           ij = i*(Nyp+2*Nh)+j;
+           rVec = sqrt( pow((GAD_Xcoords[iturb]-xPos[ijk]),2.0)
+                       +pow((GAD_Ycoords[iturb]-yPos[ijk]),2.0)
+                       +pow((GAD_hubHeights[GAD_turbineType[iturb]]-(zPos[ijk]-topoPos[ij])),2.0)); 
+           if(rVec <= (0.5*GAD_rotorD[GAD_turbineType[iturb]])){
+             GAD_turbineVolMask[ijk] = turbineID;
+           }//end if rVec... 
+        } //end for(k...
+      } // end for(j...
+    } // end for(i... 
+  }// end for iturb...
+  if (GADoutputForces == 1){
+    GAD_forceX = memAllocateFloat3DField(Nxp, Nyp, Nzp, Nh, "GAD_forceX");
+    sprintf(&fldName[0],"GAD_forceX");
+    errorCode = ioRegisterVar(&fldName[0], "float", 4, dims4d, &GAD_forceX[0]);
+    errorCode = ioAddStandardAttrs("GAD_forceX", "N m-3", "Turbine Forces in X-Direction", NULL);
+    
+    GAD_forceY = memAllocateFloat3DField(Nxp, Nyp, Nzp, Nh, "GAD_forceY");
+    sprintf(&fldName[0],"GAD_forceY");
+    errorCode = ioRegisterVar(&fldName[0], "float", 4, dims4d, &GAD_forceY[0]);
+    errorCode = ioAddStandardAttrs("GAD_forceY", "N m-3", "Turbine Forces in Y-Direction", NULL);
+    
+    GAD_forceZ = memAllocateFloat3DField(Nxp, Nyp, Nzp, Nh, "GAD_forceZ");
+    sprintf(&fldName[0],"GAD_forceZ");
+    errorCode = ioRegisterVar(&fldName[0], "float", 4, dims4d, &GAD_forceZ[0]);
+    errorCode = ioAddStandardAttrs("GAD_forceZ", "N m-3", "Turbine Forces in Z-Direction", NULL);
+  }
+
+  return(errorCode);
+}//end GADCreateTurbineVolMask()
+
+/*----->>>>> int GADCreateTurbineRotorMask()   ----------------------------------------------------------------------
+* This function creates the yaw-specific rotor-disk mask for turbines in the simulation
+*/
+int GADCreateTurbineRotorMask(){
+  int errorCode = GAD_SUCCESS;
+  char fldName[64];
+
+  GAD_turbineRotorMask = memAllocateFloat3DField(Nxp, Nyp, Nzp, Nh, "GAD_turbineRotorMask");
+  sprintf(&fldName[0],"GAD_turbineRotorMask");
+  errorCode = ioRegisterVar(&fldName[0], "float", 4, dims4d, &GAD_turbineRotorMask[0]);
+  errorCode = ioAddStandardAttrs("GAD_turbineRotorMask", "-", "Turbine Rotor-Disk Mask", NULL);
+  if(mpi_rank_world == 0){
+    printf("GADCreateTurbineRotorMask: %s stored at %p, has been registered with IO.\n",
+    &fldName[0],&GAD_turbineRotorMask[0]);
+    fflush(stdout);
+  }
+
+  errorCode = GADUpdateTurbineRotorMask();
+
+  return(errorCode);
+}//end GADCreateTurbineRotorMask()
+
+/*----->>>>> int GADUpdateTurbineRotorMask()   ----------------------------------------------------------------------
+* This function updates (from GAD_rotorTheta) the yaw-specific rotor-disk mask for turbines in the simulation
+*/
+int GADUpdateTurbineRotorMask(){
+  int errorCode = GAD_SUCCESS;
+  int iturb,i,j,k;
+  int ijk;
+  int ij;
+  float mask_value;
+  float pi = 3.1415926535;
+#define CELLINROTOR
+#ifdef CELLINROTOR
+  float x_hat[3];
+  float dr[3];
+  float tiltAngle = 0.0;
+#else
+  float x1,x2,x3,y1,y2,y3;
+  float parallelDist;
+#endif
+  float perpdx_rot;
+  float perpDist;
+  float rVec;
+  void  *memsetReturnVal;
+  
+  /*Reset the mask to zero everywhere*/
+  memsetReturnVal = memset(GAD_turbineRotorMask,0,(Nxp+2*Nh)*(Nyp+2*Nh)*(Nzp+2*Nh)*sizeof(float));
+  if(memsetReturnVal == NULL){
+       fprintf(stderr, "Rank %d/%d GADUpdateTurbineRotorMask():WARNING memsetReturnVal == NULL!\n",
+               mpi_rank_world,mpi_size_world);
+  } 
+  for(iturb = 0; iturb < GADNumTurbines; iturb++){
+    for(i=iMin-Nh; i < iMax+Nh; i++){
+      for(j=jMin-Nh; j < jMax+Nh; j++){
+        for(k=kMin-Nh; k < kMax+Nh; k++){
+           ijk = i*(Nyp+2*Nh)*(Nzp+2*Nh)+j*(Nzp+2*Nh)+k;
+           ij = i*(Nyp+2*Nh)+j;
+           mask_value = 0.0;
+#ifdef CELLINROTOR
+	   //Unit horizontal vector normal to the rotor-disk plane
+           x_hat[0] = cosf(tiltAngle*pi/180.0)*cosf(GAD_rotorTheta[iturb]*pi/180.0);
+           x_hat[1] = cosf(tiltAngle*pi/180.0)*sinf(GAD_rotorTheta[iturb]*pi/180.0);
+           x_hat[2] = -sinf(tiltAngle*pi/180.0);
+
+           //Vector from nacelle center to current grid point
+           dr[0] = xPos[ijk]-GAD_Xcoords[iturb];
+           dr[1] = yPos[ijk]-GAD_Ycoords[iturb];
+           dr[2] = (zPos[ijk]-topoPos[ij])-GAD_hubHeights[GAD_turbineType[iturb]];
+    
+           //Perpendicular distance from nacelle-center to current grid point (normal to the rotor-disk plane)
+           perpDist = dr[0]*x_hat[0] + dr[1]*x_hat[1] + dr[2]*x_hat[2];
+
+           //Proper radial distance of blade segment (accounts for tilted rotor?) 
+	   rVec = sqrtf( powf(dr[0]-perpDist*x_hat[0],2.0)
+                        +powf(dr[1]-perpDist*x_hat[1],2.0)
+                        +powf(dr[2]-perpDist*x_hat[2],2.0) );
+#else
+           /* Define the rotor plane */
+           x1 = GAD_Xcoords[iturb] - 0.5*GAD_rotorD[GAD_turbineType[iturb]]*cos(0.5*pi + GAD_rotorTheta[iturb]*pi/180.0);
+           y1 = GAD_Ycoords[iturb] - 0.5*GAD_rotorD[GAD_turbineType[iturb]]*sin(0.5*pi + GAD_rotorTheta[iturb]*pi/180.0);
+           x2 = GAD_Xcoords[iturb] + 0.5*GAD_rotorD[GAD_turbineType[iturb]]*cos(0.5*pi + GAD_rotorTheta[iturb]*pi/180.0);
+           y2 = GAD_Ycoords[iturb] + 0.5*GAD_rotorD[GAD_turbineType[iturb]]*sin(0.5*pi + GAD_rotorTheta[iturb]*pi/180.0);
+           x3 = fabs(GAD_Xcoords[iturb]-xPos[ijk])*cos(0.5*pi + GAD_rotorTheta[iturb]*pi/180.0);
+           y3 = fabs(GAD_Ycoords[iturb]-yPos[ijk])*sin(0.5*pi + GAD_rotorTheta[iturb]*pi/180.0);
+
+           /*Find the perpendicular distance from this i,j,k cell center to the rotor-plane*/
+           perpDist = fabs( (x2-x1)*(y1-yPos[ijk]) - (x1-xPos[ijk])*(y2-y1) )/sqrt(pow((x2-x1),2.0) + pow((y2-y1),2.0));
+           parallelDist = sqrt(pow(x3,2.0)+pow(y3,2.0)); 
+           /*Recalculate the radial vector of the yaw-projected rotor disk...*/
+           rVec = sqrt(pow(parallelDist,2.0) + pow((GAD_hubHeights[GAD_turbineType[iturb]]-(zPos[ijk]-topoPos[ij])),2.0));
+#endif
+           /*Define ithe perpendicular "dx" in the rotated "x-y" plane  */
+           perpdx_rot =  fabs(dX*cosf(0.5*pi + GAD_rotorTheta[iturb]*pi/180.0))
+                       + fabs(dY*sinf(0.5*pi + GAD_rotorTheta[iturb]*pi/180.0));
+
+           if(   (fabs(perpDist) < ((float) numgridCells_away)*perpdx_rot)
+              && rVec <= (0.5*GAD_rotorD[GAD_turbineType[iturb]])
+	      && rVec >  (0.5*GAD_nacelleD[GAD_turbineType[iturb]]) ){
+//#define DEBUG_GAD_UPDATEROTOR
+#ifdef DEBUG_GAD_UPDATEROTOR
+             //float m,b;
+             if((mpi_rank_world == 6) && (k==20) ){
+              // printf("GADCreateTurbineRotorMask: Turbine %d @ %d,%d,%d: perpDist = %f , perpdx_rot = %f, parallelDist = %f, zDist = %f, m = %f, b = %f.\n",
+              // iturb,i,j,k,perpDist, perpdx_rot, parallelDist, sqrt(pow((GAD_hubHeights[GAD_turbineType[iturb]]-(zPos[ijk]-topoPos[ij])),2.0)), m, b ) ;
+               printf("GADCreateTurbineRotorMask: Turbine %d @ %d,%d,%d: rVec = %f , perpDist = %f , perpdx_rot = %f, zDist = %f.\n",
+               iturb,i,j,k, rVec, perpDist, perpdx_rot, sqrtf(powf((GAD_hubHeights[GAD_turbineType[iturb]]-(zPos[ijk]-topoPos[ij])),2.0))) ;
+               fflush(stdout);
+             }
+#endif
+              mask_value = 1.0;
+              GAD_turbineRotorMask[ijk] = mask_value;
+           }
+        } //end for(k...
+      } // end for(j...
+    } // end for(i... 
+  }// end for iturb...
+  
+  return(errorCode);
+}//end GADUpdateTurbineRotorMask()
+
+/*----->>>>> int GADDestructor();   ----------------------------------------------------------------------
+ * This function frees allocated memory of turbine characteristics arrays in the GAD module
+*/
+int GADDestructor(){
+  int errorCode = GAD_SUCCESS;
+
+  free(GAD_turbineType);
+  free(GAD_turbineRank);
+  free(GAD_turbineRefi);
+  free(GAD_turbineRefj);
+  free(GAD_turbineRefk);
+  free(GAD_turbineYawing);
+  free(GAD_Xcoords);
+  free(GAD_Ycoords);
+  free(GAD_turbineRefMag);
+  free(GAD_turbineRefDir);
+  free(GAD_yawError);
+  free(GAD_anFactor);
+  free(GAD_rotorTheta);
+  free(GAD_hubHeights);
+  free(GAD_rotorD);
+  free(GAD_nacelleD);
+  free(turbinePolyTwist);
+  free(turbinePolyChord);
+  free(turbinePolyPitch);
+  free(turbinePolyOmega);
+
+  return(errorCode);
+}//end GADDestructor()
+
+/*----->>>>> int GADCleanup();  ----------------------------------------------------------------------
+* Used to free all malloced memory by the GAD module.
+*/
+int GADCleanup(){
+   int errorCode = GAD_SUCCESS;
+
+   if(GADSelector > 0){
+     /* Free any GAD module arrays */
+     errorCode = GADDestructor();
+    
+     free(GAD_turbineVolMask);
+     free(GAD_turbineRotorMask);
+   } //end if GADSelector > 0
+
+   return(errorCode);
+}//end GADCleanup()
diff --git a/SRC/EXTENSIONS/GAD/GAD.h b/SRC/EXTENSIONS/GAD/GAD.h
new file mode 100644
index 0000000..5085842
--- /dev/null
+++ b/SRC/EXTENSIONS/GAD/GAD.h
@@ -0,0 +1,133 @@
+/* FastEddy®: SRC/EXTENSIONS/GAD/GAD.h
+* ©2016 University Corporation for Atmospheric Research
+* 
+* This file is licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+* 
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+#ifndef _GAD_H
+#define _GAD_H
+
+/*GAD return codes */
+#define GAD_SUCCESS    0
+#define GAD_FAIL       10
+
+/*---GAD parameters*/
+extern int GADSelector;         /* Generalized Actuator Disk Selector: 0=off, 1=on */
+extern char *turbineSpecsFile;  /* The path+filename to a turbine specifications file*/
+extern int GADoutputForces;     /* Flag to include GAD forces in the output: 0=off, 1=on */
+extern int GADofflineForces;    /* Flag to compute GAD forces in an offline mode: 0=off, 1=on */
+extern int GADaxialInduction;   /* Flag to compute axial induction factor: 0==off (uses prescribed GADaxialIndVal), 1==on */
+extern float GADaxialIndVal;    /* Prescribed constant axial induction factor when GADaxialInduction==0 */
+extern int GADrefSwitch;        /* Switch to use reference windspeed: 0=off, 1=on */
+extern float GADrefU;           /* Prescribed constant reference hub-height windspeed*/
+extern float GADrefSampWindow;  /* Sample duration (in seconds) over which to average per-timestep values (filtering out highest frequencies)*/
+extern int GADsamplingAvgLength;/* number of timestep in the prescribed sample window */
+extern float GADsamplingAvgWeight;/* sample window averaging weight*/
+extern int GADrefSeriesLength;  /* Number of sampling windows over which to average again for reference velocity magnitude and direction */
+extern float GADrefSeriesWeight;  /* ref Series averaging weight */
+extern int GADForcingSwitch;    /* Switch to use the GADrefU-based or local windspeed in computing GAD forces: 0=local, 1=ref */
+extern int GADNumTurbines;      /* Number of GAD Turbines */
+extern int GADNumTurbineTypes;  /* Number of GAD Turbine Types */
+extern int turbinePolyOrderMax; /* Maximum Polynomial order across all turbine types */
+extern int turbinePolyClCdrNormSegments; /* Number of segments in the normalized radius for the lift and drag coefficient polynomial */
+extern int alphaBounds;         /* Number of elements in the min/max angle of attack array for the lift/drag curves */
+
+extern int numgridCells_away; /*Halo-region of cells considered in rotor disk distance-wise smoothing function*/
+
+/*---GAD turbine characteristics arrays */
+extern int* GAD_turbineType;    /* Integer class-label for turbine type*/ 
+extern int* GAD_turbineRank;    /* Integer mpi-rank of nacelle center cell for each turbine reference velMag and velDir grid cell*/ 
+extern int* GAD_turbineRefi;    /* Integer i-index of nacelle center cell for each turbine reference velMag and velDir grid cell*/ 
+extern int* GAD_turbineRefj;    /* Integer j-index of nacelle center cell for each turbine reference velMag and velDir grid cell*/   
+extern int* GAD_turbineRefk;    /* Integer k-index of nacelle center cell for each turbine reference velMag and velDir grid cell*/    
+extern int* GAD_turbineYawing;  /* Integer indicating in a turbine is currently yawing ==1*/
+extern float* GAD_Xcoords;      /* SW-corner (0,0)-relative x-coordinate of turbines [m]*/ 
+extern float* GAD_Ycoords;      /* SW-corner (0,0)-relative y-coordinate of turbines [m]*/
+extern float* GAD_turbineRefMag;/* Reference "ambient" velocity magnitude for yaw control and beta/omega [m/s]*/
+extern float* GAD_turbineRefDir;/* *Reference "ambient" velocity direction (horizontal, met. standard orientation) for yaw control and beta/omega [degrees]*/
+extern float* GAD_yawError;     /* yaw error between the incoming wind and the turbine orientation */
+extern float* GAD_anFactor;     /* turbine axial induction factor at hub heigth*/
+extern float* GAD_rotorTheta;   /* rotor-normal horizontal angle from North [degrees]*/
+extern float* GAD_hubHeights;   /* Above-ground-level hub-heights of turbines [m]*/
+extern float* GAD_rotorD;       /* turbine-specific rotor diameters  [m]*/
+extern float* GAD_nacelleD;     /* turbine-specific nacelle diameters [m]*/
+extern float* turbinePolyTwist; /* turbine-type-specific twist polynomial coefficients*/
+extern float* turbinePolyChord; /* turbine-type-specific chord polynomial coefficients*/
+extern float* turbinePolyPitch; /* turbine-type-specific pitch polynomial coefficients*/
+extern float* turbinePolyOmega; /* turbine-type-specific omega polynomial coefficients*/
+extern float* rnorm_vect;       /* turbine-type-specific normalized radious segment limits*/
+extern float* alpha_minmax_vect;/* turbine-type-specific maximum and minimum angle of attack for the lift/drag curves*/
+extern float* turbinePolyCl;    /* turbine-type-specific lift coefficient polynomial coefficients*/
+extern float* turbinePolyCd;    /* turbine-type-specific drag coefficient polynomial coefficients*/
+
+
+extern float* GAD_turbineVolMask; /* turbine Volume mask (0 if turbine free cell in domain, else turbine ID of cell in turbine yaw-swept volume*/
+extern float* GAD_turbineRotorMask; /* turbine Rotor-disk  mask (0 if turbine free cell in domain, else 1.0 in turbine yaw-centric disk*/
+extern float* GAD_forceX;         /* turbine forces in the x-direction */
+extern float* GAD_forceY;         /* turbine forces in the y-direction */
+extern float* GAD_forceZ;         /* turbine forces in the z-direction */
+
+/*----->>>>> int GADGetParams();   ----------------------------------------------------------------------
+ * Obtain parameters for the GAD sub-module
+*/
+int GADGetParams();
+
+/*----->>>>> int GADPrintParams();   ----------------------------------------------------------------------
+* Print parameters for the GAD sub-module
+*/
+int GADPrintParams();
+
+/*----->>>>> int GADInit();   ----------------------------------------------------------------------
+ * Used to broadcast and print parameters, allocate memory, and initialize configuration settings 
+ * for the GAD sub-module.
+ */
+int GADInit();
+
+/*----->>>>> int GADConstructor();   ----------------------------------------------------------------------
+* This function constructs the GAD sub-module instance by reading a GAD (netCDF) input configuration file,
+* allocating CPU-level memory for GAD arrays, and initializing these arrays with values specified in 
+* the inputs file.
+*/
+int GADConstructor();
+
+/*----->>>>> int GADInitTurbineRefChars();   ----------------------------------------------------------------------
+* This function iinitializes turbine reference location characteristic values (location mpi_rank and i,j,k indices).
+*/
+int GADInitTurbineRefChars(float dt);
+
+/*----->>>>> int GADCreateTurbineVolMask();   ----------------------------------------------------------------------
+* This function creates the swept-volume mask (of turbine IDs as floats) for the turbine array
+*/
+int GADCreateTurbineVolMask();
+
+/*----->>>>> int GADCreateTurbineRotorMask();   ----------------------------------------------------------------------
+* This function creates the yaw-specific rotor-disk mask for turbines in the simulation
+*/
+int GADCreateTurbineRotorMask();
+
+/*----->>>>> int GADUpdateTurbineRotorMask()   ----------------------------------------------------------------------
+* This function updates (from GAD_rotorTheta) the yaw-specific rotor-disk mask for turbines in the simulation
+*/
+int GADUpdateTurbineRotorMask();
+
+/*----->>>>> int GADDestructor();   ----------------------------------------------------------------------
+* This function frees allocated memory of turbine characteristics arrays in the GAD module
+*/
+int GADDestructor();
+
+/*----->>>>> int GADCleanup();  ----------------------------------------------------------------------
+* Used to free all malloced memory by the GAD module.
+*/
+int GADCleanup();
+
+#endif // _GAD_H
diff --git a/SRC/EXTENSIONS/URBAN/CUDA/cuda_urbanDevice.cu b/SRC/EXTENSIONS/URBAN/CUDA/cuda_urbanDevice.cu
new file mode 100644
index 0000000..25c355e
--- /dev/null
+++ b/SRC/EXTENSIONS/URBAN/CUDA/cuda_urbanDevice.cu
@@ -0,0 +1,241 @@
+/* FastEddy®: SRC/EXTENSIONS/URBAN/CUDA/cuda_urbanDevice.cu
+* ©2016 University Corporation for Atmospheric Research
+* 
+* This file is licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+* 
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+/*---URBAN*/
+/*Parameters*/
+__constant__ int urbanSelector_d;          /* urban selector: 0=off, 1=on */
+__constant__ float cd_build_d;             /* c_d coefficient (m-1) used by the drag-based building formulation: -c_d|u|u_i */
+__constant__ float ct_build_d;             /* c_t coefficient (s-1) used by the drag-based building formulation: -c_t(rho*theta-rho_b*theta_b) & -c_t(rho-rho_b) */
+__constant__ float delta_aware_bdg_d;      /* scale-aware correction for building forcing and limiters */
+float* building_mask_d;                    /* building mask field: 0 (atmosphere) or 1 (building) */
+__constant__ int urban_heatRedis_d;        /* selector to activate surface heat redistribution */
+float *urban_heat_redis_d;                 /* Base Address of memory containing 2d map of heat redistribution coefficient in urban areas */
+
+/*#################------------ URBAN submodule function definitions ------------------#############*/
+/*----->>>>> int cuda_urbanDeviceSetup();       ---------------------------------------------------------
+ * Used to cudaMalloc and cudaMemcpy parameters and coordinate arrays, and for the URBAN_CUDA submodule.
+*/
+extern "C" int cuda_urbanDeviceSetup(){
+   int errorCode = CUDA_URBAN_SUCCESS;
+   int Nelems;
+
+   cudaMemcpyToSymbol(urbanSelector_d, &urbanSelector, sizeof(int));
+   cudaMemcpyToSymbol(cd_build_d, &cd_build, sizeof(float));
+   cudaMemcpyToSymbol(ct_build_d, &ct_build, sizeof(float));
+
+   Nelems = (Nxp+2*Nh)*(Nyp+2*Nh)*(Nzp+2*Nh);
+   fecuda_DeviceMalloc(Nelems*sizeof(float), &building_mask_d);
+   cudaMemcpy(building_mask_d, building_mask, Nelems*sizeof(float), cudaMemcpyHostToDevice);
+
+   cudaMemcpyToSymbol(delta_aware_bdg_d, &delta_aware_bdg, sizeof(float));
+
+   if(urban_heatRedis > 0){
+     Nelems = (Nxp+2*Nh)*(Nyp+2*Nh);
+     fecuda_DeviceMalloc(Nelems*sizeof(float), &urban_heat_redis_d);
+     cudaMemcpy(urban_heat_redis_d, urban_heat_redis, Nelems*sizeof(float), cudaMemcpyHostToDevice);
+   }
+
+   return(errorCode);
+} //end cuda_urbanDeviceSetup()
+
+/*----->>>>> extern "C" int cuda_urbanDeviceCleanup();  -----------------------------------------------------------
+Used to free all malloced memory by the URBAN submodule.
+*/
+extern "C" int cuda_urbanDeviceCleanup(){
+   int errorCode = CUDA_URBAN_SUCCESS;
+
+   /* Free any URBAN submodule arrays */
+   cudaFree(building_mask_d);
+   if(urban_heatRedis > 0){
+     cudaFree(urban_heat_redis_d);
+   }
+
+   return(errorCode);
+
+}//end cuda_urbanDeviceCleanup()
+
+__global__ void cudaDevice_URBANinter(float* z0m, float* z0t, float* hydroTauFlds, float* moistTauFlds, 
+		                      float* fricVel, float* htFlux, float* qFlux, float* invOblen, 
+				      float* bdg_mask, float* sea_mask, float* urban_redis){
+
+   int i,j,k,ijk,ij;
+   int fldStride;
+   int iStride,jStride,kStride;
+   int iStride2d,jStride2d;
+
+   /*Establish necessary indices for spatial locality*/
+   i = (blockIdx.x)*blockDim.x + threadIdx.x;
+   j = (blockIdx.y)*blockDim.y + threadIdx.y;
+   k = (blockIdx.z)*blockDim.z + threadIdx.z;
+
+   fldStride = (Nx_d+2*Nh_d)*(Ny_d+2*Nh_d)*(Nz_d+2*Nh_d);
+   iStride = (Ny_d+2*Nh_d)*(Nz_d+2*Nh_d);
+   jStride = (Nz_d+2*Nh_d);
+   kStride = 1;
+   iStride2d = (Ny_d+2*Nh_d);
+   jStride2d = 1;
+
+   if((i >= iMin_d)&&(i < iMax_d) &&
+      (j >= jMin_d)&&(j < jMax_d) &&
+      (k == kMin_d)){
+      ijk = i*iStride + j*jStride + k*kStride;
+      ij = i*iStride2d + j*jStride2d; // 2-dimensional (horizontal index)
+
+      //Standard dynamic z0t or redistribution adjustment z0t as appropriate
+      if ( (surflayer_z0tdyn_d>0) && ((surflayer_offshore_d==0) || ((surflayer_offshore_d==1) && (sea_mask[ij]<1e-4))) ){ // dynamic z0t calculation
+        if (urban_heatRedis_d > 0){ //Redistribution-aware dynamic z0t (Only compute in places where redistributed heat flux will NOT occur)
+           if (urban_redis[ij] <= (1.0+1e-5)){
+              cudaDevice_z0tdyn(&z0m[ij], &z0t[ij], &fricVel[ij]);
+           }
+        }else{ //standard dynamic z0t
+              cudaDevice_z0tdyn(&z0m[ij], &z0t[ij], &fricVel[ij]);
+        } //if-else urban_heatRedis_d > 0
+      }
+
+      //Intermediate-timestep stage calculations
+      if (bdg_mask[ijk] > 0.0){
+        hydroTauFlds[2*fldStride+ijk] = 0.0;
+        hydroTauFlds[3*fldStride+ijk] = 0.0;
+        hydroTauFlds[8*fldStride+ijk] = 0.0;
+        fricVel[ij] = 0.0;
+        htFlux[ij] = 0.0;
+        invOblen[ij] = 0.0;
+        if (moistureSelector_d > 0){
+          moistTauFlds[2*fldStride+ijk] = 0.0;
+	  qFlux[ij] = 0.0;
+        }
+      }
+
+      if (urban_heatRedis_d > 0){ //Redistribution-aware dynamic z0t (Only compute in places where redistributed heat flux will NOT occur)
+         if (urban_redis[ij] > (1.0+1e-5)){ // urban heat redistribution
+            htFlux[ij] = urban_redis[ij]*htFlux[ij];
+    	    if (moistureSelector_d > 0){
+               qFlux[ij]  = urban_redis[ij]*qFlux[ij];
+	    }
+         }
+      } //if urban_heatRedis_d > 0
+   }//end if in the range of non-halo cells
+
+} // end cudaDevice_URBANinter()
+
+__global__ void cudaDevice_URBANfinal(float* hydroFlds_d, float* hydroFldsFrhs_d, float* hydroBaseStateFlds_d, 
+		                      float* hydroAuxScalars_d, float* hydroAuxScalarsFrhs_d,
+				      float* hydroFldsFrhsMoist_d,
+				      float* building_mask_d){
+
+   int i,j,k,ijk;
+   int iFld;
+   int iFldMoist;
+   int fldStride;
+   int iStride,jStride,kStride;
+
+   /*Establish necessary indices for spatial locality*/
+   i = (blockIdx.x)*blockDim.x + threadIdx.x;
+   j = (blockIdx.y)*blockDim.y + threadIdx.y;
+   k = (blockIdx.z)*blockDim.z + threadIdx.z;
+
+   fldStride = (Nx_d+2*Nh_d)*(Ny_d+2*Nh_d)*(Nz_d+2*Nh_d);
+   iStride = (Ny_d+2*Nh_d)*(Nz_d+2*Nh_d);
+   jStride = (Nz_d+2*Nh_d);
+   kStride = 1;
+
+   if((i >= iMin_d)&&(i < iMax_d) &&
+      (j >= jMin_d)&&(j < jMax_d) &&
+      (k >= kMin_d)&&(k < kMax_d) ){
+      ijk = i*iStride + j*jStride + k*kStride;
+
+      cudaDevice_UrbanDragMethod(&hydroFlds_d[fldStride*RHO_INDX+ijk],&hydroFlds_d[fldStride*U_INDX+ijk],&hydroFlds_d[fldStride*V_INDX+ijk],&hydroFlds_d[fldStride*W_INDX+ijk],
+                                 &hydroFlds_d[fldStride*THETA_INDX+ijk],&hydroBaseStateFlds_d[fldStride*THETA_INDX+ijk],&hydroBaseStateFlds_d[fldStride*RHO_INDX+ijk],
+                                 &hydroFldsFrhs_d[fldStride*U_INDX+ijk],&hydroFldsFrhs_d[fldStride*V_INDX+ijk],&hydroFldsFrhs_d[fldStride*W_INDX+ijk],
+                                 &hydroFldsFrhs_d[fldStride*THETA_INDX+ijk],&hydroFldsFrhs_d[fldStride*RHO_INDX+ijk],&building_mask_d[ijk]);
+      if(NhydroAuxScalars_d > 0){
+        for(iFld=0; iFld < NhydroAuxScalars_d; iFld++){
+          cudaDevice_UrbanDragMethodAuxScalar(&hydroAuxScalars_d[fldStride*iFld+ijk], &hydroAuxScalarsFrhs_d[fldStride*iFld+ijk], &building_mask_d[ijk]);
+        }
+      }// end if NhydroAuxScalars_d > 0
+
+      if(moistureSelector_d > 0){
+        for(iFldMoist=0; iFldMoist < moistureNvars_d; iFldMoist++){
+	   cudaDevice_UrbanDragMethodMoist(&hydroFldsFrhsMoist_d[fldStride*iFldMoist+ijk],&building_mask_d[ijk]);
+        }
+      }// end if moistureSelector_d > 0
+   }//end if in the range of non-halo cells
+
+} // end cudaDevice_URBANfinal()
+
+/*----->>>>> __device__ void  cudaDevice_UrbanDragMethod();  --------------------------------------------------
+*/
+__device__ void cudaDevice_UrbanDragMethod(float* rho, float* u, float* v, float* w, float* th, float* th_base, float* rho_base, float* Frhs_u, float* Frhs_v, float* Frhs_w, float* Frhs_th, float* Frhs_rho, float* bdg_mask){
+  float u_ijk,v_ijk,w_ijk;
+  float fBuild_u,fBuild_v,fBuild_w,fBuild_th;
+  float fBuild_rho;
+  float fBuild_ulim_fact = 1.25;
+  float fBuild_ulim_u; // limiter for u-velocity drag forcing from buildings
+  float fBuild_ulim_v; // limiter for v-velocity drag forcing from buildings
+  float fBuild_ulim_w; // limiter for w-velcoity drag forcing from buildings
+  float fBuild_ulim_th; // limiter for temperature damping forcing from buildings
+  float fBuild_ulim_rho; // limiter for density damping forcing from buildings
+
+  u_ijk = *u/ *rho;
+  v_ijk = *v/ *rho;
+  w_ijk = *w/ *rho;
+
+  fBuild_u  = cd_build_d*delta_aware_bdg_d*fabsf(u_ijk)*(*u)*(*bdg_mask);
+  fBuild_v  = cd_build_d*delta_aware_bdg_d*fabsf(v_ijk)*(*v)*(*bdg_mask);
+  fBuild_w  = cd_build_d*delta_aware_bdg_d*fabsf(w_ijk)*(*w)*(*bdg_mask);
+  fBuild_th = ct_build_d*delta_aware_bdg_d*((*th)-(*th_base))*(*bdg_mask);
+  fBuild_rho = ct_build_d*delta_aware_bdg_d*((*rho)-(*rho_base))*(*bdg_mask);
+
+  fBuild_ulim_u = fabsf((*Frhs_u)*fBuild_ulim_fact);
+  fBuild_ulim_v = fabsf((*Frhs_v)*fBuild_ulim_fact);
+  fBuild_ulim_w = fabsf((*Frhs_w)*fBuild_ulim_fact);
+  fBuild_ulim_th = fabsf((*Frhs_th)*fBuild_ulim_fact);
+  fBuild_ulim_rho = fabsf((*Frhs_rho)*fBuild_ulim_fact);
+
+  *Frhs_u  = *Frhs_u - copysign(1.0,fBuild_u)*fminf(fabsf(fBuild_u),fBuild_ulim_u);
+  *Frhs_v  = *Frhs_v - copysign(1.0,fBuild_v)*fminf(fabsf(fBuild_v),fBuild_ulim_v);
+  *Frhs_w  = *Frhs_w - copysign(1.0,fBuild_w)*fminf(fabsf(fBuild_w),fBuild_ulim_w);
+  if(urbanSelector_d==1){
+    *Frhs_th = *Frhs_th*(1.0-(*bdg_mask)) ;
+    *Frhs_rho = *Frhs_rho*(1.0-(*bdg_mask));
+  }else if(urbanSelector_d==2){
+    *Frhs_th = *Frhs_th - copysign(1.0,fBuild_th)*fminf(fabsf(fBuild_th),fBuild_ulim_th);
+    *Frhs_rho = *Frhs_rho - copysign(1.0,fBuild_rho)*fminf(fabsf(fBuild_rho),fBuild_ulim_rho);
+  }//end if 
+
+} //end cudaDevice_UrbanDragMethod
+
+__device__ void cudaDevice_UrbanDragMethodMoist(float * Frhs_qMoistFld, float* bdg_mask){
+
+  *Frhs_qMoistFld = *Frhs_qMoistFld*(1.0-(*bdg_mask));
+
+} //end cudaDevice_UrbanDragMethodMoist
+
+__device__ void cudaDevice_UrbanDragMethodAuxScalar(float* AuxScalar, float* Frhs_AuxScalar, float* bdg_mask){
+  float fBuild_AuxSc;
+  float fBuild_ulim_fact = 1.25;
+  float fBuild_ulim_AuxSc; // limiter for damping forcing inside building mask
+
+
+  fBuild_AuxSc  = cd_build_d*delta_aware_bdg_d*fabsf(*AuxScalar)*(*bdg_mask);
+  fBuild_ulim_AuxSc = fabsf((*Frhs_AuxScalar)*fBuild_ulim_fact);
+
+  if(urbanSelector_d==1){
+    *Frhs_AuxScalar = *Frhs_AuxScalar*(1.0-(*bdg_mask));
+  }else if(urbanSelector_d==2){
+    *Frhs_AuxScalar = *Frhs_AuxScalar - copysign(1.0,fBuild_AuxSc)*fminf(fabsf(fBuild_AuxSc),fBuild_ulim_AuxSc);
+  }//end if 
+
+} //end cudaDevice_UrbanDragMethodAuxScalar
diff --git a/SRC/EXTENSIONS/URBAN/CUDA/cuda_urbanDevice_cu.h b/SRC/EXTENSIONS/URBAN/CUDA/cuda_urbanDevice_cu.h
new file mode 100644
index 0000000..ae9347a
--- /dev/null
+++ b/SRC/EXTENSIONS/URBAN/CUDA/cuda_urbanDevice_cu.h
@@ -0,0 +1,60 @@
+/* FastEddy®: SRC/EXTENSIONS/URBAN/CUDA/cuda_urbanDevice_cu.h
+* ©2016 University Corporation for Atmospheric Research
+* 
+* This file is licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+* 
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+#ifndef _URBAN_CUDADEV_CU_H
+#define _URBAN_CUDADEV_CU_H
+
+/*urban_ return codes */
+#define CUDA_URBAN_SUCCESS               0
+
+/*##############------------------- URBAN submodule variable declarations ---------------------#################*/
+/* Parameters */
+extern __constant__ int urbanSelector_d;          /* urban selector: 0=off, 1=on */
+extern __constant__ float cd_build_d;             /* c_d coefficient (m-1) used by the drag-based building formulation: -c_d|u_i|u_i */
+extern __constant__ float ct_build_d;             /* c_t coefficient (s-1) used by the drag-based building formulation: -c_t(rho*theta-rho_b*theta_b) & -c_t(rho-rho_b) */
+extern __constant__ float delta_aware_bdg_d;      /* scale-aware correction for building forcing and limiters */
+/* array fields */
+extern float* building_mask_d;                    /* Base Address of memory containing building mask field: 0 (atmosphere) or 1 (building) */
+extern __constant__ int urban_heatRedis_d;        /* selector to activate surface heat redistribution */
+extern float *urban_heat_redis_d;                 /* Base Address of memory containing 2d map of heat redistribution coefficient in urban areas */
+
+/*##############-------------- URBAN_CUDADEV submodule function declarations ------------------############*/
+
+/*----->>>>> int cuda_urbanDeviceSetup();      -----------------------------------------------------------------
+* Used to cudaMalloc and cudaMemcpy parameters and coordinate arrays for the URBAN_CUDADEV submodule.
+*/
+extern "C" int cuda_urbanDeviceSetup();
+
+/*----->>>>> int cuda_urbanDeviceCleanup();    ---------------------------------------------------------------
+* Used to free all malloced memory by the URBAN_CUDADEV submodule.
+*/
+extern "C" int cuda_urbanDeviceCleanup();
+
+__global__ void cudaDevice_URBANinter(float* z0m, float* z0t, float* hydroTauFlds, float* moistTauFlds,
+                                      float* fricVel, float* htFlux, float* qFlux, float* invOblen,
+                                      float* bdg_mask, float* sea_mask, float* urban_redis);
+__global__ void cudaDevice_URBANfinal(float* hydroFlds_d, float* hydroFldsFrhs_d, float* hydroBaseStateFlds_d,
+	                              float* hydroAuxScalars_d, float* hydroAuxScalarsFrhs_d,
+                                      float* hydroFldsFrhsMoist_d,
+			      	      float* building_mask_d);
+
+/*----->>>>> __device__ void  cudaDevice_UrbanDragMethod();  --------------------------------------------------
+* This cuda kerne lsets up the cells and their id in the urban drag-based approach
+*/
+__device__ void cudaDevice_UrbanDragMethod(float* rho, float* u, float* v, float* w, float* th, float* th_base, float* rho_base, float* Frhs_u, float* Frhs_v, float* Frhs_w, float* Frhs_th, float* Frhs_rho, float* bdg_mask);
+__device__ void cudaDevice_UrbanDragMethodMoist(float* Frhs_qMoistFld, float* bdg_mask);
+__device__ void cudaDevice_UrbanDragMethodAuxScalar(float* AuxScalar, float* Frhs_AuxScalar, float* bdg_mask);
+
+#endif // _URBAN_CUDADEV_CU_H
diff --git a/SRC/EXTENSIONS/URBAN/urban.c b/SRC/EXTENSIONS/URBAN/urban.c
new file mode 100644
index 0000000..b69835c
--- /dev/null
+++ b/SRC/EXTENSIONS/URBAN/urban.c
@@ -0,0 +1,115 @@
+/* FastEddy®: SRC/EXTENSIONS/URBAN/urban.c 
+* ©2016 University Corporation for Atmospheric Research
+* 
+* This file is licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+* 
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+/*Urban parameters*/
+int urbanSelector;          /* urban selector: 0=off, 1=on, 2=on with thermal relaxation towards base state */
+float cd_build;             /* c_d coefficient (m-1) used by the drag-based building formulation: -c_d|u_i|u_i */
+float ct_build;             /* c_t coefficient (s-1) used by the drag-based building formulation: -c_t(rho*theta-rho_b*theta_b) & -c_t(rho-rho_b) */
+float *building_mask;       /* Base Address of memory containing building mask 0,1 field */
+float delta_aware_bdg;      /* scale-aware correction for building forcing and limiters */
+int urban_heatRedis;        /* selector to activate surface heat redistribution */
+float *urban_heat_redis;    /* Base Address of memory containing 2d map of heat redistribution coefficient in urban areas */
+
+/*----->>>>> int URBANGetParams();   ----------------------------------------------------------------------
+ * Obtain parameters for the URBAN sub-module
+*/
+int URBANGetParams(){
+   int errorCode = URBAN_SUCCESS;
+
+   urbanSelector = 0; // Default to off
+   errorCode = queryIntegerParameter("urbanSelector", &urbanSelector, 0, 2, PARAM_OPTIONAL);
+   if(urbanSelector > 0){
+     cd_build = 100.0; // Default to 100.0
+     errorCode = queryFloatParameter("cd_build", &cd_build, 0.0, 1e+8, PARAM_OPTIONAL);
+     ct_build = 10.0; // Default to 0.0
+     errorCode = queryFloatParameter("ct_build", &ct_build, 0.0, 1e+8, PARAM_OPTIONAL);
+     urban_heatRedis = 0; // Default off
+     errorCode = queryIntegerParameter("urban_heatRedis", &urban_heatRedis, 0, 1, PARAM_OPTIONAL);
+   } // end if(urbanSelector > 0)
+
+   return(errorCode);
+} //end URBANGetParams()
+
+/*----->>>>> int URBANPrintParams();   ----------------------------------------------------------------------
+* Print parameters for the URBAN sub-module
+*/
+int URBANPrintParams(){
+   int errorCode = URBAN_SUCCESS;
+   if(mpi_rank_world == 0){
+     printParameter("urbanSelector", "urban selector: 0=off, 1=on, 2=on with thermal relaxation towards base state");	   
+     if(urbanSelector > 0){
+      printParameter("cd_build", "drag coefficient for buildings when urbanSelector > 0");
+      printParameter("ct_build", "temperature and density damping coefficient for buildings when urbanSelector > 0");
+      printParameter("urban_heatRedis", "selector to activate surface heat redistribution");
+     }
+   } //end if(mpi_rank_world == 0)
+   return(errorCode);
+} //end URBANPrintParams()
+
+/*----->>>>> int URBANInit();   ----------------------------------------------------------------------
+ * Used to broadcast parameters, allocate memory, and initialize configuration settings 
+ * for the URBAN sub-module.
+*/
+int URBANInit(){
+   int errorCode = URBAN_SUCCESS;
+   char fldName[MAX_HC_FLDNAME_LENGTH];
+
+   MPI_Bcast(&urbanSelector, 1, MPI_INT, 0, MPI_COMM_WORLD);
+   if(urbanSelector > 0){
+     MPI_Bcast(&cd_build, 1, MPI_FLOAT, 0, MPI_COMM_WORLD);
+     MPI_Bcast(&ct_build, 1, MPI_FLOAT, 0, MPI_COMM_WORLD);
+     MPI_Bcast(&urban_heatRedis, 1, MPI_INT, 0, MPI_COMM_WORLD);
+   }
+
+   if(urbanSelector > 0){
+     delta_aware_bdg = 1.0/fmin(pow(d_xi*d_eta*d_zeta,1.0/3.0),1.0);
+     printf("urban:delta_aware_bdg = %f\n",delta_aware_bdg);
+     building_mask = memAllocateFloat3DField(Nxp, Nyp, Nzp, Nh, "building_mask");
+     errorCode = sprintf(&fldName[0],"BuildingMask");
+     errorCode = ioRegisterVar(&fldName[0], "float", 4, dims4d, building_mask);
+     errorCode = ioAddStandardAttrs("BuildingMask", "-", "Building Mask", NULL);
+     printf("urban:Field = %s stored at %p, has been registered with IO.\n",
+            &fldName[0],building_mask);
+     fflush(stdout);
+     if(urban_heatRedis > 0){
+       urban_heat_redis = memAllocateFloat2DField(Nxp, Nyp, Nh, "urban_heat_redis");
+       errorCode = sprintf(&fldName[0],"UrbanHeatRedis");
+       errorCode = ioRegisterVar(&fldName[0], "float", 3, dims2dTD, urban_heat_redis);
+       errorCode = ioAddStandardAttrs("UrbanHeatRedis", "-", "Urban Heat Redistribution Coefficient", NULL);
+       printf("urban:Field = %s stored at %p, has been registered with IO.\n",
+              &fldName[0],urban_heat_redis);
+       fflush(stdout);
+     }
+
+   } // end of urbanSelector > 0
+
+   return(errorCode);
+} //end URBANInit()
+
+/*----->>>>> int URBANCleanup();  ----------------------------------------------------------------------
+* Used to free all malloced memory by the URBAN module.
+*/
+int URBANCleanup(){
+   int errorCode = URBAN_SUCCESS;
+
+   if(urbanSelector > 0){
+     free(building_mask);
+     if(urban_heatRedis > 0){
+       free(urban_heat_redis);
+     }
+   } //end if urbanSelector > 0
+
+   return(errorCode);
+}//end URBANCleanup()
diff --git a/SRC/EXTENSIONS/URBAN/urban.h b/SRC/EXTENSIONS/URBAN/urban.h
new file mode 100644
index 0000000..22df96b
--- /dev/null
+++ b/SRC/EXTENSIONS/URBAN/urban.h
@@ -0,0 +1,53 @@
+/* FastEddy®: SRC/EXTENSIONS/URBAN/urban.h
+* ©2016 University Corporation for Atmospheric Research
+* 
+* This file is licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+* 
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+#ifndef _URBAN_H
+#define _URBAN_H
+
+/*URBAN return codes */
+#define URBAN_SUCCESS    0
+#define URBAN_FAIL       10
+
+/*---URBAN parameters*/
+extern int urbanSelector;      /* urban selector: 0=off, 1=on, 2=on with thermal relaxation towards base state */
+extern float cd_build;         /* c_d coefficient used by the drag-based building formulation: -c_d|u_i|u_i */
+extern float ct_build;         /* c_t coefficient (s-1) used by the drag-based building formulation: -c_t(rho*theta-rho_b*theta_b) & -c_t(rho-rho_b) */
+extern float *building_mask;   /* Base Address of memory containing building mask 0,1 field */
+extern float delta_aware_bdg;  /* scale-aware correction for building forcing and limiters */
+extern int urban_heatRedis;        /* selector to activate surface heat redistribution */
+extern float *urban_heat_redis;    /* Base Address of memory containing 2d map of heat redistribution coefficient in urban areas */
+
+/*----->>>>> int URBANGetParams();   ----------------------------------------------------------------------
+ * Obtain parameters for the URBAN sub-module
+*/
+int URBANGetParams();
+
+/*----->>>>> int URBANPrintParams();   ----------------------------------------------------------------------
+* Print parameters for the URBAN sub-module
+*/
+int URBANPrintParams();
+
+/*----->>>>> int URBANInit();   ----------------------------------------------------------------------
+ * Used to broadcast parameters, allocate memory, and initialize configuration settings
+ * for the URBAN sub-module.
+*/
+int URBANInit();
+
+/*----->>>>> int URBANCleanup();  ----------------------------------------------------------------------
+* Used to free all malloced memory by the URBAN module.
+*/
+int URBANCleanup();
+
+#endif // _URBAN_H
diff --git a/SRC/FECUDA/fecuda.c b/SRC/FECUDA/fecuda.c
index ddb570a..9a6d446 100644
--- a/SRC/FECUDA/fecuda.c
+++ b/SRC/FECUDA/fecuda.c
@@ -39,6 +39,9 @@ int fecuda_GetParams(){
    int errorCode = FECUDA_SUCCESS;
 
    /*query for each FECUDA parameter */
+   tBx=1; // default to 1
+   tBy=8; // default to 8
+   tBz=32; // default to 32
    errorCode = queryIntegerParameter("tBx", &tBx, 1, INT_MAX, PARAM_MANDATORY);
    errorCode = queryIntegerParameter("tBy", &tBy, 1, INT_MAX, PARAM_MANDATORY);
    errorCode = queryIntegerParameter("tBz", &tBz, 1, INT_MAX, PARAM_MANDATORY);
diff --git a/SRC/FEMAIN/FastEddy.c b/SRC/FEMAIN/FastEddy.c
index 7cd0842..51d85ab 100644
--- a/SRC/FEMAIN/FastEddy.c
+++ b/SRC/FEMAIN/FastEddy.c
@@ -35,6 +35,11 @@
 #include <time_integration.h>
 #include <cuda_timeInt.h>
 
+/*Model-Extensions includes*/
+#ifdef GAD_EXT
+  #include <GAD.h>
+#endif
+
 /***    main.c    ***/
 int main(int argc, char **argv){
   int errorCode;
@@ -213,7 +218,11 @@ int main(int argc, char **argv){
 #endif
     printf("Reading coordinates and input conditions from %s\n",inFile);
     fflush(stdout);
+#ifdef GAD_EXT
+    errorCode = ioReadNetCDFinFileSingleTime(0, Nx, Ny, Nz, Nh, GADNumTurbines);
+#else
     errorCode = ioReadNetCDFinFileSingleTime(0, Nx, Ny, Nz, Nh);
+#endif
   }else{
 #ifdef DEBUG_INITIALIZATION 
     printf("mpi_rank_world--%d/%d inFile == NULL !!\n",mpi_rank_world,mpi_size_world);
@@ -236,14 +245,15 @@ int main(int argc, char **argv){
   printf("mpi_rank_world--%d/%d Beginning secondary and CUDA preparations!\n",mpi_rank_world,mpi_size_world);
   fflush(stdout);
 #endif
-  /*Allow for Secondary Preparations*/
+  /*Allow for GRID Module Secondary Preparations*/
   errorCode = gridSecondaryPreparations();
+
 #ifdef DEBUG_INITIALIZATION 
   printf("mpi_rank_world--%d/%d Setting hydro_core Base State!\n",mpi_rank_world,mpi_size_world);
   fflush(stdout);
 #endif
-  /*Now that the grid is definitely defined, setup the base state  */
-  errorCode = hydro_coreSetBaseState();
+  /*Now that the grid is definitely defined, perform any secondary HYDRO_CORE module preparations  */
+  errorCode = hydro_coreSecondaryPreparations(dt);
 
   /* inFile exists, allow HYDRO_CORE to preparations specifically from initial conditions */
   if(inFile != NULL){
@@ -383,14 +393,23 @@ int main(int argc, char **argv){
        /* Dump the root output file. */
 #ifndef IO_OFF
        if(ioOutputMode==0){
+#ifdef GAD_EXT
+         errorCode = ioWriteNetCDFoutFileSingleTime(it, Nx, Ny, Nz, Nh, GADNumTurbines);
+#else
          errorCode = ioWriteNetCDFoutFileSingleTime(it, Nx, Ny, Nz, Nh);
+#endif
        }else if(ioOutputMode==1){
+#ifdef GAD_EXT
+         errorCode = ioWriteBinaryoutFileSingleTime(it, Nxp, Nyp, Nzp, Nh, GADNumTurbines);
+#else
          errorCode = ioWriteBinaryoutFileSingleTime(it, Nxp, Nyp, Nzp, Nh);
+#endif
        }
 #endif
        mpi_t4 = MPI_Wtime();    //Mark the walltime to measure IO duration
        if(mpi_rank_world == 0){
          printf("Dumped state at timestep = %d...\n",it);
+         fflush(stdout);
        } //if mpi_rank_world
      } //end if (it%frqOutput == 0) ....   (We log summary info and dump outputs)
 #ifdef NOTCUDA 
@@ -398,6 +417,12 @@ int main(int argc, char **argv){
 #else  /* ---------------  CUDA FASTEDDY !!!!! -------------------------  */
      /*Launch the GPU batch timestep kernel*/
      errorCode = cuda_timeIntCommence(itTmp);
+#ifdef GAD_EXT
+     if(GADSelector > 0){
+	//Update the turbine rotor mask output array from new rotorTheta values updated at device-level
+        errorCode = GADUpdateTurbineRotorMask();
+     }
+#endif
            /*Build an Frhs*/
            /*Update the prognostic variables*/
            /*Do any necessary halo exchange*/
@@ -443,9 +468,17 @@ int main(int argc, char **argv){
   /*Dump the final timestep*/
 #ifndef IO_OFF
   if(ioOutputMode==0){
+#ifdef GAD_EXT
+    errorCode = ioWriteNetCDFoutFileSingleTime(it, Nx, Ny, Nz, Nh, GADNumTurbines);
+#else
     errorCode = ioWriteNetCDFoutFileSingleTime(it, Nx, Ny, Nz, Nh);
+#endif
   }else if(ioOutputMode==1){
+#ifdef GAD_EXT
+    errorCode = ioWriteBinaryoutFileSingleTime(it, Nxp, Nyp, Nzp, Nh, GADNumTurbines);
+#else
     errorCode = ioWriteBinaryoutFileSingleTime(it, Nxp, Nyp, Nzp, Nh);
+#endif
   }
 #endif
   MPI_Barrier(MPI_COMM_WORLD); 
diff --git a/SRC/FEMAIN/Makefile b/SRC/FEMAIN/Makefile
index b86fd83..5be253d 100644
--- a/SRC/FEMAIN/Makefile
+++ b/SRC/FEMAIN/Makefile
@@ -35,7 +35,6 @@ DEBUG_CFLAGS = -g
 
 DEFINES = -DCUB_IGNORE_DEPRECATED_CPP_DIALECT -DTHRUST_IGNORE_DEPRECATED_CPP_DIALECT
 
-
 TEST_CFLAGS = -Wall -m64 ${DEFINES} ${INCLUDES} ${OTHER_INCLUDES}
 ARCH_CU_FLAGS = -arch=sm_70
 TEST_CU_CFLAGS = ${ARCH_CU_FLAGS} -m64 -std=c++11 ${DEFINES} ${INCLUDES} ${OTHER_INCLUDES}
@@ -52,7 +51,6 @@ TEST_CU_LDFLAGS = -L.
 TEST_LIBS = -lm -lmpi -lstdc++ -lcurand -lcudart -lnetcdf
 TEST_CU_LIBS = -lm -lmpi -lcudart
 
-
 ################################################################################
 # Sub-directories to INCLUDE 
 # ################################################################################
@@ -118,6 +116,30 @@ TEST_OBJS = ./FastEddy.o \
 	../PARAMETERS/parameters.o \
 	../PARAMETERS/hashTable.o
 
+
+################################################################################
+# Extensions 
+################################################################################
+WITH_GAD = 0
+ifeq ($(WITH_GAD),1)
+ DEFINES+=-DGAD_EXT
+ INCLUDES+=-I../EXTENSIONS/GAD/ \
+	   -I../EXTENSIONS/GAD/CUDA/
+ $(info WITH_GAD=1)
+ $(info ${DEFINES})
+ $(info ${INCLUDES})
+endif
+
+WITH_URBAN = 0
+ifeq ($(WITH_URBAN),1)
+ DEFINES+=-DURBAN_EXT
+ INCLUDES+=-I../EXTENSIONS/URBAN/ \
+	   -I../EXTENSIONS/URBAN/CUDA/
+ $(info WITH_URBAN=1)
+ $(info ${DEFINES})
+ $(info ${INCLUDES})
+endif
+
 ################################################################################
 # Targets
 # ###############################################################################
@@ -128,7 +150,6 @@ all: FastEddy
 # Generic compile rules
 # ################################################################################
 %.o: %.cu
-	rm -rf ./FastEddy_devlink.o; \
 	$(TEST_CU_CC) $(TEST_CU_CFLAGS) -dc $< -o $@
 
 .c.o:
@@ -140,7 +161,6 @@ all: FastEddy
 
 ../TIME_INTEGBRATION/CUDA/cuda_timeIntDevice.o: ../CUDA/cuda_timeIntDevice.cu \
         ../TIME_INTEGRATION.CUDA/cuda_RKschemes.cu
-	rm -rf ./FastEddy_devlink.o; \
 	$(TEST_CU_CC) $(TEST_CU_CFLAGS) -dc $< -o $@
 
 ../IO/io.o: ../IO/io.c \
@@ -151,7 +171,6 @@ all: FastEddy
 ../FECUDA/fecuda_Device.o: ../FECUDA/fecuda_Device.cu \
 	../FECUDA/fecuda_Utils.cu \
 	../FECUDA/fecuda_PlugIns.cu 
-	rm -rf ./FastEddy_devlink.o; \
 	$(TEST_CU_CC) $(TEST_CU_CFLAGS) -dc $< -o $@
 
 ../HYDRO_CORE/CUDA/cuda_hydroCoreDevice.o: ../HYDRO_CORE/CUDA/cuda_hydroCoreDevice.cu \
@@ -171,7 +190,6 @@ all: FastEddy
         ../HYDRO_CORE/CUDA/cuda_moistureDevice.cu \
         ../HYDRO_CORE/CUDA/cuda_filtersDevice.cu \
 	../HYDRO_CORE/CUDA/cuda_cellpertDevice.cu
-	rm -rf ./FastEddy_devlink.o; \
 	$(TEST_CU_CC) $(TEST_CU_CFLAGS) -dc $< -o $@
 ################################################################################
 # Generic Executable
@@ -182,6 +200,7 @@ FastEddy: COMP_FLAGS = ${TEST_CC} ${TEST_CFLAGS}
 TEST_DEPENDENCIES = ${TEST_OBJS} ${TEST_HDRS} FastEddy_devlink.o
 
 FastEddy_devlink.o:
+	rm -rf ./FastEddy_devlink.o; \
 	$(TEST_CU_CC) ${ARCH_CU_FLAGS} ${TEST_OBJS} ${TEST_LDFLAGS} ${TEST_LIBS} -dlink -o FastEddy_devlink.o
 FastEddy: ${TEST_DEPENDENCIES}
 	${COMP_FLAGS} -o FastEddy ${TEST_OBJS} ./FastEddy_devlink.o ${TEST_LDFLAGS} ${TEST_LIBS}
diff --git a/SRC/FEMPI/fempi.c b/SRC/FEMPI/fempi.c
index 82c63d5..2fc8c54 100644
--- a/SRC/FEMPI/fempi.c
+++ b/SRC/FEMPI/fempi.c
@@ -81,6 +81,8 @@ int fempi_GetParams(){
    int errorCode = FEMPI_SUCCESS;
 
    /*query for each FEMPI parameter */
+   numProcsX=1; // default to 1
+   numProcsY=1; // default to 1
    errorCode = queryIntegerParameter("numProcsX", &numProcsX, 1, INT_MAX, PARAM_MANDATORY);
    errorCode = queryIntegerParameter("numProcsY", &numProcsY, 1, INT_MAX, PARAM_MANDATORY);
 
diff --git a/SRC/GRID/CUDA/cuda_gridDevice.cu b/SRC/GRID/CUDA/cuda_gridDevice.cu
index 042c810..2ede7ff 100644
--- a/SRC/GRID/CUDA/cuda_gridDevice.cu
+++ b/SRC/GRID/CUDA/cuda_gridDevice.cu
@@ -47,7 +47,7 @@ __constant__ int kMax_d;
 float *xPos_d;  // Cell-center position in x (meters) 
 float *yPos_d;  // Cell-center position in y (meters) 
 float *zPos_d;  // Cell-center position in z (meters) 
-float *topoPos_d; //Topography elevation (z in meters) at the cell center position in x and y. 
+float *topoPos_d; //Terrain elevation (z in meters) at the cell center position in x and y. 
 
 float *J13_d;      // dx/d_zeta
 float *J23_d;      // dy/d_zeta
diff --git a/SRC/GRID/CUDA/cuda_gridDevice_cu.h b/SRC/GRID/CUDA/cuda_gridDevice_cu.h
index 253f2f0..8e85ec6 100644
--- a/SRC/GRID/CUDA/cuda_gridDevice_cu.h
+++ b/SRC/GRID/CUDA/cuda_gridDevice_cu.h
@@ -44,7 +44,7 @@ extern __constant__ int kMax_d;
 extern float *xPos_d;  /* Cell-center position in x (meters) */
 extern float *yPos_d;  /* Cell-center position in y (meters) */
 extern float *zPos_d;  /* Cell-center position in z (meters) */
-extern float *topoPos_d; /*Topography elevation (z in meters) at the cell center position in x and y. */
+extern float *topoPos_d; /*Terrain elevation (z in meters) at the cell center position in x and y. */
 
 extern float *J13_d;      // dx/d_zeta
 extern float *J23_d;      // dy/d_zeta
diff --git a/SRC/GRID/grid.c b/SRC/GRID/grid.c
index 573c4de..d2f983d 100644
--- a/SRC/GRID/grid.c
+++ b/SRC/GRID/grid.c
@@ -29,20 +29,20 @@
 /*##################------------------- GRID module variable definitions ---------------------#################*/
 char *gridFile = NULL;
 char *topoFile = NULL;
-int Nx = 1;
-int Ny = 1;
-int Nz = 1;
-int Nh = 0;
-float d_xi = 1.0;
-float d_eta = 1.0;
-float d_zeta = 1.0;
-int coordHorizHalos = 1; //switch to setup coordiante halos as periodic, or gradient following
+int Nx = 128;
+int Ny = 122;
+int Nz = 122;
+int Nh = 3;
+float d_xi = 10.0;
+float d_eta = 10.0;
+float d_zeta = 10.0;
+int coordHorizHalos = 1; //switch to setup coordinate halos as periodic, or gradient following
 int iMin, iMax; //Constant min and max bounds of i-index accounting for only non-halos cells of the mpi_rank subdomain
 int jMin, jMax; //Constant min and max bounds of j-index accounting for only non-halos cells of the mpi_rank subdomain
 int kMin, kMax; //Constant min and max bounds of k-index accounting for only non-halos cells of the mpi_rank subdomain
 int verticalDeformSwitch; //switch to use vertical coordinate deformation
 float verticalDeformFactor; //factor to used under vertical deformation (0.0-1.0)
-float verticalDeformQuadCoeff; // quadratic term coefficient in the deformtion scheme (default = 0.0)
+float verticalDeformQuadCoeff; // quadratic term coefficient in the deformation scheme (default = 0.0)
 
 float dX, dY, dZ; //reference computational model coordinate resolution
 float dXi, dYi, dZi; //inverse of the reference computational model coordinate resolution
@@ -51,8 +51,8 @@ float dXi, dYi, dZi; //inverse of the reference computational model coordinate r
 float *xPos;  /* Cell-center position in x (meters) */
 float *yPos;  /* Cell-center position in y (meters) */
 float *zPos;  /* Cell-center position in z (meters) */
-float *topoPosGlobal; /*Topography elevation (z in meters) at the cell center position in x and y. (Global domain) */
-float *topoPos; /*Topography elevation (z in meters) at the cell center position in x and y. (per-rank domain) */
+float *topoPosGlobal; /*Terrain elevation (z in meters) at the cell center position in x and y. (Global domain) */
+float *topoPos; /*Terrain elevation (z in meters) at the cell center position in x and y. (per-rank domain) */
 
 //float *J11;      // dx/d_xi  -- assumed = 1.0
 //float *J12;      // dx/d_eta -- assumed = 0.0
@@ -108,7 +108,7 @@ int gridInit(){
    if(mpi_rank_world == 0){
       printComment("GRID parameters---");
       printParameter("gridFile", "A file containing a complete grid specification");
-      printParameter("topoFile", "A file containing topography (surface elevation in meters ASL)");
+      printParameter("topoFile", "A file containing terrain(surface elevation in meters ASL)");
       printParameter("Nx", "Number of discretised domain elements in the x (zonal) direction.");
       printParameter("Ny", "Number of discretised domain elements in the y (meridional) direction.");
       printParameter("Nz", "Number of discretised domain elements in the z (vertical) direction.");
@@ -116,7 +116,7 @@ int gridInit(){
       printParameter("d_xi", "Computational domain fixed resolution in the 'i' direction."); 
       printParameter("d_eta", "Computational domain fixed resolution in the 'j' direction."); 
       printParameter("d_zeta", "Computational domain fixed resolution in the 'k' direction."); 
-      printParameter("coordHorizHalos", "switch to setup coordiante halos as periodic=1 or gradient-following=0."); 
+      printParameter("coordHorizHalos", "switch to setup coordinate halos as periodic=1 or gradient-following=0."); 
       printParameter("verticalDeformSwitch", "switch to use vertical coordinate deformation 0=off, 1=on"); 
       printParameter("verticalDeformFactor", "deformation factor (0.0=max compression,  1.0=no compression)"); 
       printParameter("verticalDeformQuadCoeff", "deformation factor (0.0=max compression,  1.0=no compression)"); 
@@ -273,6 +273,21 @@ int gridInit(){
        fflush(stdout);
        errorCode = GRID_IO_CALL_FAIL;
      }
+
+   /* Add NetCDF attributes for coordinate variables */
+   if(ioerrorCode == GRID_SUCCESS){
+     ioerrorCode = ioAddStandardAttrs("xPos", "m", "x-coordinate of cell center", "projection_x_coordinate");
+     ioerrorCode = ioAddStandardAttrs("yPos", "m", "y-coordinate of cell center", "projection_y_coordinate");
+     ioerrorCode = ioAddStandardAttrs("zPos", "m", "z-coordinate of cell center", "height");
+     ioerrorCode = ioAddStandardAttrs("topoPos", "m", "Terrain elevation", "surface_altitude");
+
+     if(ioerrorCode != 0){
+       printf("Error adding standard attributes to GRID coordinate fields.\n");
+       fflush(stdout);
+       errorCode = GRID_IO_CALL_FAIL;
+     }
+   }
+     
 #ifdef DEBUG
 //#if 1
      errorCode = ioRegisterVar("D_Jac", "float", 4, dims4d, D_Jac);
@@ -282,9 +297,18 @@ int gridInit(){
      errorCode = ioRegisterVar("J31", "float", 4, dims4d, J31);
      errorCode = ioRegisterVar("J32", "float", 4, dims4d, J32);
      errorCode = ioRegisterVar("J33", "float", 4, dims4d, J33);
+
+     /* Add attributes for Jacobian and metric tensor fields */
+     ioerrorCode = ioAddStandardAttrs("D_Jac", "-", "Jacobian determinant", NULL);
+     ioerrorCode = ioAddStandardAttrs("invD_Jac", "-", "Inverse Jacobian determinant", NULL);
+     ioerrorCode = ioAddStandardAttrs("J13", "-", "Metric tensor component dx/d_zeta", NULL);
+     ioerrorCode = ioAddStandardAttrs("J23", "-", "Metric tensor component dy/d_zeta", NULL);
+     ioerrorCode = ioAddStandardAttrs("J31", "-", "Metric tensor component dz/d_xi", NULL);
+     ioerrorCode = ioAddStandardAttrs("J32", "-", "Metric tensor component dz/d_eta", NULL);
+     ioerrorCode = ioAddStandardAttrs("J33", "-", "Metric tensor component dz/d_zeta", NULL);
 #endif 
    } // end if errorCode indicates no errors thus far
-  
+
 #ifdef DEBUG
 //#if 1
    printf("mpi_rank_world %d/%d: Finished gridInit()!\n",mpi_rank_world,mpi_size_world);
diff --git a/SRC/GRID/grid.h b/SRC/GRID/grid.h
index adc9444..90f6bba 100644
--- a/SRC/GRID/grid.h
+++ b/SRC/GRID/grid.h
@@ -30,10 +30,10 @@ extern char *topoFile;  //A file containing a complete grid specification
 extern int Nh;          //Number of halo cells to be used (dependent on largest stencil extent)
 extern int Nx, Ny, Nz;  //Complete Cartesian Domain extents in the x, y, and z directions 
 extern float d_xi, d_eta, d_zeta; //Computational Domain fixed resolutions (i, j, k respectively)
-extern int coordHorizHalos; //switch to setup coordiante halos as periodic, or gradient following
+extern int coordHorizHalos; //switch to setup coordinate halos as periodic, or gradient following
 extern int verticalDeformSwitch; //switch to use vertical coordinate deformation
 extern float verticalDeformFactor; // factor used under vertical deformation (0.0-1.0)
-extern float verticalDeformQuadCoeff; // quadratic term coefficient in the deformtion scheme (default = 0.0)
+extern float verticalDeformQuadCoeff; // quadratic term coefficient in the deformation scheme (default = 0.0)
 extern int iMin, iMax; //Constant min and max bounds of i-index accounting for only non-halos cells of the mpi_rank subdomain
 extern int jMin, jMax; //Constant min and max bounds of j-index accounting for only non-halos cells of the mpi_rank subdomain
 extern int kMin, kMax; //Constant min and max bounds of k-index accounting for only non-halos cells of the mpi_rank subdomain
@@ -46,8 +46,8 @@ extern float dXi, dYi, dZi; //inverse of the reference computational model coord
 extern float *xPos;  /* Cell-center position in x (meters) */
 extern float *yPos;  /* Cell-center position in y (meters) */
 extern float *zPos;  /* Cell-center position in z (meters) */
-extern float *topoPos; /*Topography elevation (z in meters) at the cell center position in x and y. */
-extern float *topoPosGlobal; /*Topography elevation (z in meters) at the cell center position in x and y. (Global domain) */
+extern float *topoPos; /*Terrain elevation (z in meters) at the cell center position in x and y. */
+extern float *topoPosGlobal; /*Terrain elevation (z in meters) at the cell center position in x and y. (Global domain) */
 
 //extern float *J11;      // dx/d_xi  -- assumed = 1.0
 //extern float *J12;      // dx/d_eta -- assumed = 0.0
diff --git a/SRC/HYDRO_CORE/CUDA/cuda_auxScalarsDevice.cu b/SRC/HYDRO_CORE/CUDA/cuda_auxScalarsDevice.cu
index 19d17a1..7de459d 100644
--- a/SRC/HYDRO_CORE/CUDA/cuda_auxScalarsDevice.cu
+++ b/SRC/HYDRO_CORE/CUDA/cuda_auxScalarsDevice.cu
@@ -1,3 +1,18 @@
+/* FastEddy®: SRC/HYDRO_CORE/CUDA/cuda_auxScalarsDevice.cu
+* ©2016 University Corporation for Atmospheric Research
+* 
+* This file is licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+* 
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
 /*Auxiliary Scalar Fields*/
 __constant__ int NhydroAuxScalars_d;       /*Number of prognostic auxiliary scalar variable fields */
 __constant__ int AuxScAdvSelector_d; /*adv. scheme for auxiliary scalar fields */
diff --git a/SRC/HYDRO_CORE/CUDA/cuda_auxScalarsDevice_cu.h b/SRC/HYDRO_CORE/CUDA/cuda_auxScalarsDevice_cu.h
index b2d36d2..347901b 100644
--- a/SRC/HYDRO_CORE/CUDA/cuda_auxScalarsDevice_cu.h
+++ b/SRC/HYDRO_CORE/CUDA/cuda_auxScalarsDevice_cu.h
@@ -1,3 +1,18 @@
+/* FastEddy®: SRC/HYDRO_CORE/CUDA/cuda_auxScalarsDevice_cu.h
+* ©2016 University Corporation for Atmospheric Research
+* 
+* This file is licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+* 
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
 #ifndef _AUXSCALARS_CUDADEV_CU_H
 #define _AUXSCALARS_CUDADEV_CU_H
 
diff --git a/SRC/HYDRO_CORE/CUDA/cuda_canopyDevice.cu b/SRC/HYDRO_CORE/CUDA/cuda_canopyDevice.cu
index 078ac30..7f30608 100644
--- a/SRC/HYDRO_CORE/CUDA/cuda_canopyDevice.cu
+++ b/SRC/HYDRO_CORE/CUDA/cuda_canopyDevice.cu
@@ -1,3 +1,18 @@
+/* FastEddy®: SRC/HYDRO_CORE/CUDA/cuda_canopyDevice.cu
+* ©2016 University Corporation for Atmospheric Research
+* 
+* This file is licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+* 
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
 /*---CANOPY MODEL*/
 __constant__ int canopySelector_d;         /* canopy selector: 0=off, 1=on */
 __constant__ int canopySkinOpt_d;          /* canopy selector to use additional skin friction effect on drag coefficient: 0=off, 1=on */
diff --git a/SRC/HYDRO_CORE/CUDA/cuda_canopyDevice_cu.h b/SRC/HYDRO_CORE/CUDA/cuda_canopyDevice_cu.h
index d176615..0f0dd22 100644
--- a/SRC/HYDRO_CORE/CUDA/cuda_canopyDevice_cu.h
+++ b/SRC/HYDRO_CORE/CUDA/cuda_canopyDevice_cu.h
@@ -1,3 +1,18 @@
+/* FastEddy®: SRC/HYDRO_CORE/CUDA/cuda_canopyDevice_cu.h
+* ©2016 University Corporation for Atmospheric Research
+* 
+* This file is licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+* 
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
 #ifndef _CANOPY_CUDADEV_CU_H
 #define _CANOPY_CUDADEV_CU_H
 
diff --git a/SRC/HYDRO_CORE/CUDA/cuda_cellpertDevice.cu b/SRC/HYDRO_CORE/CUDA/cuda_cellpertDevice.cu
index 25f0640..81d9d56 100644
--- a/SRC/HYDRO_CORE/CUDA/cuda_cellpertDevice.cu
+++ b/SRC/HYDRO_CORE/CUDA/cuda_cellpertDevice.cu
@@ -1,3 +1,18 @@
+/* FastEddy®: SRC/HYDRO_CORE/CUDA/cuda_cellpertDevice.cu
+* ©2016 University Corporation for Atmospheric Research
+* 
+* This file is licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+* 
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
 /*---CELL PERTURBATION METHOD*/
 __constant__ int cellpertSelector_d;    /*CP method selector: 0= off, 1= on */
 __constant__ int cellpert_sw2b_d;       /* switch to do: 0= all four lateral boundaries, 1= only south & west boundaries, 2= only south boundary */
@@ -82,7 +97,15 @@ extern "C" int cuda_hydroCoreDeviceBuildCPmethod(int simTime_it){
    curandSetPseudoRandomGeneratorSeed(gen,simTime_it);
    curandGenerateUniform(gen,randcp_d,n_tot);
 
-   cudaDevice_hydroCoreCompleteCellPerturbation<<<grid, tBlock>>>(hydroFlds_d,randcp_d,mpi_rank_world,numProcsX,numProcsY);
+#ifdef URBAN_EXT
+   if(urbanSelector > 0){
+     cudaDevice_hydroCoreCompleteCellPerturbationMasked<<<grid, tBlock>>>(hydroFlds_d,randcp_d,mpi_rank_world,numProcsX,numProcsY,building_mask_d);
+   }else{
+     cudaDevice_hydroCoreCompleteCellPerturbation<<<grid, tBlock>>>(hydroFlds_d,randcp_d,mpi_rank_world,numProcsX,numProcsY);
+   }
+#else
+     cudaDevice_hydroCoreCompleteCellPerturbation<<<grid, tBlock>>>(hydroFlds_d,randcp_d,mpi_rank_world,numProcsX,numProcsY);
+#endif
 
 //#define TIMERS_LEVEL2
 #ifdef TIMERS_LEVEL2
@@ -139,6 +162,33 @@ __global__ void cudaDevice_hydroCoreCompleteCellPerturbation(float* hydroFlds, f
 
 } // end cudaDevice_hydroCoreCompleteCellPerturbation()
 
+__global__ void cudaDevice_hydroCoreCompleteCellPerturbationMasked(float* hydroFlds, float* randcp_d, int my_mpi, int numpx, int numpy, float* bdg_mask){
+
+   int i,j,k,ijk;
+   int fldStride;
+   int iStride,jStride,kStride;
+
+   /*Establish necessary indices for spatial locality*/
+   i = (blockIdx.x)*blockDim.x + threadIdx.x;
+   j = (blockIdx.y)*blockDim.y + threadIdx.y;
+   k = (blockIdx.z)*blockDim.z + threadIdx.z;
+
+   fldStride = (Nx_d+2*Nh_d)*(Ny_d+2*Nh_d)*(Nz_d+2*Nh_d);
+   iStride = (Ny_d+2*Nh_d)*(Nz_d+2*Nh_d);
+   jStride = (Nz_d+2*Nh_d);
+   kStride = 1;
+
+   if((i >= iMin_d)&&(i < iMax_d) &&
+      (j >= jMin_d)&&(j < jMax_d) &&
+      (k >= kMin_d)&&(k < kMax_d) ){
+      if((k >= (cellpert_kbottom_d+Nh_d-1))&&(k <= (cellpert_ktop_d+Nh_d-1))){ // call to cell perturbation device kernel
+        ijk = i*iStride + j*jStride + k*kStride;
+        cudaDevice_CellPerturbationMasked(i,j,k,Nx_d,Ny_d,Nz_d,Nh_d,my_mpi,numpx,numpy,&hydroFlds[RHO_INDX*fldStride+ijk],&hydroFlds[THETA_INDX*fldStride+ijk],randcp_d,&bdg_mask[ijk]);
+      }
+   }//end if in the range of non-halo cells
+
+} // end cudaDevice_hydroCoreCompleteCellPerturbationMasked()
+
 /*----->>>>> __device__ void  cudaDevice_CellPerturbation();  --------------------------------------------------
 */
 __device__ void cudaDevice_CellPerturbation(int i_ind, int j_ind, int k_ind, int Nx, int Ny, int Nz, int Nh, int my_mpi, int numpx, int numpy, float* rho, float* theta, float* rand_1darray){
diff --git a/SRC/HYDRO_CORE/CUDA/cuda_cellpertDevice_cu.h b/SRC/HYDRO_CORE/CUDA/cuda_cellpertDevice_cu.h
index ecb38f3..671e1d2 100644
--- a/SRC/HYDRO_CORE/CUDA/cuda_cellpertDevice_cu.h
+++ b/SRC/HYDRO_CORE/CUDA/cuda_cellpertDevice_cu.h
@@ -1,3 +1,18 @@
+/* FastEddy®: SRC/HYDRO_CORE/CUDA/cuda_cellpertDevice_cu.h
+* ©2016 University Corporation for Atmospheric Research
+* 
+* This file is licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+* 
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
 #ifndef _CELLPERT_CUDADEV_CU_H
 #define _CELLPERT_CUDADEV_CU_H
 
@@ -40,9 +55,11 @@ extern "C" int cuda_hydroCoreDeviceBuildCPmethod(int simTime_it);
 extern "C" int cuda_hydroCoreTVCP();
 
 __global__ void cudaDevice_hydroCoreCompleteCellPerturbation(float* hydroFlds, float* randcp_d, int my_mpi, int numpx, int numpy);
+__global__ void cudaDevice_hydroCoreCompleteCellPerturbationMasked(float* hydroFlds, float* randcp_d, int my_mpi, int numpx, int numpy, float* bdg_mask);
 
 /*----->>>>> __device__ void  cudaDevice_CellPerturbation();  --------------------------------------------------
  *  */ // This cuda kerne lsets up the cells and their id in the CP method
 __device__ void cudaDevice_CellPerturbation(int i_ind, int j_ind, int k_ind, int Nx, int Ny, int Nz, int Nh, int my_mpi, int numpx, int numpy, float* rho, float* theta, float *rand_1darray);
+__device__ void cudaDevice_CellPerturbationMasked(int i_ind, int j_ind, int k_ind, int Nx, int Ny, int Nz, int Nh, int my_mpi, int numpx, int numpy, float* rho, float* theta, float* rand_1darray,float* bdg_mask);
 
 #endif // _CELLPERT_CUDADEV_CU_H
diff --git a/SRC/HYDRO_CORE/CUDA/cuda_coriolisDevice.cu b/SRC/HYDRO_CORE/CUDA/cuda_coriolisDevice.cu
index a7ab6de..c9b2327 100644
--- a/SRC/HYDRO_CORE/CUDA/cuda_coriolisDevice.cu
+++ b/SRC/HYDRO_CORE/CUDA/cuda_coriolisDevice.cu
@@ -27,6 +27,9 @@ __constant__ float corioLS_fact_d;            /*large-scale forcing factor on Co
 extern "C" int cuda_coriolisDeviceSetup(){
    int errorCode = CUDA_CORIOLIS_SUCCESS;
    cudaMemcpyToSymbol(coriolisSelector_d, &coriolisSelector, sizeof(int));
+   cudaMemcpyToSymbol(corioConstHorz_d, &corioConstHorz, sizeof(float));
+   cudaMemcpyToSymbol(corioConstVert_d, &corioConstVert, sizeof(float));
+   cudaMemcpyToSymbol(corioLS_fact_d, &corioLS_fact, sizeof(float));
 
    return(errorCode);
 } //end cuda_coriolisDeviceSetup()
diff --git a/SRC/HYDRO_CORE/CUDA/cuda_hydroCoreDevice.cu b/SRC/HYDRO_CORE/CUDA/cuda_hydroCoreDevice.cu
index d19e8cc..d9c0e58 100644
--- a/SRC/HYDRO_CORE/CUDA/cuda_hydroCoreDevice.cu
+++ b/SRC/HYDRO_CORE/CUDA/cuda_hydroCoreDevice.cu
@@ -49,6 +49,13 @@
 #include "cuda_filtersDevice.cu" 
 #include "cuda_cellpertDevice.cu"
 
+#ifdef URBAN_EXT
+  #include "cuda_urbanDevice.cu"
+#endif
+#ifdef GAD_EXT
+  #include "cuda_GADDevice.cu"
+#endif
+
 /*#################------------- CUDA_HYDRO_CORE module variable definitions ------------------#############*/
 /*Parameters*/
 __constant__ int Nhydro_d;       // Number of hydro_core prognostic variable fields
@@ -221,13 +228,26 @@ extern "C" int cuda_hydroCoreDeviceSetup(){
      errorCode = cuda_filtersDeviceSetup();
    }
 
+#ifdef URBAN_EXT
+   /* URBAN */
+   if (urbanSelector > 0){
+     errorCode = cuda_urbanDeviceSetup();
+   }
+#endif
+
+#ifdef GAD_EXT
+   /* GAD */
+   if (GADSelector > 0){
+     errorCode = cuda_GADDeviceSetup();
+   }
+#endif 
+
    gpuErrchk( cudaPeekAtLastError() ); /*Check for errors in the cudaMalloc calls*/
    gpuErrchk( cudaDeviceSynchronize() );
    MPI_Barrier(MPI_COMM_WORLD);
    printf("cuda_hydroCoreDeviceSetup() complete.\n");
    MPI_Barrier(MPI_COMM_WORLD);
 
-
    /* Done */
    return(errorCode);
 } //end cuda_hydroCoreDeviceSetup()
@@ -291,6 +311,18 @@ extern "C" int cuda_hydroCoreDeviceCleanup(){
      errorCode = cuda_filtersDeviceCleanup();
    }
 
+#ifdef URBAN_EXT
+   /* URBAN */
+   if (urbanSelector > 0){
+     errorCode = cuda_urbanDeviceCleanup();
+   }
+#endif
+#ifdef GAD_EXT
+   if (GADSelector > 0){
+     errorCode = cuda_GADDeviceCleanup();
+   }
+#endif
+
    return(errorCode);
 
 }//end cuda_hydroCoreDeviceCleanup()
@@ -322,6 +354,7 @@ extern "C" int cuda_hydroCoreDeviceBuildFrhs(float simTime, int simTime_it, int
    createAndStartEvent(&startE, &stopE);
 #endif
 
+/*********************************** build_Frhs "preparatory tasks" phase ************************************************************/
    fldStride = (Nxp+2*Nh)*(Nyp+2*Nh)*(Nzp+2*Nh);
 //#define VERBOSE_HALO
 #ifdef VERBOSE_HALO
@@ -418,6 +451,7 @@ extern "C" int cuda_hydroCoreDeviceBuildFrhs(float simTime, int simTime_it, int
    createAndStartEvent(&startE, &stopE);
 #endif
   
+/*********************************** build_Frhs "intermediate tasks" phase ************************************************************/
    /*Advecting Velocities*/ 
     cudaDevice_hydroCoreCalcFaceVelocities<<<grid, tBlock>>>(simTime, simTime_it, simTime_itRestart, dt, timeStage, numRKstages,
                                                             hydroFlds_d, hydroFldsFrhs_d,
@@ -429,7 +463,6 @@ extern "C" int cuda_hydroCoreDeviceBuildFrhs(float simTime, int simTime_it, int
                                                             hydroRhoInv_d, hydroKappaM_d, sgstkeScalars_d, sgstke_ls_d,
                                                             dedxi_d, moistScalars_d, moistTauFlds_d, moistScalarsFrhs_d,
                                                             J13_d, J23_d, J31_d, J32_d, J33_d, D_Jac_d);
-   gpuErrchk( cudaGetLastError() );
 #ifdef TIMERS_LEVEL2
    stopSynchReportDestroyEvent(&startE, &stopE, &elapsedTime);
    printf("cuda_hydroCoreCalcFaceVelocities()  Kernel execution time (ms): %12.8f\n", elapsedTime);
@@ -439,10 +472,45 @@ extern "C" int cuda_hydroCoreDeviceBuildFrhs(float simTime, int simTime_it, int
    /*Calculate the Frhs contributions for the advection, buoyancy, and SGS-mixing terms on core+TKE+moistScalars fields*/
    createAndStartEvent(&startE, &stopE);
 #endif
+#ifdef URBAN_EXT
+    if(urbanSelector > 0 && ((physics_oneRKonly==0) || (timeStage==numRKstages))){
+      cudaDevice_URBANinter<<<grid, tBlock>>>(z0m_d, z0t_d, hydroTauFlds_d, moistTauFlds_d, 
+		                              fricVel_d, htFlux_d, qFlux_d, invOblen_d, 
+			      	              building_mask_d, sea_mask_d, urban_heat_redis_d);
+    }
+#else
+    if( (physics_oneRKonly==0) || (timeStage==numRKstages) ){
+      cudaDevice_dynamicz0tLand<<<grid, tBlock>>>(z0m_d, z0t_d, fricVel_d, sea_mask_d);
+    }
+#endif
+#ifdef GAD_EXT
+   if (GADSelector > 0 && ((physics_oneRKonly==0) || (timeStage==numRKstages))){
+     cudaDevice_GADinter<<<grid, tBlock>>>(xPos_d, yPos_d, zPos_d, topoPos_d,
+                                           simTime_it, timeStage, numRKstages, dt,
+                                           hydroFlds_d, GAD_turbineType_d, GAD_turbineVolMask_d,
+                                           GAD_Xcoords_d, GAD_Ycoords_d, GAD_rotorTheta_d,
+                                           GAD_hubHeights_d, GAD_rotorD_d, GAD_nacelleD_d,
+                                           turbinePolyTwist_d, turbinePolyChord_d,
+                                           turbinePolyPitch_d, turbinePolyOmega_d,
+                                           rnorm_vect_d, alpha_minmax_vect_d,
+                                           turbinePolyCl_d, turbinePolyCd_d,
+                                           GAD_turbineRank_d, GAD_turbineRefi_d, GAD_turbineRefj_d, GAD_turbineRefk_d,
+                                           u_sampAvg_d, v_sampAvg_d,
+                                           GAD_turbineUseries_d, GAD_turbineVseries_d,
+                                           GAD_turbineRefMag_d, GAD_turbineRefDir_d,
+                                           GAD_turbineYawing_d, GAD_yawError_d, GAD_anFactor_d);
+   }
+#endif
+   gpuErrchk( cudaGetLastError() );
+   gpuErrchk( cudaDeviceSynchronize() );
+   
+/*********************************** build_Frhs "final tasks" phase ************************************************************/
    cudaDevice_hydroCoreComplete<<<grid, tBlock>>>(simTime, simTime_it, dt, timeStage, numRKstages, hydroFlds_d, hydroFldsFrhs_d,
                                                           hydroFaceVels_d, hydroBaseStateFlds_d, hydroTauFlds_d,
                                                           sgstkeScalars_d, sgstkeScalarsFrhs_d, moistScalars_d, moistScalarsFrhs_d, moistTauFlds_d,
                                                           J13_d, J23_d, J31_d, J32_d, J33_d, invD_Jac_d, zPos_d);
+   gpuErrchk( cudaGetLastError() );
+   gpuErrchk( cudaDeviceSynchronize() );
 
    /*Calculate the Frhs contributions for the advection and SGS-mixing terms on Auxiliary scalar fields*/
    if(NhydroAuxScalars > 0){
@@ -460,52 +528,51 @@ extern "C" int cuda_hydroCoreDeviceBuildFrhs(float simTime, int simTime_it, int
             cudaDevice_TausScalar<<<grid, tBlock>>>(iFld, hydroRhoInv_d, hydroFlds_d, hydroKappaM_d, sgstke_ls_d,
                                                     hydroAuxScalars_d, AuxScalarsTauFlds_d,
                                                     J13_d, J23_d, J31_d, J32_d, J33_d, D_Jac_d); // compute taus
-            gpuErrchk( cudaGetLastError() );
-            gpuErrchk( cudaDeviceSynchronize() );
 
             cudaDevice_SGSforcing<<<grid, tBlock>>>(iFld, AuxScalarsTauFlds_d, hydroAuxScalarsFrhs_d,
                                                     J13_d, J23_d, J31_d, J32_d, J33_d); // compute/add SGS forcing
-            gpuErrchk( cudaGetLastError() );
-            gpuErrchk( cudaDeviceSynchronize() );
          } //end for iFld
        } // endif SGS turbulence is on
+       gpuErrchk( cudaGetLastError() );
+       gpuErrchk( cudaDeviceSynchronize() );
      } //end if either compute at all RK stages, or last RK stage
    } //end if NhydroAuxScalars > 0
 
    //Carry out the following section of calculations only if either explcitly requested at every RK stage (physics_oneRKonly==0), or at the last RK stage (timeStage==numRKstages)
    if ((physics_oneRKonly==0) || (timeStage==numRKstages)) {
+     //SGS-TKE forcings 
      if ((turbulenceSelector >0) && (TKESelector > 0)){
        cudaDevice_hydroCoreCompleteSGSTKE<<<grid, tBlock>>>(hydroFlds_d, hydroRhoInv_d, hydroTauFlds_d,
-                                                                    hydroKappaM_d, dedxi_d, sgstke_ls_d,
-                                                                    sgstkeScalars_d, sgstkeScalarsFrhs_d, canopy_lad_d,
-                                                                    J13_d, J23_d, J31_d, J32_d, J33_d, D_Jac_d); //call to prognostic TKE equation
+                                                            hydroKappaM_d, dedxi_d, sgstke_ls_d,
+                                                            sgstkeScalars_d, sgstkeScalarsFrhs_d, canopy_lad_d,
+                                                            J13_d, J23_d, J31_d, J32_d, J33_d, D_Jac_d); //call to prognostic TKE equation
        if (canopySelector==1){ // canopy drag term to forcing of momentum
          cudaDevice_hydroCoreCompleteCanopy<<<grid, tBlock>>>(hydroFlds_d, hydroRhoInv_d, canopy_lad_d, hydroFldsFrhs_d);
        }
      } // end if (turbSelector >0) && (TKESelector > 0)
+     gpuErrchk( cudaGetLastError() );
+     gpuErrchk( cudaDeviceSynchronize() );
      //Moist species microphysics forcings 
      if ((moistureSelector > 0)&&(moistureCond > 0)&&(moistureNvars > 1)){ // (moisture condensation forcing)
        temp_freq = roundf(fmaxf(moistureMPcallTscale,dt)/dt); // ensure minimum is time step
        mp_update = simTime_it%temp_freq;
        if (mp_update==0){
          cudaDevice_hydroCoreCompleteMP<<<grid, tBlock>>>(hydroFlds_d, hydroFldsFrhs_d, moistScalars_d, moistScalarsFrhs_d,
-                                                                    hydroRhoInv_d, hydroPres_d, fcond_d, dt, hydroBaseStateFlds_d);
+                                                          hydroRhoInv_d, hydroPres_d, fcond_d, dt, hydroBaseStateFlds_d);
        }
      }
+     gpuErrchk( cudaGetLastError() );
+     gpuErrchk( cudaDeviceSynchronize() );
      //Molecular diffusion
      if (diffusionSelector == 1){  
        cudaDevice_hydroCoreCompleteMolecularDiffusion<<<grid, tBlock>>>(hydroFlds_d, hydroFldsFrhs_d,
-                                                                              hydroNuGradXFlds_d,hydroNuGradYFlds_d,hydroNuGradZFlds_d,
-                                                                              J13_d, J23_d, J31_d, J32_d, J33_d, D_Jac_d, invD_Jac_d); // call to div of nugrad
+                                                                        hydroNuGradXFlds_d,hydroNuGradYFlds_d,hydroNuGradZFlds_d,
+                                                                        J13_d, J23_d, J31_d, J32_d, J33_d, D_Jac_d, invD_Jac_d); // call to div of nugrad
      } // endif diffusionSelector == 1
-     //Auxiliary scalar  mixing (diffusion) from SGS-turbulence
-      
-
+     gpuErrchk( cudaGetLastError() );
+     gpuErrchk( cudaDeviceSynchronize() );
    } // endif ((physics_oneRKonly==0) || (timeStage==numRKstages))
 
-   gpuErrchk( cudaGetLastError() );
-   gpuErrchk( cudaDeviceSynchronize() );
-
    simTime_diff = simTime_it - simTime_itRestart;
    ldf_itNum = (int)roundf(lsf_freq/dt);
    if ((lsfSelector==1) && (timeStage==numRKstages) && (simTime_it > simTime_itRestart) && (simTime_diff >= ldf_itNum) && (simTime_it%(int)roundf(lsf_freq/dt)==0)){ // (large-scale forcing)
@@ -522,6 +589,30 @@ extern "C" int cuda_hydroCoreDeviceBuildFrhs(float simTime, int simTime_it, int
                                                                    hydroBaseStatePres_d,timeStage);
    }
 
+#ifdef URBAN_EXT
+   if (urbanSelector > 0 && ((physics_oneRKonly==0) || (timeStage==numRKstages))){
+     cudaDevice_URBANfinal<<<grid, tBlock>>>(hydroFlds_d, hydroFldsFrhs_d, hydroBaseStateFlds_d, 
+		                             hydroAuxScalars_d, hydroAuxScalarsFrhs_d,
+					     moistScalarsFrhs_d,
+					     building_mask_d);
+   }
+#endif
+#ifdef GAD_EXT
+   if (GADSelector > 0 && ((physics_oneRKonly==0) || (timeStage==numRKstages))){
+     cudaDevice_GADfinal<<<grid, tBlock>>>(xPos_d, yPos_d, zPos_d, topoPos_d,
+                                           hydroFlds_d, hydroFldsFrhs_d,simTime_it,dt,
+                                           GAD_turbineType_d, GAD_turbineVolMask_d,
+                                           GAD_Xcoords_d, GAD_Ycoords_d, GAD_rotorTheta_d,
+                                           GAD_hubHeights_d, GAD_rotorD_d, GAD_nacelleD_d,
+                                           turbinePolyTwist_d, turbinePolyChord_d,
+                                           turbinePolyPitch_d, turbinePolyOmega_d,
+                                           rnorm_vect_d, alpha_minmax_vect_d,
+                                           turbinePolyCl_d, turbinePolyCd_d,
+					   GAD_turbineRefMag_d, GAD_anFactor_d,
+                                           GAD_forceX_d, GAD_forceY_d, GAD_forceZ_d);
+   }
+#endif 
+
 #ifdef TIMERS_LEVEL2
    printf("cuda_hydroCoreComplete()  Kernel execution time (ms): %12.8f\n", elapsedTime);
 #endif
@@ -537,7 +628,7 @@ extern "C" int cuda_hydroCoreDeviceBuildFrhs(float simTime, int simTime_it, int
 }//end cuda_hydroCoreDeviceBuildFrhs()
 
 /*----->>>>> __global__ void  cudaDevice_hydroCoreCommence(); ---------------------------------------
-* This is the gloabl-entry kernel routine used by the HYDRO_CORE module
+* This is the global-entry kernel routine used by the HYDRO_CORE module
 */
 __global__ void cudaDevice_hydroCoreCommence(int simTime_it, float* hydroFlds_d, float* hydroFldsFrhs_d, 
                                                      float* hydroBaseStateFlds_d, 
@@ -1159,3 +1250,129 @@ __device__ void cudaDevice_setToZero(float* fld){
   ijk = i*iStride + j*jStride + k*kStride;
   fld[ijk] = 0.0;
 } //end cudaDevice_setToZero
+
+/*----->>>>> extern "C" int cuda_hydroCoreInitFieldsDevice();  -----------------------------------------------------------
+* This function handles the one-time initializations of state fields on-device (GPU) memory by executing the appropriate sequence
+* of cudaMemcpyHostToDevice data transfers.
+*/
+extern "C" int cuda_hydroCoreInitFieldsDevice(){
+   int errorCode = CUDA_HYDRO_CORE_SUCCESS;
+   int Nelems;
+   int Nelems2d;
+   /*Set the full memory block number of elements for transfers of 2-d and 3-d fields*/
+   Nelems = (Nxp+2*Nh)*(Nyp+2*Nh)*(Nzp+2*Nh);
+   Nelems2d = (Nxp+2*Nh)*(Nyp+2*Nh);
+   /*Copy the host hydroFlds to the device */
+   cudaMemcpy(hydroFlds_d, hydroFlds, Nelems*Nhydro*sizeof(float), cudaMemcpyHostToDevice);
+   if(TKESelector > 0){ /*Copy any required SGS TKE equation fields to device */
+     cudaMemcpy(sgstkeScalars_d, sgstkeScalars, Nelems*TKESelector*sizeof(float), cudaMemcpyHostToDevice);
+   }
+   if(moistureSelector > 0){ /*Copy any required moisture fields to device */
+     cudaMemcpy(moistScalars_d, moistScalars, Nelems*moistureNvars*sizeof(float), cudaMemcpyHostToDevice);
+   }
+   if(surflayerSelector > 0){ /*Copy any required host auxiliary sclar fields to the device */
+     cudaMemcpy(tskin_d, tskin, Nelems2d*sizeof(float), cudaMemcpyHostToDevice);
+     cudaMemcpy(fricVel_d, fricVel, Nelems2d*sizeof(float), cudaMemcpyHostToDevice);
+     cudaMemcpy(htFlux_d, htFlux, Nelems2d*sizeof(float), cudaMemcpyHostToDevice);
+     cudaMemcpy(z0m_d, z0m, Nelems2d*sizeof(float), cudaMemcpyHostToDevice);
+     cudaMemcpy(z0t_d, z0t, Nelems2d*sizeof(float), cudaMemcpyHostToDevice);
+     if (moistureSelector > 0){
+       cudaMemcpy(qskin_d, qskin, Nelems2d*sizeof(float), cudaMemcpyHostToDevice);
+       cudaMemcpy(qFlux_d, qFlux, Nelems2d*sizeof(float), cudaMemcpyHostToDevice);
+     }
+   }// end if surflayerSelector > 0
+   if(NhydroAuxScalars > 0){ /*Copy any required host auxiliary sclar fields to the device */
+     cudaMemcpy(hydroAuxScalars_d, hydroAuxScalars, Nelems*NhydroAuxScalars*sizeof(float), cudaMemcpyHostToDevice);
+   }// end if hydroAuxScalars > 0
+   gpuErrchk( cudaPeekAtLastError() ); /*Check for errors in the cudaMemCpy calls*/
+   gpuErrchk( cudaDeviceSynchronize() );
+   return(errorCode);
+}//end cuda_hydroCoreInitFieldsDevice()
+
+/*----->>>>> extern "C" int cuda_hydroCoreSynchFieldsFromDevice();  --------------------------------------------------
+* This function handles the synchronization to host of on-device (GPU) fields  by executing the appropriate sequence
+* of cudaMemcpyDeviceiToHost data transfers.
+*/
+extern "C" int cuda_hydroCoreSynchFieldsFromDevice(){
+   int errorCode = CUDA_HYDRO_CORE_SUCCESS;
+   int Nelems;
+   int Nelems2d;
+
+   /*Set the full memory block number of elements for transfers of 2-d and 3-d fields*/
+   Nelems = (Nxp+2*Nh)*(Nyp+2*Nh)*(Nzp+2*Nh);
+   Nelems2d = (Nxp+2*Nh)*(Nyp+2*Nh);
+
+   /* Send any desired GPU-computed HYDRO_CORE arrays from Device up to Host*/
+   gpuErrchk( cudaMemcpy(hydroPres, hydroPres_d, Nelems*sizeof(float), cudaMemcpyDeviceToHost) );
+   gpuErrchk( cudaMemcpy(hydroFlds, hydroFlds_d, Nelems*Nhydro*sizeof(float), cudaMemcpyDeviceToHost) );
+   if((hydroForcingWrite==1)||(hydroForcingLog==1)){
+     gpuErrchk( cudaMemcpy(hydroFldsFrhs, hydroFldsFrhs_d, Nelems*Nhydro*sizeof(float), cudaMemcpyDeviceToHost) );
+   } //endif we need to send up the Frhs
+   if (TKESelector > 0){
+     gpuErrchk( cudaMemcpy(sgstkeScalars, sgstkeScalars_d, Nelems*TKESelector*sizeof(float), cudaMemcpyDeviceToHost) );
+     if ((hydroForcingWrite==1)||(hydroForcingLog==1)){
+       gpuErrchk( cudaMemcpy(sgstkeScalarsFrhs, sgstkeScalarsFrhs_d, Nelems*TKESelector*sizeof(float), cudaMemcpyDeviceToHost) );
+     }
+   }
+   if (moistureSelector > 0){
+     gpuErrchk( cudaMemcpy(moistScalars, moistScalars_d, Nelems*moistureNvars*sizeof(float), cudaMemcpyDeviceToHost) );
+     if ((hydroForcingWrite==1)||(hydroForcingLog==1)){
+       gpuErrchk( cudaMemcpy(moistScalarsFrhs, moistScalarsFrhs_d, Nelems*moistureNvars*sizeof(float), cudaMemcpyDeviceToHost) );
+     }
+   }
+   if(surflayerSelector > 0){
+     gpuErrchk( cudaMemcpy(fricVel, fricVel_d, Nelems2d*sizeof(float), cudaMemcpyDeviceToHost) );
+     gpuErrchk( cudaMemcpy(htFlux, htFlux_d, Nelems2d*sizeof(float), cudaMemcpyDeviceToHost) );
+     gpuErrchk( cudaMemcpy(tskin, tskin_d, Nelems2d*sizeof(float), cudaMemcpyDeviceToHost) );
+     gpuErrchk( cudaMemcpy(invOblen, invOblen_d, Nelems2d*sizeof(float), cudaMemcpyDeviceToHost) );
+     gpuErrchk( cudaMemcpy(z0m, z0m_d, Nelems2d*sizeof(float), cudaMemcpyDeviceToHost) );
+     gpuErrchk( cudaMemcpy(z0t, z0t_d, Nelems2d*sizeof(float), cudaMemcpyDeviceToHost) );
+     if (moistureSelector > 0){
+       gpuErrchk( cudaMemcpy(qFlux, qFlux_d, Nelems2d*sizeof(float), cudaMemcpyDeviceToHost) );
+       gpuErrchk( cudaMemcpy(qskin, qskin_d, Nelems2d*sizeof(float), cudaMemcpyDeviceToHost) );
+     }
+   }//endif surflayerSelector > 0
+   if(NhydroAuxScalars > 0){
+     gpuErrchk( cudaMemcpy(hydroAuxScalars, hydroAuxScalars_d, Nelems*NhydroAuxScalars*sizeof(float), cudaMemcpyDeviceToHost) );
+     if((hydroForcingWrite==1)||(hydroForcingLog==1)){
+       gpuErrchk( cudaMemcpy(hydroAuxScalarsFrhs, hydroAuxScalarsFrhs_d, Nelems*NhydroAuxScalars*sizeof(float), cudaMemcpyDeviceToHost) );
+     } //endif we need to send up the Frhs
+   } //end if NhydroAuxScalars > 0
+   if(hydroSubGridWrite==1){
+     if(turbulenceSelector > 0){
+       // The 6 Tau_i-j and 3 Tau_TH,j fields
+       gpuErrchk( cudaMemcpy(hydroTauFlds, hydroTauFlds_d, Nelems*9*sizeof(float), cudaMemcpyDeviceToHost) );
+     }//endif
+     if(moistureSGSturb==1){
+       // The moistureNvars*3 tau moisture fields (3 spatial components per moist species)
+       gpuErrchk( cudaMemcpy(moistTauFlds, moistTauFlds_d, Nelems*moistureNvars*3*sizeof(float), cudaMemcpyDeviceToHost) );
+     }
+   } //endif hydroSubGridWrite==1
+
+#ifdef GAD_EXT
+   if (GADSelector > 0){
+     gpuErrchk( cudaMemcpy(GAD_turbineYawing, GAD_turbineYawing_d, GADNumTurbines*sizeof(int), cudaMemcpyDeviceToHost) );
+     gpuErrchk( cudaMemcpy(GAD_turbineRefMag, GAD_turbineRefMag_d, GADNumTurbines*sizeof(float), cudaMemcpyDeviceToHost) );
+     gpuErrchk( cudaMemcpy(GAD_turbineRefDir, GAD_turbineRefDir_d, GADNumTurbines*sizeof(float), cudaMemcpyDeviceToHost) );
+     gpuErrchk( cudaMemcpy(GAD_yawError, GAD_yawError_d, GADNumTurbines*sizeof(float), cudaMemcpyDeviceToHost) );
+     gpuErrchk( cudaMemcpy(GAD_anFactor, GAD_anFactor_d, GADNumTurbines*sizeof(float), cudaMemcpyDeviceToHost) );
+     gpuErrchk( cudaMemcpy(GAD_rotorTheta, GAD_rotorTheta_d, GADNumTurbines*sizeof(float), cudaMemcpyDeviceToHost) ); 
+     if (GADoutputForces == 1){
+       gpuErrchk( cudaMemcpy(GAD_forceX, GAD_forceX_d, Nelems*sizeof(float), cudaMemcpyDeviceToHost) );
+       gpuErrchk( cudaMemcpy(GAD_forceY, GAD_forceY_d, Nelems*sizeof(float), cudaMemcpyDeviceToHost) );
+       gpuErrchk( cudaMemcpy(GAD_forceZ, GAD_forceZ_d, Nelems*sizeof(float), cudaMemcpyDeviceToHost) );
+     }
+   }
+#endif 
+   gpuErrchk( cudaPeekAtLastError() ); /*Check for errors in the cudaMemCpy calls*/
+//#ifdef DEBUG
+#if 1
+   MPI_Barrier(MPI_COMM_WORLD);
+   printf("Rank %d/%d: Batch complete results sent via cudaMemcpyDeviceToHost.\n",mpi_rank_world, mpi_size_world);
+   fflush(stdout);
+   MPI_Barrier(MPI_COMM_WORLD);
+#endif
+   
+   return(errorCode);
+}//end cuda_hydroCoreSynchFieldsFromDevice()
+
diff --git a/SRC/HYDRO_CORE/CUDA/cuda_hydroCoreDevice_cu.h b/SRC/HYDRO_CORE/CUDA/cuda_hydroCoreDevice_cu.h
index de46099..f6f67c1 100644
--- a/SRC/HYDRO_CORE/CUDA/cuda_hydroCoreDevice_cu.h
+++ b/SRC/HYDRO_CORE/CUDA/cuda_hydroCoreDevice_cu.h
@@ -80,6 +80,15 @@ extern float *hydroRhoInv_d;   //storage for 1.0/rho
 /*---CELL PERTURBATION METHOD*/
 #include <cuda_cellpertDevice_cu.h>
 
+#ifdef URBAN_EXT
+  /*URBAN */
+  #include <cuda_urbanDevice_cu.h>
+#endif
+#ifdef GAD_EXT
+  /*GENERALIZED ACTUATOR DISK */
+  #include <cuda_GADDevice_cu.h>
+#endif
+
 /*Switch for Last-RK stage physics */
 extern __constant__ int physics_oneRKonly_d; /* selector to apply physics RHS forcing only at the latest RK stage: 0= off, 1= on */
 
@@ -164,4 +173,16 @@ __device__ void cudaDevice_SetRhoInv(float* hydroFlds, float* hydroRhoInv);
 */
 __device__ void cudaDevice_setToZero(float* fld);
 
+/*----->>>>> extern "C" int cuda_hydroCoreInitFieldsDevice();  -----------------------------------------------------------
+* This function handles the one-time initializations of state fields on-device (GPU) memory by executing the appropriate sequence
+* of cudaMemcpyHostToDevice data transfers.
+*/
+extern "C" int cuda_hydroCoreInitFieldsDevice();
+
+/*----->>>>> extern "C" int cuda_hydroCoreSynchFieldsFromDevice();  --------------------------------------------------
+* This function handles the synchronization to host of on-device (GPU) fields  by executing the appropriate sequence
+* of cudaMemcpyDeviceiToHost data transfers.
+*/
+extern "C" int cuda_hydroCoreSynchFieldsFromDevice();
+
 #endif // _HYDRO_CORE_CUDADEV_CU_H
diff --git a/SRC/HYDRO_CORE/CUDA/cuda_surfaceLayerDevice.cu b/SRC/HYDRO_CORE/CUDA/cuda_surfaceLayerDevice.cu
index d5f4e2d..47f0218 100644
--- a/SRC/HYDRO_CORE/CUDA/cuda_surfaceLayerDevice.cu
+++ b/SRC/HYDRO_CORE/CUDA/cuda_surfaceLayerDevice.cu
@@ -392,6 +392,7 @@ __device__ void cudaDevice_SurfaceLayerMOSTdry(int ijk, float* u, float* v, floa
    float z0temp;
    float it_max;
    float constant_1;
+   float ci_ulim = 1.0;
 
    if (surflayer_stab_d==0){
      it_max = 1;
@@ -446,6 +447,13 @@ __device__ void cudaDevice_SurfaceLayerMOSTdry(int ijk, float* u, float* v, floa
    } while(it_n<=it_max);
    // end of iterative process
 
+   cd_i = fmaxf(fminf(cd_i,ci_ulim),0.0);
+   ch_i = fmaxf(fminf(ch_i,ci_ulim),0.0);
+   *cd_iter = cd_i;
+   *ch_iter = ch_i;
+   if (surflayerSelector_d > 1){
+        *htFlux = ch_i*U1*(th0-th1);
+   }//endif surflayerSelector_d==2
    *cd_iter = cd_i;
    *ch_iter = ch_i;
    tauxz = -cd_i*sqrtf(powf(*u/ *rho,2.0)+powf(*v/ *rho,2.0))*(*u);
@@ -461,10 +469,6 @@ __device__ void cudaDevice_SurfaceLayerMOSTdry(int ijk, float* u, float* v, floa
       cudaDevice_offshoreRoughness(z0m, z0t, fricVel, u1, v1, sea_mask);
    }
 
-   if ( (surflayer_z0tdyn_d>0) && ((surflayer_offshore_d==0) || ((surflayer_offshore_d==1) && (*sea_mask<1e-4))) ){ // dynamic z0t calculation
-      cudaDevice_z0tdyn(z0m, z0t, fricVel);
-   }
-
 } //end cudaDevice_SurfaceLayerMOSTdry(...
 
 /*----->>>>> __device__ void cudaDevice_SurfaceLayerMOSTmoist();  --------------------------------------------------
@@ -494,6 +498,7 @@ __device__ void cudaDevice_SurfaceLayerMOSTmoist(int ijk, float* u, float* v, fl
    float q0,q1,cq_i,psi_q,tauqz;
    int it_max;
    float constant_1;
+   float ci_ulim = 1.0;
 
    if (surflayer_stab_d==0){
      it_max = 1;
@@ -554,9 +559,16 @@ __device__ void cudaDevice_SurfaceLayerMOSTmoist(int ijk, float* u, float* v, fl
    } while(it_n<=it_max);
    // end of iterative process
 
+   cd_i = fmaxf(fminf(cd_i,ci_ulim),0.0);
+   ch_i = fmaxf(fminf(ch_i,ci_ulim),0.0);
+   cq_i = fmaxf(fminf(cq_i,ci_ulim),0.0);
    *cd_iter = cd_i;
    *ch_iter = ch_i;
    *cq_iter = cq_i;
+   if (surflayerSelector_d > 1){
+        *htFlux = ch_i*U1*(th0-th1);
+        *qFlux = cq_i*U1*(q0-q1);
+   }//endif surflayerSelector_d==2
    tauxz = -cd_i*sqrtf(powf(*u/ *rho,2.0)+powf(*v/ *rho,2.0))*(*u);
    tauyz = -cd_i*sqrtf(powf(*u/ *rho,2.0)+powf(*v/ *rho,2.0))*(*v);
    *tau31 = tauxz;
@@ -572,10 +584,6 @@ __device__ void cudaDevice_SurfaceLayerMOSTmoist(int ijk, float* u, float* v, fl
       cudaDevice_offshoreRoughness(z0m, z0t, fricVel, u1, v1, sea_mask);
    }
 
-   if ( (surflayer_z0tdyn_d>0) && ((surflayer_offshore_d==0) || ((surflayer_offshore_d==1) && (*sea_mask<1e-4))) ){ // dynamic z0t calculation
-      cudaDevice_z0tdyn(z0m, z0t, fricVel);
-   }
-
 } //end cudaDevice_SurfaceLayerMOSTmoist(...
 
 /*----->>>>> __device__ void cudaDevice_offshoreRoughness();  --------------------------------------------------
@@ -657,6 +665,32 @@ __device__ void cudaDevice_offshoreRoughness(float* z0m, float* z0t, float* fric
 
 } // cudaDevice_offshoreRoughness()
 
+__global__ void cudaDevice_dynamicz0tLand(float* z0m, float* z0t, float* fricVel, float* sea_mask){
+
+   int i,j,k,ij;
+   int iStride2d,jStride2d;
+
+   /*Establish necessary indices for spatial locality*/
+   i = (blockIdx.x)*blockDim.x + threadIdx.x;
+   j = (blockIdx.y)*blockDim.y + threadIdx.y;
+   k = (blockIdx.z)*blockDim.z + threadIdx.z;
+
+   iStride2d = (Ny_d+2*Nh_d);
+   jStride2d = 1;
+
+   if((i >= iMin_d)&&(i < iMax_d) &&
+      (j >= jMin_d)&&(j < jMax_d) &&
+      (k == kMin_d) ){
+      ij = i*iStride2d + j*jStride2d; // 2-dimensional (horizontal index)
+
+      if ( (surflayer_z0tdyn_d>0) && ((surflayer_offshore_d==0) || ((surflayer_offshore_d==1) && (sea_mask[ij]<1e-4))) ){ // dynamic z0t calculation
+        cudaDevice_z0tdyn(&z0m[ij], &z0t[ij], &fricVel[ij]);
+      }
+
+   }//end if in the range of non-halo cells
+
+} // end cudaDevice_dynamicz0tLand()
+
 /*----->>>>> __device__ void cudaDevice_z0tdyn();  --------------------------------------------------
 */
 __device__ void cudaDevice_z0tdyn(float* z0m, float* z0t, float* fricVel){
diff --git a/SRC/HYDRO_CORE/CUDA/cuda_surfaceLayerDevice_cu.h b/SRC/HYDRO_CORE/CUDA/cuda_surfaceLayerDevice_cu.h
index 042d6cf..56e87dc 100644
--- a/SRC/HYDRO_CORE/CUDA/cuda_surfaceLayerDevice_cu.h
+++ b/SRC/HYDRO_CORE/CUDA/cuda_surfaceLayerDevice_cu.h
@@ -104,6 +104,7 @@ __device__ void cudaDevice_SurfaceLayerMOSTmoist(int ijk, float* u, float* v, fl
 
 __device__ void cudaDevice_offshoreRoughness(float* z0m, float* z0t, float* fricVel, float u_1, float v_1, float* sea_mask);
 
+__global__ void cudaDevice_dynamicz0tLand(float* z0m, float* z0t, float* fricVel, float* sea_mask);
 __device__ void cudaDevice_z0tdyn(float* z0m, float* z0t, float* fricVel);
 
 #endif // _SURFLAYER_CUDADEV_CU_H
diff --git a/SRC/HYDRO_CORE/hydro_core.c b/SRC/HYDRO_CORE/hydro_core.c
index f105cb9..33084d3 100644
--- a/SRC/HYDRO_CORE/hydro_core.c
+++ b/SRC/HYDRO_CORE/hydro_core.c
@@ -26,13 +26,20 @@
 #include <grid.h>
 #include <hydro_core.h>
 
+/*Model-Extensions includes*/
+#ifdef URBAN_EXT
+  #include <urban.c>
+#endif
+#ifdef GAD_EXT
+  #include <GAD.c>
+#endif
 
 /*##################------------------- HYDRO_CORE module variable definitions ---------------------#################*/
 int Nhydro = 5;              /*Number of prognostic variable fields under hydro_core */
 int hydroBCs;          /*selector for hydro BC set. 1= LAD, Dirichlet lateral, ceiling and surface boundary conditions,
 			                            2= periodicHorizBSVertical */
 
-int hydroForcingWrite;     /*switching for dumping forcing fields of prognostic variables. 0-off (default), 1= on*/
+int hydroForcingWrite;     /*switching for writing output of forcing fields of prognostic variables. 0-off (default), 1= on*/
 int hydroForcingLog;     /*switch for logging Frhs summary metrics. 0-off (default), 1= on*/
 int hydroSubGridWrite;   /*switch for SGS fields 0-off (default), 1= on*/
 float *hydroFlds;        /*Base Adress of memory containing all prognostic variable fields under hydro_core */
@@ -92,7 +99,7 @@ int buoyancySelector;     /*buoyancy Force selector: 0=off, 1=on*/
 
 /*----Coriolis*/ 
 int coriolisSelector;   /* Coriolis selector, (0 = none, 1 = horizontal terms only, 2 = horizontal and vertical terms*/
-float coriolisLatitude; /*Charactersitc latitude in degrees from equator of the LES domain*/
+float coriolisLatitude; /*Characteristic latitude in degrees from equator of the LES domain*/
 float corioConstHorz;   /*Latitude dependent horizontal Coriolis term constant */
 float corioConstVert;   /*Latitude dependent Vertical Coriolis term constant */
 int coriolis_LAD = 0;       /*Coriolis force selector for LAD BC cases (hydroBCs==1): 0=off, 1=on*/
@@ -100,7 +107,7 @@ float corioLS_fact;     /*large-scale factor on Coriolis term*/
 
 /*----Turbulence*/ 
 int turbulenceSelector;         /*turbulence scheme selector: 0= none, 1= Lilly/Smagorinsky */
-int TKESelector;                /* Prognostic TKE selector: 0= none, 1= Prognostic */
+int TKESelector;                /* Prognostic TKE selector: 0= none, 1= Prognostic, 2= requires canopySelector=1 */
 int TKEAdvSelector;             /* SGSTKE advection scheme selector */
 float TKEAdvSelector_b_hyb;     /*hybrid advection scheme parameter */
 float c_s;                      /* Smagorinsky turbulence model constant used for turbulenceSelector = 1 with TKESelector = 0 */
@@ -109,11 +116,11 @@ float *sgstkeScalars;     /* Base Adress of memory containing all prognostic "sg
 float *sgstkeScalarsFrhs; /* Base Adress of memory containing all prognostic "sgstke" RHS forcing fields */ 
 
 /*----Advection*/ 
-int advectionSelector;    /*advection scheme selector: 0= 1st-order upwind, 1= 3rd-order QUICK, 
-                                              2= hybrid 3rd-4th order, 3= hybrid 5th-6th order */
+int advectionSelector;    /*advection scheme selector: 0=1st-order upwind, 1=3rd-order QUICK, 2=hybrid 3rd-4th order,
+			    3=hybrid 5th-6th order, 4=3rd-order WENO, 5=5th-order WENO, 6=2nd-order centered */
 int ceilingAdvectionBC;   /*selector to allow advection through the domain ceiling 1=on, 0=off (w-ceiling = 0)*/
 float b_hyb;      /*hybrid advection scheme parameter: 0.0= lower-order upwind,
-                                          1.0=higher-order cetered, 0.0 < b_hyb < 1.0 = hybrid */
+                                          1.0=higher-order centered, 0.0 < b_hyb < 1.0 = hybrid */
 
 /*----Diffusion*/ 
 int diffusionSelector;    /*diffusion Term-type selector: 0= none, 1= constant, 2= scalar turbulent-diffusivity*/
@@ -125,11 +132,11 @@ float* hydroDiffTauYFlds; /*Base adress for diffusion TauY arrays for all progno
 float* hydroDiffTauZFlds; /*Base adress for diffusion TauZ arrays for all prognostic fields*/
 
 /*---Monin-Obukhov surface layer---*/ 
-int surflayerSelector;    /*Monin-Obukhov surface layer selector: 0= off, 1= on */
+int surflayerSelector;    /*Monin-Obukhov surface layer selector: 0=off, 1=surface kinematic heat flux (surflayer_wth), 2=skin temperature rate (surflayer_tr) */
 float surflayer_z0;       /* roughness length (momentum) */
 float surflayer_z0t;      /* roughness length (temperature) */
 float surflayer_wth;      /* kinematic sensible heat flux at the surface */
-float surflayer_tr;       /* surface temperature rate in K h-1 */
+float surflayer_tr;       /* surface temperature rate in K h-1 when surflayerSelector == 2*/
 float surflayer_wq;       /* kinematic latent heat flux at the surface */
 float surflayer_qr;       /* surface water vapor rate (kg/kg) h-1 */
 int surflayer_qskin_input;/* selector to use file input (restart) value for qskin under surflayerSelector == 2 */
@@ -199,7 +206,7 @@ int moistureNvars;           /* number of moisture species */
 int moistureAdvSelectorQv;     /* water vapor advection scheme selector */
 float moistureAdvSelectorQv_b; /*hybrid advection scheme parameter */
 int moistureSGSturb;         /* selector to apply sub-grid scale diffusion to moisture fields */
-int moistureCond;            /* selector to apply condensation to mositure fields */
+int moistureCond;            /* selector to apply condensation to moisture fields */
 float *moistScalars;         /*Base address for moisture field arrays*/
 float *moistScalarsFrhs;     /*Base address for moisture forcing field arrays*/
 float *moistTauFlds;         /*Base address for SGS moisture field arrays*/
@@ -275,7 +282,7 @@ float thetaHeight; /* Initial theta perturbations maximum height*/
 float thetaAmplitude; /* Initial theta perturbation (maximum amplitude in K)*/
 
 int physics_oneRKonly; /* selector to apply physics RHS forcing only at the latest RK stage */
- 
+
 /*###################------------------- HYDRO_CORE module function definitions ---------------------#################*/
 
 /*----->>>>> int hydro_coreGetParams();   ----------------------------------------------------------------------
@@ -285,10 +292,10 @@ int hydro_coreGetParams(){
    int errorCode = HYDRO_CORE_SUCCESS;
 
    /*query for each HYDRO_CORE parameter */
-   hydroBCs = 0; //Default to triply-periodic
+   hydroBCs = 2; //Default to periodicHorizVerticalAbl
    errorCode = queryIntegerParameter("hydroBCs", &hydroBCs, 1, 2, PARAM_MANDATORY);
    if(hydroBCs==1){
-     errorCode = queryFileParameter("hydroBndysFileBase", &hydroBndysFileBase, PARAM_OPTIONAL);  
+     errorCode = queryStringParameter("hydroBndysFileBase", &hydroBndysFileBase, PARAM_MANDATORY);  
      hydroBndysFileStart = 0;
      errorCode = queryIntegerParameter("hydroBndysFileStart", &hydroBndysFileStart, 0, 500000, PARAM_MANDATORY);
      hydroBndysFileEnd = 0;
@@ -297,7 +304,7 @@ int hydro_coreGetParams(){
      errorCode = queryFloatParameter("dtBdyPlaneBCs", &dtBdyPlaneBCs, 0.0, 6e5, PARAM_MANDATORY);
    }
    hydroForcingWrite = 0; //Default to off
-   errorCode = queryIntegerParameter("hydroForcingWrite", &hydroForcingWrite, 0, 1, PARAM_MANDATORY);
+   errorCode = queryIntegerParameter("hydroForcingWrite", &hydroForcingWrite, 0, 1, PARAM_OPTIONAL);
    hydroForcingLog = 0; //Default to off
    errorCode = queryIntegerParameter("hydroForcingLog", &hydroForcingLog, 0, 1, PARAM_MANDATORY);
    hydroSubGridWrite = 0; //Default to off
@@ -313,37 +320,50 @@ int hydro_coreGetParams(){
    turbulenceSelector = 0; //Default to off
    errorCode = queryIntegerParameter("turbulenceSelector", &turbulenceSelector, 0, 1, PARAM_MANDATORY);
    TKESelector = 0; //Default to none
-   errorCode = queryIntegerParameter("TKESelector", &TKESelector, 0, 2, PARAM_MANDATORY);
+   errorCode = queryIntegerParameter("TKESelector", &TKESelector, 0, 2, PARAM_OPTIONAL);
    TKEAdvSelector = 0; //Default to 0 for monotonic 1st-order upstream
-   errorCode = queryIntegerParameter("TKEAdvSelector", &TKEAdvSelector, 0, 6, PARAM_MANDATORY);
+   errorCode = queryIntegerParameter("TKEAdvSelector", &TKEAdvSelector, 0, 6, PARAM_OPTIONAL);
    TKEAdvSelector_b_hyb = 0.0; //Default to 0.0
-   errorCode = queryFloatParameter("TKEAdvSelector_b_hyb", &TKEAdvSelector_b_hyb, 0.0, 1.0, PARAM_MANDATORY);
-   c_s = 0.18; //Default to 0.18
-   errorCode = queryFloatParameter("c_s", &c_s, 1e-6, 1e6, PARAM_MANDATORY);
-   c_k = 0.10; //Default to 0.1
-   errorCode = queryFloatParameter("c_k", &c_k, 1e-6, 1e6, PARAM_MANDATORY);
-   advectionSelector = 0; //Default to 0
-   errorCode = queryIntegerParameter("advectionSelector", &advectionSelector, 0, 6, PARAM_MANDATORY);
+   errorCode = queryFloatParameter("TKEAdvSelector_b_hyb", &TKEAdvSelector_b_hyb, 0.0, 1.0, PARAM_OPTIONAL);
+   if (turbulenceSelector == 1){
+      if (TKESelector == 0){
+         c_s = 0.18; //Default to 0.18
+         errorCode = queryFloatParameter("c_s", &c_s, 1e-6, 1e6, PARAM_OPTIONAL);
+      }else if (TKESelector > 0){
+         c_k = 0.10; //Default to 0.1
+         errorCode = queryFloatParameter("c_k", &c_k, 1e-6, 1e6, PARAM_OPTIONAL);
+      }
+   }
+   advectionSelector = 3; //Default to 3
+   errorCode = queryIntegerParameter("advectionSelector", &advectionSelector, 0, 6, PARAM_OPTIONAL);
    ceilingAdvectionBC = 0;
    errorCode = queryIntegerParameter("ceilingAdvectionBC", &ceilingAdvectionBC, 0, 1, PARAM_OPTIONAL);
-   b_hyb = 0.8; //Default to 0.8
-   errorCode = queryFloatParameter("b_hyb", &b_hyb, 0.0, 1.0, PARAM_MANDATORY);
+   if ((advectionSelector == 2) || (advectionSelector == 3)){
+     b_hyb = 0.8; //Default to 0.8
+     errorCode = queryFloatParameter("b_hyb", &b_hyb, 0.0, 1.0, PARAM_OPTIONAL);
+   }
    diffusionSelector = 0; //Default to off
-   errorCode = queryIntegerParameter("diffusionSelector", &diffusionSelector, 0, 1, PARAM_MANDATORY);
-   nu_0 = 1.0; //Default to 1.0 m/s^2
-   errorCode = queryFloatParameter("nu_0", &nu_0, 0, FLT_MAX, PARAM_MANDATORY);
+   errorCode = queryIntegerParameter("diffusionSelector", &diffusionSelector, 0, 1, PARAM_OPTIONAL);
+   if (diffusionSelector==1){
+     nu_0 = 1.0; //Default to 1.0 m/s^2
+     errorCode = queryFloatParameter("nu_0", &nu_0, 0, FLT_MAX, PARAM_OPTIONAL);
+   }
    surflayerSelector = 0; // Default to off
    errorCode = queryIntegerParameter("surflayerSelector", &surflayerSelector, 0, 3, PARAM_MANDATORY);
    surflayer_z0 = 0.1; // Default to 0.1 m 
    errorCode = queryFloatParameter("surflayer_z0", &surflayer_z0, 1e-12, 1e+0, PARAM_MANDATORY);
    surflayer_z0t = 0.1; // Default to 0.1 m 
    errorCode = queryFloatParameter("surflayer_z0t", &surflayer_z0t, 1e-6, 1e+1, PARAM_MANDATORY);
-   surflayer_tr = 0.0; // Default to 0.0 K h-1 
-   errorCode = queryFloatParameter("surflayer_tr", &surflayer_tr, -1e+1, 1e+1, PARAM_MANDATORY);
-   surflayer_wth = 0.0; // Default to 0.0 K m s-1 
-   errorCode = queryFloatParameter("surflayer_wth", &surflayer_wth, -5e+0, 5e+0, PARAM_MANDATORY);
+   if (surflayerSelector == 2){
+     surflayer_tr = 0.0; // Default to 0.0 K h-1 
+     errorCode = queryFloatParameter("surflayer_tr", &surflayer_tr, -1e+1, 1e+1, PARAM_MANDATORY);
+   }
+   if (surflayerSelector == 1){
+     surflayer_wth = 0.0; // Default to 0.0 K m s-1 
+     errorCode = queryFloatParameter("surflayer_wth", &surflayer_wth, -5e+0, 5e+0, PARAM_MANDATORY);
+   }
    surflayer_idealsine = 0; //Default to off 
-   errorCode = queryIntegerParameter("surflayer_idealsine", &surflayer_idealsine, 0, 1, PARAM_MANDATORY);
+   errorCode = queryIntegerParameter("surflayer_idealsine", &surflayer_idealsine, 0, 1, PARAM_OPTIONAL);
    surflayer_ideal_ts = 0.0; // Default to 0.0 s
    surflayer_ideal_te = 0.0; // Default to 0.0 s
    surflayer_ideal_amp = 0.1; // Default to 0.1
@@ -363,18 +383,18 @@ int hydro_coreGetParams(){
    errorCode = queryIntegerParameter("surflayer_stab", &surflayer_stab, 0, 1, PARAM_OPTIONAL);
    surflayer_z0tdyn = 1; // Default to option 1
    errorCode = queryIntegerParameter("surflayer_z0tdyn", &surflayer_z0tdyn, 0, 2, PARAM_OPTIONAL);
-   surflayer_offshore = 0; // Default to off
-   surflayer_offshore_opt = 0;
+   surflayer_offshore = 1; // Default to on
+   surflayer_offshore_opt = 4; // Default to 4
    surflayer_offshore_dyn = 1;
    surflayer_offshore_hs = 0.0;
    surflayer_offshore_lp = 0.1;
    surflayer_offshore_cp = 0.1;
    surflayer_offshore_theta = 0.0;
    surflayer_offshore_visc = 1;
-   errorCode = queryIntegerParameter("surflayer_offshore", &surflayer_offshore, 0, 1, PARAM_MANDATORY);
+   errorCode = queryIntegerParameter("surflayer_offshore", &surflayer_offshore, 0, 1, PARAM_OPTIONAL);
    errorCode = queryIntegerParameter("surflayer_offshore_visc", &surflayer_offshore_visc, 0, 1, PARAM_OPTIONAL);
    if (surflayer_offshore > 0){
-     errorCode = queryIntegerParameter("surflayer_offshore_opt", &surflayer_offshore_opt, 0, 5, PARAM_MANDATORY);
+     errorCode = queryIntegerParameter("surflayer_offshore_opt", &surflayer_offshore_opt, 0, 5, PARAM_OPTIONAL);
      errorCode = queryIntegerParameter("surflayer_offshore_dyn", &surflayer_offshore_dyn, 0, 1, PARAM_OPTIONAL);
      if (surflayer_offshore_dyn == 0){
        if (surflayer_offshore_opt == 2){
@@ -413,25 +433,28 @@ int hydro_coreGetParams(){
    cellpert_nts = 500; // Default to 500 time steps
    errorCode = queryIntegerParameter("cellpert_nts", &cellpert_nts, 0, 1e+6, PARAM_OPTIONAL);
    if (cellpertSelector > 0){
-     errorCode = queryIntegerParameter("cellpertSelector", &cellpertSelector, 0, 1, PARAM_MANDATORY);
+     errorCode = queryIntegerParameter("cellpertSelector", &cellpertSelector, 0, 1, PARAM_OPTIONAL);
      cellpert_sw2b = 0; // Default to 0
-     errorCode = queryIntegerParameter("cellpert_sw2b", &cellpert_sw2b, 0, 3, PARAM_MANDATORY);
+     errorCode = queryIntegerParameter("cellpert_sw2b", &cellpert_sw2b, 0, 3, PARAM_OPTIONAL);
      cellpert_amp = 0.5; // Default to 0.5 K
-     errorCode = queryFloatParameter("cellpert_amp", &cellpert_amp, 0.0, 20.0, PARAM_MANDATORY);
+     errorCode = queryFloatParameter("cellpert_amp", &cellpert_amp, 0.0, 20.0, PARAM_OPTIONAL);
      cellpert_gppc = 8; // Default to 8 grid points per cell
-     errorCode = queryIntegerParameter("cellpert_gppc", &cellpert_gppc, 0, 50, PARAM_MANDATORY);
+     errorCode = queryIntegerParameter("cellpert_gppc", &cellpert_gppc, 0, 50, PARAM_OPTIONAL);
      cellpert_ndbc = 3; // Default to 3 cells
-     errorCode = queryIntegerParameter("cellpert_ndbc", &cellpert_ndbc, 0, 10, PARAM_MANDATORY);
+     errorCode = queryIntegerParameter("cellpert_ndbc", &cellpert_ndbc, 0, 10, PARAM_OPTIONAL);
      cellpert_kbottom = 1; // Default to 1st grid point above surface
-     errorCode = queryIntegerParameter("cellpert_kbottom", &cellpert_kbottom, 1, 10, PARAM_MANDATORY);
+     errorCode = queryIntegerParameter("cellpert_kbottom", &cellpert_kbottom, 1, 10, PARAM_OPTIONAL);
      cellpert_ktop = 20; // Default to 20th grid point above surface
-     errorCode = queryIntegerParameter("cellpert_ktop", &cellpert_ktop, 0, 200, PARAM_MANDATORY);
+     errorCode = queryIntegerParameter("cellpert_ktop", &cellpert_ktop, 0, 200, PARAM_OPTIONAL);
+     if (cellpert_ktop > Nz){
+       cellpert_ktop = Nz-10;
+     }
      cellpert_tvcp = 0; // Default to off 
-     errorCode = queryIntegerParameter("cellpert_tvcp", &cellpert_tvcp, 0, 1, PARAM_MANDATORY);
+     errorCode = queryIntegerParameter("cellpert_tvcp", &cellpert_tvcp, 0, 1, PARAM_OPTIONAL);
      cellpert_eckert = 0.2; // Default to Ec = 0.2
-     errorCode = queryFloatParameter("cellpert_eckert", &cellpert_eckert, 0.0, 10.0, PARAM_MANDATORY);
+     errorCode = queryFloatParameter("cellpert_eckert", &cellpert_eckert, 0.0, 10.0, PARAM_OPTIONAL);
      cellpert_tsfact = 1.0; // Default to cellpert_tsfact = 1.0
-     errorCode = queryFloatParameter("cellpert_tsfact", &cellpert_tsfact, 0.0, 10.0, PARAM_MANDATORY);
+     errorCode = queryFloatParameter("cellpert_tsfact", &cellpert_tsfact, 0.0, 10.0, PARAM_OPTIONAL);
    }
    //
    lsfSelector = 0; // Default to off 
@@ -505,8 +528,10 @@ int hydro_coreGetParams(){
      errorCode = queryIntegerParameter("moistureCondBasePres", &moistureCondBasePres, 0, 1, PARAM_MANDATORY);
      errorCode = queryFloatParameter("moistureMPcallTscale", &moistureMPcallTscale, 1e-4, 1000.0, PARAM_MANDATORY);
      errorCode = queryFloatParameter("surflayer_wq", &surflayer_wq, -5e+0, 5e+0, PARAM_MANDATORY);
-     errorCode = queryFloatParameter("surflayer_qr", &surflayer_qr, -1e+1, 1e+1, PARAM_MANDATORY);
-     errorCode = queryIntegerParameter("surflayer_qskin_input", &surflayer_qskin_input, 0, 1, PARAM_OPTIONAL);
+     if (surflayerSelector == 2){
+       errorCode = queryFloatParameter("surflayer_qr", &surflayer_qr, -1e+1, 1e+1, PARAM_MANDATORY);
+       errorCode = queryIntegerParameter("surflayer_qskin_input", &surflayer_qskin_input, 0, 1, PARAM_OPTIONAL);
+     }
      if (surflayer_idealsine > 0){
        errorCode = queryFloatParameter("surflayer_ideal_qts", &surflayer_ideal_qts, 0, 1e+5, PARAM_MANDATORY);
        errorCode = queryFloatParameter("surflayer_ideal_qte", &surflayer_ideal_qte, 0, 1e+5, PARAM_MANDATORY);
@@ -519,7 +544,7 @@ int hydro_coreGetParams(){
    filter_6thdiff_hori = 0; // Default to off
    filter_6thdiff_hori_coeff = 0.03; // Default to 0.03
    filter_divdamp = 0; // Default to off
-   errorCode = queryIntegerParameter("filterSelector", &filterSelector, 0, 1, PARAM_MANDATORY);
+   errorCode = queryIntegerParameter("filterSelector", &filterSelector, 0, 1, PARAM_OPTIONAL);
    if (filterSelector == 1){
      errorCode = queryIntegerParameter("filter_6thdiff_vert", &filter_6thdiff_vert, 0, 1, PARAM_OPTIONAL);
      errorCode = queryIntegerParameter("filter_6thdiff_hori", &filter_6thdiff_hori, 0, 1, PARAM_OPTIONAL);
@@ -531,10 +556,16 @@ int hydro_coreGetParams(){
        errorCode = queryFloatParameter("filter_6thdiff_hori_coeff", &filter_6thdiff_hori_coeff, 0.0, 1.0, PARAM_MANDATORY);
      }
    }
+#ifdef GAD_EXT
+   /*New EXTENSIONS sub-module style call to get parameters for the GAD sub-module*/
+   errorCode = GADGetParams();
+#endif
    dampingLayerSelector = 0; // Default to off 
-   errorCode = queryIntegerParameter("dampingLayerSelector", &dampingLayerSelector, 0, 1, PARAM_MANDATORY);
-   dampingLayerDepth = 100.0; //Default to 100.0 (meters)  
-   errorCode = queryFloatParameter("dampingLayerDepth", &dampingLayerDepth, 0.0, FLT_MAX, PARAM_MANDATORY);
+   errorCode = queryIntegerParameter("dampingLayerSelector", &dampingLayerSelector, 0, 1, PARAM_OPTIONAL);
+   if(dampingLayerSelector == 1){
+     dampingLayerDepth = 100.0; //Default to 100.0 (meters)  
+     errorCode = queryFloatParameter("dampingLayerDepth", &dampingLayerDepth, 0.0, FLT_MAX, PARAM_OPTIONAL);
+   }
    /*Auxiliary scalar parameters*/
    NhydroAuxScalars = 0; // Default to zero auxiliary scalars
    errorCode = queryIntegerParameter("NhydroAuxScalars", &NhydroAuxScalars, 0, MAX_AUXSC_SRC, PARAM_OPTIONAL);
@@ -583,8 +614,8 @@ int hydro_coreGetParams(){
                                         0.0, FLT_MAX, PARAM_MANDATORY);
      } //endif srcAuxScFile == NULL...
    }// endif NhydroAuxScalars > 0
-   stabilityScheme = 0; //Default to constant rho & theta
-   errorCode = queryIntegerParameter("stabilityScheme", &stabilityScheme, 0, 4, PARAM_MANDATORY);
+   stabilityScheme = 2; //Default to 2
+   errorCode = queryIntegerParameter("stabilityScheme", &stabilityScheme, 2, 2, PARAM_MANDATORY);
    temp_grnd = 300.0; //Default to 300.0-(Kelvin) = 80.33-(Fahrenheit) = 26.85-(Celsius) 
    errorCode = queryFloatParameter("temp_grnd", &temp_grnd, FLT_MIN, FLT_MAX, PARAM_MANDATORY);
    pres_grnd = 1.0e5; //Default to refPressure 100,000-(pascals) = 1000-(millibars)
@@ -623,6 +654,11 @@ int hydro_coreGetParams(){
    physics_oneRKonly = 1; //Default 1 (physics only at the last stage of RK scheme)
    errorCode = queryIntegerParameter("physics_oneRKonly", &physics_oneRKonly, 0, 1, PARAM_OPTIONAL);
 
+#ifdef URBAN_EXT
+   /*New EXTENSIONS sub-module style call to get parameters for the URBAN sub-module*/
+   errorCode = URBANGetParams();
+#endif
+
    return(errorCode);
 } //end hydro_coreGetParams()
 
@@ -662,8 +698,8 @@ int hydro_coreInit(){
         printParameter("hydroBndysFileEnd", "end counter value for BdyPlane sets");
         printParameter("dtBdyPlaneBCs", "delta in time (seconds) between BdyPlane sets (default = 0.0)");
       }
-      printParameter("hydroForcingWrite", "Switch for dumping hydroFldsFrhs for prognositic fields. 0 = off, 1=on");
-      printParameter("hydroSubGridWrite", "Switch for dumping Tauij fields. 0 = off, 1=on");
+      printParameter("hydroForcingWrite", "Switch for writing output of hydroFldsFrhs for prognostic fields. 0 = off, 1=on");
+      printParameter("hydroSubGridWrite", "Switch for writing output of Tauij fields. 0 = off, 1=on");
       printParameter("hydroForcingLog", "Switch for logging Frhs summary metrics. 0 = off, 1=on");
       printComment("----------: PRESSURE GRADIENT FORCE ---");
       printParameter("pgfSelector", "Pressure Gradient Force (pgf) selector: 0=off, 1=on");
@@ -671,23 +707,23 @@ int hydro_coreInit(){
       printParameter("buoyancySelector", "Buoyancy force  selector: 0=off, 1=on");
       printComment("----------: CORIOLIS ---");
       printParameter("coriolisSelector", "Corilis force selector: 0= none, 1= horiz. terms, 2= horiz. & vert. terms");
-      printParameter("coriolisLatitude", "Charactersitc latitude in degrees from equator of the LES domain");
+      printParameter("coriolisLatitude", "Characteristic latitude in degrees from equator of the LES domain");
       printComment("----------: TURBULENCE ---");
       printParameter("turbulenceSelector", "turbulence scheme selector: 0= none, 1= Lilly/Smagorinsky ");
-      printParameter("TKESelector", "Prognostic TKE selector: 0= none, 1= Prognostic");
+      printParameter("TKESelector", "Prognostic TKE selector: 0= none, 1= Prognostic, 2= requires canopySelector=1");
       printParameter("TKEAdvSelector", "advection scheme for SGSTKE equation");
       printParameter("TKEAdvSelector_b_hyb","hybrid advection scheme parameter");
       printParameter("c_s", "Smagorinsky model constant used for turbulenceSelector = 1 and TKESelector = 0");
       printParameter("c_k", "Lilly model constant used for turbulenceSelector = 1 and TKESelector > 0");
       printComment("----------: ADVECTION ---");
-      printParameter("advectionSelector", "advection scheme selector: 0= 1st-order upwind, 1= 3rd-order QUICK, 2= hybrid 3rd-4th order, 3= hybrid 5th-6th order");
+      printParameter("advectionSelector", "advection scheme selector: 0=1st-order upwind, 1=3rd-order QUICK, 2=hybrid 3rd-4th order, 3=hybrid 5th-6th order, 4=3rd-order WENO, 5=5th-order WENO, 6=2nd-order centered");
       printParameter("ceilingAdvectionBC", "selector to allow advection through the domain ceiling 1=on, 0=off (w-ceiling = 0)");
-      printParameter("b_hyb", "hybrid advection scheme parameter: 0.0= lower-order upwind, 1.0=higher-order cetered, 0.0 < b_hyb < 1.0 = hybrid");
+      printParameter("b_hyb", "hybrid advection scheme parameter: 0.0= lower-order upwind, 1.0=higher-order centered, 0.0 < b_hyb < 1.0 = hybrid");
       printComment("----------: DIFFUSION ---");
       printParameter("diffusionSelector", "diffusivity selector: 0= none, 1= const.");
       printParameter("nu_0", "constant diffusivity used when diffusionSelector = 1");
       printComment("----------: SURFACE LAYER ---"); 
-      printParameter("surflayerSelector", "surfacelayer selector: 0= off, 1,2= on");
+      printParameter("surflayerSelector", "surfacelayer selector: 0=off, 1=surface kinematic heat flux (surflayer_wth), 2=skin temperature rate (surflayer_tr)");
       printParameter("surflayer_z0", "roughness length (momentum) when surflayerSelector > 0");
       printParameter("surflayer_z0t", "roughness length (temperature) when surflayerSelector > 0");
       printParameter("surflayer_wth", "kinematic sensible heat flux at the surface when surflayerSelector = 1");
@@ -790,9 +826,9 @@ int hydro_coreInit(){
                          "Source start time in seconds from start of simulation (i.e. time = 0.0)");
           printParameter("srcAuxScDurationSeconds", "Source duration in seconds from srcAuxScStartSeconds");
           printParameter("srcAuxScGeometryType", "0 = point (single cell volume), 1 = line (line of surface cells)");
-          printParameter("srcAuxScLocation_X", "Source geometry centroid postion in x (west-east)");
-          printParameter("srcAuxScLocation_Y", "Source geometry centroid postion in y (south-north)");
-          printParameter("srcAuxScLocation_Z", "Source geometry centroid postion in z (vertical above the surface)");
+          printParameter("srcAuxScLocation_X", "Source geometry centroid position in x (west-east)");
+          printParameter("srcAuxScLocation_Y", "Source geometry centroid position in y (south-north)");
+          printParameter("srcAuxScLocation_Z", "Source geometry centroid position in z (vertical above the surface)");
           printParameter("srcAuxScMassSpecType",
                          "Source mass specification type 0 = mass in kg, 1 = mass source rate in kg/s");
           printParameter("srcAuxScMassSpecValue",
@@ -824,7 +860,15 @@ int hydro_coreInit(){
       printParameter("thetaAmplitude", "Maximum amplitude for theta perturbations: thetaAmplitude*[-1,+1] K");
       printParameter("physics_oneRKonly", "selector to apply physics RHS forcing only at the latest RK stage: 0= off, 1= on");
    } //end if(mpi_rank_world == 0)
-
+#ifdef URBAN_EXT
+   /*New EXTENSIONS sub-module style call to print parameters for the URBAN sub-module*/
+   printComment("----------: URBAN MODEL ---");
+   errorCode = URBANPrintParams();
+#endif
+#ifdef GAD_EXT
+   /*New EXTENSIONS sub-module style call to print parameters for the GAD sub-module*/
+   errorCode = GADPrintParams();
+#endif
    /*Broadcast the parameters across mpi_ranks*/
    MPI_Bcast(&hydroBCs, 1, MPI_INT, 0, MPI_COMM_WORLD);
    if(hydroBCs==1){  // Using LAD BCs
@@ -1040,6 +1084,8 @@ int hydro_coreInit(){
    for(iFld = 0; iFld < Nhydro; iFld ++){
      errorCode = hydro_coreGetFieldName( &fldName[0], iFld);
      errorCode = ioRegisterVar(&fldName[0], "float", 4, dims4d, &hydroFlds[iFld*fldStride]);
+     // Add NetCDF attributes based on field type
+     errorCode = hydro_coreAddFieldAttributes(&fldName[0], 0); // 0 = not forcing
      printf("hydro_coreInit:hydroFlds[%d] = %s stored at %p, has been registered with IO.\n",
             iFld,&fldName[0],&hydroFlds[iFld*fldStride]);
      fflush(stdout);
@@ -1049,6 +1095,8 @@ int hydro_coreInit(){
    hydroPres = memAllocateFloat3DField(Nxp, Nyp, Nzp, Nh, "hydroPres");
    errorCode = sprintf(&fldName[0],"pressure");
    errorCode = ioRegisterVar(&fldName[0], "float", 4, dims4d, hydroPres);
+   // Add attributes for pressure field
+   errorCode = hydro_coreAddFieldAttributes(&fldName[0], 0); // 0 = not forcing
    printf("hydro_coreInit:Field = %s stored at %p, has been registered with IO.\n",
           &fldName[0],hydroPres);
    
@@ -1059,6 +1107,8 @@ int hydro_coreInit(){
        errorCode = hydro_coreGetFieldName( &fldName[0], iFld);
        sprintf(&frhsName[0],"F_%s",&fldName[0]);
        errorCode = ioRegisterVar(&frhsName[0], "float", 4, dims4d, &hydroFldsFrhs[iFld*fldStride]);
+       // Add attributes for forcing fields
+       errorCode = hydro_coreAddFieldAttributes(&frhsName[0], 1); // 1 = is forcing
        printf("hydro_coreInit:hydroFldsFrhs[%d] = %s stored at %p, has been registered with IO.\n",
               iFld,&frhsName[0],&hydroFldsFrhs[iFld*fldStride]);
      }
@@ -1071,6 +1121,8 @@ int hydro_coreInit(){
      for(iFld = 0; iFld < NhydroAuxScalars; iFld ++){
         sprintf(&AuxScName[0],"AuxScalar_%d",iFld);
         errorCode = ioRegisterVar(&AuxScName[0], "float", 4, dims4d, &hydroAuxScalars[iFld*fldStride]);
+	// Add attributes for auxiliary scalar
+        errorCode = hydro_coreAddFieldAttributes(&AuxScName[0], 0);
         printf("hydro_coreInit:hydroAuxScalars[%d] = %s stored at %p, has been registered with IO.\n",
                iFld,&AuxScName[0],&hydroAuxScalars[iFld*fldStride]);
         fflush(stdout);
@@ -1086,12 +1138,16 @@ int hydro_coreInit(){
         errorCode = ioRegisterVar(&sgstkeScName[0], "float", 4, dims4d, &sgstkeScalars[iFld*fldStride]);
         printf("hydro_coreInit:sgstkeScalars[%d] = %s stored at %p, has been registered with IO.\n",
                iFld,&sgstkeScName[0],&sgstkeScalars[iFld*fldStride]);
+	// Add attributes for TKE scalar
+	errorCode = hydro_coreAddFieldAttributes(&sgstkeScName[0], 0);
         fflush(stdout);
      } //end for iFld...
      if(hydroForcingWrite == 1){ // add rhs forcing of SGSTKE equation
        for(iFld = 0; iFld < TKESelector; iFld ++){
          sprintf(&sgstkeScName[0],"F_TKE%d",iFld);
          errorCode = ioRegisterVar(&sgstkeScName[0], "float", 4, dims4d, &sgstkeScalarsFrhs[iFld*fldStride]);
+         // Add attributes for TKE forcing field
+         errorCode = hydro_coreAddFieldAttributes(&sgstkeScName[0], 1);
          printf("hydro_coreInit:sgstkeScalarsFrhs[%d] = %s stored at %p, has been registered with IO.\n",
                 iFld,&sgstkeScName[0],&sgstkeScalarsFrhs[iFld*fldStride]);
        }
@@ -1114,12 +1170,16 @@ int hydro_coreInit(){
      errorCode = hydro_coreGetFieldName( &fldName[0], iFld);
      errorCode = sprintf(&fldName[0],"BS_%d",iFld);
      errorCode = ioRegisterVar(&fldName[0], "float", 4, dims4d, &hydroBaseStateFlds[iFld*fldStride]);
+     // Add NetCDF attributes after registration
+     errorCode = hydro_coreAddFieldAttributes(&fldName[0], 0); // 0 = not a forcing field
      printf("hydro_coreInit:hydroBaseStateFlds[%d] = %s stored at %p, has been registered with IO.\n",
             iFld,&fldName[0],&hydroBaseStateFlds[iFld*fldStride]);
      fflush(stdout);
    } //end for iFld...
    errorCode = sprintf(&fldName[0],"BS_pressure");
    errorCode = ioRegisterVar(&fldName[0], "float", 4, dims4d, hydroBaseStatePres);
+   // Add NetCDF attributes for pressure field
+   errorCode = hydro_coreAddFieldAttributes(&fldName[0], 0); // 0 = not a forcing field
    printf("hydro_coreInit:Field = %s stored at %p, has been registered with IO.\n",
           &fldName[0],hydroBaseStatePres);
 #endif
@@ -1173,20 +1233,28 @@ int hydro_coreInit(){
            break;
         }//end switch(iFld)
         errorCode = ioRegisterVar(&TauScName[0], "float", 4, dims4d, &hydroTauFlds[iFld*fldStride]);
+	// Add NetCDF attributes for the registered variable
+	errorCode = hydro_coreAddFieldAttributes(&TauScName[0], 0);
         printf("hydro_coreInit:hydroTauFlds[%d] = %s stored at %p, has been registered with IO.\n",
                iFld,&TauScName[0],&hydroTauFlds[iFld*fldStride]);
         fflush(stdout);
      } //end for iFld...
      sprintf(&TauScName[0],"TauTH%d",1);
      errorCode = ioRegisterVar(&TauScName[0], "float", 4, dims4d, &hydroTauFlds[6*fldStride]);
+     // Add NetCDF attributes for TauTH1
+     errorCode = hydro_coreAddFieldAttributes(&TauScName[0], 0);
      printf("hydro_coreInit:hydroTauFlds[6] = %s stored at %p, has been registered with IO.\n",
              &TauScName[0],&hydroTauFlds[6*fldStride]);
      sprintf(&TauScName[0],"TauTH%d",2);
      errorCode = ioRegisterVar(&TauScName[0], "float", 4, dims4d, &hydroTauFlds[7*fldStride]);
+     // Add NetCDF attributes for TauTH2
+     errorCode = hydro_coreAddFieldAttributes(&TauScName[0], 0);
      printf("hydro_coreInit:hydroTauFlds[7] = %s stored at %p, has been registered with IO.\n",
              &TauScName[0],&hydroTauFlds[7*fldStride]);
      sprintf(&TauScName[0],"TauTH%d",3);
      errorCode = ioRegisterVar(&TauScName[0], "float", 4, dims4d, &hydroTauFlds[8*fldStride]);
+     // Add NetCDF attributes for TauTH3
+     errorCode = hydro_coreAddFieldAttributes(&TauScName[0], 0);
      printf("hydro_coreInit:hydroTauFlds[8] = %s stored at %p, has been registered with IO.\n",
              &TauScName[0],&hydroTauFlds[8*fldStride]);
      fflush(stdout);
@@ -1210,47 +1278,70 @@ int hydro_coreInit(){
 
      errorCode = sprintf(&fldName[0],"tskin");
      errorCode = ioRegisterVar(&fldName[0], "float", 3, dims2dTD, tskin);
+     // Add NetCDF attributes for the registered variable
+     errorCode = hydro_coreAddFieldAttributes(&fldName[0], 0);
      printf("hydro_coreInit:Field = %s stored at %p, has been registered with IO.\n",
              &fldName[0],tskin);
      fflush(stdout);
      errorCode = sprintf(&fldName[0],"fricVel");
      errorCode = ioRegisterVar(&fldName[0], "float", 3, dims2dTD, fricVel);
+     // Add NetCDF attributes for the registered variable
+     errorCode = hydro_coreAddFieldAttributes(&fldName[0], 0);
      printf("hydro_coreInit:Field = %s stored at %p, has been registered with IO.\n",
              &fldName[0],fricVel);
      fflush(stdout);
      errorCode = sprintf(&fldName[0],"htFlux");
      errorCode = ioRegisterVar(&fldName[0], "float", 3, dims2dTD, htFlux);
+     // Add NetCDF attributes for the registered variable
+     errorCode = hydro_coreAddFieldAttributes(&fldName[0], 0);
      printf("hydro_coreInit:Field = %s stored at %p, has been registered with IO.\n",
              &fldName[0],htFlux);
      fflush(stdout);
      errorCode = sprintf(&fldName[0],"invOblen");
      errorCode = ioRegisterVar(&fldName[0], "float", 3, dims2dTD, invOblen);
+     // Add NetCDF attributes for the registered variable
+     errorCode = hydro_coreAddFieldAttributes(&fldName[0], 0);
      printf("hydro_coreInit:Field = %s stored at %p, has been registered with IO.\n",
              &fldName[0],invOblen);
      fflush(stdout);
      if (moistureSelector > 0){
        errorCode = sprintf(&fldName[0],"qskin");
        errorCode = ioRegisterVar(&fldName[0], "float", 3, dims2dTD, qskin);
+       // Add NetCDF attributes for the registered variable
+       errorCode = hydro_coreAddFieldAttributes(&fldName[0], 0);
        printf("hydro_coreInit:Field = %s stored at %p, has been registered with IO.\n",
                &fldName[0],qskin);
        fflush(stdout);
        errorCode = sprintf(&fldName[0],"qFlux");
        errorCode = ioRegisterVar(&fldName[0], "float", 3, dims2dTD, qFlux);
+       // Add NetCDF attributes for the registered variable
+       errorCode = hydro_coreAddFieldAttributes(&fldName[0], 0);
        printf("hydro_coreInit:Field = %s stored at %p, has been registered with IO.\n",
                &fldName[0],qFlux);
        fflush(stdout);
      }
      errorCode = sprintf(&fldName[0],"z0m");
      errorCode = ioRegisterVar(&fldName[0], "float", 3, dims2dTD, z0m);
+     // Add NetCDF attributes for the registered variable
+     errorCode = hydro_coreAddFieldAttributes(&fldName[0], 0);
      printf("hydro_coreInit:Field = %s stored at %p, has been registered with IO.\n",
              &fldName[0],z0m);
      fflush(stdout);
      errorCode = sprintf(&fldName[0],"z0t");
      errorCode = ioRegisterVar(&fldName[0], "float", 3, dims2dTD, z0t);
+     // Add NetCDF attributes for the registered variable
+     errorCode = hydro_coreAddFieldAttributes(&fldName[0], 0);
      printf("hydro_coreInit:Field = %s stored at %p, has been registered with IO.\n",
              &fldName[0],z0t);
      fflush(stdout);
-
+#ifdef URBAN_EXT
+     /*New sub-module style Init() call for URBAN initialization.*/
+     errorCode=URBANInit();
+#endif
+#ifdef GAD_EXT
+     /*New sub-module style Init() call for GAD initialization.*/
+     errorCode=GADInit();
+#endif
      MPI_Barrier(MPI_COMM_WORLD);   
      
      /* Provide intial approximation for the momentum and heat exchange coefficient at all surface locations*/
@@ -1291,6 +1382,8 @@ int hydro_coreInit(){
      sea_mask = memAllocateFloat2DField(Nxp, Nyp, Nh, "sea_mask");
      errorCode = sprintf(&fldName[0],"SeaMask");
      errorCode = ioRegisterVar(&fldName[0], "float", 3, dims2dTD, sea_mask);
+     // Add NetCDF attributes for the registered variable
+     errorCode = hydro_coreAddFieldAttributes(&fldName[0], 0);
      printf("surflayer_offshore:Field = %s stored at %p, has been registered with IO.\n",
              &fldName[0],sea_mask);
      fflush(stdout);
@@ -1299,16 +1392,22 @@ int hydro_coreInit(){
    if(cellpertSelector>0){ //Cell Perturbation parameters (time-varying when cellpert_tvcp == 1 and hydroBCs==1)
      errorCode = sprintf(&fldName[0],"cellpert_amp");
      errorCode = ioRegisterVar(&fldName[0], "float", 1, dims1dTD, &cellpert_amp);
+     // Add NetCDF attributes for the registered variable
+     errorCode = ioAddStandardAttrs(&fldName[0], "K", "Cell perturbation amplitude", NULL);
      printf("cellpert:Variable = %s stored at %p, has been registered with IO.\n",
             &fldName[0],&cellpert_amp);
      fflush(stdout);
      errorCode = sprintf(&fldName[0],"cellpert_nts");
      errorCode = ioRegisterVar(&fldName[0], "int", 1, dims1dTD, &cellpert_nts);
+     // Add NetCDF attributes for the registered variable
+     errorCode = ioAddStandardAttrs(&fldName[0], "-", "Cell perturbation refresh rate in time steps", NULL);
      printf("cellpert:Variable = %s stored at %p, has been registered with IO.\n",
             &fldName[0],&cellpert_nts);
      fflush(stdout);
      errorCode = sprintf(&fldName[0],"cellpert_ktop");
      errorCode = ioRegisterVar(&fldName[0], "int", 1, dims1dTD, &cellpert_ktop);
+     // Add NetCDF attributes for the registered variable
+     errorCode = ioAddStandardAttrs(&fldName[0], "-", "Cell perturbation uppermost vertical grid level", NULL);
      printf("cellpert:Variable = %s stored at %p, has been registered with IO.\n",
             &fldName[0],&cellpert_ktop);
      fflush(stdout);
@@ -1318,6 +1417,8 @@ int hydro_coreInit(){
      canopy_lad = memAllocateFloat3DField(Nxp, Nyp, Nzp, Nh, "canopy_lad");
      errorCode = sprintf(&fldName[0],"CanopyLAD");
      errorCode = ioRegisterVar(&fldName[0], "float", 4, dims4d, canopy_lad);
+     // Add NetCDF attributes for the registered variable
+     errorCode = hydro_coreAddFieldAttributes(&fldName[0], 0);
      printf("canopy:Field = %s stored at %p, has been registered with IO.\n",
             &fldName[0],canopy_lad);
      fflush(stdout);
@@ -1333,6 +1434,8 @@ int hydro_coreInit(){
           sprintf(&moistName[0],"ql");
         }
         errorCode = ioRegisterVar(&moistName[0], "float", 4, dims4d, &moistScalars[iFld*fldStride]);
+	// Add NetCDF attributes for the registered variable
+	errorCode = hydro_coreAddFieldAttributes(&moistName[0], 0);
         printf("hydro_coreInit:moistScalars[%d] = %s stored at %p, has been registered with IO.\n",
                iFld,&moistName[0],&moistScalars[iFld*fldStride]);
         fflush(stdout);
@@ -1345,6 +1448,8 @@ int hydro_coreInit(){
            sprintf(&moistName[0],"F_ql");
          }
          errorCode = ioRegisterVar(&moistName[0], "float", 4, dims4d, &moistScalarsFrhs[iFld*fldStride]);
+	 // Add NetCDF attributes for the registered variable
+	 errorCode = hydro_coreAddFieldAttributes(&moistName[0], 1);
          printf("hydro_coreInit:moistScalarsFrhs[%d] = %s stored at %p, has been registered with IO.\n",
                 iFld,&moistName[0],&moistScalarsFrhs[iFld*fldStride]);
        }
@@ -1385,6 +1490,20 @@ int hydro_coreInit(){
                 break;
              }//end switch(iFld)
              errorCode = ioRegisterVar(&moistName[0], "float", 4, dims4d, &moistTauFlds[(iFld*3+iFld2)*fldStride]);
+	     // Add NetCDF attributes for the moisture SGS field
+             char longName[256];
+             char *direction[] = {"x", "y", "z"};
+             if (iFld == 0) { // TauQv (water vapor)
+                 sprintf(longName, "Subgrid-%s water vapor flux in %s direction", direction[iFld2], direction[iFld2]);
+                 errorCode = ioAddStandardAttrs(&moistName[0], "kg kg-1 m s-1", longName, NULL);
+             } else if (iFld == 1) { // TauQl (liquid water)
+                 sprintf(longName, "Subgrid-%s liquid water flux in %s direction", direction[iFld2], direction[iFld2]);
+                 errorCode = ioAddStandardAttrs(&moistName[0], "kg kg-1 m s-1", longName, NULL);
+             } else {
+                 // Generic moisture SGS field for other moisture species
+                 sprintf(longName, "Subgrid-%s moisture flux in %s direction", direction[iFld2], direction[iFld2]);
+                 errorCode = ioAddStandardAttrs(&moistName[0], "kg kg-1 m s-1", longName, NULL);
+             }
              printf("hydro_coreInit:moistTauFlds[%d] = %s stored at %p, has been registered with IO.\n",
                     iFld*3+iFld2,&moistName[0],&moistTauFlds[(iFld*3+iFld2)*fldStride]);
              fflush(stdout);
@@ -1473,6 +1592,30 @@ int hydro_coreInit(){
    return(errorCode);
 } //end hydro_coreInit()
 
+/*----->>>>> int hydro_coreSecondaryPrepariations();   -------------------------------------------------
+* Secondary preparations (initializations) in the HYDRO_CORE module following secondary
+* GRID module preparations  i.e. definition of the domain coordinate system and Jacobians
+* and TIME_INTEGRATION module initialization
+*/
+int hydro_coreSecondaryPreparations(float dt){
+  int errorCode;
+
+  /*Now that the grid module is completely defined, setup the base state*/
+  errorCode = hydro_coreSetBaseState();
+
+#ifdef GAD_EXT
+  /*If GAD is included, define the mask arrays from the turbine characteristics array inputs*/
+  if(GADSelector > 0){
+    /*Create the swept-volume mask for the turbine array read in through this constructor*/
+    errorCode = GADInitTurbineRefChars(dt);
+    errorCode = GADCreateTurbineVolMask();
+    errorCode = GADCreateTurbineRotorMask();
+  }//end if GADSelector > 0
+#endif
+
+  return(errorCode);
+} //end hydro_coreSecondaryPrepariations()
+
 /*----->>>>> int hydro_corePrepareFromInitialConditions();   -------------------------------------------------
 * Used to undertake the sequence of steps to build the Frhs of all hydro_core prognostic variable fields.
 */
@@ -3146,6 +3289,12 @@ int hydro_coreCleanup(){
      memReleaseFloat(hydroAuxScalarsFrhs);
    } //end if NhydroAuxScalars
 
+#ifdef GAD_EXT
+   if(GADSelector > 0){
+     errorCode = GADCleanup();
+   }
+#endif
+
    if(hydroBCs==1){
      free(hydroBndysFile);
      free(XZBdyPlanesGlobal);
@@ -3164,5 +3313,184 @@ int hydro_coreCleanup(){
      }
    } //end if hydroBCs==1
 
+#ifdef URBAN_EXT
+   if(urbanSelector > 0){
+     errorCode = URBANCleanup();
+   }
+#endif
+
    return(errorCode);
 }//end hydro_coreCleanup()
+
+/*----->>>>> helper functions to create forcing strings --------------------------------------------------*/
+
+// Increment the exponent of "s" if present, else append " s-1"
+static char* make_forcing_units(const char *units) {
+    if(units == NULL) return NULL;
+
+    const char *s_ptr = strstr(units, "s-");
+    if(s_ptr) {
+        // Found "s-" pattern, try to increment number after it
+        const char *exp_ptr = s_ptr + 2;
+        int exp = atoi(exp_ptr);   // atoi will return 0 if not a number
+        if(exp > 0) {
+            exp++; // increment existing exponent
+
+            // copy prefix (up to "s-")
+            size_t prefix_len = s_ptr - units + 2;
+            char prefix[prefix_len + 1];
+            strncpy(prefix, units, prefix_len);
+            prefix[prefix_len] = '\0';
+
+
+            // format new string
+            size_t buf_len = strlen(units) + 10; // extra space
+            char *result = (char*)malloc(buf_len);
+            if(!result) return NULL;
+
+            snprintf(result, buf_len, "%s%d", prefix, exp);
+            return result;
+        }
+    }
+
+    // If no "s-" pattern or exponent, just append " s-1"
+    size_t len = strlen(units);
+    const char *suffix = " s-1";
+    char *result = (char*)malloc(len + strlen(suffix) + 1);
+    if(!result) return NULL;
+
+    strcpy(result, units);
+    strcat(result, suffix);
+    return result;
+}//end make_forcing_units()
+
+// Allocates new string with " forcing" appended to long_name
+static char* make_forcing_long_name(const char *long_name) {
+    if(long_name == NULL) return NULL;
+
+    size_t len = strlen(long_name);
+    const char *suffix = " forcing";
+    char *result = (char*)malloc(len + strlen(suffix) + 1);
+    if(!result) return NULL;
+
+    strcpy(result, long_name);
+    strcat(result, suffix);
+    return result;
+}//end make_forcing_long_name()
+
+/*----->>>>> int hydro_coreAddFieldAttributes();  -----------------------------------------------
+* Utility function to add NetCDF attributes to hydro core fields based on field name
+* Parameters:
+*   fieldName - name of the field to add attributes to
+*   isForcing - flag indicating if this is a forcing field (affects units)
+* Returns: error code (0 = success, non-zero = error)
+*/
+
+int hydro_coreAddFieldAttributes(char *fieldName, int isForcing) {
+    int errorCode = 0;
+    char *baseFieldName = fieldName;
+
+    // If this is a forcing field, skip the "F_" prefix to get the base field name
+    if(isForcing && strncmp(fieldName, "F_", 2) == 0) {
+        baseFieldName = fieldName + 2; // Skip "F_" prefix  
+    }
+
+    // Define field metadata structure                                                                                                                              
+    typedef struct {
+        char *pattern;
+        char *units;
+        char *long_name;
+        char *standard_name;
+    } field_metadata_t;
+
+    // Field metadata lookup table
+    field_metadata_t field_metadata[] = {
+        {"BS_pressure", "Pa",            "Base state pressure",                                           "air_pressure"},
+        {"TauTH1",      "K m s-1",       "Subgrid-x turbulent flux of potential temperature",             NULL},
+        {"TauTH2",      "K m s-1",       "Subgrid-y turbulent flux of potential temperature",             NULL},
+        {"TauTH3",      "K m s-1",       "Subgrid-z turbulent flux of potential temperature",             NULL},	
+        {"Tau11",       "m2 s-2",        "Subgrid-xx stress tensor component",                            NULL},
+        {"Tau21",       "m2 s-2",        "Subgrid-yx stress tensor component",                            NULL},
+        {"Tau31",       "m2 s-2",        "Subgrid-zx stress tensor component",                            NULL},
+        {"Tau32",       "m2 s-2",        "Subgrid-zy stress tensor component",                            NULL},
+        {"Tau22",       "m2 s-2",        "Subgrid-yy stress tensor component",                            NULL},
+        {"Tau33",       "m2 s-2",        "Subgrid-zz stress tensor component",                            NULL},
+        {"rho",         "kg m-3",        "Air density",                                                   "air_density"},
+        {"u",           "m s-1",         "Zonal wind velocity",                                           "eastward_wind"},
+        {"v",           "m s-1",         "Meridional wind velocity",                                      "northward_wind"},
+        {"w",           "m s-1",         "Vertical wind velocity",                                        "upward_air_velocity"},
+        {"theta",       "K",             "Potential temperature",                                         "air_potential_temperature"},
+        {"pressure",    "Pa",            "Perturbation pressure",                                         NULL},
+        {"TKE_0",       "m2 s-2",        "Subgrid turbulent kinetic energy of air at grid-filter scale",  NULL},
+        {"TKE_1",       "m2 s-2",        "Subgrid turbulent kinetic energy of air at canopy leaf scale",  NULL},
+        {"AuxScalar",   "-",             "Auxiliary scalar",                                              NULL},
+        {"moisture",    "kg kg-1",       "Water vapor mixing ratio",                                      "humidity_mixing_ratio"},
+        {"qv",          "g kg-1",        "Water vapor mixing ratio",                                      "humidity_mixing_ratio"},
+        {"ql",          "g kg-1",        "Cloud liquid water mixing ratio",                               "cloud_liquid_water_mixing_ratio"},
+        {"fricVel",     "m s-1",         "Surface friction velocity",                                     "surface_friction_velocity"},
+        {"htFlux",      "K m s-1",       "Surface sensible heat flux",                                    "surface_upward_sensible_heat_flux"},
+        {"qFlux",       "kg kg-1 m s-1", "Surface latent heat flux",                                      "surface_upward_latent_heat_flux"},
+        {"tskin",       "K",             "Surface skin temperature",                                      "surface_temperature"},
+        {"qskin",       "kg kg-1",       "Surface skin water vapor mixing ratio",                         NULL},
+        {"z0m",         "m",             "Roughness length for momentum",                                 "surface_roughness_length_for_momentum_in_air"},
+        {"z0t",         "m",             "Roughness length for heat",                                     "surface_roughness_length_for_heat_in_air"},
+        {"invOblen",    "m-1",           "Inverse Obukhov length",                                        "atmosphere_boundary_layer_thickness"},
+        {"CanopyLAD",   "m-1",           "Leaf area density",                                             "leaf_area_density"},
+        {"SeaMask",     "-",             "Sea mask",                                                      "sea_area_fraction"},
+        {NULL, NULL, NULL, NULL} // End marker                                                                                                           
+    };
+
+    // Search for matching field pattern
+    for(int i = 0; field_metadata[i].pattern != NULL; i++) {
+	if (strncmp(baseFieldName, field_metadata[i].pattern, strlen(field_metadata[i].pattern)) == 0) {
+            if(isForcing) {
+                char *forcing_units = make_forcing_units(field_metadata[i].units);
+                char *forcing_long_name = make_forcing_long_name(field_metadata[i].long_name);
+
+ 
+                errorCode = ioAddStandardAttrs(fieldName,
+                                               forcing_units,
+                                               forcing_long_name,
+                                               NULL); // No standard name for forcing fields
+		
+                free(forcing_units);
+                free(forcing_long_name);
+            } else {
+                errorCode = ioAddStandardAttrs(fieldName,
+                                               field_metadata[i].units,
+                                               field_metadata[i].long_name,
+                                               field_metadata[i].standard_name);
+            }
+            return errorCode;
+        }
+    }
+
+    // Handle special case for BS_ fields with numeric identifiers
+    if(strncmp(baseFieldName, "BS_", 3) == 0) {
+        char *endptr;
+        int fieldIndex = strtol(baseFieldName + 3, &endptr, 10);
+        if(*endptr == '\0') { // Successfully parsed a number                                                                                                        
+            if(fieldIndex == RHO_INDX_BS) { // 0 = rho base state                                                                                                    
+                errorCode = ioAddStandardAttrs(fieldName, "kg m-3", "Base state air density", "air_density");
+            }
+            else if(fieldIndex == THETA_INDX_BS) { // 1 = theta base state                                                                                           
+                errorCode = ioAddStandardAttrs(fieldName, "K", "Base state potential temperature", "air_potential_temperature");
+            }
+            else {
+                errorCode = ioAddStandardAttrs(fieldName, "-", "Base state field", NULL);
+            }
+            return errorCode;
+        }
+    }
+
+    // For unrecognized fields, add generic attributes
+    printf("Warning: Unrecognized field '%s' in hydro_coreAddFieldAttributes, adding generic attributes\n", fieldName);
+    if(isForcing) {
+        errorCode = ioAddStandardAttrs(fieldName, "s-1", "Generic field forcing", NULL);
+    } else {
+        errorCode = ioAddStandardAttrs(fieldName, "-", "Generic field", NULL);
+    }
+
+    return errorCode;
+}//end hydro_coreAddFieldAttributes()    
+
diff --git a/SRC/HYDRO_CORE/hydro_core.h b/SRC/HYDRO_CORE/hydro_core.h
index 1d17df0..b24f239 100644
--- a/SRC/HYDRO_CORE/hydro_core.h
+++ b/SRC/HYDRO_CORE/hydro_core.h
@@ -16,6 +16,14 @@
 #ifndef _HYDRO_CORE_H
 #define _HYDRO_CORE_H
 
+/*Model-Extensions includes*/
+#ifdef URBAN_EXT
+  #include <urban.h>
+#endif
+#ifdef GAD_EXT
+  #include <GAD.h>
+#endif
+
 /*hydro_core_ return codes */
 #define HYDRO_CORE_SUCCESS    0
 
@@ -29,14 +37,14 @@
 #define THETA_INDX_BS         1
 
 #define MAX_HC_FLDNAME_LENGTH 256
-#define MAX_AUXSC_SRC         20
+#define MAX_AUXSC_SRC         256
 /*#################------------------- HYDRO_CORE module variable declarations ---------------------#################*/
 /* Parameters */
 extern int Nhydro;          /*Number of prognostic variable fields under hydro_core */
 extern int hydroBCs;          /*selector for hydro BC set. 1= Dirichlet lateral, ceiling and surface boundary conditions (Limited Area Domain -- LAD),
 			                                   2= periodicHorizVerticalAbl */
 
-extern int hydroForcingWrite;   /*switch for dumping forcing fields of prognostic variables. 0-off (default), 1= on*/
+extern int hydroForcingWrite;   /*switch for writing output of forcing fields of prognostic variables. 0-off (default), 1= on*/
 extern int hydroForcingLog;     /*switch for logging Frhs summary metrics. 0-off (default), 1= on*/
 extern int hydroSubGridWrite;   /*switch for SGS fields 0-off (default), 1= on*/
 extern float *hydroFlds;        /*Base Adress of memory containing all prognostic variable fields under hydro_core */
@@ -97,7 +105,7 @@ extern int buoyancySelector;     /*buoyancy Force selector: 0=off, 1=on*/
 
 /*---CORIOLIS*/
 extern int coriolisSelector;     /*coriolis Force selector: 0= none, 1= Horiz.-only, 2=Horz. & Vert.*/
-extern float coriolisLatitude;   /*Charactersitc latitude in degrees from equator of the LES domain*/
+extern float coriolisLatitude;   /*Characteristic latitude in degrees from equator of the LES domain*/
 extern float corioConstHorz;     /*Latitude dependent horizontal Coriolis term constant */
 extern float corioConstVert;     /*Latitude dependent Vertical Coriolis term constant */
 extern int coriolis_LAD;         /*Coriolis force selector for LAD BC cases (hydroBCs==1): 0=off, 1=on*/
@@ -105,7 +113,7 @@ extern float corioLS_fact;       /*large-scale factor on Coriolis term*/
 
 /*---TURBULENCE*/
 extern int turbulenceSelector;    /*turbulence scheme selector: 0= none, 1= Lilly/Smagorinsky */
-extern int TKESelector;           /* Prognostic TKE selector: 0= none, 1= Prognostic */
+extern int TKESelector;           /* Prognostic TKE selector: 0= none, 1= Prognostic, 2= requires canopySelector=1 */
 extern int TKEAdvSelector;        /* SGSTKE advection scheme selector */
 extern float TKEAdvSelector_b_hyb;     /*hybrid advection scheme parameter */
 extern float c_s;     /* Smagorinsky turbulence model constant used for turbulenceSelector = 1 with TKESelector = 0 */
@@ -122,13 +130,13 @@ extern float *hydroDiffTauYFlds; /*Base address for diffusion TauY arrays for al
 extern float *hydroDiffTauZFlds; /*Base address for diffusion TauZ arrays for all prognostic fields*/
 
 /*---ADVECTION*/
-extern int advectionSelector;    /*advection scheme selector: 0= 1st-order upwind, 2= 3rd-order QUICK */
+extern int advectionSelector;    /*advection scheme selector: 0=1st-order upwind, 1=3rd-order QUICK, 2=hybrid 3rd-4th order, 3=hybrid 5th-6th order, 4=3rd-order WENO, 5=5th-order WENO, 6=2nd-order centered */
 extern int ceilingAdvectionBC;   /*selector to allow advection through the domain ceiling 1=on, 0=off (w-ceiling = 0)*/
 extern float b_hyb; /*hybrid advection scheme parameter: 0.0= lower-order upwind, 
-                             1.0=higher-order cetered, 0.0 < b_hyb < 1.0 = hybrid */
+                             1.0=higher-order centered, 0.0 < b_hyb < 1.0 = hybrid */
 
 /*---SURFACE LAYER*/
-extern int surflayerSelector;    /*Monin-Obukhov surface layer selector: 0= off, 1= on */
+extern int surflayerSelector;    /*Monin-Obukhov surface layer selector: 0=off, 1=surface kinematic heat flux (surflayer_wth), 2=skin temperature rate (surflayer_tr) */
 extern float surflayer_z0;       /* roughness length (momentum) */
 extern float surflayer_z0t;      /* roughness length (temperature) */
 extern float surflayer_wth;      /* kinematic sensible heat flux at the surface */
@@ -199,7 +207,7 @@ extern int moistureNvars;           /* number of moisture species */
 extern int moistureAdvSelectorQv;     /* water vapor advection scheme selector */
 extern float moistureAdvSelectorQv_b; /*hybrid advection scheme parameter */
 extern int moistureSGSturb;         /* selector to apply sub-grid scale diffusion to moisture fields */
-extern int moistureCond;            /* selector to apply condensation to mositure fields */
+extern int moistureCond;            /* selector to apply condensation to moisture fields */
 extern float *moistScalars;         /*Base address for moisture field arrays*/
 extern float *moistScalarsFrhs;     /*Base address for moisture forcing field arrays*/
 extern float *moistTauFlds;         /*Base address for moisture SGS field arrays*/
@@ -289,6 +297,13 @@ int hydro_coreGetParams();
 */
 int hydro_coreInit();
 
+/*----->>>>> int hydro_coreSecondaryPrepariations();   -------------------------------------------------
+* Secondary preparations (initializations) in the HYDRO_CORE module following secondary
+* GRID module preparations i.e. definition of the domain coordinate system and Jacobians
+* and TIME_INTEGRATION module initialization
+*/
+int hydro_coreSecondaryPreparations(float dt);
+
 /*----->>>>> int hydro_corePrepareFromInitialConditions();   -------------------------------------------------
 * Used to undertake the sequence of steps to build the Frhs of all hydro_core prognostic variable fields.
 */
@@ -365,5 +380,14 @@ int srcAuxScConstructor();
 */
 int hydro_coreCleanup();
 
+/*----->>>>> int hydro_coreAddFieldAttributes();  --------------------------------------------------------
+* Utility function to add NetCDF attributes to hydro core fields based on field name
+* Parameters:
+*   fieldName - name of the field to add attributes to  
+*   isForcing - flag indicating if this is a forcing field (0=regular field, 1=forcing field)
+* Returns: error code (0 = success, non-zero = error)
+*/
+int hydro_coreAddFieldAttributes(char *fieldName, int isForcing);
+
 
 #endif // _HYDRO_CORE_H
diff --git a/SRC/IO/io.c b/SRC/IO/io.c
index 7d0335e..76b3ebf 100644
--- a/SRC/IO/io.c
+++ b/SRC/IO/io.c
@@ -34,12 +34,12 @@ size_t start2dTD[MAXDIMS];
 
 /*######################------------------- IO module variable definitions ---------------------#################*/
 /* Parameters */
-int ioOutputMode;  /*0: N-to-1 gather and write to a netcdf file, 1:N-to-N writes of FastEddy binary files*/
+int ioOutputMode;  /*0: N-to-1 gather and write to a netCDF file, 1: N-to-N writes of FastEddy binary files*/
 char *outPath;     /* Directory Path where output files are to be written */
 char *outFileBase; /* Base name of the output file series as in (outFileBase).element-in-series */
 char *inPath;      /* Directory Path where input files are to be read from */
 char *inFile;      /* Name of the input file */
-int frqOutput;     /*frequency (in timesteps) at which to produce output*/
+int frqOutput;     /*frequency (in timesteps) at which to produce output; should be an even multiple of NtBatch*/
 
 /*static Variables*/
 char *outSubString; /*subString portion of outFile holding element-in-series as in path/base.substring */
@@ -51,6 +51,7 @@ float *ioBuffField;
 float *ioBuffFieldTransposed;
 float *ioBuffFieldRho;
 float *ioBuffFieldTransposed2D;
+int *ioBuffFieldInt;
 
 int nz_varid;
 int ny_varid;
@@ -66,7 +67,7 @@ int ioGetParams(){
    int errorCode = IO_SUCCESS;
 
    /*query for each IO parameter */
-   ioOutputMode=0;
+   ioOutputMode=0; //default  = 0
    errorCode = queryIntegerParameter("ioOutputMode", &ioOutputMode, 0, 1, PARAM_OPTIONAL);
    errorCode = queryPathParameter("inPath", &inPath, PARAM_OPTIONAL);
    errorCode = queryStringParameter("inFile", &inFile, PARAM_OPTIONAL);
@@ -85,12 +86,12 @@ int ioInit(){
  
    if(mpi_rank_world == 0){
       printComment("IO parameters---");
-      printParameter("ioOutputMode", "0: N-to-1 gather and write to a netcdf file, 1:N-to-N writes of FastEddy binary files");
+      printParameter("ioOutputMode", "0: N-to-1 gather and write to a netCDF file, 1:N-to-N writes of FastEddy binary files");
       printParameter("inPath", "Path where initial/restart file is read in from");
       printParameter("inFile", "name of the input file for coordinate system and initial or restart conditions");
       printParameter("outPath", "Path where output files are to be written");
       printParameter("outFileBase", "Base name of the output file series as in (outFileBase).element-in-series");
-      printParameter("frqOutput", "frequency (in timesteps) at which to produce output");
+      printParameter("frqOutput", "frequency (in timesteps) at which to produce output; should be an even multiple of NtBatch");
    } //end if(mpi_rank_world == 0) 
 
    /*Broadcast the parameters across mpi_ranks*/
@@ -190,8 +191,8 @@ int ioAllocateBuffers(int globalNx, int globalNy, int globalNz){
      ioBuffFieldTransposed = (float *) malloc(numElems*sizeof(float));
      ioBuffFieldRho = (float *) malloc(numElems*sizeof(float));
      ioBuffFieldTransposed2D = (float *) malloc(numElems2D*sizeof(float));
+     ioBuffFieldInt = (int *) malloc(numElems*sizeof(int));
    } //endif mpi_Rank_world==0
-
    return(errorCode);
 } //end ioAllocateBuffers()
 
@@ -208,11 +209,13 @@ int ioCleanup(){
    int errorCode = IO_SUCCESS;
 
    /*free the io-buffers*/
-   free(ioBuffField);
-   free(ioBuffFieldTransposed);
-   free(ioBuffFieldRho);
-   free(ioBuffFieldTransposed2D);
-
+   if(mpi_rank_world == 0){
+     free(ioBuffField);
+     free(ioBuffFieldTransposed);
+     free(ioBuffFieldRho);
+     free(ioBuffFieldTransposed2D);
+     free(ioBuffFieldInt);
+   } //end if mpi_rank_world == 0
    /*free the registry list*/
    destroyList();
 
@@ -242,3 +245,48 @@ int ioRegisterVar(char *name, char *type, int nDims, int *dimids, void *varMemAd
     return(errorCode);
 } //end ioRegisterVar()
 
+/*----->>>>> int ioAddVarAttr(); -------------------------------------------------------------------
+* Add a single attribute to an already registered variable
+*/
+int ioAddVarAttr(char *varName, char *attrName, char *attrType, char *attrValue){
+    int errorCode = IO_SUCCESS;
+    int tmperrorCode = 0;
+
+    /* Check if variable exists first */
+    if(getNamedVarFromList(varName) == NULL){
+        printf("ERROR: Variable %s not found in registry...", varName);
+        return IO_ERROR_VAR_NOT_FOUND;
+    }
+
+    /* Add the attribute */
+    tmperrorCode = addAttrToVar(varName, attrName, attrType, attrValue);
+    if(tmperrorCode != 0){
+        printf("ERROR = %d returned by addAttrToVar() for attribute %s...", tmperrorCode, attrName);
+        return IO_ERROR_ATTR_ADD;
+    }
+
+    return errorCode;
+}
+
+/*----->>>>> int ioAddStandardAttrs(); -------------------------------------------------------------
+* Add standard CF convention attributes to a variable (units, long_name, standard_name)
+*/
+int ioAddStandardAttrs(char *varName, char *units, char *longName, char *standardName){
+    int errorCode = IO_SUCCESS;
+    int tmperrorCode = 0;
+
+    /* Check if variable exists first */
+    if(getNamedVarFromList(varName) == NULL){
+        printf("ERROR: Variable %s not found in registry...", varName);
+        return IO_ERROR_VAR_NOT_FOUND;
+    }
+
+    /* Add the standard attribute using the ioVarsList function */
+    tmperrorCode = addStandardAttrsToVar(varName, units, longName, standardName);
+    if(tmperrorCode != 0){
+        printf("ERROR = %d returned by addStandardAttrsToVar()...", tmperrorCode);
+        return IO_ERROR_ATTR_ADD;
+    }
+
+    return errorCode;
+}
diff --git a/SRC/IO/io.h b/SRC/IO/io.h
index 0c23229..2f99651 100644
--- a/SRC/IO/io.h
+++ b/SRC/IO/io.h
@@ -22,19 +22,23 @@
 #define MAXDIMS     16   //used for static allocation of dimids length. Could be made dynamic.
 
 #define IO_ERROR_DIMLEN          200
+#define IO_ERROR_ATTR_ADD        201
+#define IO_ERROR_VAR_NOT_FOUND   202
+/*#define IO_ERROR_ATTR_LIMIT      203*/
 
 /*io includes*/
 #include <io_netcdf.h>
 #include <io_binary.h>
+#include <ioVarsList.h>
 
 /*######################------------------- IO module variable declarations ---------------------#################*/
 /* Parameters */
-extern int ioOutputMode;  /*0: N-to-1 gather and write to a netcdf file, 1:N-to-N writes of FastEddy binary files*/
+extern int ioOutputMode;  /*0: N-to-1 gather and write to a netCDF file, 1: N-to-N writes of FastEddy binary files*/
 extern char *outPath;     /* Directory Path where output files are to be written */
 extern char *outFileBase; /* Base name of the output file series as in (outFileBase).element-in-series */ 
 extern char *inPath;      /* Directory Path where input files are to be read from */
 extern char *inFile;      /* Name of the input file */ 
-extern int frqOutput;     /*frequency in timesteps to produce output*/
+extern int frqOutput;     /*frequency in timesteps to produce output; should be an even multiple of NtBatch*/
 
 /*static Variables*/
 extern char *outSubString; /*subString portion of outFile holding element-in-series as in path/base.substring */
@@ -77,4 +81,14 @@ int ioCleanup();
 */
 int ioRegisterVar(char *name, char *type, int nDims, int *dimids, void *varMemAddress);
 
+/*----->>>>> int ioAddVarAttr(); -------------------------------------------------------------------
+* Add a single attribute to an already registered variable
+*/
+int ioAddVarAttr(char *varName, char *attrName, char *attrType, char *attrValue);
+
+/*----->>>>> int ioAddStandardAttrs(); -------------------------------------------------------------
+* Add standard CF convention attributes to a variable (units, long_name, standard_name)
+*/
+int ioAddStandardAttrs(char *varName, char *units, char *longName, char *standardName);
+
 #endif // _IO_H
diff --git a/SRC/IO/ioVarsList.c b/SRC/IO/ioVarsList.c
index 7019f86..94f8edd 100644
--- a/SRC/IO/ioVarsList.c
+++ b/SRC/IO/ioVarsList.c
@@ -32,6 +32,7 @@ ioVar_t *createList(){
       exit(0);
    }
    ptr->next = NULL;
+   ptr->nAttrs = 0;  /* Initialize attribute count */
    head = ptr;
    curr = ptr;
 
@@ -80,6 +81,7 @@ int addVarToList(char *name, char *type, int nDims, int *dimids, void *varMemAdd
        ptr->dimids[i] = dimids[i];       
     }//end for i
     ptr->varMemAddress = varMemAddress;
+    ptr->nAttrs = 0;  /* Initialize attribute count */
     ptr->next = NULL;
     curr->next = ptr;
     curr = ptr;
@@ -88,8 +90,47 @@ int addVarToList(char *name, char *type, int nDims, int *dimids, void *varMemAdd
     return(0);
 } //end addVarToList
 
+int addAttrToVar(char *varName, char *attrName, char *attrType, char *attrValue){
+    ioVar_t *ptr = getNamedVarFromList(varName);
+    
+    if(ptr == NULL){
+        printf("addAttrToVar: Variable %s not found.\n", varName);
+        return(-1);
+    }
+    
+    if(ptr->nAttrs >= MAX_ATTRS){
+        printf("addAttrToVar: Maximum attributes (%d) reached for variable %s.\n", MAX_ATTRS, varName);
+        return(-1);
+    }
+    
+    strcpy(ptr->attrs[ptr->nAttrs].name, attrName);
+    strcpy(ptr->attrs[ptr->nAttrs].type, attrType);
+    strcpy(ptr->attrs[ptr->nAttrs].value, attrValue);
+    ptr->nAttrs++;
+    
+    return(0);
+} //end addAttrToVar
+
+int addStandardAttrsToVar(char *varName, char *units, char *longName, char *standardName){
+    int result = 0;
+    
+    if(units != NULL && strlen(units) > 0){
+        result |= addAttrToVar(varName, "units", "text", units);
+    }
+    
+    if(longName != NULL && strlen(longName) > 0){
+        result |= addAttrToVar(varName, "long_name", "text", longName);
+    }
+    
+    if(standardName != NULL && strlen(standardName) > 0){
+        result |= addAttrToVar(varName, "standard_name", "text", standardName);
+    }
+    
+    return result;
+} //end addStandardAttrsToVar
+
 int printList(){
-   int i;
+  int i, j;
    ioVar_t *tmp;
    /*print the contents of the list from beginning to end*/
    i = 0;
@@ -98,29 +139,35 @@ int printList(){
    while(tmp != NULL){
      switch (tmp->nDims){
        case 1:
-         printf("%d: %s, %s, %d, [%d]\n",
-                 i,tmp->name,tmp->type,tmp->nDims,tmp->dimids[0]);
+         printf("%d: %s, %s, %d, [%d], %d attrs\n",
+                 i,tmp->name,tmp->type,tmp->nDims,tmp->dimids[0],tmp->nAttrs);
          break;
        case 2:
-         printf("%d: %s, %s, %d, [%d %d]\n",
-                 i,tmp->name,tmp->type,tmp->nDims,tmp->dimids[0],tmp->dimids[1]);
+         printf("%d: %s, %s, %d, [%d %d], %d attrs\n",
+                 i,tmp->name,tmp->type,tmp->nDims,tmp->dimids[0],tmp->dimids[1],tmp->nAttrs);
          break;
        case 3:
-         printf("%d: %s, %s, %d, [%d %d %d]\n",
-                 i,tmp->name,tmp->type,tmp->nDims,tmp->dimids[0],tmp->dimids[1],tmp->dimids[2]);
+         printf("%d: %s, %s, %d, [%d %d %d], %d attrs\n",
+                 i,tmp->name,tmp->type,tmp->nDims,tmp->dimids[0],tmp->dimids[1],tmp->dimids[2],tmp->nAttrs);
          break;
        case 4:
-         printf("%d: %s, %s, %d, [%d %d %d %d]\n",
-                 i,tmp->name,tmp->type,tmp->nDims,tmp->dimids[0],tmp->dimids[1],tmp->dimids[2],tmp->dimids[3]);
+         printf("%d: %s, %s, %d, [%d %d %d %d], %d attrs\n",
+                 i,tmp->name,tmp->type,tmp->nDims,tmp->dimids[0],tmp->dimids[1],tmp->dimids[2],tmp->dimids[3],tmp->nAttrs);
          break;
        case 5:
-         printf("%d: %s, %s, %d, [%d %d %d %d %d]\n",
-           i,tmp->name,tmp->type,tmp->nDims,tmp->dimids[0],tmp->dimids[1],tmp->dimids[2],tmp->dimids[3],tmp->dimids[4]);
+         printf("%d: %s, %s, %d, [%d %d %d %d %d], %d attrs\n",
+           i,tmp->name,tmp->type,tmp->nDims,tmp->dimids[0],tmp->dimids[1],tmp->dimids[2],tmp->dimids[3],tmp->dimids[4],tmp->nAttrs);
          break;
        default:
           printf("%d has nDims< 1 or nDims >5, no printing...\n", i);
          break;
       }//end switch tmp->nDims
+
+      /* Print attributes */
+      for(j = 0; j < tmp->nAttrs; j++){
+          printf("    Attr %d: %s (%s) = %s\n", j, tmp->attrs[j].name, tmp->attrs[j].type, tmp->attrs[j].value);
+      }
+           
       tmp = tmp->next;
       i++;
    }// end while
diff --git a/SRC/IO/ioVarsList.h b/SRC/IO/ioVarsList.h
index a15cd82..a72b457 100644
--- a/SRC/IO/ioVarsList.h
+++ b/SRC/IO/ioVarsList.h
@@ -18,7 +18,16 @@
 
 #define MAXDIMS         16   //used for static allocation of dimids array. Could be made dynamic.
 #define MAX_NAME_LENGTH 128  //used for static allocation of name char array. Could be made dynamic
-#define MAX_TYPE_LENGTH 16  //used for static allocation of type char array. Could be made dynamic
+#define MAX_TYPE_LENGTH 16   //used for static allocation of type char array. Could be made dynamic
+#define MAX_ATTR_LENGTH 256  //used for static allocation of attribute strings
+#define MAX_ATTRS       10   //maximum number of attributes per variable
+
+/*attribute structure*/
+typedef struct _ioAttr_t {
+   char    name[MAX_NAME_LENGTH];
+   char    type[MAX_TYPE_LENGTH];  /* "text", "float", "double", "int" */
+   char    value[MAX_ATTR_LENGTH]; /* string representation of value */
+} ioAttr_t;
 
 /*type definitions*/
 typedef struct _ioVar_t {
@@ -28,6 +37,11 @@ typedef struct _ioVar_t {
    int     dimids[MAXDIMS];
    void    *varMemAddress;
    int     ncvarid;
+
+  /* Attribute support */
+   int     nAttrs;                /* number of attributes */
+   ioAttr_t attrs[MAX_ATTRS];     /* array of attributes */
+   
    struct _ioVar_t *next;
 } ioVar_t;
 
@@ -38,4 +52,25 @@ int addVarToList(char *name, char *type, int nDims, int *dimids, void *varMemAdd
 int printList();
 void destroyList();
 
+/* Add a single NetCDF attribute to an existing variable in the list
+ * Parameters:
+ *   varName - name of the variable to add attribute to
+ *   attrName - name of the attribute
+ *   attrType - type of attribute ("text", "float", "double", "int")
+ *   attrValue - string representation of the attribute value
+ * Returns: error code (0 = success, non-zero = error)
+ */   
+int addAttrToVar(char *varName, char *attrName, char *attrType, char *attrValue);
+
+/* Add standard CF convention attributes to a variable (units, long_name, standard_name)
+ * This is a convenience function for adding the most commonly used CF attributes
+ * Parameters:
+ *   varName - name of the variable to add attributes to
+ *   units - units string (e.g., "m/s", "K", "kg/m^3") - can be NULL
+ *   longName - descriptive long name for the variable - can be NULL
+ *   standardName - CF standard name if applicable - can be NULL
+ * Returns: error code (0 = success, non-zero = error)
+ */
+int addStandardAttrsToVar(char *varName, char *units, char *longName, char *standardName);
+
 #endif // _IOVARSLIST_H
diff --git a/SRC/IO/io_binary.c b/SRC/IO/io_binary.c
index 6e5dd1b..442c18c 100644
--- a/SRC/IO/io_binary.c
+++ b/SRC/IO/io_binary.c
@@ -16,7 +16,11 @@
 /*----->>>>> int ioWriteBinaryoutFileSingleTime();  ---------------------------------------------------------------
  * Used to have N-ranks write N-binary files of registered variables for a single timestep.
 */
+#ifdef GAD_EXT
+int ioWriteBinaryoutFileSingleTime(int tstep, int Nx, int Ny, int Nz, int Nh, int Nturbines){
+#else
 int ioWriteBinaryoutFileSingleTime(int tstep, int Nx, int Ny, int Nz, int Nh){
+#endif
    int errorCode = IO_SUCCESS;
    FILE *output_ptr;
 
@@ -27,7 +31,11 @@ int ioWriteBinaryoutFileSingleTime(int tstep, int Nx, int Ny, int Nz, int Nh){
    /*Open the output file*/
    output_ptr = fopen(outFileName,"wb");
    /*Write the IO-registered variables to the output file*/
+#ifdef GAD_EXT
+   errorCode = ioPutBinaryoutFileVars(output_ptr, Nx, Ny, Nz, Nh, Nturbines);
+#else
    errorCode = ioPutBinaryoutFileVars(output_ptr, Nx, Ny, Nz, Nh);
+#endif
    /*Close the output file*/
    fclose(output_ptr);
    return(errorCode);
@@ -36,7 +44,11 @@ int ioWriteBinaryoutFileSingleTime(int tstep, int Nx, int Ny, int Nz, int Nh){
 /*----->>>>> int ioPutBinaryoutFileVars();    ---------------------------------------------------------------------
 * Used to put(write) all variables in the register list in(to) the Binary file. 
 */
+#ifdef GAD_EXT
+int ioPutBinaryoutFileVars(FILE *outptr, int Nx, int Ny, int Nz, int Nh, int Nturbines){
+#else
 int ioPutBinaryoutFileVars(FILE *outptr, int Nx, int Ny, int Nz, int Nh){
+#endif
    int errorCode = IO_SUCCESS;
    ioVar_t *ptr;
    ioVar_t *rhoptr;
@@ -130,6 +142,15 @@ int ioPutBinaryoutFileVars(FILE *outptr, int Nx, int Ny, int Nz, int Nh){
            extent=Ny+2*Nh; 
            fwrite(&extent,sizeof(int),1,outptr);
            fwrite(field,numElems*sizeof(float),1,outptr);
+#ifdef GAD_EXT
+         }else if((ptr->nDims == 2)&&(ptr->dimids[1] == 4)){
+           numElems=(Nturbines);
+           binary_nDims=1;
+           fwrite(&binary_nDims,sizeof(int),1,outptr);
+           extent=Nturbines;
+           fwrite(&extent,sizeof(int),1,outptr);
+           fwrite(field,numElems*sizeof(float),1,outptr);
+#endif
 	 }else if((ptr->nDims == 1)&&(ptr->dimids[0] == 0)){
            numElems=1;
            binary_nDims=1;
@@ -147,6 +168,15 @@ int ioPutBinaryoutFileVars(FILE *outptr, int Nx, int Ny, int Nz, int Nh){
            extent=1;
            fwrite(&extent,sizeof(int),1,outptr);
            fwrite(intField,numElems*sizeof(int),1,outptr);
+#ifdef GAD_EXT
+	 }else if((ptr->nDims == 2)&&(ptr->dimids[1] == 4)){
+           numElems=(Nturbines);
+           binary_nDims=1;
+           fwrite(&binary_nDims,sizeof(int),1,outptr);
+           extent=Nturbines;
+           fwrite(&extent,sizeof(int),1,outptr);
+           fwrite(intField,numElems*sizeof(int),1,outptr);
+#endif
          }// end if ndims == 1  && ptr->dimids[0] == 0  
       }// if (ptr.type == "float") else if (ptr.type == "int")...
       ptr = ptr->next;
diff --git a/SRC/IO/io_binary.h b/SRC/IO/io_binary.h
index 2a92f7a..02528aa 100644
--- a/SRC/IO/io_binary.h
+++ b/SRC/IO/io_binary.h
@@ -19,9 +19,16 @@
 /*----->>>>> int ioWriteiBinaryoutFileSingleTime();  ---------------------------------------------------------------
  * Used to have N-ranks write N-binary files of registered variables for a single timestep.
  */
+#ifdef GAD_EXT
+int ioWriteBinaryoutFileSingleTime(int tstep, int Nx, int Ny, int Nz, int Nh, int Nturbines);
+#else
 int ioWriteBinaryoutFileSingleTime(int tstep, int Nx, int Ny, int Nz, int Nh);
+#endif
 /*----->>>>> int ioPutBinaryoutFileVars();    ---------------------------------------------------------------------
  * Used to put(write) all variables in the register list in(to) the Binary file. 
 */
+#ifdef GAD_EXT
+int ioPutBinaryoutFileVars(FILE *outptr, int Nx, int Ny, int Nz, int Nh, int Nturbines);
+#else
 int ioPutBinaryoutFileVars(FILE *outptr, int Nx, int Ny, int Nz, int Nh);
-
+#endif
diff --git a/SRC/IO/io_netcdf.c b/SRC/IO/io_netcdf.c
index 2b1ed9d..5c2ade7 100644
--- a/SRC/IO/io_netcdf.c
+++ b/SRC/IO/io_netcdf.c
@@ -13,6 +13,9 @@
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
+
+#include <stdlib.h>
+
 #define MAXLEN 256
 int dimids[MAXDIMS];
 size_t count[MAXDIMS];
@@ -26,8 +29,12 @@ int dims4d[] = {0,1,2,3};
 int dims3d[] = {1,2,3};  
 int dims2dTD[] = {0,2,3};  
 int dims2d[] = {2,3}; 
-int dims1dTD[] = {0};  
-
+int dims1dTD[] = {0};
+#ifdef GAD_EXT
+   size_t count1dTD_GAD[MAXDIMS];
+   size_t start1dTD_GAD[MAXDIMS];
+   int dims1dTD_GAD[] = {0,4};
+#endif
 //////////***********************  INPUT FUNCTIONS  *********************************////////
 /*----->>>>> int ioReadNetCDFgridFile();  ---------------------------------------------------------------
 * Used to read a NetCDF file of registered "GRID" variables.
@@ -92,7 +99,11 @@ int ioReadNetCDFgridFile(char* gridFile, int Nx, int Ny, int Nz, int Nh){
    start[dimids[3]] = 0;
 
    printf("Reading IO-registered variable fields from gridFile = %s\n",gridFile);
+#ifdef GAD_EXT
+   errorCode = ioGetNetCDFinFileVars(ncid, Nx, Ny, Nz, Nh, 0);
+#else
    errorCode = ioGetNetCDFinFileVars(ncid, Nx, Ny, Nz, Nh);
+#endif
    printf("Done Reading IO-registered variable fields from gridFile = %s\n",gridFile);
    /* close the file */
    errorCode = ioCloseNetCDFfile(ncid);
@@ -104,14 +115,25 @@ int ioReadNetCDFgridFile(char* gridFile, int Nx, int Ny, int Nz, int Nh){
 /*----->>>>> int ioReadNetCDFinFileSingleTime();  ---------------------------------------------------------------
  * Used to read a NetCDF file of registered variables for a single timestep.
 */
+#ifdef GAD_EXT
+int ioReadNetCDFinFileSingleTime(int tstep, int Nx, int Ny, int Nz, int Nh, int Nturbines){
+#else
 int ioReadNetCDFinFileSingleTime(int tstep, int Nx, int Ny, int Nz, int Nh){
+#endif
    int errorCode = IO_SUCCESS;
    int ncid;
+   int ncdims;
    /* concatenate the fileName components */
    sprintf(inFileName, "%s%s",inPath,inFile);
    /* Open the input file.*/
    printf("Attempting to open inFileName = %s\n",inFileName);
    errorCode = ioOpenNetCDFinFile(inFileName, &ncid);
+   /* Inquire for the inumber of dimensions*/
+   if ((errorCode = nc_inq_ndims(ncid, &ncdims))){
+      ERR(errorCode);
+   }
+   printf("inFileName = %s as %d dimensions\n",inFileName,ncdims);
+
    /* Inquire for the dimension-ids*/
    if ((errorCode = nc_inq_dimid(ncid, "time", &dimids[0]))){
       ERR(errorCode);
@@ -127,10 +149,17 @@ int ioReadNetCDFinFileSingleTime(int tstep, int Nx, int Ny, int Nz, int Nh){
    }
    printf("Opened inFileName = %s with ncid = %d\n",inFileName,ncid);
    printf("Established dimension ids of xIndex,yIndex,zIndex = %d, %d, %d\n",dimids[3],dimids[2],dimids[1]);
+#ifdef GAD_EXT
+   if(ncdims > 4){
+     if ((errorCode = nc_inq_dimid(ncid, "GADNumTurbines", &dimids[4]))){
+        ERR(errorCode);
+     }
+     printf("Established GAD dimension id of GADNumTurbines as %d\n",dimids[4]);
+   }//endif ncdims > 4
+#endif
   
    /*Attempt to read all of the variables in the IO Registry list*/
    /* These are precisely the same as Nxp, Nyp, and Nzp calculated in GRID/grid.c:grid_init(). */
-#define NOMPI
    count[dimids[0]] = 1;
    count[dimids[1]] = Nz;
    count[dimids[2]] = Ny;
@@ -140,7 +169,10 @@ int ioReadNetCDFinFileSingleTime(int tstep, int Nx, int Ny, int Nz, int Nh){
    count2dTD[dimids[0]] = 1;
    count2dTD[dimids[1]] = Ny;
    count2dTD[dimids[2]] = Nx;
-
+#ifdef GAD_EXT
+   count1dTD_GAD[dimids[0]] = 1;
+   count1dTD_GAD[dimids[1]] = Nturbines;
+#endif
    if ((errorCode = nc_inq_dimlen(ncid, dimids[0], &count[0]))){
       ERR(errorCode);
    }
@@ -198,15 +230,41 @@ int ioReadNetCDFinFileSingleTime(int tstep, int Nx, int Ny, int Nz, int Nh){
        errorCode = IO_ERROR_DIMLEN;
        return(errorCode);  
    } 
-
+#ifdef GAD_EXT
+   if(ncdims > 4){
+     //count1dTD_GAD
+     if ((errorCode = nc_inq_dimlen(ncid, dimids[0], &count1dTD_GAD[0]))){
+        ERR(errorCode);
+     }
+     if ((errorCode = nc_inq_dimlen(ncid, dimids[4], &count1dTD_GAD[1]))){
+        ERR(errorCode);
+     }
+     if(count1dTD_GAD[1]!=Nturbines){
+        printf("ERROR: inFileName = %s, count1dTD_GAD dimension lengths for t,GADNumTurbines = %lu\n",
+               inFileName, count1dTD_GAD[1]);
+        printf("       does not match GADNumTurbines = %d turbineSpecsFile parameter!\n",
+               Nturbines);
+        printf("       No values will be read from the file!\n");
+        errorCode = IO_ERROR_DIMLEN;
+        return(errorCode);
+     }
+   }
+#endif
    /*These are the starting location in the full domain space.*/ 
    start[dimids[0]] = 0;   
    start[dimids[1]] = 0;  
    start[dimids[2]] = 0;  
    start[dimids[3]] = 0;  
+#ifdef GAD_EXT
+   start[dimids[4]] = 0;
+#endif
    
    printf("Reading IO-registered variable fields from inFileName = %s\n",inFileName);
+#ifdef GAD_EXT
+   errorCode = ioGetNetCDFinFileVars(ncid, Nx, Ny, Nz, Nh, Nturbines);
+#else
    errorCode = ioGetNetCDFinFileVars(ncid, Nx, Ny, Nz, Nh);
+#endif
    printf("Done Reading IO-registered variable fields from inFileName = %s\n",inFileName);
    /* close the file */
    errorCode = ioCloseNetCDFfile(ncid);    
@@ -234,7 +292,11 @@ int ioOpenNetCDFinFile(char *fileName, int *ncidptr){
 /*----->>>>> int ioGetNetCDFinFileVars();    ---------------------------------------------------------------------
 * Used to get(read) all variables from the register list into the appropriately registered module memory blocks. 
 */
+#ifdef GAD_EXT
+int ioGetNetCDFinFileVars(int ncid, int Nx, int Ny, int Nz, int Nh, int Nturbines){
+#else
 int ioGetNetCDFinFileVars(int ncid, int Nx, int Ny, int Nz, int Nh){
+#endif
    int errorCode = IO_SUCCESS;
    int varFound;
    size_t *countPtr;
@@ -252,6 +314,11 @@ int ioGetNetCDFinFileVars(int ncid, int Nx, int Ny, int Nz, int Nh){
    /* For each entry in the ioVarsList, "get" the var */
    ptr = getFirstVarFromList();
    while(ptr != NULL){
+      if(mpi_rank_world==0){ 
+        printf("Checking for %s...\n",ptr->name);
+        fflush(stdout);
+      }//end if mpi_rank==0
+      varFound = 0;
       if(!strcmp(ptr->type,"float")){
          field = (float *) ptr->varMemAddress;  //All ranks set pointer to local memory location 
                                                 //for this registered field
@@ -269,15 +336,22 @@ int ioGetNetCDFinFileVars(int ncid, int Nx, int Ny, int Nz, int Nh){
                fflush(stdout);
                ERR(errorCode);
              }else{
+               printf("Next registered var in list to get is ptr->name = %s, from ptr->ncvarid = %d\n",ptr->name,ptr->ncvarid);
+               fflush(stdout);
                varFound=1;
              }
-             printf("Next registered var in list to get is ptr->name = %s, from ptr->ncvarid = %d\n",ptr->name,ptr->ncvarid);
-             fflush(stdout);
-           
+          
+	     if(varFound == 1){  //Only do this section if a var was found 
              /*Allocate a tmp buffer and get the pointer to the Register var*/
              if((ptr->nDims == 2)||(ptr->nDims==3)){
                if(ptr->nDims == 2){
-                 countPtr = count2d;
+		 if(ptr->dimids[1] == 2){
+                   countPtr = count2d;
+#ifdef GAD_EXT
+		 }else if(ptr->dimids[1] == 4){
+                   countPtr = count1dTD_GAD;
+#endif
+                 }
                }else if(ptr->nDims == 3){
                  countPtr = count2dTD;
                }//end if,else ptr->nDims == 2,3
@@ -297,7 +371,6 @@ int ioGetNetCDFinFileVars(int ncid, int Nx, int Ny, int Nz, int Nh){
                 ERR(errorCode);
              }
              for(i = 0; i< nDims; i++){
-               //printf("Variable field = %s has dimid(%d) = %d\n",ptr->name,i,tmpDimids[i]);
 	       printf("Variable field = %s has dimid(%d) = %d: start, count => %lu, %lu\n",ptr->name,i,tmpDimids[i],start[tmpDimids[i]],countPtr[tmpDimids[i]]);
              }
 	     fflush(stdout);
@@ -310,13 +383,23 @@ int ioGetNetCDFinFileVars(int ncid, int Nx, int Ny, int Nz, int Nh){
 	     fflush(stdout);
              /* Transpose the data */
              if((nDims == 2)||(nDims == 3)){
-               for(i=0; i < Nx; i++){
-                 for(j=0; j < Ny; j++){
-                   ijk = i*(Ny)+j;  //Note ijk is only 2-d here
-                   kji = j*(Nx)+i;  //Note kji is only 2-d here
-                   ioBuffFieldTransposed2D[ijk] = ioBuffField[kji]; //out-of-place transpose the array elements
-                 } // end for(j...
-               } // end for(i...
+#ifdef GAD_EXT
+	       if(ptr->dimids[1] == 4){
+                 for(i=0; i < Nturbines; i++){
+                    field[i] = ioBuffField[i];
+		 }
+	       }else{
+#endif
+                 for(i=0; i < Nx; i++){
+                   for(j=0; j < Ny; j++){
+                     ijk = i*(Ny)+j;  //Note ijk is only 2-d here
+                     kji = j*(Nx)+i;  //Note kji is only 2-d here
+                     ioBuffFieldTransposed2D[ijk] = ioBuffField[kji]; //out-of-place transpose the array elements
+                   } // end for(j...
+                 } // end for(i...
+#ifdef GAD_EXT
+	       }//end if (ptr->dimids[1] == 4) else...
+#endif
              }else{
                for(i=0; i < Nx; i++){
                  for(j=0; j < Ny; j++){
@@ -329,6 +412,7 @@ int ioGetNetCDFinFileVars(int ncid, int Nx, int Ny, int Nz, int Nh){
                } // end for(i...
              }//end if(nDims==2)-else
            }// if this var is Tau* -else 
+	 }//end if varFound==1
          }//end if mpi_rank_world == 0
          MPI_Barrier(MPI_COMM_WORLD);
          //Broadcast the varFound flag for this variable
@@ -338,7 +422,15 @@ int ioGetNetCDFinFileVars(int ncid, int Nx, int Ny, int Nz, int Nh){
            MPI_Bcast(&nDims, 1, MPI_INTEGER, 0, MPI_COMM_WORLD);
            //Now scatter the field across ranks
            if((nDims == 2)||(nDims == 3)){
-             errorCode = fempi_ScatterVariable(Nx,Ny,1,Nxp,Nyp,1,Nh,ioBuffFieldTransposed2D,field);
+#ifdef GAD_EXT
+             if(ptr->dimids[1] == 4){
+	       MPI_Bcast(field, Nturbines, MPI_FLOAT, 0, MPI_COMM_WORLD);
+             }else{
+#endif
+               errorCode = fempi_ScatterVariable(Nx,Ny,1,Nxp,Nyp,1,Nh,ioBuffFieldTransposed2D,field);
+#ifdef GAD_EXT
+	     }//end if (ptr->dimids[1] == 4) else...
+#endif
            }else if(nDims == 4){
              errorCode = fempi_ScatterVariable(Nx,Ny,Nz,Nxp,Nyp,Nzp,Nh,ioBuffFieldTransposed,field);
            }else if(nDims == 1){  // A scalar float variable was read, it shoud be simply broadcast to all ranks rather than "scattered"
@@ -387,19 +479,25 @@ int ioGetNetCDFinFileVars(int ncid, int Nx, int Ny, int Nz, int Nh){
            }//end if(nDims==2)-else
            MPI_Barrier(MPI_COMM_WORLD);
          } //end if varFound == 1
-      } else if(!strcmp(ptr->type,"int")){
+      }else if(!strcmp(ptr->type,"int")){
         intField = (int *) ptr->varMemAddress;  //All ranks set pointer to local memory location 
-        countPtr = count;
+	if(ptr->nDims == 1){					
+          countPtr = count;
+#ifdef GAD_EXT
+	}else if(ptr->nDims == 2){
+          countPtr = count1dTD_GAD;
+#endif
+        }//end if ptr->nDims ==1
         if(mpi_rank_world==0){
          varFound = 0;
          /*inquire for the varid for this variable name*/
-         printf("Next registered var in list to get is ptr->name = %s, from ptr->ncvarid = %d\n",ptr->name,ptr->ncvarid);
-         fflush(stdout);
          if ( (errorCode = nc_inq_varid(ncid, ptr->name, &ptr->ncvarid)) ){
            printf("Error ioGetNetCDFinFileVars(): Variable field = %s was not found in this file,!\n",ptr->name);
            fflush(stdout);
            ERR(errorCode);
          }else{
+           printf("Next registered var in list to get is ptr->name = %s, from ptr->ncvarid = %d\n",ptr->name,ptr->ncvarid);
+           fflush(stdout);
            varFound=1;
          }
 
@@ -426,11 +524,16 @@ int ioGetNetCDFinFileVars(int ncid, int Nx, int Ny, int Nz, int Nh){
            }
          } //end if varFound == 1
         }//end if mpi_rank_world==0
+        MPI_Bcast(&varFound, 1, MPI_INTEGER, 0, MPI_COMM_WORLD);
         if(varFound==1){
           //Broadcast the nDims read by the rrot rank for this variable
           MPI_Bcast(&nDims, 1, MPI_INTEGER, 0, MPI_COMM_WORLD);
           if(nDims == 1){
             MPI_Bcast(intField, 1, MPI_INTEGER, 0, MPI_COMM_WORLD);
+#ifdef GAD_EXT
+	  }else if(nDims == 2){
+            MPI_Bcast(intField, Nturbines, MPI_INTEGER, 0, MPI_COMM_WORLD);
+#endif
           }//end if nDims == 1
         }//end if varFound == 1
       } else {
@@ -446,7 +549,11 @@ int ioGetNetCDFinFileVars(int ncid, int Nx, int Ny, int Nz, int Nh){
 /*----->>>>> int ioWriteNetCDFoutFileSingleTime();  ---------------------------------------------------------------
  * Used to write a NetCDF file of registered variables for a single timestep.
 */
+#ifdef GAD_EXT
+int ioWriteNetCDFoutFileSingleTime(int tstep, int Nx, int Ny, int Nz, int Nh, int Nturbines){
+#else
 int ioWriteNetCDFoutFileSingleTime(int tstep, int Nx, int Ny, int Nz, int Nh){
+#endif
    int errorCode = IO_SUCCESS;
    int ncid;
 
@@ -461,14 +568,30 @@ int ioWriteNetCDFoutFileSingleTime(int tstep, int Nx, int Ny, int Nz, int Nh){
    if(mpi_rank_world==0){
      /* Open and set  the file into "define mode" */
      errorCode = ioCreateNetCDFoutFile(outFileName, &ncid);
+#ifdef GAD_EXT
+     errorCode = ioDefineNetCDFoutFileDims(ncid, Nx, Ny, Nz, Nh, Nturbines);
+#else
      errorCode = ioDefineNetCDFoutFileDims(ncid, Nx, Ny, Nz, Nh);
+#endif
      errorCode = ioDefineNetCDFoutFileVars(ncid);
+     /* Define dimension coordinate variable attributes */
+     errorCode = ioDefineNetCDFcoordVarAttrs(ncid);
+     /* Define variable attributes */
+     errorCode = ioDefineNetCDFoutFileAttrs(ncid);
+#ifdef GAD_EXT
+     errorCode = ioEndNetCDFdefineMode(ncid,Nx, Ny, Nz, Nh, Nturbines);
+#else
      errorCode = ioEndNetCDFdefineMode(ncid,Nx, Ny, Nz, Nh);
+#endif
      /*Write all of the variables in the IO Registry list*/
    } //endif mpi_rank_world==0
    //Broadcast the ncid...
    MPI_Bcast(&ncid, 1, MPI_INT, 0, MPI_COMM_WORLD);
+#ifdef GAD_EXT
+   errorCode = ioPutNetCDFoutFileVars(ncid, Nx, Ny, Nz, Nh, Nturbines);
+#else
    errorCode = ioPutNetCDFoutFileVars(ncid, Nx, Ny, Nz, Nh);
+#endif
    /* close the file */
    if(mpi_rank_world==0){
      errorCode = ioCloseNetCDFfile(ncid);    
@@ -495,8 +618,11 @@ int ioCreateNetCDFoutFile(char *outFileName, int *ncidptr){
 /*----->>>>> int ioDefineNetCDFoutFileDims();    ---------------------------------------------------------------------
 * Used to complete the sequence of steps involved in "define mode" for a NetCDF file to be written.
 */
-
+#ifdef GAD_EXT
+int ioDefineNetCDFoutFileDims(int ncid, int Nx, int Ny, int Nz, int Nh, int Nturbines){
+#else
 int ioDefineNetCDFoutFileDims(int ncid, int Nx, int Ny, int Nz, int Nh){
+#endif
    int errorCode = IO_SUCCESS;
 
 
@@ -536,6 +662,13 @@ int ioDefineNetCDFoutFileDims(int ncid, int Nx, int Ny, int Nz, int Nh){
    start[dimids[2]] = 0; 
    start[dimids[3]] = 0; 
    
+#ifdef GAD_EXT
+   if ((errorCode = nc_def_dim(ncid, "GADNumTurbines", Nturbines, &dimids[4]))){
+      ERR(errorCode);
+   }
+   count1dTD_GAD[dimids[0]] = 1;
+   count1dTD_GAD[dimids[4]] = Nturbines;
+#endif
    return(errorCode);
 } //end ioDefineNetCDFoutFileDims()
 
@@ -572,11 +705,6 @@ int ioDefineNetCDFoutFileVars(int ncid){
         printf("Cannot define a NetCDF variable with var->type = %s\n",ptr->type);
       }// if (ptr.type == "float") else ...
       /* define any variable attributes. */
-/*TODO      
-      if ((errorCode = nc_put_att_text(ncid, ptr->ncvarid, ptr->attname, strlen(ptr->attval), ptr->attval))){
-           ERR(errorCode);
-      } 
-*/
       ptr = ptr->next;
    }
    
@@ -586,7 +714,11 @@ int ioDefineNetCDFoutFileVars(int ncid){
 /*----->>>>> int ioEndNetCDFdefineMode();    ---------------------------------------------------------------------
  * Used to close the sequence steps involved in "define mode" for a NetCDF file to be written.
  */
+#ifdef GAD_EXT
+int ioEndNetCDFdefineMode(int ncid, int Nx, int Ny, int Nz, int Nh, int Nturbines){
+#else
 int ioEndNetCDFdefineMode(int ncid, int Nx, int Ny, int Nz, int Nh){
+#endif
    int *dimIndexVec;
    int  dimIndexCnt;
    int idx;
@@ -604,9 +736,15 @@ int ioEndNetCDFdefineMode(int ncid, int Nx, int Ny, int Nz, int Nh){
    } 
    if(dimIndexCnt < Nx){
      dimIndexCnt = Nx;
-   } 
+   }
+#ifdef GAD_EXT
+   if(dimIndexCnt < Nturbines){
+     dimIndexCnt = Nturbines;
+   }
+#endif
+
    //Malloc an index vector with dimIndexCnt elements;
-   dimIndexVec= (int *) malloc(dimIndexCnt*sizeof(int)); /* 3 for each part of path/base.subString */ 
+   dimIndexVec= (int *) malloc(dimIndexCnt*sizeof(int)); 
    //Initialize the index vector
    for(idx=0; idx < dimIndexCnt; idx++){
      dimIndexVec[idx] = idx;
@@ -619,7 +757,12 @@ int ioEndNetCDFdefineMode(int ncid, int Nx, int Ny, int Nz, int Nh){
    } 
    if ((errorCode = nc_put_vara_int(ncid, nx_varid, &start[dimids[3]], &count[dimids[3]], dimIndexVec))){
        ERR(errorCode);
-   } 
+   }
+#ifdef GAD_EXT
+   if ((errorCode = nc_put_vara_int(ncid, nx_varid, &start[dimids[4]], &count[dimids[4]], dimIndexVec))){
+       ERR(errorCode);
+   }
+#endif 
    free(dimIndexVec);
    
    return(errorCode);
@@ -628,7 +771,11 @@ int ioEndNetCDFdefineMode(int ncid, int Nx, int Ny, int Nz, int Nh){
 /*----->>>>> int ioPutNetCDFoutFileVars();    ---------------------------------------------------------------------
 * Used to put(write) all variables in the regiter list in(to) the NetCDF file. 
 */
+#ifdef GAD_EXT
+int ioPutNetCDFoutFileVars(int ncid, int Nx, int Ny, int Nz, int Nh, int Nturbines){
+#else
 int ioPutNetCDFoutFileVars(int ncid, int Nx, int Ny, int Nz, int Nh){
+#endif
    int errorCode = IO_SUCCESS;
    size_t *countPtr;
    ioVar_t *ptr;
@@ -641,6 +788,10 @@ int ioPutNetCDFoutFileVars(int ncid, int Nx, int Ny, int Nz, int Nh){
    int verbose_log = 0;
    int *intField;
 
+#ifdef GAD_EXT
+   void* memsetReturnVal;
+#endif
+
    /* For each entry in the ioVarsList, "put" the var */
    ptr = getFirstVarFromList();
    while(ptr != NULL){
@@ -751,6 +902,15 @@ int ioPutNetCDFoutFileVars(int ncid, int Nx, int Ny, int Nz, int Nh){
                  ioBuffField[kji] = field[ijk]; //out-of-place trim and transpose if the array elements
              } // end for(j...
            } // end for(i...
+#ifdef GAD_EXT
+         }else if((ptr->nDims == 2)&&(ptr->dimids[1] == 4)){
+           countPtr=count1dTD_GAD;
+           /* Reset to zero and then Reduce(MPI_SUM op) into the write buffer */
+           if(mpi_rank_world==0){
+	     memsetReturnVal = memset(ioBuffField,0,(Nturbines)*sizeof(float));  //Just sets the first Nturbines elements to zero
+	   }//end if mpi_rank ==0
+           MPI_Reduce(field, ioBuffField, Nturbines, MPI_FLOAT, MPI_SUM, 0, MPI_COMM_WORLD);
+#endif
          }else if((ptr->nDims == 1)&&(ptr->dimids[0] == 0)){
            countPtr=count;
            if(mpi_rank_world==0){
@@ -770,6 +930,10 @@ int ioPutNetCDFoutFileVars(int ncid, int Nx, int Ny, int Nz, int Nh){
            }
          }//endif mpi_Rank_world==0
       } else if(!strcmp(ptr->type,"int")){
+#ifdef DEBUG
+   printf("mpi_rank_world--%d/%d Putting variable field %s...\n",mpi_rank_world,mpi_size_world,ptr->name);
+   fflush(stdout);
+#endif
          intField = (int *) ptr->varMemAddress;
          if((ptr->nDims == 1)&&(ptr->dimids[0] == 0)){
            countPtr=count;
@@ -780,16 +944,140 @@ int ioPutNetCDFoutFileVars(int ncid, int Nx, int Ny, int Nz, int Nh){
                 fflush(stdout);
              }
            }//endif mpi_Rank_world==0
-         }// end if ndims==1  && dimids[0]=0 (time) 
+#ifdef GAD_EXT
+	 }else if((ptr->nDims == 2)&&(ptr->dimids[1] == 4)){
+           countPtr=count1dTD_GAD;
+           if (mpi_rank_world==0){
+	      memsetReturnVal = memset(ioBuffFieldInt,0,(Nturbines)*sizeof(int));  //Just sets the first Nturbines elements to zero
+	      if (memsetReturnVal==NULL){
+                printf("ioPutNetCDFoutFileVars: Error in call to memset for var = %s\n",ptr->name);
+                fflush(stdout);
+	      }//end if 
+           }//endif mpi_Rank_world==0
+           MPI_Reduce(intField, ioBuffFieldInt, Nturbines, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD);
+           if (mpi_rank_world==0){
+             if ((errorCode = nc_put_vara_int(ncid, ptr->ncvarid, start, countPtr, ioBuffFieldInt))){
+                ERR(errorCode);
+                printf("ioPutNetCDFoutFileVars: Error writing field = %s\n",ptr->name);
+                fflush(stdout);
+             }
+           }//endif mpi_Rank_world==0
+#endif         
+	 }// end if ndims==1  && dimids[0]=0 (time), #ifdef GAD_EXT-- else if(nDims==2 && dimids[1]=4) #endif
       } else {
         printf("Cannot 'put' a NetCDF variable with var.type = %s\n",ptr->type);
-      }// if (ptr.type == "float") else ...
+      }// if (ptr->type == "float") else if(ptr->type == "int")...
+      MPI_Barrier(MPI_COMM_WORLD);
       ptr = ptr->next;
    } //end while ptr != NULL
 
    return(errorCode);   
 } //ioPutNetCDFoutFileVars()
 
+/*----->>>>> int ioDefineNetCDFoutFileAttrs();    ---------------------------------------------------------------------
+* Used to define NetCDF variable attributes.
+*/
+int ioDefineNetCDFoutFileAttrs(int ncid){
+   int errorCode = IO_SUCCESS;
+   ioVar_t *ptr;
+   int i;
+
+   /* For each entry in the ioVarsList, define attributes if they exist */
+   ptr = getFirstVarFromList();
+   while(ptr != NULL){
+      /* Check if variable has attributes defined and loop through them */
+      for(i = 0; i < ptr->nAttrs; i++){
+         if(strlen(ptr->attrs[i].name) > 0 && strlen(ptr->attrs[i].value) > 0){
+            /* Determine the appropriate NetCDF function based on attribute type */
+            if(strcmp(ptr->attrs[i].type, "text") == 0){
+               if ((errorCode = nc_put_att_text(ncid, ptr->ncvarid, ptr->attrs[i].name,
+                                              strlen(ptr->attrs[i].value), ptr->attrs[i].value))){
+                  ERR(errorCode);
+               }
+            }
+            else if(strcmp(ptr->attrs[i].type, "float") == 0){
+               float val = atof(ptr->attrs[i].value);
+               if ((errorCode = nc_put_att_float(ncid, ptr->ncvarid, ptr->attrs[i].name, NC_FLOAT, 1, &val))){
+                  ERR(errorCode);
+               }
+            }
+            else if(strcmp(ptr->attrs[i].type, "double") == 0){
+               double val = atof(ptr->attrs[i].value);
+               if ((errorCode = nc_put_att_double(ncid, ptr->ncvarid, ptr->attrs[i].name, NC_DOUBLE, 1, &val))){
+                  ERR(errorCode);
+               }
+            }
+            else if(strcmp(ptr->attrs[i].type, "int") == 0){
+               int val = atoi(ptr->attrs[i].value);
+               if ((errorCode = nc_put_att_int(ncid, ptr->ncvarid, ptr->attrs[i].name, NC_INT, 1, &val))){
+                  ERR(errorCode);
+               }
+            }
+            else {
+               /* Default to text if type is unrecognized */
+               if ((errorCode = nc_put_att_text(ncid, ptr->ncvarid, ptr->attrs[i].name,
+                                              strlen(ptr->attrs[i].value), ptr->attrs[i].value))){
+                  ERR(errorCode);
+               }
+            }
+         }
+      }
+
+      ptr = ptr->next;
+   }
+   
+   return(errorCode);
+} //end ioDefineNetCDFoutFileAttrs
+
+/*----->>>>> int ioDefineNetCDFcoordVarAttrs();    ---------------------------------------------------------------------
+* Used to define attributes for dimension coordinate variables.
+*/
+int ioDefineNetCDFcoordVarAttrs(int ncid){
+   int errorCode = IO_SUCCESS;
+   
+   /* Define attributes for xIndex coordinate variable */
+   if ((errorCode = nc_put_att_text(ncid, nx_varid, "long_name", 
+                                   strlen("x-coordinate index"), "x-coordinate index"))){
+      ERR(errorCode);
+   }
+   if ((errorCode = nc_put_att_text(ncid, nx_varid, "units", strlen("1"), "1"))){
+      ERR(errorCode);
+   }
+   if ((errorCode = nc_put_att_text(ncid, nx_varid, "axis", strlen("X"), "X"))){
+      ERR(errorCode);
+   }
+   
+   /* Define attributes for yIndex coordinate variable */
+   if ((errorCode = nc_put_att_text(ncid, ny_varid, "long_name", 
+                                   strlen("y-coordinate index"), "y-coordinate index"))){
+      ERR(errorCode);
+   }
+   if ((errorCode = nc_put_att_text(ncid, ny_varid, "units", strlen("1"), "1"))){
+      ERR(errorCode);
+   }
+   if ((errorCode = nc_put_att_text(ncid, ny_varid, "axis", strlen("Y"), "Y"))){
+      ERR(errorCode);
+   }
+   
+   /* Define attributes for zIndex coordinate variable */
+   if ((errorCode = nc_put_att_text(ncid, nz_varid, "long_name", 
+                                   strlen("z-coordinate index"), "z-coordinate index"))){
+      ERR(errorCode);
+   }
+   if ((errorCode = nc_put_att_text(ncid, nz_varid, "units", strlen("1"), "1"))){
+      ERR(errorCode);
+   }
+   if ((errorCode = nc_put_att_text(ncid, nz_varid, "axis", strlen("Z"), "Z"))){
+      ERR(errorCode);
+   }
+   if ((errorCode = nc_put_att_text(ncid, nz_varid, "positive", strlen("up"), "up"))){
+      ERR(errorCode);
+   }
+
+   return(errorCode);
+} //end ioDefineNetCDFcoordVarAttrs
+
+
 /*----->>>>> int ioCloseNetCDFfile();    ---------------------------------------------------------------------
  * Used to close a netCDF file
  * */
diff --git a/SRC/IO/io_netcdf.h b/SRC/IO/io_netcdf.h
index 400d902..7064053 100644
--- a/SRC/IO/io_netcdf.h
+++ b/SRC/IO/io_netcdf.h
@@ -26,6 +26,9 @@ extern int dims3d[];
 extern int dims2dTD[];  
 extern int dims2d[];  
 extern int dims1dTD[];
+#ifdef GAD_EXT
+extern int dims1dTD_GAD[];
+#endif
 
 /*######################------------------- IO module function declarations ---------------------#################*/
 
@@ -38,7 +41,11 @@ int ioReadNetCDFgridFile(char* gridFile, int Nx, int Ny, int Nz, int Nh);
 /*----->>>>> int ioReadNetCDFinFileSingleTime();  ---------------------------------------------------------------
 * Used to read a NetCDF file for a single timestep.
 */
+#ifdef GAD_EXT
+int ioReadNetCDFinFileSingleTime(int tstep, int Nx, int Ny, int Nz, int Nh, int Nturbines);
+#else
 int ioReadNetCDFinFileSingleTime(int tstep, int Nx, int Ny, int Nz, int Nh);
+#endif
 
 /*----->>>>> int ioOpenNetCDFoutFile();    ---------------------------------------------------------------------
 * Used to open a NetCDF file for reading.
@@ -48,13 +55,21 @@ int ioOpenNetCDFinFile(char *fileName, int *ncidptr);
 /*----->>>>> int ioGetNetCDFinFileVars();    ---------------------------------------------------------------------
 * Used to get(read) all variables in the regiter list in(to) the appropriately registered module memory. 
 */
+#ifdef GAD_EXT
+int ioGetNetCDFinFileVars(int ncid, int Nx, int Ny, int Nz, int Nh, int Nturbines);
+#else
 int ioGetNetCDFinFileVars(int ncid, int Nx, int Ny, int Nz, int Nh);
+#endif
 
 //////////***********************  OUTPUT FUNCTIONS  *********************************////////
 /*----->>>>> int ioWriteNetCDFoutFileSingleTime();  ---------------------------------------------------------------
 * Used to write a NetCDF file for a single timestep.
 */
+#ifdef GAD_EXT
+int ioWriteNetCDFoutFileSingleTime(int tstep, int Nx, int Ny, int Nz, int Nh, int Nturbines);
+#else
 int ioWriteNetCDFoutFileSingleTime(int tstep, int Nx, int Ny, int Nz, int Nh);
+#endif
 
 /*----->>>>> int ioCreateNetCDFoutFile();    ---------------------------------------------------------------------
 * Used to create NetCDF file for writing.
@@ -64,7 +79,11 @@ int ioCreateNetCDFoutFile(char *outFileName, int *ncidptr);
 /*----->>>>> int ioDefineNetCDFoutFileDims();    ---------------------------------------------------------------------
 * Used to complete the sequence of dimension definitions involved in "define mode" for a NetCDF file to be written.
 */
+#ifdef GAD_EXT
+int ioDefineNetCDFoutFileDims(int ncid, int Nx, int Ny, int Nz, int Nh, int Nturbines);
+#else
 int ioDefineNetCDFoutFileDims(int ncid, int Nx, int Ny, int Nz, int Nh);
+#endif
 
 /*----->>>>> int ioDefineNetCDFoutFileVars();    ---------------------------------------------------------------------
 * Used to complete the sequence of variable definitions involved in "define mode" for a NetCDF file to be written.
@@ -74,15 +93,32 @@ int ioDefineNetCDFoutFileVars(int ncid);
 /*----->>>>> int ioEndNetCDFdefineMode();    ---------------------------------------------------------------------
 * Used to close the sequence steps involved in "define mode" for a NetCDF file to be written.
 */
+#ifdef GAD_EXT
+int ioEndNetCDFdefineMode(int ncid, int Nx, int Ny, int Nz, int Nh, int Nturbines);
+#else
 int ioEndNetCDFdefineMode(int ncid, int Nx, int Ny, int Nz, int Nh);
+#endif
 
 /*----->>>>> int ioiPutNetCDFoutFileVars();    ---------------------------------------------------------------------
 * Used to put(write) all variables in the regiter list in(to) the NetCDF file.
 */
+#ifdef GAD_EXT
+int ioPutNetCDFoutFileVars(int ncid, int Nx, int Ny, int Nz, int Nh, int Nturbines);
+#else
 int ioPutNetCDFoutFileVars(int ncid, int Nx, int Ny, int Nz, int Nh);
+#endif
+
+/*----->>>>> int ioDefineNetCDFoutFileAttrs();    ---------------------------------------------------------------------
+* Used to define NetCDF variable attributes.
+*/
+int ioDefineNetCDFoutFileAttrs(int ncid);
+
+/*----->>>>> int ioDefineNetCDFcoordVarAttrs();    ---------------------------------------------------------------------
+* Used to define attributes for dimension coordinate variables.
+*/
+int ioDefineNetCDFcoordVarAttrs(int ncid);
 
 /*----->>>>> int ioCloseNetCDFfile();    ---------------------------------------------------------------------
 * Used to close a netCDF file
 */
 int ioCloseNetCDFfile(int ncid);
-
diff --git a/SRC/TIME_INTEGRATION/CUDA/cuda_timeIntDevice.cu b/SRC/TIME_INTEGRATION/CUDA/cuda_timeIntDevice.cu
index 0ae6f08..649956b 100644
--- a/SRC/TIME_INTEGRATION/CUDA/cuda_timeIntDevice.cu
+++ b/SRC/TIME_INTEGRATION/CUDA/cuda_timeIntDevice.cu
@@ -79,7 +79,7 @@ extern "C" int cuda_timeIntDeviceSetup(){
    //Ensure secondary time-integration dependent hydro_core parameters get initialized
    errorCode = cuda_hydroCoreDeviceSecondaryStageSetup(dt);
    //Inital Host-to-Device field copies 
-   errorCode = cuda_timeIntHydroInitDevice();  //Transfer initial/restart conditions to the device
+   errorCode = cuda_hydroCoreInitFieldsDevice();  //Transfer initial/restart conditions to the device
    //printf("cuda_timeIntDeviceSetup() complete.\n");
 
    /* Done */
@@ -160,116 +160,8 @@ extern "C" int cuda_timeIntDeviceCommence(int it){
    }//end for itBatch...
 
    //Retrieve desired HYDRO_CORE fields from device
-   errorCode = cuda_timeIntHydroSynchFromDevice();
+   errorCode = cuda_hydroCoreSynchFieldsFromDevice();
    
    return(errorCode);
 }//end cuda_timeIntDeviceCommence()
 
-/*----->>>>> extern "C" int cuda_timeIntHydroInitDevice();  -----------------------------------------------------------
-* This function handles the one-time initializations of on-device (GPU) memory by executing the appropriate sequence 
-* of cudaMemcpyHostToDevice data transfers.
-*/
-extern "C" int cuda_timeIntHydroInitDevice(){
-   int errorCode = TIME_INTEGRATION_SUCCESS;
-   int Nelems;
-   int Nelems2d;
-   /*Set the full memory block number of elements for transfers of 2-d and 3-d fields*/
-   Nelems = (Nxp+2*Nh)*(Nyp+2*Nh)*(Nzp+2*Nh);
-   Nelems2d = (Nxp+2*Nh)*(Nyp+2*Nh);
-   /*Copy the host hydroFlds to the device */
-   cudaMemcpy(hydroFlds_d, hydroFlds, Nelems*Nhydro*sizeof(float), cudaMemcpyHostToDevice);
-   if(TKESelector > 0){ /*Copy any required SGS TKE equation fields to device */ 
-     cudaMemcpy(sgstkeScalars_d, sgstkeScalars, Nelems*TKESelector*sizeof(float), cudaMemcpyHostToDevice);
-   }
-   if(moistureSelector > 0){ /*Copy any required moisture fields to device */ 
-     cudaMemcpy(moistScalars_d, moistScalars, Nelems*moistureNvars*sizeof(float), cudaMemcpyHostToDevice);
-   }
-   if(surflayerSelector > 0){ /*Copy any required host auxiliary sclar fields to the device */
-     cudaMemcpy(tskin_d, tskin, Nelems2d*sizeof(float), cudaMemcpyHostToDevice);
-     cudaMemcpy(fricVel_d, fricVel, Nelems2d*sizeof(float), cudaMemcpyHostToDevice);
-     cudaMemcpy(htFlux_d, htFlux, Nelems2d*sizeof(float), cudaMemcpyHostToDevice);
-     cudaMemcpy(z0m_d, z0m, Nelems2d*sizeof(float), cudaMemcpyHostToDevice);
-     cudaMemcpy(z0t_d, z0t, Nelems2d*sizeof(float), cudaMemcpyHostToDevice);
-     if (moistureSelector > 0){
-       cudaMemcpy(qskin_d, qskin, Nelems2d*sizeof(float), cudaMemcpyHostToDevice);
-       cudaMemcpy(qFlux_d, qFlux, Nelems2d*sizeof(float), cudaMemcpyHostToDevice);
-     }
-   }// end if surflayerSelector > 0
-   if(NhydroAuxScalars > 0){ /*Copy any required host auxiliary sclar fields to the device */
-     cudaMemcpy(hydroAuxScalars_d, hydroAuxScalars, Nelems*NhydroAuxScalars*sizeof(float), cudaMemcpyHostToDevice);
-   }// end if hydroAuxScalars > 0
-   gpuErrchk( cudaPeekAtLastError() ); /*Check for errors in the cudaMemCpy calls*/
-   gpuErrchk( cudaDeviceSynchronize() );
-   return(errorCode);
-}//end cuda_timeIntHydroInitDevice()
-
-/*----->>>>> extern "C" int cuda_timeIntHydroSynchFromDevice();  --------------------------------------------------
-* This function handles the synchronization to host of on-device (GPU) fields  by executing the appropriate sequence 
-* of cudaMemcpyDeviceiToHost data transfers.
-*/
-extern "C" int cuda_timeIntHydroSynchFromDevice(){
-   int errorCode = TIME_INTEGRATION_SUCCESS;
-   int Nelems;
-   int Nelems2d;
-
-   /*Set the full memory block number of elements for transfers of 2-d and 3-d fields*/
-   Nelems = (Nxp+2*Nh)*(Nyp+2*Nh)*(Nzp+2*Nh);
-   Nelems2d = (Nxp+2*Nh)*(Nyp+2*Nh);
-
-   /* Send any desired GPU-computed HYDRO_CORE arrays from Device up to Host*/
-   gpuErrchk( cudaMemcpy(hydroPres, hydroPres_d, Nelems*sizeof(float), cudaMemcpyDeviceToHost) );
-   gpuErrchk( cudaMemcpy(hydroFlds, hydroFlds_d, Nelems*Nhydro*sizeof(float), cudaMemcpyDeviceToHost) );
-   if((hydroForcingWrite==1)||(hydroForcingLog==1)){
-     gpuErrchk( cudaMemcpy(hydroFldsFrhs, hydroFldsFrhs_d, Nelems*Nhydro*sizeof(float), cudaMemcpyDeviceToHost) );
-   } //endif we need to send up the Frhs
-   if (TKESelector > 0){ 
-     gpuErrchk( cudaMemcpy(sgstkeScalars, sgstkeScalars_d, Nelems*TKESelector*sizeof(float), cudaMemcpyDeviceToHost) );
-     if ((hydroForcingWrite==1)||(hydroForcingLog==1)){
-       gpuErrchk( cudaMemcpy(sgstkeScalarsFrhs, sgstkeScalarsFrhs_d, Nelems*TKESelector*sizeof(float), cudaMemcpyDeviceToHost) );
-     }
-   }
-   if (moistureSelector > 0){ 
-     gpuErrchk( cudaMemcpy(moistScalars, moistScalars_d, Nelems*moistureNvars*sizeof(float), cudaMemcpyDeviceToHost) );
-     if ((hydroForcingWrite==1)||(hydroForcingLog==1)){
-       gpuErrchk( cudaMemcpy(moistScalarsFrhs, moistScalarsFrhs_d, Nelems*moistureNvars*sizeof(float), cudaMemcpyDeviceToHost) );
-     }
-   }
-   if(surflayerSelector > 0){
-     gpuErrchk( cudaMemcpy(fricVel, fricVel_d, Nelems2d*sizeof(float), cudaMemcpyDeviceToHost) );
-     gpuErrchk( cudaMemcpy(htFlux, htFlux_d, Nelems2d*sizeof(float), cudaMemcpyDeviceToHost) );
-     gpuErrchk( cudaMemcpy(tskin, tskin_d, Nelems2d*sizeof(float), cudaMemcpyDeviceToHost) );
-     gpuErrchk( cudaMemcpy(invOblen, invOblen_d, Nelems2d*sizeof(float), cudaMemcpyDeviceToHost) );
-     gpuErrchk( cudaMemcpy(z0m, z0m_d, Nelems2d*sizeof(float), cudaMemcpyDeviceToHost) );
-     gpuErrchk( cudaMemcpy(z0t, z0t_d, Nelems2d*sizeof(float), cudaMemcpyDeviceToHost) );
-     if (moistureSelector > 0){
-       gpuErrchk( cudaMemcpy(qFlux, qFlux_d, Nelems2d*sizeof(float), cudaMemcpyDeviceToHost) );
-       gpuErrchk( cudaMemcpy(qskin, qskin_d, Nelems2d*sizeof(float), cudaMemcpyDeviceToHost) );
-     }
-   }//endif surflayerSelector > 0
-   if(NhydroAuxScalars > 0){
-     gpuErrchk( cudaMemcpy(hydroAuxScalars, hydroAuxScalars_d, Nelems*NhydroAuxScalars*sizeof(float), cudaMemcpyDeviceToHost) );
-     if((hydroForcingWrite==1)||(hydroForcingLog==1)){
-       gpuErrchk( cudaMemcpy(hydroAuxScalarsFrhs, hydroAuxScalarsFrhs_d, Nelems*NhydroAuxScalars*sizeof(float), cudaMemcpyDeviceToHost) );
-     } //endif we need to send up the Frhs
-   } //end if NhydroAuxScalars > 0
-   if(hydroSubGridWrite==1){
-     if(turbulenceSelector > 0){
-       // The 6 Tau_i-j and 3 Tau_TH,j fields
-       gpuErrchk( cudaMemcpy(hydroTauFlds, hydroTauFlds_d, Nelems*9*sizeof(float), cudaMemcpyDeviceToHost) );
-     }//endif 
-     if(moistureSGSturb==1){
-       // The moistureNvars*3 tau moisture fields (3 spatial components per moist species)
-       gpuErrchk( cudaMemcpy(moistTauFlds, moistTauFlds_d, Nelems*moistureNvars*3*sizeof(float), cudaMemcpyDeviceToHost) );
-     }
-   } //endif hydroSubGridWrite==1 
-   gpuErrchk( cudaPeekAtLastError() ); /*Check for errors in the cudaMemCpy calls*/
-//#ifdef DEBUG
-#if 1
-   MPI_Barrier(MPI_COMM_WORLD);
-   printf("Rank %d/%d: Batch complete results sent via cudaMemcpyDeviceToHost.\n",mpi_rank_world, mpi_size_world);
-   fflush(stdout);
-   MPI_Barrier(MPI_COMM_WORLD);
-#endif
-
-   return(errorCode);
-}//end cuda_timeIntHydrosynchFromDevice()
diff --git a/SRC/TIME_INTEGRATION/CUDA/cuda_timeIntDevice_cu.h b/SRC/TIME_INTEGRATION/CUDA/cuda_timeIntDevice_cu.h
index db07c95..f369f87 100644
--- a/SRC/TIME_INTEGRATION/CUDA/cuda_timeIntDevice_cu.h
+++ b/SRC/TIME_INTEGRATION/CUDA/cuda_timeIntDevice_cu.h
@@ -54,16 +54,4 @@ extern "C" int cuda_timeIntDeviceCleanup();
 */
 extern "C" int cuda_timeIntDeviceCommence(int it);
 
-/*----->>>>> extern "C" int cuda_timeIntHydroInitDevice();  -----------------------------------------------------------
-* This function handles the one-time initializations of on-device (GPU) memory by executing the appropriate sequence 
-* of cudaMemcpyHostToDevice data transfers.
-*/
-extern "C" int cuda_timeIntHydroInitDevice();
-
-/*----->>>>> extern "C" int cuda_timeIntHydroSynchFromDevice();  -----------------------------------------------------------
-* This function handles the synchronization to host of on-device (GPU) fields  by executing the appropriate sequence 
-* of cudaMemcpyDeviceiToHost data transfers.
-*/
-extern "C" int cuda_timeIntHydroSynchFromDevice();
-
 #endif // _TIME_INTEGRATION_CUDADEV_CU_H
diff --git a/SRC/TIME_INTEGRATION/time_integration.c b/SRC/TIME_INTEGRATION/time_integration.c
index b2d610f..0544b3d 100644
--- a/SRC/TIME_INTEGRATION/time_integration.c
+++ b/SRC/TIME_INTEGRATION/time_integration.c
@@ -53,7 +53,7 @@ int timeGetParams(){
 
    /*query for each TIME_INTEGRATION parameter */
    timeMethod = 0;
-   errorCode = queryIntegerParameter("timeMethod", &timeMethod, 0, 0, PARAM_MANDATORY);
+   errorCode = queryIntegerParameter("timeMethod", &timeMethod, 0, 0, PARAM_OPTIONAL);
    Nt = 1000;
    errorCode = queryIntegerParameter("Nt", &Nt, 1, INT_MAX, PARAM_MANDATORY);
    dt = 1.0;
@@ -112,6 +112,10 @@ int timeInit(){
    /*Register a time variable holding "simTime" or the master simulation time*/
    errorCode = sprintf(&varName[0],"time");
    errorCode = ioRegisterVar(&varName[0], "float", 1, dims1dTD, &simTime);
+   
+   /* Add NetCDF attributes for the time variable */
+   errorCode = timeAddTimeAttributes();
+   
    printf(":Variable = %s stored at %p, has been registered with IO.\n",
           &varName[0],&simTime);
    fflush(stdout);
@@ -125,6 +129,28 @@ int timeInit(){
    return(errorCode);
 } //end timeInit()
 
+/*----->>>>> int timeAddTimeAttributes();       ----------------------------------------------------------------------
+* Add NetCDF attributes to time-related variables registered by the TIME_INTEGRATION module.
+*/
+int timeAddTimeAttributes(){
+   int errorCode = TIME_INTEGRATION_SUCCESS;
+
+   /* Add standard CF convention attributes for the time variable */
+   errorCode = ioAddStandardAttrs("time", "s", "Simulation time", "time");
+   if(errorCode != TIME_INTEGRATION_SUCCESS){
+      printf("Error adding standard attributes to time variable: %d\n", errorCode);
+      return errorCode;
+   }
+
+   errorCode = ioAddVarAttr("time", "axis", "text", "T");
+   if(errorCode != TIME_INTEGRATION_SUCCESS){
+      printf("Error adding axis attribute to time variable: %d\n", errorCode);
+      return errorCode;
+   }
+
+   return errorCode;
+} //end timeAddTimeAttributes()
+
 /*----->>>>> int timeIntBdyPlaneUpdates();       ----------------------------------------------------------------------
  * Used to broadcast and print parameters, allocate memory, and initialize configuration settings 
  * for the TIME_INTEGRATION module.
diff --git a/SRC/TIME_INTEGRATION/time_integration.h b/SRC/TIME_INTEGRATION/time_integration.h
index 5b9dc68..fa79f74 100644
--- a/SRC/TIME_INTEGRATION/time_integration.h
+++ b/SRC/TIME_INTEGRATION/time_integration.h
@@ -47,6 +47,11 @@ int timeGetParams();
 */
 int timeInit();
 
+/*----->>>>> int timeAddTimeAttributes();       ----------------------------------------------------------------------
+* Add NetCDF attributes to time-related variables registered by the TIME_INTEGRATION module.
+*/
+int timeAddTimeAttributes();
+
 /*----->>>>> int timeIntBdyPlaneUpdates();       ----------------------------------------------------------------------
  * Used to broadcast and print parameters, allocate memory, and initialize configuration settings
  * for the TIME_INTEGRATION module.
@@ -58,4 +63,4 @@ Used to free all malloced memory by the TIME_INTEGRATION module.
 */
 int timeCleanup();
 
-#endif // _TIME_INTEGRATION_H
+#endif // _TIME_INTEGRATION_H
\ No newline at end of file
diff --git a/docs/Tutorials/cases/DISPERSION.rst b/docs/Tutorials/cases/DISPERSION.rst
index 593d659..3e66b90 100644
--- a/docs/Tutorials/cases/DISPERSION.rst
+++ b/docs/Tutorials/cases/DISPERSION.rst
@@ -45,7 +45,7 @@ Note that this example requires creation of a terrain and source specification f
 1. Execute the Jupyter notebook provided in **tutorials/notebooks/Dispersion_PrepTerrain.ipynb** to create the topography file *Topography_504x498.dat* that corresponds to a Witch of Agnesi hill of 15 m height.
 2. Execute the Jupyter notebook provided in **/tutorial/notebooks/Dispersion_PrepAuxSrc.ipynb** to create the source specification input file. This example will add two sources at the first vertical grid levels upstream (*x* = 930 m) and downstream (*x* = 1082 m) of the hill. The emissions begin :math:`45` min into the simulation.
 
-Two FastEddy simulation setups are provided for this tutorial, corresponding to weakly stable (*Example07_DISPERSION_SBL.in*) and convective conditions (*Example07_DISPERSION_CBL.in*). The terrain preparation and source input file steps only need to be carried out once. Additionally, the CBL case is set up to demonstrate the use of a rank-wise binary output mode in FastEddy for efficient dumping of the model state to file. Personalize and use the batch submission script **/scripts/batch_jobs/fasteddy_convert_pbs_script_casper.sh** which will invoke a python script (**/scripts//python_utilities/post-processing/FEbinaryToNetCDF.py**) to convert the rank-wise binary files from each output timestep into a single aggregate NetCDF output file per timestep analogous to those resulting from the SBL case.
+Two FastEddy simulation setups are provided for this tutorial, corresponding to weakly stable (*Example07_DISPERSION_SBL.in*) and convective conditions (*Example07_DISPERSION_CBL.in*). The terrain preparation and source input file steps only need to be carried out once. Additionally, the CBL case is set up to demonstrate the use of a rank-wise binary output mode in FastEddy for efficient dumping of the model state to file. Personalize and use the batch submission script **/scripts/batch_jobs/fasteddy_convert_pbs_script_casper.sh** which will invoke a python script (**/scripts/python_utilities/post-processing/FEbinaryToNetCDF.py**) to convert the rank-wise binary files from each output timestep into a single aggregate NetCDF output file per timestep analogous to those resulting from the SBL case.
 
 Visualize the output
 --------------------
diff --git a/scripts/batch_jobs/fasteddy_convert_pbs_script_casper.sh b/scripts/batch_jobs/fasteddy_convert_pbs_script_casper.sh
index 0fa664f..5408567 100755
--- a/scripts/batch_jobs/fasteddy_convert_pbs_script_casper.sh
+++ b/scripts/batch_jobs/fasteddy_convert_pbs_script_casper.sh
@@ -15,4 +15,4 @@ module load conda
 conda activate npl-2025a
 which python
 
-mpiexec -n 4 --ppn 2 python -u ${SRCDIR}/FEbinaryToNetCDF.py -f ${SRCDIR}/convert.json
+mpiexec -n 4 --ppn 2 python -u ${SRCDIR}/FEbinaryToNetCDF.py -f ${SRCDIR}/convert.json -a ${SRCDIR}/field_attributes.json
diff --git a/scripts/python_utilities/coupler/GenICBCs.py b/scripts/python_utilities/coupler/GenICBCs.py
index 6e5a33a..b227650 100644
--- a/scripts/python_utilities/coupler/GenICBCs.py
+++ b/scripts/python_utilities/coupler/GenICBCs.py
@@ -13,7 +13,6 @@
 from matplotlib.gridspec import GridSpec
 from scipy import ndimage,interpolate
 from netCDF4 import Dataset
-from numba import jit
 import datetime as dt
 #from datetime import date
 
@@ -26,7 +25,7 @@
 mpi_name = MPI.Get_processor_name()
 
 print("{:d}/{:d}: Hello World! on {:s}.".format(mpi_rank, mpi_size, mpi_name))
-DEBUG_COUPLER = False
+DEBUG_COUPLER = False #True
 
 ######################################################
 ### Parse the command line arguments                ###
@@ -116,7 +115,6 @@
      myend = (iRank+1)*elems_perRank + list_StartOffset
      if iRank == (mpi_size-1):
         myend = myend+((list_len-list_StartOffset)-mpi_size*elems_perRank) ###Catch straggler files with the last rank
-        #myend = myend-1 ###Drop the last instance since (avoiding redundancy of the time instance)  it belongs in the next it_set
      print("{:d}/{:d}: mylist = files_list({:d}:{:d})".format(mpi_rank, mpi_size,mystart,myend))
      mylist = files_list[mystart:myend]
      print("{:d}/{:d}: len(mylist) = {:d}".format(mpi_rank, mpi_size,len(mylist)))
@@ -124,24 +122,20 @@
 
 
 MPI.COMM_WORLD.Barrier()
-#### JAS quit for now
-#print("{:d}/{:d}: Bailing Out!".format(mpi_rank, mpi_size))
-#exit()
 
 ##############################################################################
 ### Ingest the target FE grid created from "BuildingMask_FEgen_SimGrid.py" ###
 ##############################################################################
-ds_FEGrid=xr.open_dataset(FE_simGrid)
+ds_FEGrid=xr.open_dataset(FE_simGrid, engine="netcdf4")
 
 ########################################
 ### Load the reference WRF data file ###
 ########################################
-ds_WRFRef=xr.open_dataset(files_list[0])
+ds_WRFRef=xr.open_dataset(files_list[0], engine="netcdf4")
 
 ############################################################################################
 ### Locate the FE-grid-bounding corners as index pairs (j,i) in the WRF-reference domain ###
 ############################################################################################
-####THIS is the real cell that find the minimum vector-distance location i,j for each lat/lon tower observation pairing
 
 ## Find the the WRF d02 profiler locations
 itargs=[]
@@ -152,9 +146,10 @@
 latFE = ds_FEGrid.lat.values
 lonFE = ds_FEGrid.lon.values
 
-corners_lat = [latFE[0,0], latFE[0,-1],latFE[-1,-1], latFE[-1,0]]
-corners_lon = [lonFE[0,0], lonFE[0,-1],lonFE[-1,-1], lonFE[-1,0]]
-
+corners_lat = np.asarray([latFE[0,0], latFE[0,-1],latFE[-1,-1], latFE[-1,0]])
+corners_lon = np.asarray([lonFE[0,0], lonFE[0,-1],lonFE[-1,-1], lonFE[-1,0]])
+print('corners_lat.shape=',corners_lat.shape)
+print('corners_lon.shape=',corners_lon.shape)
 print('corners_lat=',corners_lat)
 print('corners_lon=',corners_lon)
 
@@ -178,6 +173,7 @@
 ds_WRFRef['dFE_iindxs']=xr.DataArray(np.asarray(itargs,dtype=np.int32),dims=["corners"])
 for indx in range(len(corners_lat)):
   if(mpi_rank == 0):
+    print(f"corner({indx}) @ WRF({ds_WRFRef['dFE_jindxs'][indx].values},{ds_WRFRef['dFE_iindxs'][indx].values})")
     print('[WRF,corner({:d})]: lats = [{:f},{:f}], lons = [{:f},{:f}]'.format(indx,ds_WRFRef['XLAT'][0,ds_WRFRef['dFE_jindxs'][indx],ds_WRFRef['dFE_iindxs'][indx]].values,
                                                                                  corners_lat[indx],
                                                                                  ds_WRFRef['XLONG'][0,ds_WRFRef['dFE_jindxs'][indx],ds_WRFRef['dFE_iindxs'][indx]].values,
@@ -208,14 +204,14 @@
         else: 
             if xoffset > 0.0: #FE domain SE corner is east of the closest wrf cell, increment the bounding iindx
                 ds_WRFRef['dFE_iindxs'][indx]+=1
-    else:   ## 2=northwest, or 3=northeast corner
+    else:   ## 3=northwest, or 2=northeast corner
         if yoffset > 0.0:  #FE domain NE/NW corner is north of the closest wrf cell, increment the bounding jindx
             ds_WRFRef['dFE_jindxs'][indx]+=1
         if indx < 3: 
             if xoffset > 0.0: #FE domain NE corner is east of the closest wrf cell, increment the bounding iindx
                 ds_WRFRef['dFE_iindxs'][indx]+=1
         else: 
-            if xoffset < 0.0: #FE domain NW corner is west of the closest wrf cell, deccrement the bounding iindx
+            if xoffset < 0.0: #FE domain NW corner is west of the closest wrf cell, decrement the bounding iindx
                 ds_WRFRef['dFE_iindxs'][indx]-=1 
 if(mpi_rank == 0):
   print('Bounding-box corrected offsets:')        
@@ -244,10 +240,38 @@
 ll_iindx=ds_WRFRef['dFE_iindxs'].min(dim='corners').values
 j_extent=ds_WRFRef['dFE_jindxs'].max(dim='corners').values-ll_jindx
 i_extent=ds_WRFRef['dFE_iindxs'].max(dim='corners').values-ll_iindx
+
+##Ensure WRF interpolation area extents will entirely encompass FE target x & y domain 
+y_distWRF = (j_extent-1)*ds_WRFRef.attrs['DY']-ll_yoffset
+x_distWRF = (i_extent-1)*ds_WRFRef.attrs['DX']-ll_xoffset
+dxFE=(ds_FEGrid['xPos'][0,0,1]-ds_FEGrid['xPos'][0,0,0]).values
+dyFE=(ds_FEGrid['yPos'][0,1,0]-ds_FEGrid['yPos'][0,0,0]).values
+x_distFE = (ds_FEGrid.sizes['xIndex']-1)*dxFE
+y_distFE = (ds_FEGrid.sizes['yIndex']-1)*dyFE
+while x_distWRF <= x_distFE:
+    i_extent += 1
+    x_distWRF = (i_extent-1)*ds_WRFRef.attrs['DX']-ll_xoffset
+while y_distWRF <= y_distFE:
+    j_extent += 1
+    y_distWRF = (j_extent-1)*ds_WRFRef.attrs['DY']-ll_yoffset
+
 if(mpi_rank == 0):
-  print('ll: ({:d},{:d})'.format(ll_jindx,ll_iindx))
-  print('extents: ({:d},{:d})'.format(j_extent,i_extent))
-  print('y,x offsets: ({:f},{:f})'.format(ll_yoffset,ll_xoffset))
+    if (ll_jindx < 0):
+        print(f"Southern FE domain boundary coordinate falls outside of the provided WRF domain, exiting.")
+        exit()
+    elif (ll_iindx < 0):
+        print(f"Western Ft domain boundary coordinate falls outside of the provided WRF domain, exiting.")
+        exit()
+    elif (ll_jindx+j_extent > ds_WRFRef.sizes['south_north']):
+        print(f"Northern Ft domain boundary coordinate falls outside of the provided WRF domain, exiting.")
+        exit()
+    elif (ll_iindx+i_extent > ds_WRFRef.sizes['west_east']):  
+        print(f"Eastern Ft domain boundary coordinate falls outside of the provided WRF domain, exiting.")
+        exit()
+    else:  #All set to perform strictly interpolation in the horizontal of WRF outputs to FE domain 
+        print('ll: ({:d},{:d})'.format(ll_jindx,ll_iindx))
+        print('extents: ({:d},{:d})'.format(j_extent,i_extent))
+        print('y,x offsets: ({:f},{:f})'.format(ll_yoffset,ll_xoffset))
 
 ######################################################################################################################
 ### Define the Cartesian southwest corner origin (x,y) WRF coordinate system for the horizontal FE-bounding domain ###
@@ -270,13 +294,13 @@
 dxFE=(ds_FEGrid['xPos'][0,0,1]-ds_FEGrid['xPos'][0,0,0]).values
 dyFE=(ds_FEGrid['yPos'][0,1,0]-ds_FEGrid['yPos'][0,0,0]).values
 if(mpi_rank == 0) and DEBUG_COUPLER:
-  print("Nx,NY = ({:d},{:d}), dxFE={:f},dyFE={:f}".format(ds_FEGrid.dims['xIndex'],ds_FEGrid.dims['yIndex'],dxFE,dyFE))
-x0FEinWRF=XvWRF[0,0]+ll_xoffset+ds_FEGrid['xPos'][0,0,0].values
-y0FEinWRF=YvWRF[0,0]+ll_yoffset+ds_FEGrid['yPos'][0,0,0].values
+  print("Nx,NY = ({:d},{:d}), dxFE={:f},dyFE={:f}".format(ds_FEGrid.sizes['xIndex'],ds_FEGrid.sizes['yIndex'],dxFE,dyFE))
+x0FEinWRF=XvWRF[0,0]+ll_xoffset 
+y0FEinWRF=YvWRF[0,0]+ll_yoffset 
 if(mpi_rank == 0) and DEBUG_COUPLER:
   print("x0FEinWRF,y0FEinWRF = {:f},{:f}".format(x0FEinWRF,y0FEinWRF))
-xNFEinWRF = x0FEinWRF-ds_FEGrid['xPos'][0,0,0].values+ds_FEGrid['xPos'][0,0,-1].values
-yNFEinWRF = y0FEinWRF-ds_FEGrid['yPos'][0,0,0].values+ds_FEGrid['yPos'][0,-1,0].values
+xNFEinWRF = x0FEinWRF+(ds_FEGrid.sizes['xIndex']-1)*dxFE
+yNFEinWRF = y0FEinWRF+(ds_FEGrid.sizes['yIndex']-1)*dyFE
 if(mpi_rank == 0) and DEBUG_COUPLER:
   print("xNFEinWRF,yNFEinWRF = {:f},{:f}".format(xNFEinWRF,yNFEinWRF))
   print("X_extentFE,Y_extentFE = {:f},{:f}".format(xNFEinWRF-x0FEinWRF,yNFEinWRF-y0FEinWRF))
@@ -303,7 +327,7 @@
 ####################################################################################################
 zBottom = 0.0
 zTop = ds_FEGrid['zPos'][-1,0,0].values+90.0
-NzWRFInterp = 275
+NzWRFInterp = 200 #275
 zRect = np.linspace(zBottom,zTop,NzWRFInterp)
 if(mpi_rank == 0) and DEBUG_COUPLER:
   print(zRect[0],zRect[-1])
@@ -345,6 +369,10 @@
     t3s = time.perf_counter()
     dsFEFinal=create_dsFEFinal(ds_FEGrid)
     verticalInterpFinal(ds_FEGrid,dsFENew,dsFEFinal,zRect)
+    if 'BuildingMask' in list(dsFEFinal.variables):
+       for var in ['u','v','w','ql','TKE_0']:
+          if var in list(dsFEFinal.variables):
+            dsFEFinal[var][:,:,:]=dsFEFinal[var][:,:,:]*np.where((dsFEFinal['BuildingMask'][:,:,:]>1e-3),0.0,1.0)
     t3e = time.perf_counter()
     print('{:d}/{:d}: t3_elapsed = {:f} (s)'.format(mpi_rank, mpi_size, t3e-t3s))
     addTimeDim_FEfinal(dsFEFinal)
diff --git a/scripts/python_utilities/coupler/GeoSpec.py b/scripts/python_utilities/coupler/GeoSpec.py
index 93a6bd9..6011763 100644
--- a/scripts/python_utilities/coupler/GeoSpec.py
+++ b/scripts/python_utilities/coupler/GeoSpec.py
@@ -23,6 +23,7 @@
 gis_file = params["gis_file"]
 nlcd_name = params["nlcd_name"]
 water_cats = params["water_cats"]
+urban_opt = params["urban_opt"]
 
 FE_dataset_path = params["FE_dataset_path"]
 gis_opt = params["gis_opt"]
@@ -121,6 +122,14 @@
     SeaMask_tmp[ind_land_wrf] = 0.0
     SeaMask_tmp[ind_sea_wrf] = 1.0
 
+# Building height information
+
+if (gis_opt==0 and urban_opt==1):
+
+    bdg_height = ds_GIS.BuildingHeights.values
+    ind_missing = np.where(bdg_height==-9999)
+    bdg_height[ind_missing] = 0.0
+
 # Save to netcdf file
 
 ds_data = xr.Dataset()
@@ -134,6 +143,8 @@
 ds_data['SeaMask']= xr.DataArray(SeaMask_tmp,dims=(['yIndex','xIndex']))
 ds_data['dx_inter']= xr.DataArray(np.array(dx,dtype=np.float32))
 ds_data['dy_inter']= xr.DataArray(np.array(dy,dtype=np.float32))
+if (gis_opt==0 and urban_opt==1):
+    ds_data['BuildingHeights']= xr.DataArray(bdg_height,dims=(['yIndex','xIndex']))
 ds_data['lat']= xr.DataArray(lat.astype(dtype=np.float64),dims=(['yIndex','xIndex']))
 ds_data['lon']= xr.DataArray(lon.astype(dtype=np.float64),dims=(['yIndex','xIndex']))
 
diff --git a/scripts/python_utilities/coupler/SimGrid.py b/scripts/python_utilities/coupler/SimGrid.py
index 765bfea..31c3d16 100644
--- a/scripts/python_utilities/coupler/SimGrid.py
+++ b/scripts/python_utilities/coupler/SimGrid.py
@@ -26,6 +26,7 @@
 FE_params_file = params["FE_params_file"]
 center_lat = params["center_lat"]
 center_lon = params["center_lon"]
+urban_opt = params["urban_opt"]
 FE_new_nc_path = params["FE_new_nc_path"]
 name_dom_add = params["name_dom_add"]
 save_plot_opt = params["save_plot_opt"]
@@ -216,6 +217,31 @@
 print('dz_lowTopo_v at the top =',dz_lowTopo_v[-1],'m')
 print('dz_highTopo_v at the top =',dz_highTopo_v[-1],'m')
 
+# building mask
+if (urban_opt == 1):
+    print('Computing 3-d building mask...')
+
+    bdg_heights = ds_GIS.BuildingHeights.values
+    if (interp_flag==0):
+        data_bmask = bdg_heights[y_s:y_e:npy_inc,x_s:x_e:npx_inc]
+    else:
+        f_bdg = NearestNDInterpolator(list(zip(xPos_2d_dom_ori.flatten(), yPos_2d_dom_ori.flatten())), data_bmask[y_s:y_e,x_s:x_e].flatten())
+        data_bmask = f_bdg(xPos_2d_new, yPos_2d_new)
+
+    bdg3d_tmp = np.zeros((Nz,Ny,Nx),dtype=np.float32)
+
+    for j in range(Ny):
+        if(j%int(Ny/10)==0):
+          print('{:d}% complete...'.format(10*int(j/int(Ny/10))))
+        for i in range(Nx):
+            z_1d = zarr[:,j,i]
+            if (data_bmask[j,i]!=0.0):
+                z_diff = np.abs(z_1d-(data_bmask[j,i]+data_topo[j,i]))
+                ind_min = np.where(z_diff==np.amin(z_diff))
+                ind_min = ind_min[0]
+                ind_min = ind_min[0]
+                bdg3d_tmp[0:ind_min+1,j,i] = 1.0
+
 # xPos, yPos
 xarr = np.zeros((Nz,Ny,Nx),dtype=np.float32)
 if (interp_flag==0):
@@ -275,6 +301,9 @@
 ds_data['z0t']= xr.DataArray(data_z0t.astype(dtype=np.float32),dims=(['yIndex','xIndex']))
 ds_data['SeaMask']= xr.DataArray(data_SeaMask.astype(dtype=np.float32),dims=(['yIndex','xIndex']))
 ds_data['LandCover']= xr.DataArray(data_landc.astype(dtype=np.int32),dims=(['yIndex','xIndex']))
+if (urban_opt == 1):
+    ds_data['BuildingMask']= xr.DataArray(bdg3d_tmp.astype(dtype=np.float32),dims=(['zIndex','yIndex','xIndex']))
+    ds_data['BuildingHeights']= xr.DataArray(data_bmask.astype(dtype=np.float32),dims=(['yIndex','xIndex']))
 ds_data['lat']= xr.DataArray(lat_dom.astype(dtype=np.float64),dims=(['yIndex','xIndex']))
 ds_data['lon']= xr.DataArray(lon_dom.astype(dtype=np.float64),dims=(['yIndex','xIndex']))
 ds_data['xIndex']= xr.DataArray(np.arange(0,xarr.shape[2],dtype=np.int32),dims='xIndex')
diff --git a/scripts/python_utilities/coupler/geospec.json b/scripts/python_utilities/coupler/geospec.json
index 3615b88..4969eb1 100644
--- a/scripts/python_utilities/coupler/geospec.json
+++ b/scripts/python_utilities/coupler/geospec.json
@@ -4,6 +4,7 @@
   "gis_file": "inputs_gis.nc",
   "nlcd_name": "LandCoverMetadata_NLCD16.csv",
   "water_cats": [11],
+  "urban_opt": 0,
   "FE_dataset_path": "/path_geospec/",
   "name_dom_add": "",
   "gis_opt": 0,
diff --git a/scripts/python_utilities/coupler/simgrid.json b/scripts/python_utilities/coupler/simgrid.json
index 78fd593..39248d4 100644
--- a/scripts/python_utilities/coupler/simgrid.json
+++ b/scripts/python_utilities/coupler/simgrid.json
@@ -4,6 +4,7 @@
   "FE_params_file": "FE_parameters_file.in",
   "center_lat": 40.5948,
   "center_lon": -105.1380,
+  "urban_opt": 0,
   "FE_new_nc_path": "/path_simgrid/",
   "name_dom_add": "",
   "save_plot_opt": 1
diff --git a/scripts/python_utilities/post-processing/FEbinaryToNetCDF.py b/scripts/python_utilities/post-processing/FEbinaryToNetCDF.py
index be77860..b6910ce 100755
--- a/scripts/python_utilities/post-processing/FEbinaryToNetCDF.py
+++ b/scripts/python_utilities/post-processing/FEbinaryToNetCDF.py
@@ -10,73 +10,380 @@
 import json
 import argparse
 from mpi4py import MPI
+from datetime import datetime
+import re
+
+### Load lookup tables from JSON file ###
+
+def load_field_attributes(json_file_path):
+    """
+    Load field attribute lookup tables from a JSON file.
+    
+    Args:
+        json_file_path (str): Path to the JSON file containing field attributes
+    
+    Returns:
+        tuple: (base_attrs, jacobian_attrs, coordinate_attrs, directions)
+    """
+
+    try:
+        with open(json_file_path, 'r') as f:
+            attrs_data = json.load(f)
+        
+        # Convert lists back to tuples for consistency with original code
+        base_attrs = {k: tuple(v) for k, v in attrs_data['base_attrs'].items()}
+        jacobian_attrs = {k: tuple(v) for k, v in attrs_data['jacobian_attrs'].items()}
+        coordinate_attrs = {k: tuple(v) for k, v in attrs_data['coordinate_attrs'].items()}
+        
+        # Convert string keys back to integers for directions
+        directions = {int(k): v for k, v in attrs_data['directions'].items()}
+        
+        # Load special field mappings
+        base_state_indices = {int(k): tuple(v) for k, v in attrs_data['special_field_mappings']['base_state_indices'].items()}
+
+        return base_attrs, jacobian_attrs, coordinate_attrs, directions, base_state_indices
+        
+    except FileNotFoundError:
+        print(f"Warning: Field attributes file '{json_file_path}' not found. Using empty lookup tables.")
+        return {}, {}, {}, {}, {}
+    except Exception as e:
+        print(f"Error loading field attributes from '{json_file_path}': {e}")
+        return {}, {}, {}, {}, {}
 
 def field3dTranspose(fld,extents):
     fld=fld.reshape(extents)
     fldFinal=np.transpose(fld,axes=[2,1,0])
+    del fld
     return fldFinal[np.newaxis,Nh:-Nh,Nh:-Nh,Nh:-Nh]
 
 def field2dTranspose(fld,extents):
     fld=fld.reshape(extents)
     fldFinal=np.transpose(fld,axes=[1,0])
+    del fld
     return fldFinal[np.newaxis,Nh:-Nh,Nh:-Nh]
 
-def readBinary(outpath,theseFiles):
-    verboseLogging=False
+def get_variable_attrs(var_name, base_attrs, jacobian_attrs, coordinate_attrs, directions, base_state_indices):
+    """
+    Get CF-compliant attributes for a variable name, handling special cases.   
+    Args: 
+        var_name (str): Variable name to get attributes for
+        base_attrs (dict): Base field attributes lookup table
+        jacobian_attrs (dict): Jacobian field attributes lookup table
+        coordinate_attrs (dict): Coordinate field attributes lookup table
+        directions (dict): Direction index to name mapping
+        base_state_indices (dict): Base state field index mappings
+    Returns: 
+        tuple or None: (units, long_name, standard_name) or None if no match
+    """
+
+    # Handle BS_ fields with numeric identifiers
+    if var_name.startswith('BS_'):
+        try:
+            field_index = int(var_name[3:])
+            if field_index in base_state_indices:
+                return base_state_indices[field_index]
+            else:
+                return ('1', 'Base state field', None)
+        except ValueError:
+            pass
+
+    # Handle TauQv/TauQl moisture flux fields
+    tau_moisture_match = re.match(r'^TauQ([vl])(\d+)$', var_name)
+    if tau_moisture_match:
+        species, direction_idx = tau_moisture_match.groups()
+        direction_idx = int(direction_idx)
+        direction_name = directions.get(direction_idx, str(direction_idx))
+        
+        if species == 'v':  # TauQv (water vapor)
+            long_name = f'Subgrid-{direction_name} water vapor flux in {direction_name} direction'
+        elif species == 'l':  # TauQl (liquid water)
+            long_name = f'Subgrid-{direction_name} liquid water flux in {direction_name} direction'
+        else:
+            long_name = f'Subgrid-{direction_name} moisture flux in {direction_name} direction'
+        
+        return ('kg kg-1 m s-1', long_name, None)
+
+    # Handle numbered versions of base fields (e.g., AuxScalar_0, etc.)
+    base_name_match = re.match(r'^([A-Za-z_]+?)_?(\d+)$', var_name)
+    if base_name_match:
+        base_name = base_name_match.group(1)
+        if base_name in base_attrs:
+            return base_attrs[base_name]
+
+    # Check specific attribute dictionaries
+    for attr_dict in [jacobian_attrs, coordinate_attrs, base_attrs]:
+        if var_name in attr_dict:
+            return attr_dict[var_name]
+
+    return None
+
+def add_variable_attributes(ds, base_attrs, jacobian_attrs, coordinate_attrs, directions, base_state_indices):
+    """Add attributes to variables"""
+
+    for var_name, var in ds.data_vars.items():
+        attrs_tuple = get_variable_attrs(var_name, base_attrs, jacobian_attrs, coordinate_attrs, directions, base_state_indices)
+        
+        if attrs_tuple:
+            units, long_name, standard_name = attrs_tuple
+            var.attrs['units'] = units
+            var.attrs['long_name'] = long_name
+            if standard_name is not None:
+                var.attrs['standard_name'] = standard_name
+                
+    return ds
+    
+def add_coordinate_attributes(ds):
+    """Add coordinate variables and their attributes"""
+
+    # Create explicit coordinate variables based on dimension sizes
+    coords_to_add = {}
+
+    if 'xIndex' in ds.dims:
+        coords_to_add['xIndex'] = np.arange(ds.sizes['xIndex'], dtype=np.int32)
+
+    if 'yIndex' in ds.dims:
+        coords_to_add['yIndex'] = np.arange(ds.sizes['yIndex'], dtype=np.int32)
+
+    if 'zIndex' in ds.dims:
+        coords_to_add['zIndex'] = np.arange(ds.sizes['zIndex'], dtype=np.int32)
+
+    # Add the coordinate variables to the dataset
+    if coords_to_add:
+        ds = ds.assign_coords(coords_to_add)
+
+    if 'time' in ds.coords:
+        ds['time'].attrs = {
+            'units': 's',
+            'long_name': 'Simulation time',
+            'standard_name': 'time',
+            'axis': 'T'
+        }
+
+    coord_attrs = {
+        'xIndex': {
+            'long_name': 'x-coordinate index',
+            'units': '1',
+            'axis': 'X'
+        },
+        'yIndex': {
+            'long_name': 'y-coordinate index',
+            'units': '1',
+            'axis': 'Y'
+        },
+        'zIndex': {
+            'long_name': 'z-coordinate index',
+            'units': '1',
+            'axis': 'Z',
+            'positive': 'up'
+        }
+    }
+
+    for coord_name, attrs in coord_attrs.items():
+        if coord_name in ds.coords:
+            ds[coord_name].attrs.update(attrs)
+
+    return ds
+
+def reorder_dataset_variables(ds):
+    """
+    Reorder dataset variables to desired order:
+    zIndex, yIndex, xIndex, xPos, yPos, zPos, then other variables, with time last
+    """
+
+    # Define the desired order for the first variables
+    priority_order = ['zIndex', 'yIndex', 'xIndex', 'xPos', 'yPos', 'zPos']
+
+    # Get all variable names from both data_vars and coords, preserving original order
+    original_data_vars = list(ds.data_vars.keys())
+    original_coords = list(ds.coords.keys())
+
+    # Build the new order
+    new_order = []
+    used_vars = set()
+
+    # Add priority variables first (if they exist)
+    for var_name in priority_order:
+        if var_name in ds.data_vars or var_name in ds.coords:
+            new_order.append(var_name)
+            used_vars.add(var_name)
+
+    # Add remaining data variables in their original order (except time)
+    for var_name in original_data_vars:
+        if var_name not in used_vars and var_name != 'time':
+            new_order.append(var_name)
+            used_vars.add(var_name)
+
+    # Add remaining coordinate variables in their original order (except time)
+    for var_name in original_coords:
+        if var_name not in used_vars and var_name != 'time':
+            new_order.append(var_name)
+            used_vars.add(var_name)
+
+    # Add time last if it exists
+    if 'time' in ds.data_vars or 'time' in ds.coords:
+        new_order.append('time')
+
+    # Use xarray's reindex to reorder variables
+    # This preserves the distinction between coords and data_vars
+    return ds[new_order]
+
+def readBinary(numOutRanks,outpath,theseFiles):
+    
+    if 'fulldata_dict' in locals():
+        del fulldata_dict
+    if 'subdata_dict' in locals():
+        del subdata_dict
+    gc.collect()
+
+    verboseLogging=False #True 
     print(theseFiles)
-    dsSet=[]
+    fulldata_dict = {}
+
     for thatFile in theseFiles:
+       subdata_dict = {}
        print(thatFile)
-       ds_fe=xr.Dataset()
        try:
           thisFile='{:s}{:s}'.format(outpath,thatFile)
           flength = os.stat(thisFile).st_size
           with open(thisFile, mode='rb') as f:
-               while(f.tell() < flength):
+               while(f.tell() < flength): #while the filepointer is not at the end of the binary file
+                 ## Read and parse the binary representation of 
+                 ## the current variable "name" (a string of integer length)
                  nameLen=struct.unpack("i", f.read(4))
                  if verboseLogging:
-                     print(len(nameLen),nameLen[0])
+                     print(f"len(nameLen) = {len(nameLen)}, nameLen[0] = {nameLen[0]}")
                  fieldName=f.read(nameLen[0]).rstrip(b'\x00').decode()
                  if verboseLogging:
-                     print(fieldName)
+                     print(f"fieldName = {fieldName}")
+                 ## Read and parse the binary representation of 
+                 ## the current variable "type" (a string of integer length)
                  typeLen=struct.unpack("i", f.read(4))
                  if verboseLogging:
-                     print(len(typeLen),typeLen[0])
+                     print(f"len(typeLen) = {len(typeLen)}, typeLen[0] = {typeLen[0]}")
                  fieldType=f.read(typeLen[0]).rstrip(b'\x00').decode()
+                 if verboseLogging:
+                     print(f"fieldType = {fieldType}")
+                 ## Read and parse the binary representation of 
+                 ## the current variable "number-of-dimensions" (integer) 
                  nDims=struct.unpack("i", f.read(4))
                  if verboseLogging:
-                     print(nDims)
+                     print(f"nDims = {nDims}")
+                 ## Read and parse the binary representation of 
+                 ## the current variable "dimension extents" (1-d integer array) 
                  extents=np.array([],dtype=np.int32)
                  fmtStr='{:d}i'.format(nDims[0])
                  extents=np.asarray(struct.unpack(fmtStr,f.read(nDims[0]*4)),dtype=np.int32)
                  if verboseLogging:
-                     print(extents)
+                     print(f"extents = {extents}")
+                 ## Read, parse, reshape, transpose the binary field based on the type and extents  
                  if fieldType == 'float':
-                    fmtStr='{:d}f'.format(np.prod(extents))
-                    fld=np.asarray(struct.unpack(fmtStr,f.read(np.prod(extents)*4)),dtype=np.float32)
+                     fmtStr='{:d}f'.format(np.prod(extents))
+                     if(len(extents)==3):
+                         fld3dfloat=np.frombuffer(f.read(np.prod(extents)*4),dtype=np.float32).reshape(extents)
+                         fldFinal=field3dTranspose(fld3dfloat,extents)
+                         if verboseLogging:
+                             print(fldFinal.shape)
+                     elif(len(extents)==2):
+                         fld2dfloat=np.frombuffer(f.read(np.prod(extents)*4),dtype=np.float32).reshape(extents)
+                         fldFinal=field2dTranspose(fld2dfloat,extents)
+                         if verboseLogging:
+                             print(fldFinal.shape)
+                     elif(len(extents)==1):
+                         fld1dfloat=np.frombuffer(f.read(np.prod(extents)*4),dtype=np.float32)
+                         if("GAD" in fieldName):
+                            fldFinal=fld1dfloat[np.newaxis,:]
+                         else:
+                            fldFinal=fld1dfloat
+                         if verboseLogging:
+                             print(fldFinal.shape)
                  elif fieldType == 'int':
-                    fmtStr='{:d}i'.format(np.prod(extents))
-                    fld=np.asarray(struct.unpack(fmtStr,f.read(np.prod(extents)*4)),dtype=np.int32)
-                 if(len(extents)==3):
-                     fld=fld.reshape(extents)
-                     fldFinal=field3dTranspose(fld,extents)
-                     if verboseLogging:
-                         print(fldFinal.shape)
-                     ds_fe[fieldName]=xr.DataArray(fldFinal,dims=['time','zIndex','yIndex','xIndex'])
-                 elif(len(extents)==2):
-                     fldFinal=field2dTranspose(fld,extents)
-                     if verboseLogging:
-                         print(fldFinal.shape)
-                     ds_fe[fieldName]=xr.DataArray(fldFinal,dims=['time','yIndex','xIndex'])
-                 elif(len(extents)==1):
-                     fldFinal=fld
-                     ds_fe[fieldName]=xr.DataArray(fldFinal,dims=['time'])
+                     fmtStr='{:d}i'.format(np.prod(extents))
+                     if(len(extents)==1):
+                         fld1dint=np.frombuffer(f.read(np.prod(extents)*4),dtype=np.int32)
+                         if("GAD" in fieldName):
+                            fldFinal=fld1dint[np.newaxis,:]
+                         else:
+                            fldFinal=fld1dint
+                         if verboseLogging:
+                             print(fldFinal.shape)
+                 ### Add the named field to a dictionary as a key-value pair 
+                 subdata_dict[fieldName]=fldFinal
        except IOError:
          print('Error While Opening the file: {:s}'.format(thisFile))
-       dsSet.append(ds_fe)
-      
-    #Concatenate all the perRank dataSets into a single dataset 
-    dsFull=xr.concat(dsSet,'xIndex',data_vars='minimal')
+       finally:
+        if f:  # Check if f was successfully assigned a file object
+            f.close()
+            # The file is closed here
+       ### If this is the first time through allocate all the full data arrays 
+       ### necessary for a full data dictionary
+       if len(fulldata_dict) == 0:
+           rank_cnt=0
+           for key in subdata_dict.keys():
+               subextents = subdata_dict[key].shape
+               fullextents = subextents
+               if len(subextents) > 1:  ## Note tuples are immutable so using tuple(list(blah_tuple)) as workaround 
+                                        ## to compute xIndex value in fullextents 
+                   list_extents = list(subextents)
+                   list_extents[-1] = list_extents[-1]*numOutRanks
+                   fullextents = tuple(list_extents)
+               if verboseLogging:
+                   print(f"rank_cnt = {rank_cnt}, {key}: subextents = {subextents}, fullextents={fullextents}")
+               subtype = subdata_dict[key].dtype
+               fulldata_dict[key] = np.zeros(fullextents,dtype=subtype)
+               if verboseLogging:
+                    print(f"rank_cnt = {rank_cnt}, {key}:  fulldata_dict[key].shape= {fulldata_dict[key].shape}, fullextents={fullextents}")
+               ## NOTE for now this assumes concatenation is always on the xIndex 
+               ## (just like the original converter script)!!!
+               if len(subextents) == 1:    #[time]  #[GADNumTurbines]
+                      fulldata_dict[key] = np.copy(subdata_dict[key])
+               elif len(subextents) == 2:    #[time, GADNumTurbines]
+                      fulldata_dict[key] = np.copy(subdata_dict[key])
+               elif len(subextents) == 3:  #[time, yIndex, xIndex]
+                   fulldata_dict[key][:,:,rank_cnt*subextents[-1]:(rank_cnt+1)*subextents[-1]] = np.copy(subdata_dict[key])
+               elif len(subextents) == 4:  #[time, zIndex, yIndex, xIndex]
+                   if verboseLogging:
+                       print(f"rank_cnt = {rank_cnt}, {key}:  slice.shape= {fulldata_dict[key][:,:,:,rank_cnt*subextents[-1]:(rank_cnt+1)*subextents[-1]].shape}, fullextents={fullextents}")
+                   fulldata_dict[key][:,:,:,rank_cnt*subextents[-1]:(rank_cnt+1)*subextents[-1]] = np.copy(subdata_dict[key])
+           rank_cnt += 1
+       else:  ## This is the second or later file in the per-rank sequence so just fill fullData array segments
+           for key in subdata_dict.keys():
+               subextents = subdata_dict[key].shape
+               fullextents = fulldata_dict[key].shape
+               if verboseLogging:
+                   print(f"rank_cnt = {rank_cnt}, {key}: subextents = {subextents}, fullextents={fullextents}")
+               subtype = subdata_dict[key].dtype
+               if len(subextents) == 1: 
+                   if verboseLogging:
+                      print(f"rank_cnt = {rank_cnt}, skipping {key} since no spatial dimensions involved...")
+               elif len(subextents) == 2:
+                   ### perform a reduction (by sum) across rank files 
+                   fulldata_dict[key] = fulldata_dict[key]+np.copy(subdata_dict[key])
+               elif len(subextents) == 3:
+                   fulldata_dict[key][:,:,rank_cnt*subextents[-1]:(rank_cnt+1)*subextents[-1]] = np.copy(subdata_dict[key])
+               elif len(subextents) == 4:
+                   fulldata_dict[key][:,:,:,rank_cnt*subextents[-1]:(rank_cnt+1)*subextents[-1]] = np.copy(subdata_dict[key])
+           rank_cnt += 1
+              
+    #Create the full-domain single xarray dataset
+    dsFull=xr.Dataset()
+    for key in fulldata_dict.keys():
+        if verboseLogging:
+             print(f"rank_cnt = {rank_cnt}, creating dataset field {key} with shape {fulldata_dict[key].shape}...")
+        if len(fulldata_dict[key].shape) == 4:
+            dsFull[key]=xr.DataArray(fulldata_dict[key],dims=['time','zIndex','yIndex','xIndex'])
+        if len(fulldata_dict[key].shape) == 3:
+            dsFull[key]=xr.DataArray(fulldata_dict[key],dims=['time','yIndex','xIndex'])
+        if len(fulldata_dict[key].shape) == 2:
+            dsFull[key]=xr.DataArray(fulldata_dict[key],dims=['time','GADNumTurbines'])
+        if len(fulldata_dict[key].shape) == 1:
+            dsFull[key]=xr.DataArray(fulldata_dict[key],dims=['time'])
+   
+    ## Clean up memory 
+    del subdata_dict
+    del fulldata_dict
+    del fldFinal 
+    gc.collect()
+
     return dsFull
 
 ###
@@ -86,10 +393,11 @@ def parse_args():
 
     parser = argparse.ArgumentParser()
     parser.add_argument("-f", "--file", required=True, help="JSON file with converter parameter settings")
+    parser.add_argument("-a", "--attrs", required=True, help="JSON file with field attribute definitions")
     args = parser.parse_args()
     return args
 
-################## main()
+################## main() ################################################################################
 print("Hello performing first MPI calls.")
 
 mpi_size = MPI.COMM_WORLD.Get_size()
@@ -100,6 +408,12 @@ def parse_args():
 ### Parse the command line arguments ###
 ########################################
 args = parse_args()
+
+#########################################################
+### Load field attributes from JSON file ###
+#########################################################
+base_attrs, jacobian_attrs, coordinate_attrs, directions, base_state_indices = load_field_attributes(args.attrs)
+
 #########################################################
 ### Read the json file of converter script parameters ###
 #########################################################
@@ -115,12 +429,12 @@ def parse_args():
 netCDFpath = params["netCDFpath"]
 removeBinaries = params["removeBinaries"]
 
-if mpi_size <= fileSetSize:
+if mpi_size <= fileSetSize+1:
   fileBatchsize = np.int32(fileSetSize/mpi_size)
 else:
   print('mpi_size of {:d} is > fileSetSize = {:d}. Please ensure mpi_size <= fileSetSize.*'.format(mpi_size,fileSetSize))
   exit()
-tstop=tstart+tstep*fileSetSize   #tstart+(mpi_size*fileBatchsize+1)*tstep
+tstop=tstart+tstep*fileSetSize   
 print("{:d}/{:d}: Hello World! on {:s}.".format(mpi_rank, mpi_size, mpi_name))
 print('Converting binary FE outputs {:s}{:s}_rank_{:d}-{:d}.*'.format(outpath,FEoutBase,0,numOutRanks))
 print('In batches of {:d} files per rank beginning from timestep {:d} to timestep {:d} every {:d} timesteps.'.format(fileBatchsize,tstart,tstop,tstep))
@@ -172,19 +486,28 @@ def parse_args():
    else:
        print('{:d} specified binary files are missing. Skipping timestep: {:d}...'.format(numOutRanks-goodCnt,timeStep))
    if parseProceed:
-     dsFull=readBinary(outpath,theseFiles)
-     #write the full  domain datatset to netcdf file
-     if False:
-        dsFull.to_netcdf('{:s}NETCDF/{:s}.{:d}'.format(outpath,FEoutBase,timeStep),format='NETCDF4')
-     else:
-        dsFull.to_netcdf('{:s}/{:s}.{:d}'.format(netCDFpath,FEoutBase,timeStep),format='NETCDF4')
+     dsFull=readBinary(numOutRanks,outpath,theseFiles)
+
+     # Add variable and coordinate attributes
+     dsFull = add_coordinate_attributes(dsFull)
+     dsFull = add_variable_attributes(dsFull, base_attrs, jacobian_attrs, coordinate_attrs, directions, base_state_indices)
+
+     # Reorder variables to desired order
+     dsFull = reorder_dataset_variables(dsFull)
+
+     # Create encoding to prevent _FillValue for all variables AND coordinates
+     encoding = {var: {'_FillValue': None} for var in list(dsFull.data_vars) + list(dsFull.coords)}
+
+     #write the full domain datatset to netcdf file
+     dsFull.to_netcdf('{:s}/{:s}.{:d}'.format(netCDFpath,FEoutBase,timeStep),format='NETCDF4',encoding=encoding)
+     
      del dsFull
+     gc.collect()
      if os.path.exists('{:s}/{:s}.{:d}'.format(netCDFpath,FEoutBase,timeStep)):
        if removeBinaries:
          for thatFile in theseFiles:
            thisFile='{:s}{:s}'.format(outpath,thatFile)
            os.remove(thisFile)
-   gc.collect()
 
 MPI.COMM_WORLD.Barrier()
 print("{:d}/{:d}: Conversions complete on {:s}.".format(mpi_rank, mpi_size, mpi_name))
diff --git a/scripts/python_utilities/post-processing/field_attributes.json b/scripts/python_utilities/post-processing/field_attributes.json
new file mode 100644
index 0000000..8fffaab
--- /dev/null
+++ b/scripts/python_utilities/post-processing/field_attributes.json
@@ -0,0 +1,78 @@
+{
+  "base_attrs": {
+    "BS_pressure": ["Pa", "Base state pressure", "air_pressure"],
+    "TauTH1": ["K m s-1", "Subgrid-x turbulent flux of potential temperature", null],
+    "TauTH2": ["K m s-1", "Subgrid-y turbulent flux of potential temperature", null],
+    "TauTH3": ["K m s-1", "Subgrid-z turbulent flux of potential temperature", null],
+    "Tau11": ["m2 s-2", "Subgrid-xx stress tensor component", null],
+    "Tau21": ["m2 s-2", "Subgrid-yx stress tensor component", null],
+    "Tau31": ["m2 s-2", "Subgrid-zx stress tensor component", null],
+    "Tau32": ["m2 s-2", "Subgrid-zy stress tensor component", null],
+    "Tau22": ["m2 s-2", "Subgrid-yy stress tensor component", null],
+    "Tau33": ["m2 s-2", "Subgrid-zz stress tensor component", null],
+    "rho": ["kg m-3", "Air density", "air_density"],
+    "u": ["m s-1", "Zonal wind velocity", "eastward_wind"],
+    "v": ["m s-1", "Meridional wind velocity", "northward_wind"],
+    "w": ["m s-1", "Vertical wind velocity", "upward_air_velocity"],
+    "theta": ["K", "Potential temperature", "air_potential_temperature"],
+    "pressure": ["Pa", "Perturbation pressure", null],
+    "TKE_0": ["m2 s-2", "Subgrid turbulent kinetic energy of air at grid-filter scale", null],
+    "TKE_1": ["m2 s-2", "Subgrid turbulent kinetic energy of air at canopy leaf scale", null],
+    "AuxScalar": ["-", "Auxiliary scalar", null],
+    "moisture": ["kg kg-1", "Water vapor mixing ratio", "humidity_mixing_ratio"],
+    "qv": ["g kg-1", "Water vapor mixing ratio", "humidity_mixing_ratio"],
+    "ql": ["g kg-1", "Cloud liquid water mixing ratio", "cloud_liquid_water_mixing_ratio"],
+    "fricVel": ["m s-1", "Surface friction velocity", "surface_friction_velocity"],
+    "htFlux": ["K m s-1", "Surface sensible heat flux", "surface_upward_sensible_heat_flux"],
+    "qFlux": ["kg kg-1 m s-1", "Surface latent heat flux", "surface_upward_latent_heat_flux"],
+    "tskin": ["K", "Surface skin temperature", "surface_temperature"],
+    "qskin": ["kg kg-1", "Surface skin water vapor mixing ratio", null],
+    "z0m": ["m", "Roughness length for momentum", "surface_roughness_length_for_momentum_in_air"],
+    "z0t": ["m", "Roughness length for heat", "surface_roughness_length_for_heat_in_air"],
+    "invOblen": ["m-1", "Inverse Obukhov length", "atmosphere_boundary_layer_thickness"],
+    "cellpert_amp": ["K", "Cell perturbation amplitude", null],
+    "cellpert_nts": ["-", "Cell perturbation number of time steps", null],
+    "cellpert_ktop": ["-", "Cell perturbation top grid level", null],
+    "CanopyLAD": ["m-1", "Leaf area density", "leaf_area_density"],
+    "SeaMask": ["-", "Sea mask", "sea_area_fraction"],
+    "GAD_rotorTheta": ["degrees", "rotor angle from west increasing counter-clockwise", null],
+    "GAD_turbineYawing": ["-", "flag indicating turbine is in the process of yawing", null],
+    "GAD_turbineRefMag": ["m s-1", "turbine reference wind speed", null],
+    "GAD_turbineRefDir": ["degrees", "turbine reference wind direction", null],
+    "GAD_yawError": ["degrees 2 s", "turbine-wind misalignment error for yaw-controller", null],
+    "GAD_anFactor": ["-", "rotor-normal upstream wind speed induction factor", null],
+    "GAD_turbineVolMask": ["-", "Turbine Volume Mask", null],
+    "GAD_forceX": ["N m-3", "Turbine Forces in X-Direction", null],
+    "GAD_forceY": ["N m-3", "Turbine Forces in Y-Direction", null],
+    "GAD_forceZ": ["N m-3", "Turbine Forces in Z-Direction", null],
+    "GAD_turbineRotorMask": ["-", "Turbine Rotor-Disk Mask", null],
+    "BuildingMask": ["-", "Building Mask", null],
+    "UrbanHeatRedis": ["-", "Urban Heat Redistribution Coefficient", null]
+  },
+  "jacobian_attrs": {
+    "D_Jac": ["-", "Jacobian determinant", null],
+    "invD_Jac": ["-", "inverse Jacobian determinant", null],
+    "J13": ["-", "metric tensor component dx/d_zeta", null],
+    "J23": ["-", "metric tensor component dy/d_zeta", null],
+    "J31": ["-", "metric tensor component dz/d_xi", null],
+    "J32": ["-", "metric tensor component dz/d_eta", null],
+    "J33": ["-", "metric tensor component dz/d_zeta", null]
+  },
+  "coordinate_attrs": {
+    "xPos": ["m", "x-coordinate of cell center", "projection_x_coordinate"],
+    "yPos": ["m", "y-coordinate of cell center", "projection_y_coordinate"],
+    "zPos": ["m", "z-coordinate of cell center", "height"],
+    "topoPos": ["m", "Terrain elevation", "surface_altitude"]
+  },
+  "directions": {
+    "0": "x",
+    "1": "y",
+    "2": "z"
+  },
+  "special_field_mappings": {
+    "base_state_indices": {
+      "0": ["kg m-3", "Base state air density", "air_density"],
+      "1": ["K", "Base state potential temperature", "air_potential_temperature"]
+    }
+  }
+}
diff --git a/tutorials/notebooks/FE_Postprocessing_Example04_BOMEX.ipynb b/tutorials/notebooks/FE_Postprocessing_Example04_BOMEX.ipynb
index 667b792..b60e6b0 100644
--- a/tutorials/notebooks/FE_Postprocessing_Example04_BOMEX.ipynb
+++ b/tutorials/notebooks/FE_Postprocessing_Example04_BOMEX.ipynb
@@ -13,57 +13,11 @@
     "import xarray as xr\n",
     "import pandas as pd\n",
     "import matplotlib.pyplot as plt\n",
-    "from netCDF4 import Dataset\n",
     "from matplotlib.gridspec import GridSpec\n",
     "import matplotlib.ticker as mtick\n",
     "import pandas as pd"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import pkg_resources\n",
-    "import types\n",
-    "def get_imports():\n",
-    "    for name, val in globals().items():\n",
-    "        if isinstance(val, types.ModuleType):\n",
-    "            # Split ensures you get root package, \n",
-    "            # not just imported function\n",
-    "            name = val.__name__.split(\".\")[0]\n",
-    "\n",
-    "        elif isinstance(val, type):\n",
-    "            name = val.__module__.split(\".\")[0]\n",
-    "\n",
-    "        # Some packages are weird and have different\n",
-    "        # imported names vs. system/pip names. Unfortunately,\n",
-    "        # there is no systematic way to get pip names from\n",
-    "        # a package's imported name. You'll have to had\n",
-    "        # exceptions to this list manually!\n",
-    "        poorly_named_packages = {\n",
-    "            \"PIL\": \"Pillow\",\n",
-    "            \"sklearn\": \"scikit-learn\"\n",
-    "        }\n",
-    "        if name in poorly_named_packages.keys():\n",
-    "            name = poorly_named_packages[name]\n",
-    "\n",
-    "        yield name\n",
-    "imports = list(set(get_imports()))\n",
-    "\n",
-    "# The only way I found to get the version of the root package\n",
-    "# from only the name of the package is to cross-check the names \n",
-    "# of installed packages vs. imported packages\n",
-    "requirements = []\n",
-    "for m in pkg_resources.working_set:\n",
-    "    if m.project_name in imports and m.project_name!=\"pip\":\n",
-    "        requirements.append((m.project_name, m.version))\n",
-    "\n",
-    "for r in requirements:\n",
-    "    print(\"{}=={}\".format(*r))"
-   ]
-  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -1008,9 +962,9 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "NPL 2025b",
    "language": "python",
-   "name": "python3"
+   "name": "npl-2025b"
   },
   "language_info": {
    "codemirror_mode": {
@@ -1022,7 +976,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.9"
+   "version": "3.12.11"
   }
  },
  "nbformat": 4,
diff --git a/tutorials/notebooks/MAKE_FE_TUTORIAL_PLOTS.ipynb b/tutorials/notebooks/MAKE_FE_TUTORIAL_PLOTS.ipynb
index ce75776..7360642 100644
--- a/tutorials/notebooks/MAKE_FE_TUTORIAL_PLOTS.ipynb
+++ b/tutorials/notebooks/MAKE_FE_TUTORIAL_PLOTS.ipynb
@@ -25,25 +25,11 @@
     "import xarray as xr\n",
     "import pandas as pd\n",
     "import matplotlib.pyplot as plt\n",
-    "from netCDF4 import Dataset\n",
     "import math\n",
-    "from scipy.stats import skew\n",
-    "from scipy.stats import kurtosis\n",
     "import matplotlib.colors as mcolors\n",
-    "import scipy.fftpack as fftpack\n",
-    "from scipy import interpolate\n",
     "from fetutorialfunctions import *"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "check_imports()"
-   ]
-  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -389,9 +375,9 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
+   "display_name": "NPL 2025b",
    "language": "python",
-   "name": "python3"
+   "name": "npl-2025b"
   },
   "language_info": {
    "codemirror_mode": {
@@ -403,7 +389,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.11.9"
+   "version": "3.12.11"
   }
  },
  "nbformat": 4,
diff --git a/tutorials/notebooks/fetutorialfunctions.py b/tutorials/notebooks/fetutorialfunctions.py
index 4b2f135..cdd6231 100644
--- a/tutorials/notebooks/fetutorialfunctions.py
+++ b/tutorials/notebooks/fetutorialfunctions.py
@@ -3,56 +3,11 @@
 import xarray as xr
 import pandas as pd
 import matplotlib.pyplot as plt
-from netCDF4 import Dataset
 import math
-from scipy.stats import skew
-from scipy.stats import kurtosis
 import matplotlib.colors as mcolors
 import scipy.fftpack as fftpack
-from scipy import interpolate
 from math import log10, floor
 
-def check_imports():
-   import pkg_resources
-   
-   imports = list(set(get_imports()))
-
-   # The only way I found to get the version of the root package
-   # from only the name of the package is to cross-check the names 
-   # of installed packages vs. imported packages
-   requirements = []
-   for m in pkg_resources.working_set:
-    if m.project_name in imports and m.project_name!="pip":
-        requirements.append((m.project_name, m.version))
-
-   for r in requirements:
-    print("{}=={}".format(*r))
-
-def get_imports():
-    import types
-    for name, val in globals().items():
-        if isinstance(val, types.ModuleType):
-            # Split ensures you get root package, 
-            # not just imported function
-            name = val.__name__.split(".")[0]
-
-        elif isinstance(val, type):
-            name = val.__module__.split(".")[0]
-
-        # Some packages are weird and have different
-        # imported names vs. system/pip names. Unfortunately,
-        # there is no systematic way to get pip names from
-        # a package's imported name. You'll have to had
-        # exceptions to this list manually!
-        poorly_named_packages = {
-            "PIL": "Pillow",
-            "sklearn": "scikit-learn"
-        }
-        if name in poorly_named_packages.keys():
-            name = poorly_named_packages[name]
-
-        yield name
-
 def compute_mean_profiles(FE_xr):
 
     var_list = list(FE_xr.data_vars.keys())
@@ -167,20 +122,20 @@ def plot_XY_UVWTHETA(case, case_open, zChoose, save_plot_opt, path_figure, plot_
     zPos = case_open.zPos.isel(time=0,zIndex=zChoose).values
     if (plot_qv_cont==1):
         wsfield = np.sqrt(np.power(ufield,2.0)+np.power(vfield,2.0))
-        ws_min = np.amin(np.amin(wsfield[zChoose,:,:]))
-        ws_max = np.amax(np.amax(wsfield[zChoose,:,:]))
+        ws_min = np.percentile(wsfield[zChoose,:,:].ravel(), 0.01)
+        ws_max = np.percentile(wsfield[zChoose,:,:].ravel(), 99.99)
         qvfield = case_open.qv.isel(time=0).values
-        qv_min = np.amin(np.amin(qvfield[zChoose,:,:]))
-        qv_max = np.amax(np.amax(qvfield[zChoose,:,:]))
-        
-    u_min = np.amin(np.amin(ufield[zChoose,:,:]))
-    u_max = np.amax(np.amax(ufield[zChoose,:,:]))
-    v_min = np.amin(np.amin(vfield[zChoose,:,:]))
-    v_max = np.amax(np.amax(vfield[zChoose,:,:]))
-    w_min = np.amin(np.amin(wfield[zChoose,:,:]))
-    w_max = np.amax(np.amax(wfield[zChoose,:,:]))
-    t_min = np.amin(np.amin(thetafield[zChoose,:,:]))
-    t_max = np.amax(np.amax(thetafield[zChoose,:,:]))
+        qv_min = np.percentile(qvfield[zChoose,:,:].ravel(), 0.01)
+        qv_max = np.percentile(qvfield[zChoose,:,:].ravel(), 99.99)
+
+    u_min = np.percentile(ufield[zChoose,:,:].ravel(), 0.01)
+    u_max = np.percentile(ufield[zChoose,:,:].ravel(), 99.99)
+    v_min = np.percentile(vfield[zChoose,:,:].ravel(), 0.01)
+    v_max = np.percentile(vfield[zChoose,:,:].ravel(), 99.99)
+    w_min = np.percentile(wfield[zChoose,:,:].ravel(), 0.01)
+    w_max = np.percentile(wfield[zChoose,:,:].ravel(), 99.99)
+    t_min = np.percentile(thetafield[zChoose,:,:].ravel(), 0.01)
+    t_max = np.percentile(thetafield[zChoose,:,:].ravel(), 99.99)
     
     fig_name = "UVWTHETA-XY-"+case+".png"
     if (plot_u_map==1):
@@ -297,23 +252,23 @@ def plot_XZ_UVWTHETA(case, case_open, z_max, sizeX_XZ, sizeY_XZ, save_plot_opt,
     zPos_1d = zPos[:,0]
     diff_z = np.abs(zPos_1d - z_max)
     ind_zmax = np.where(diff_z==np.min(diff_z))[0][0]
-            
-    u_min = np.amin(np.amin(ufield[0:ind_zmax,yChoose,:]))
-    u_max = np.amax(np.amax(ufield[0:ind_zmax,yChoose,:]))
-    v_min = np.amin(np.amin(vfield[0:ind_zmax,yChoose,:]))
-    v_max = np.amax(np.amax(vfield[0:ind_zmax,yChoose,:]))
-    w_max = np.amax(np.amax(np.abs(wfield[0:ind_zmax,yChoose,:])))
+    
+    u_min = np.percentile(ufield[0:ind_zmax,yChoose,:].ravel(), 0.01)
+    u_max = np.percentile(ufield[0:ind_zmax,yChoose,:].ravel(), 99.99)
+    v_min = np.percentile(vfield[0:ind_zmax,yChoose,:].ravel(), 0.01)
+    v_max = np.percentile(vfield[0:ind_zmax,yChoose,:].ravel(), 99.99)
+    w_max = np.percentile(np.abs(wfield[0:ind_zmax,yChoose,:]).ravel(), 99.99)
     w_min = -w_max
-    t_min = np.amin(np.amin(thetafield[0:ind_zmax,yChoose,:]))
-    t_max = np.amax(np.amax(thetafield[0:ind_zmax,yChoose,:]))
+    t_min = np.percentile(thetafield[0:ind_zmax,yChoose,:].ravel(), 0.01)
+    t_max = np.percentile(thetafield[0:ind_zmax,yChoose,:].ravel(), 99.99)
 
     if (plot_qv_cont==1):
         wsfield = np.sqrt(np.power(ufield,2.0)+np.power(vfield,2.0))
-        ws_min = np.amin(np.amin(wsfield[0:ind_zmax,yChoose,:]))
-        ws_max = np.amax(np.amax(wsfield[0:ind_zmax,yChoose,:]))
+        ws_min = np.percentile(wsfield[0:ind_zmax,yChoose,:].ravel(), 0.01)
+        ws_max = np.percentile(wsfield[0:ind_zmax,yChoose,:].ravel(), 99.99)
         qvfield = case_open.qv.isel(time=0).values
-        qv_min = np.amin(np.amin(qvfield[0:ind_zmax,yChoose,:]))
-        qv_max = np.amax(np.amax(qvfield[0:ind_zmax,yChoose,:]))
+        qv_min = np.percentile(qvfield[0:ind_zmax,yChoose,:].ravel(), 0.01)
+        qv_max = np.percentile(qvfield[0:ind_zmax,yChoose,:].ravel(), 99.99)
 
     fig_name = "UVWTHETA-XZ-"+case+".png"
     colormap1 = 'viridis'
@@ -843,7 +798,8 @@ def plot_pdfs(fig, axs, FE_xr, case, save_plot_opt, path_figure):
     var0_max = np.max(var0)
     var1_min = np.min(var1)
     var1_max = np.max(var1)
-
+    print(f"var0 (min,max) = ({var0_min},{var0_max})")
+    print(f"var1 (min,max) = ({var1_min},{var1_max})")
     pdf_inc_0 = (var0_max-var0_min)/60
     pdf_inc_1 = (var1_max-var1_min)/60
     
@@ -1141,4 +1097,5 @@ def plot_YZ_DISPERSION(case, case_open, xDisper, min_c_val, max_c_val, save_plot
 
     if (save_plot_opt==1):
         print(path_figure + fig_name)
-        plt.savefig(path_figure + fig_name,dpi=300,bbox_inches = "tight")
\ No newline at end of file
+        plt.savefig(path_figure + fig_name,dpi=300,bbox_inches = "tight")
+