tensorflow · axinging · Oct 9, 2021 · Oct 9, 2021 · qjia7 · Oct 14, 2021
diff --git a/tfjs-backend-webgpu/src/kernels/argminmax_webgpu.ts b/tfjs-backend-webgpu/src/kernels/argminmax_webgpu.ts
@@ -17,7 +17,7 @@
 
 import {backend_util, util} from '@tensorflow/tfjs-core';
 
-import {getCoordsDataType, getGlobalIndexString, getMainHeaderString} from '../shader_preprocessor';
+import {getCoordsDataType, getMainHeaderString} from '../shader_preprocessor';
 import {computeDispatch} from '../webgpu_util';
 
 import {WebGPUProgram} from './webgpu_program';
@@ -139,9 +139,9 @@ export class ArgMinMaxProgram implements WebGPUProgram {
       // add back the index along the reduced dimension to |outputCoords|.
       // This function outputs the offset to the first value along
       // |axis| and the stride to get the next value of the input along |axis|.
-      fn getInputCoordInfo(globalId : vec3<u32>, globalIndex : i32) -> vec2<i32>{
+      fn getInputCoordInfo(globalId : vec3<u32>) -> vec2<i32>{
         let outputCoords : ${
-        outputCoordsType} = getOutputCoords(globalId, globalIndex);
+        outputCoordsType} = getOutputCoords(globalId, i32(globalId.x));
         var i = ${this.outputShape.length - 1};
 
         var stride = 1;
@@ -168,8 +168,7 @@ export class ArgMinMaxProgram implements WebGPUProgram {
       }
 
       ${getMainHeaderString()} {
-        ${getGlobalIndexString()}
-        let coordInfo = getInputCoordInfo(globalId, index);
+        let coordInfo = getInputCoordInfo(globalId);
 
         var bestIndex = 0;
         var bestValue = x.numbers[getInputIndex(coordInfo, bestIndex)];

diff --git a/tfjs-backend-webgpu/src/kernels/depthwise_conv2d_3x3_webgpu.ts b/tfjs-backend-webgpu/src/kernels/depthwise_conv2d_3x3_webgpu.ts
@@ -17,7 +17,7 @@
 
 import {backend_util, util} from '@tensorflow/tfjs-core';
 
-import {getGlobalIndexString, getMainHeaderString} from '../shader_preprocessor';
+import {getMainHeaderString} from '../shader_preprocessor';
 import {computeDispatch} from '../webgpu_util';
 
 import {mapActivationToShaderProgram} from './activation_util';
@@ -72,20 +72,20 @@ export class DepthwiseConv2D3x3Program implements WebGPUProgram {
           mapActivationToShaderProgram(this.activation, this.isVec4);
       if (this.hasPreluActivation) {
         activationSnippet =
-            `fn activation(a : vec4<f32>, globalId : vec3<u32>, globalIndex : i32) -> vec4<f32> {
-          let b = getPreluActivationWeightsAtOutCoordsByGlobalId(globalId, globalIndex);
+            `fn activation(a : vec4<f32>, globalId : vec3<u32>) -> vec4<f32> {
+          let b = getPreluActivationWeightsAtOutCoordsByGlobalId(globalId, i32(globalId.x));
           ${activationOp}
         }`;
       } else {
         activationSnippet = `
-        fn activation(a : vec4<f32>, globalId : vec3<u32>, globalIndex : i32) -> vec4<f32> {
+        fn activation(a : vec4<f32>, globalId : vec3<u32>) -> vec4<f32> {
             ${activationOp}
           }
         `;
       }
 
       applyActivationSnippet =
-          `dotProd[i] = activation(dotProd[i], globalId, index);`;
+          `dotProd[i] = activation(dotProd[i], globalId);`;
     }
 
     const addBiasSnippet = this.addBias ?
@@ -96,7 +96,6 @@ export class DepthwiseConv2D3x3Program implements WebGPUProgram {
       ${activationSnippet}
 
       ${getMainHeaderString()} {
-        ${getGlobalIndexString()}
         let batch = 0;
         let r = i32(globalId.x);
         let c = i32(globalId.y) * 4;

diff --git a/tfjs-backend-webgpu/src/kernels/reduce_webgpu.ts b/tfjs-backend-webgpu/src/kernels/reduce_webgpu.ts
@@ -16,7 +16,7 @@
  */
 
 import {backend_util, DataType} from '@tensorflow/tfjs-core';
-import {getGlobalIndexString, getMainHeaderString} from '../shader_preprocessor';
+import {getMainHeaderString} from '../shader_preprocessor';
 import {computeDispatch} from '../webgpu_util';
 
 import {WebGPUProgram} from './webgpu_program';
@@ -121,17 +121,16 @@ export class ReduceProgram implements WebGPUProgram {
        }
        let WorkGroupSize = ${this.workGroupSize[0]};
        ${reduceInSharedMemory ? sharedMemorySnippet : ''}
-       fn getOffset(globalId : vec3<u32>, index : i32) -> i32 {
-         let outputCoords = getOutputCoords(globalId, index);
+       fn getOffset(globalId : vec3<u32>) -> i32 {
+         let outputCoords = getOutputCoords(globalId, i32(globalId.x));
          let offset = ${
         this.outputShape.length === 1 ?
             'outputCoords' :
             'outputCoords[0]'} * uniforms.reduceSize;
          return offset;
        }
        ${getMainHeaderString()} {
-         ${getGlobalIndexString()}
-         let offset= getOffset(globalId, index);
+         let offset = getOffset(globalId);
          var bestValue = ${initValue};
          let Length = uniforms.reduceSize;
          let WorkPerThread = DIV_CEIL(Length, WorkGroupSize);

diff --git a/tfjs-backend-webgpu/src/kernels/transpose_shared_webgpu.ts b/tfjs-backend-webgpu/src/kernels/transpose_shared_webgpu.ts
@@ -15,7 +15,7 @@
  * =============================================================================
  */
 
-import {getGlobalIndexString, getMainHeaderString} from '../shader_preprocessor';
+import {getMainHeaderString} from '../shader_preprocessor';
 import {computeDispatch} from '../webgpu_util';
 
 import {WebGPUProgram} from './webgpu_program';
@@ -48,7 +48,6 @@ export class TransposeSharedProgram implements WebGPUProgram {
       var<workgroup> tile : array<array<f32, ${this.workGroupSize[0] + 1}>, ${
         this.workGroupSize[0]}>;
       ${getMainHeaderString()} {
-        ${getGlobalIndexString()}
         let workGroupID = (globalId - localId)/vec3<u32>(${
         this.workGroupSize[0]}u, ${this.workGroupSize[1]}u, ${
         this.workGroupSize[2]}u);

diff --git a/tfjs-backend-webgpu/src/shader_preprocessor.ts b/tfjs-backend-webgpu/src/shader_preprocessor.ts
@@ -72,9 +72,7 @@ export function getWorkGroupSizeString(): string {
 }
 
 export function getGlobalIndexString(): string {
-  return `
-  let index = getGlobalIndex(globalId, localId);
-`;
+  return 'let index = getGlobalIndex(globalId, localId);';
 }
 
 export function getMainHeaderString() {
@@ -278,11 +276,7 @@ const SAMPLING_SNIPPETS = `
         f32(shape.y) * f32(shape.z) * f32(shape.w), f32(shape.z) * f32(shape.w), f32(shape.w), 1.0)));
   }
 
-  // Only used when the y/z dimension of workgroup size is 1.
   fn getGlobalIndex(globalId : vec3<u32>, localId : vec3<u32>) -> i32 {
-    if (uniforms.dispatchSize.y == 1u && uniforms.dispatchSize.z == 1u) {
-      return i32(globalId.x);
-    }
     let localInvocationIndex = localId.z * workGroupSizeX * workGroupSizeY +
       localId.y * workGroupSizeX + localId.x;
     let workGroupID = (globalId - localId)/vec3<u32>(