diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs
index 799591c5ce..2605198ccf 100644
--- a/.git-blame-ignore-revs
+++ b/.git-blame-ignore-revs
@@ -29,3 +29,9 @@ c3bd8eb1214cbebbc92c7958b80aa06913bce3ba
 
 # A commit which ran flynt all Python files.
 e73655d038cdfa68964109044e33c9a6e7d85ac9
+
+# A commit which ran pre-commit on ext/testlib
+9e1afdecefaf910fa6e266f29dc480a32b0fa83e
+
+# Updated black from 22.6.0 to 23.9.1
+ddf6cb88e48df4ac7de4a9e4b612daf2e7e635c8
diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md
new file mode 100644
index 0000000000..6f89a7eb33
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/bug_report.md
@@ -0,0 +1,56 @@
+---
+name: Bug report
+about: Create a report to help us find and fix the bug
+title: ''
+labels: bug
+assignees: ''
+---
+
+**Describe the bug**
+A clear and concise description of what the bug is.
+
+**Affects version**
+State which version of gem5 this bug was found in. If on the develop branch state the Commit revision ID you are working.
+
+**gem5 Modifications**
+If you have modified gem5 in some way please state, to the best of your ability, how it has been modified.
+
+**To Reproduce**
+Steps to reproduce the behavior. Please assume starting from a clean repository:
+
+1. Compile gem5 with command ...
+2. Execute the simulation with...
+
+If writing code, or a terminal command, use code blocks. Either an inline code block, `scons build/ALL/gem5.opt` (enclosed in two '`') or a multi-line codeblock:
+
+
+```python
+int x=2
+int y=3
+print(x+y)
+```
+
+If possible, please include the Python configuration script used and state clearly any parameters passed.
+
+**Terminal Output**
+If applicable, add the terminal output here. If long, only include the relevant lines.
+Please put the terminal output in code blocks. I.e.:
+
+```shell
+#Terminal output here#
+```
+
+**Expected behavior**
+A clear and concise description of what you expected to happen.
+
+**Host Operating System**
+Ubuntu 22.04, Mac OS X, etc.
+
+**Host ISA**
+ARM, X86, RISC-V, etc.
+
+**Compiler used**
+State which compiler was used to compile gem5. Please include the compiler version.
+
+**Additional information**
+Add any other information which does not fit in the previous sections but may be of use in fixing this bug.
diff --git a/.github/workflows/ci-tests.yaml b/.github/workflows/ci-tests.yaml
index c3188b87ee..49928faf20 100644
--- a/.github/workflows/ci-tests.yaml
+++ b/.github/workflows/ci-tests.yaml
@@ -1,103 +1,195 @@
+---
 # This workflow runs after a pull-request has been approved by a reviewer.
 
 name: CI Tests
 
 on:
-  pull_request:
-    types: [opened, edited, synchronize, ready_for_review]
+    pull_request:
+        types: [opened, edited, synchronize, ready_for_review]
 
+concurrency:
+    group: ${{ github.workflow }}-${{ github.ref || github.run_id }}
+    cancel-in-progress: true
 
 jobs:
-  pre-commit:
+    pre-commit:
     # runs on github hosted runner
-    runs-on: ubuntu-22.04
-    steps:
-    - uses: actions/checkout@v3
-    - uses: actions/setup-python@v3
-    - uses: pre-commit/action@v3.0.0
+        runs-on: ubuntu-22.04
+        if: github.event.pull_request.draft == false
+        steps:
+            - uses: actions/checkout@v3
+            - uses: actions/setup-python@v3
+            - uses: pre-commit/action@v3.0.0
 
   # ensures we have a change-id in every commit, needed for gerrit
-  check-for-change-id:
-    # runs on github hosted runner
-    runs-on: ubuntu-latest
-    container: gcr.io/gem5-test/ubuntu-22.04_all-dependencies:latest
-    steps:
-      - uses: actions/github-script@v6
-        env:
-          token: "Change-Id"
-          pattern: ".*"
-        with:
-          script: |
-            const commits = ${{ toJSON(github.event.commits) }}
-            for (const commit of commits) {
-              const id = "Change-Id: "
-              const message = commit.message;
-              if (!message.includes(id)) {
-                core.setFailed('One or more of the commits in this pull request is missing a Change-ID, which we require for any changes made to gem5. ' +
-                'To automatically insert one, run the following:\n f=`git rev-parse --git-dir`/hooks/commit-msg ; mkdir -p $(dirname $f) ; ' +
-                'curl -Lo $f https://gerrit-review.googlesource.com/tools/hooks/commit-msg ; chmod +x $f\n Then amend the commit with git commit --amend --no-edit, and update your pull request.')
-              }
-            }
-
-  build-gem5:
-    runs-on: [self-hosted, linux, x64, build]
-    container: gcr.io/gem5-test/ubuntu-22.04_all-dependencies:latest
-    needs: [pre-commit, check-for-change-id] # only runs if pre-commit and change-id passes
-    outputs:
-      artifactname: ${{ steps.name.outputs.test }}
-    steps:
-      - uses: actions/checkout@v3
-      - id: name
-        run: echo "test=$(date +"%Y-%m-%d_%H.%M.%S")-artifact" >> $GITHUB_OUTPUT
-
-      - name: Build gem5
-        run: |
-          scons build/ALL/gem5.opt -j $(nproc)
-      - uses: actions/upload-artifact@v3
-        with:
-          name: ${{ steps.name.outputs.test }}
-          path: build/ALL/gem5.opt
-      - run: echo "This job's status is ${{ job.status }}."
-
-  unittests-all-opt:
-    runs-on: [self-hosted, linux, x64, run]
-    container: gcr.io/gem5-test/ubuntu-22.04_all-dependencies:latest
-    needs: [pre-commit, check-for-change-id] # only runs if pre-commit and change-id passes
-    timeout-minutes: 60
-    steps:
-      - uses: actions/checkout@v3
-      - name: CI Unittests
-        working-directory: ${{ github.workspace }}
-        run: scons build/ALL/unittests.opt -j $(nproc)
-      - run: echo "This job's status is ${{ job.status }}."
-
-  testlib-quick:
-    runs-on: [self-hosted, linux, x64, run]
-    container: gcr.io/gem5-test/ubuntu-22.04_all-dependencies:latest
-    needs: [pre-commit, check-for-change-id, build-gem5]
-    timeout-minutes: 360     # 6 hours
-    steps:
-      - uses: actions/checkout@v3
-      - uses: actions/download-artifact@v3
-        with:
-          name: ${{needs.build-gem5.outputs.artifactname}}
-          path: build/ALL
-      - run: chmod u+x build/ALL/gem5.opt
-      - name: The TestLib CI Tests
-        working-directory: ${{ github.workspace }}/tests
-        run: ./main.py run --skip-build -vv
-      - name: create zip of results
-        if: success() || failure()
-        run: |
-          apt-get -y install zip
-          zip -r output.zip tests/testing-results
-      - name: upload zip
-        if: success() || failure()
-        uses: actions/upload-artifact@v3
-        env:
-          MY_STEP_VAR: ${{github.job}}_COMMIT.${{github.sha}}_RUN.${{github.run_id}}_ATTEMPT.${{github.run_attempt}}
-        with:
-          name: ${{ env.MY_STEP_VAR }}
-          path: output.zip
-          retention-days: 7
-      - run: echo "This job's status is ${{ job.status }}."
+    check-for-change-id:
+        # runs on github hosted runner
+        runs-on: ubuntu-22.04
+        if: github.event.pull_request.draft == false
+        steps:
+            - uses: actions/checkout@v3
+              with:
+                  fetch-depth: 0
+            - name: Check for Change-Id
+              run: |
+                  # loop through all the commits in the pull request
+                  for commit in $(git rev-list ${{ github.event.pull_request.base.sha }}..${{ github.event.pull_request.head.sha }}); do
+                      git checkout $commit
+                      if (git log -1 --pretty=format:"%B" | grep -q "Change-Id: ")
+                      then
+                        # passes as long as at least one change-id exists in the pull request
+                        exit 0
+                      fi
+                  done
+                  # if we reach this part, none of the commits had a change-id
+                  echo "None of the commits in this pull request contains a Change-ID, which we require for any changes made to gem5. "\
+                    "To automatically insert one, run the following:\n f=`git rev-parse --git-dir`/hooks/commit-msg ; mkdir -p $(dirname $f) ; "\
+                    "curl -Lo $f https://gerrit-review.googlesource.com/tools/hooks/commit-msg ; chmod +x $f\n Then amend the commit with git commit --amend --no-edit, and update your pull request."
+                  exit 1
+
+    unittests-all-opt:
+        runs-on: [self-hosted, linux, x64]
+        if: github.event.pull_request.draft == false
+        container: ghcr.io/gem5/ubuntu-22.04_all-dependencies:latest
+        needs: [pre-commit, check-for-change-id] # only runs if pre-commit and change-id passes
+        timeout-minutes: 60
+        steps:
+            - uses: actions/checkout@v3
+            - name: CI Unittests
+              working-directory: ${{ github.workspace }}
+              run: scons build/ALL/unittests.opt -j $(nproc)
+            - run: echo "This job's status is ${{ job.status }}."
+
+    testlib-quick-matrix:
+        runs-on: [self-hosted, linux, x64]
+        if: github.event.pull_request.draft == false
+    # In order to make sure the environment is exactly the same, we run in
+    # the same container we use to build gem5 and run the testlib tests. This
+        container: ghcr.io/gem5/ubuntu-22.04_all-dependencies:latest
+        needs: [pre-commit, check-for-change-id]
+        steps:
+            - uses: actions/checkout@v3
+
+      # Unfortunately the 'ubunutu-latest' image doesn't have jq installed.
+      # We therefore need to install it as a step here.
+            - name: Install jq
+              run: apt install -y jq
+
+            - name: Get directories for testlib-quick
+              working-directory: ${{ github.workspace }}/tests
+              id: dir-matrix
+              run: echo "test-dirs-matrix=$(find gem5/* -type d -maxdepth 0 | jq -ncR '[inputs]')" >>$GITHUB_OUTPUT
+
+            - name: Get the build targets for testlib-quick-gem5-builds
+              working-directory: ${{ github.workspace }}/tests
+              id: build-matrix
+              run: echo "build-matrix=$(./main.py list --build-targets -q | jq -ncR '[inputs]')" >>$GITHUB_OUTPUT
+
+        outputs:
+            build-matrix: ${{ steps.build-matrix.outputs.build-matrix }}
+            test-dirs-matrix: ${{ steps.dir-matrix.outputs.test-dirs-matrix }}
+
+    clang-fast-compilation:
+        # gem5 binaries built in `quick-gem5-builds` always use GCC.
+        # Clang is more strict than GCC. This job checks that gem5 compiles
+        # with Clang. It compiles build/ALL/gem5.fast to maximize the change
+        # for compilation error to be exposed.
+        runs-on: [self-hosted, linux, x64]
+        if: github.event.pull_request.draft == false
+        container: ghcr.io/gem5/clang-version-16:latest
+        needs: [pre-commit, check-for-change-id]
+        timeout-minutes: 90
+        steps:
+            - uses: actions/checkout@v3
+            - name: Clang Compilation
+              working-directory: ${{ github.workspace }}
+              run: scons build/ALL/gem5.fast -j $(nproc)
+
+    testlib-quick-gem5-builds:
+        runs-on: [self-hosted, linux, x64]
+        if: github.event.pull_request.draft == false
+        container: ghcr.io/gem5/ubuntu-22.04_all-dependencies:latest
+        needs: [pre-commit, check-for-change-id, testlib-quick-matrix]
+        strategy:
+            matrix:
+                build-target: ${{ fromJson(needs.testlib-quick-matrix.outputs.build-matrix) }}
+        steps:
+            - uses: actions/checkout@v3
+            - name: Build gem5
+              run: scons ${{ matrix.build-target }} -j $(nproc)
+
+        # Upload the gem5 binary as an artifact.
+        # Note: the "achor.txt" file is a hack to make sure the paths are
+        # preserverd in the artifact. The upload-artifact action finds the
+        # closest common directory and uploads everything relative to that.
+        # E.g., if we upload "build/ARM/gem5.opt" and "build/RISCV/gem5.opt"
+        # Then upload-artifact will upload "ARM/gem5.opt" and "RISCV/gem5.opt",
+        # stripping the "build" directory. By adding the "anchor.txt" file, we
+        # ensure the "build" directory is preserved.
+            - run: echo "anchor" > anchor.txt
+            - uses: actions/upload-artifact@v3
+              with:
+                  name: ci-tests-${{ github.run_number }}-testlib-quick-all-gem5-builds
+                  path: |
+                      build/*/gem5.*
+                      anchor.txt
+                  retention-days: 7
+
+    testlib-quick-execution:
+        runs-on: [self-hosted, linux, x64]
+        if: github.event.pull_request.draft == false
+        container: ghcr.io/gem5/ubuntu-22.04_all-dependencies:latest
+        needs: [pre-commit, check-for-change-id, testlib-quick-matrix, testlib-quick-gem5-builds]
+        timeout-minutes: 360 # 6 hours
+        strategy:
+            fail-fast: false
+            matrix:
+                test-dir: ${{ fromJson(needs.testlib-quick-matrix.outputs.test-dirs-matrix) }}
+        steps:
+            - name: Clean runner
+              run: rm -rf ./* || true rm -rf ./.??* || true rm -rf ~/.cache || true
+
+        # Checkout the repository then download the gem5.opt artifact.
+            - uses: actions/checkout@v3
+            - uses: actions/download-artifact@v3
+              with:
+                  name: ci-tests-${{ github.run_number }}-testlib-quick-all-gem5-builds
+
+        # Check that the gem5.opt artifact exists and is executable.
+            - name: Chmod gem5.{opt,debug,fast} to be executable
+              run: |
+                  find . -name "gem5.opt" -exec chmod u+x {} \;
+                  find . -name "gem5.debug" -exec chmod u+x {} \;
+                  find . -name "gem5.fast" -exec chmod u+x {} \;
+
+        # Run the testlib quick tests in the given directory.
+            - name: Run "tests/${{ matrix.test-dir }}" TestLib quick tests
+              id: run-tests
+              working-directory: ${{ github.workspace }}/tests
+              run: ./main.py run --skip-build -vv -j$(nproc) ${{ matrix.test-dir }}
+
+        # Get the basename of the matrix.test-dir path (to name the artifact).
+            - name: Sanatize test-dir for artifact name
+              id: sanitize-test-dir
+              if: success() || failure()
+              run: echo "sanatized-test-dir=$(echo '${{ matrix.test-dir }}' | sed 's/\//-/g')" >> $GITHUB_OUTPUT
+
+        # Upload the tests/testing-results directory as an artifact.
+            - name: Upload test results
+              if: success() || failure()
+              uses: actions/upload-artifact@v3
+              with:
+                  name: ci-tests-run-${{ github.run_number }}-attempt-${{ github.run_attempt }}-testlib-quick-${{ steps.sanitize-test-dir.outputs.sanatized-test-dir
+                      }}-status-${{ steps.run-tests.outcome }}-output
+                  path: tests/testing-results
+                  retention-days: 30
+
+    testlib-quick:
+    # It is 'testlib-quick' which needs to pass for the pull request to be
+    # merged. The 'testlib-quick-execution' is a matrix job which runs all the
+    # the testlib quick tests. This job is therefore a stub which will pass if
+    # all the testlib-quick-execution jobs pass.
+        runs-on: ubuntu-22.04
+        needs: testlib-quick-execution
+        steps:
+            - run: echo "This job's status is ${{ job.status }}."
diff --git a/.github/workflows/compiler-tests.yaml b/.github/workflows/compiler-tests.yaml
index 52569e2e01..4656563357 100644
--- a/.github/workflows/compiler-tests.yaml
+++ b/.github/workflows/compiler-tests.yaml
@@ -1,52 +1,70 @@
+---
 # This workflow runs all of the compiler tests
 
 name: Compiler Tests
-run-name: ${{ github.actor }} is running compiler tests
-
 
 on:
   # Runs every Friday from 7AM UTC
-  schedule:
-    - cron:  '00 7 * * 5'
+    schedule:
+        - cron: 00 7 * * 5
   # Allows us to manually start workflow for testing
-  workflow_dispatch:
+    workflow_dispatch:
 
 jobs:
   # replication of compiler-tests.sh
-  all-compilers:
-    strategy:
-      matrix:
-        image: [gcc-version-12, gcc-version-11, gcc-version-10, gcc-version-9, gcc-version-8, gcc-version-7, clang-version-14, clang-version-13, clang-version-12, clang-version-11, clang-version-10, clang-version-9, clang-version-8, clang-version-7, clang-version-6.0, ubuntu-18.04_all-dependencies, ubuntu-20.04_all-dependencies, ubuntu-22.04_all-dependencies, ubuntu-22.04_min-dependencies]
-        opts: [.opt, .fast]
-    runs-on: [self-hosted, linux, x64, run]
-    timeout-minutes: 2880     # 48 hours
-    container: gcr.io/gem5-test/${{ matrix.image }}:latest
-    steps:
-    - uses: actions/checkout@v3
-      with:
+    all-compilers:
+        strategy:
+            fail-fast: false
+            matrix:
+                image: [gcc-version-12, gcc-version-11, gcc-version-10, gcc-version-8, clang-version-16, clang-version-15, clang-version-14, clang-version-13,
+                    clang-version-12, clang-version-11, clang-version-10, clang-version-9, clang-version-8, clang-version-7, ubuntu-20.04_all-dependencies,
+                    ubuntu-22.04_all-dependencies, ubuntu-22.04_min-dependencies]
+                opts: [.opt, .fast]
+        runs-on: [self-hosted, linux, x64]
+        timeout-minutes: 2880 # 48 hours
+        container: ghcr.io/gem5/${{ matrix.image }}:latest
+        steps:
+            - uses: actions/checkout@v3
+              with:
         # Scheduled workflows run on the default branch by default. We
         # therefore need to explicitly checkout the develop branch.
-        ref: develop
-    - name: Compile build/ALL/gem5${{ matrix.opts }} with ${{ matrix.image }}
-      run: /usr/bin/env python3 /usr/bin/scons --ignore-style build/ALL/gem5${{ matrix.opts }}
-      timeout-minutes: 600  # 10 hours
+                  ref: develop
+            - name: Compile build/ALL/gem5${{ matrix.opts }} with ${{ matrix.image }}
+              run: /usr/bin/env python3 /usr/bin/scons --ignore-style build/ALL/gem5${{ matrix.opts }} -j$(nproc)
+              timeout-minutes: 600 # 10 hours
 
   # Tests the two latest gcc and clang supported compilers against all gem5 compilations.
-  latest-compilers-all-gem5-builds:
-    strategy:
-      matrix:
-        gem5-compilation: [ARM, ARM_MESI_Three_Level, ARM_MESI_Three_Level_HTM, ARM_MOESI_hammer, Garnet_standalone, GCN3_X86, MIPS, 'NULL', NULL_MESI_Two_Level, NULL_MOESI_CMP_directory, NULL_MOESI_CMP_token, NULL_MOESI_hammer, POWER, RISCV, SPARC, X86, X86_MI_example, X86_MOESI_AMD_Base]
-        image: [gcc-version-12, clang-version-14]
-        opts: [.opt]
-    runs-on: [self-hosted, linux, x64, run]
-    timeout-minutes: 2880     # 48 hours
-    container: gcr.io/gem5-test/${{ matrix.image }}:latest
-    steps:
-    - uses: actions/checkout@v3
-      with:
+    latest-compilers-all-gem5-builds:
+        strategy:
+            fail-fast: false
+            matrix:
+                gem5-compilation: [ARM, ARM_MESI_Three_Level, ARM_MESI_Three_Level_HTM, ARM_MOESI_hammer, Garnet_standalone, GCN3_X86, MIPS, 'NULL', NULL_MESI_Two_Level,
+                    NULL_MOESI_CMP_directory, NULL_MOESI_CMP_token, NULL_MOESI_hammer, POWER, RISCV, SPARC, X86, X86_MI_example, X86_MOESI_AMD_Base, VEGA_X86,
+                    GCN3_X86]
+                image: [gcc-version-12, clang-version-16]
+                opts: [.opt]
+        runs-on: [self-hosted, linux, x64]
+        timeout-minutes: 2880 # 48 hours
+        container: ghcr.io/gem5/${{ matrix.image }}:latest
+        steps:
+            - uses: actions/checkout@v3
+              with:
         # Scheduled workflows run on the default branch by default. We
         # therefore need to explicitly checkout the develop branch.
-        ref: develop
-    - name: Compile build/${{ matrix.gem5-compilation }}/gem5${{ matrix.opts }} with ${{ matrix.image }}
-      run: /usr/bin/env python3 /usr/bin/scons --ignore-style build/${{ matrix.gem5-compilation }}/gem5${{ matrix.opts }}
-      timeout-minutes: 600 # 10 hours
+                  ref: develop
+            - name: Compile build/${{ matrix.gem5-compilation }}/gem5${{ matrix.opts }} with ${{ matrix.image }}
+              run: /usr/bin/env python3 /usr/bin/scons --ignore-style build/${{ matrix.gem5-compilation }}/gem5${{ matrix.opts }} -j$(nproc)
+              timeout-minutes: 600 # 10 hours
+
+    compiler-tests:
+        # The dummy job is used to indicate whether the compiler tests have
+        # passed or not. This can be used as status check for pull requests.
+        # I.e., if we want to stop pull requests from being merged if the
+        # compiler tests are failing, we can add this job as a required status
+        # check.
+        runs-on: ubuntu-22.04
+        needs:
+            - latest-compilers-all-gem5-builds
+            - all-compilers
+        steps:
+            - run: echo "This compiler tests have passed."
diff --git a/.github/workflows/daily-tests.yaml b/.github/workflows/daily-tests.yaml
index 5fab058b79..89a72fd852 100644
--- a/.github/workflows/daily-tests.yaml
+++ b/.github/workflows/daily-tests.yaml
@@ -1,878 +1,305 @@
+---
 # This workflow runs all of the long tests within main.py, extra tests in nightly.sh, and unittests
 
 name: Daily Tests
 
 on:
   # Runs every day from 7AM UTC
-  schedule:
-    - cron:  '0 7 * * *'
+    schedule:
+        - cron: 0 7 * * *
 
 jobs:
-# building all necessary versions of gem5
-  build-gem5:
-    runs-on: [self-hosted, linux, x64, build]
-    container: gcr.io/gem5-test/ubuntu-22.04_all-dependencies:latest
-    outputs:
-      build-name: ${{ steps.artifact-name.outputs.name }}
-    steps:
-      - uses: actions/checkout@v3
-        with:
+    name-artifacts:
+        runs-on: ubuntu-latest
+        outputs:
+            build-name: ${{ steps.artifact-name.outputs.name }}
+        steps:
+            - uses: actions/checkout@v2
+            - id: artifact-name
+              run: echo "name=$(date +"%Y-%m-%d_%H.%M.%S-")" >> $GITHUB_OUTPUT
+
+    build-gem5:
+        strategy:
+            fail-fast: false
+            matrix:
+        # NULL is in quotes since it is considered a keyword in yaml files
+                image: [ALL, ALL_CHI, ARM, ALL_MSI, ALL_MESI_Two_Level, 'NULL', NULL_MI_example, RISCV, VEGA_X86]
+        # this allows us to pass additional command line parameters
+        # the default is to add -j $(nproc), but some images
+        # require more specifications when built
+                include:
+                    - command-line: -j $(nproc)
+                    - image: ALL_CHI
+                      command-line: --default=ALL PROTOCOL=CHI -j $(nproc)
+                    - image: ALL_MSI
+                      command-line: --default=ALL PROTOCOL=MSI -j $(nproc)
+                    - image: ALL_MESI_Two_Level
+                      command-line: --default=ALL PROTOCOL=MESI_Two_Level -j $(nproc)
+                    - image: NULL_MI_example
+                      command-line: --default=NULL PROTOCOL=MI_example -j $(nproc)
+        runs-on: [self-hosted, linux, x64]
+        needs: name-artifacts
+        container: ghcr.io/gem5/ubuntu-22.04_all-dependencies:latest
+        steps:
+            - uses: actions/checkout@v3
+              with:
           # Scheduled workflows run on the default branch by default. We
           # therefore need to explicitly checkout the develop branch.
-          ref: develop
-      - id: artifact-name
-        run: echo "name=$(date +"%Y-%m-%d_%H.%M.%S")-ALL" >> $GITHUB_OUTPUT
-      - name: Build gem5
-        run: |
-          scons build/ALL/gem5.opt -j $(nproc)
-      - uses: actions/upload-artifact@v3
-        with:
-          name: ${{ steps.artifact-name.outputs.name }}
-          path: build/ALL/gem5.opt
-          retention-days: 5
-      - run: echo "This job's status is ${{ job.status }}."
-
-  build-gem5-ALL_CHI:
-    runs-on: [self-hosted, linux, x64, build]
-    container: gcr.io/gem5-test/ubuntu-22.04_all-dependencies:latest
-    outputs:
-      build-name: ${{ steps.artifact-name.outputs.name }}
-    steps:
-      - uses: actions/checkout@v3
-        with:
-          # Scheduled workflows run on the default branch by default. We
-          # therefore need to explicitly checkout the develop branch.
-          ref: develop
-      - id: artifact-name
-        run: echo "name=$(date +"%Y-%m-%d_%H.%M.%S")-ALL_CHI" >> $GITHUB_OUTPUT
-      - name: Build gem5
-        run: |
-          scons build/ALL_CHI/gem5.opt --default=ALL PROTOCOL=CHI -j $(nproc)
-      - uses: actions/upload-artifact@v3
-        with:
-          name: ${{ steps.artifact-name.outputs.name }}
-          path: build/ALL_CHI/gem5.opt
-          retention-days: 5
-      - run: echo "This job's status is ${{ job.status }}."
-
-  build-gem5-ARM:
-    runs-on: [self-hosted, linux, x64, build]
-    container: gcr.io/gem5-test/ubuntu-22.04_all-dependencies:latest
-    outputs:
-      build-name: ${{ steps.artifact-name.outputs.name }}
-    steps:
-      - uses: actions/checkout@v3
-        with:
-          # Scheduled workflows run on the default branch by default. We
-          # therefore need to explicitly checkout the develop branch.
-          ref: develop
-      - id: artifact-name
-        run: echo "name=$(date +"%Y-%m-%d_%H.%M.%S")-ARM" >> $GITHUB_OUTPUT
-      - name: Build gem5
-        run: |
-          scons build/ARM/gem5.opt -j $(nproc)
-      - uses: actions/upload-artifact@v3
-        with:
-          name: ${{ steps.artifact-name.outputs.name }}
-          path: build/ARM/gem5.opt
-          retention-days: 5
-      - run: echo "This job's status is ${{ job.status }}."
-
-  build-gem5-ALL_MSI:
-    runs-on: [self-hosted, linux, x64, build]
-    container: gcr.io/gem5-test/ubuntu-22.04_all-dependencies:latest
-    outputs:
-      build-name: ${{ steps.artifact-name.outputs.name }}
-    steps:
-      - uses: actions/checkout@v3
-        with:
-          # Scheduled workflows run on the default branch by default. We
-          # therefore need to explicitly checkout the develop branch.
-          ref: develop
-      - id: artifact-name
-        run: echo "name=$(date +"%Y-%m-%d_%H.%M.%S")-ALL_MSI" >> $GITHUB_OUTPUT
-      - name: Build gem5
-        run: |
-          scons build/ALL_MSI/gem5.opt --default=ALL PROTOCOL=MSI -j $(nproc)
-      - uses: actions/upload-artifact@v3
-        with:
-          name: ${{ steps.artifact-name.outputs.name }}
-          path: build/ALL_MSI/gem5.opt
-          retention-days: 5
-      - run: echo "This job's status is ${{ job.status }}."
-
-  build-gem5-ALL_MESI_Two_Level:
-    runs-on: [self-hosted, linux, x64, build,]
-    container: gcr.io/gem5-test/ubuntu-22.04_all-dependencies:latest
-    outputs:
-      build-name: ${{ steps.artifact-name.outputs.name }}
-    steps:
-      - uses: actions/checkout@v3
-        with:
-          # Scheduled workflows run on the default branch by default. We
-          # therefore need to explicitly checkout the develop branch.
-          ref: develop
-      - id: artifact-name
-        run: echo "name=$(date +"%Y-%m-%d_%H.%M.%S")-ALL_MESI_Two_Level" >> $GITHUB_OUTPUT
-      - name: Build gem5
-        run: |
-          scons build/ALL_MESI_Two_Level/gem5.opt --default=ALL PROTOCOL=MESI_Two_Level -j $(nproc)
-      - uses: actions/upload-artifact@v3
-        with:
-          name: ${{ steps.artifact-name.outputs.name }}
-          path: build/ALL_MESI_Two_Level/gem5.opt
-          retention-days: 5
-      - run: echo "This job's status is ${{ job.status }}."
-
-  build-gem5-NULL:
-    runs-on: [self-hosted, linux, x64, build]
-    container: gcr.io/gem5-test/ubuntu-22.04_all-dependencies:latest
-    outputs:
-      build-name: ${{ steps.artifact-name.outputs.name }}
-    steps:
-      - uses: actions/checkout@v3
-        with:
-          # Scheduled workflows run on the default branch by default. We
-          # therefore need to explicitly checkout the develop branch.
-          ref: develop
-      - id: artifact-name
-        run: echo "name=$(date +"%Y-%m-%d_%H.%M.%S")-NULL" >> $GITHUB_OUTPUT
-      - name: Build gem5
-        run: |
-          scons build/NULL/gem5.opt -j $(nproc)
-      - uses: actions/upload-artifact@v3
-        with:
-          name: ${{ steps.artifact-name.outputs.name }}
-          path: build/NULL/gem5.opt
-          retention-days: 5
-      - run: echo "This job's status is ${{ job.status }}."
-
-  build-gem5-NULL_MI_example:
-    runs-on: [self-hosted, linux, x64, build]
-    container: gcr.io/gem5-test/ubuntu-22.04_all-dependencies:latest
-    outputs:
-      build-name: ${{ steps.artifact-name.outputs.name }}
-    steps:
-      - uses: actions/checkout@v3
-        with:
-          # Scheduled workflows run on the default branch by default. We
-          # therefore need to explicitly checkout the develop branch.
-          ref: develop
-      - id: artifact-name
-        run: echo "name=$(date +"%Y-%m-%d_%H.%M.%S")-NULL_MI_example" >> $GITHUB_OUTPUT
-      - name: Build gem5
-        run: |
-          scons build/NULL_MI_example/gem5.opt --default=NULL PROTOCOL=MI_example -j $(nproc)
-      - uses: actions/upload-artifact@v3
-        with:
-          name: ${{ steps.artifact-name.outputs.name }}
-          path: build/NULL_MI_example/gem5.opt
-          retention-days: 5
-      - run: echo "This job's status is ${{ job.status }}."
-
-  build-gem5-RISCV:
-    runs-on: [self-hosted, linux, x64, build]
-    container: gcr.io/gem5-test/ubuntu-22.04_all-dependencies:latest
-    outputs:
-      build-name: ${{ steps.artifact-name.outputs.name }}
-    steps:
-      - uses: actions/checkout@v3
-        with:
-          # Scheduled workflows run on the default branch by default. We
-          # therefore need to explicitly checkout the develop branch.
-          ref: develop
-      - id: artifact-name
-        run: echo "name=$(date +"%Y-%m-%d_%H.%M.%S")-RISCV" >> $GITHUB_OUTPUT
-      - name: Build gem5
-        run: |
-          scons build/RISCV/gem5.opt -j $(nproc)
-      - uses: actions/upload-artifact@v3
-        with:
-          name: ${{ steps.artifact-name.outputs.name }}
-          path: build/RISCV/gem5.opt
-          retention-days: 5
-      - run: echo "This job's status is ${{ job.status }}."
-
-  build-gem5-VEGA_X86:
-    runs-on: [self-hosted, linux, x64, build]
-    container: gcr.io/gem5-test/ubuntu-22.04_all-dependencies:latest
-    outputs:
-      build-name: ${{ steps.artifact-name.outputs.name }}
-    steps:
-      - uses: actions/checkout@v3
-        with:
-          # Scheduled workflows run on the default branch by default. We
-          # therefore need to explicitly checkout the develop branch.
-          ref: develop
-      - id: artifact-name
-        run: echo "name=$(date +"%Y-%m-%d_%H.%M.%S")-VEGA_X86" >> $GITHUB_OUTPUT
-      - name: Build gem5
-        run: |
-          scons build/VEGA_X86/gem5.opt -j $(nproc)
-      - uses: actions/upload-artifact@v3
-        with:
-          name: ${{ steps.artifact-name.outputs.name }}
-          path: build/VEGA_X86/gem5.opt
-          retention-days: 5
-      - run: echo "This job's status is ${{ job.status }}."
-
-
-  # This runs the unit tests for the build/ALL/unittests.debug build.
-  unittests-all-debug:
-    runs-on: [self-hosted, linux, x64, run]
-    container: gcr.io/gem5-test/ubuntu-22.04_all-dependencies:latest
-    timeout-minutes: 60
-
-    steps:
-      - uses: actions/checkout@v3
-        with:
+                  ref: develop
+            - name: Build gem5
+              run: scons build/${{ matrix.image }}/gem5.opt ${{ matrix.command-line }}
+            - uses: actions/upload-artifact@v3
+              with:
+                  name: ${{ needs.name-artifacts.outputs.build-name }}${{ matrix.image }}
+                  path: build/${{ matrix.image }}/gem5.opt
+                  retention-days: 5
+            - run: echo "This job's status is ${{ job.status }}."
+
+  # this builds both unittests.fast and unittests.debug
+    unittests-fast-debug:
+        strategy:
+            matrix:
+                type: [fast, debug]
+        runs-on: [self-hosted, linux, x64]
+        container: ghcr.io/gem5/ubuntu-22.04_all-dependencies:latest
+        timeout-minutes: 60
+        steps:
+            - uses: actions/checkout@v3
+              with:
           # Scheduled workflows run on the default branch by default. We
           # therefore need to explicitly checkout the develop branch.
-          ref: develop
-      - name: ALL/unittests.debug UnitTests
-        run: |
-          ls
-          scons build/ALL/unittests.debug -j $(nproc)
-
-  # This runs the unit tests for the build/ALL/unittests.fast build.
-  unittests-all-fast:
-    runs-on: [self-hosted, linux, x64, run]
-    container: gcr.io/gem5-test/ubuntu-22.04_all-dependencies:latest
-    timeout-minutes: 60
-
-    steps:
-      - uses: actions/checkout@v3
-        with:
-          # Scheduled workflows run on the default branch by default. We
-          # therefore need to explicitly checkout the develop branch.
-          ref: develop
-      - name: ALL/unittests.fast UnitTests
-        run: |
-          ls
-          scons build/ALL/unittests.fast -j $(nproc)
-
-# start running all of the long tests
-  testlib-long-arm-boot-tests:
-    runs-on: [self-hosted, linux, x64, run]
-    container: gcr.io/gem5-test/ubuntu-22.04_all-dependencies:latest
-    needs: [build-gem5, build-gem5-ALL_CHI]
-    timeout-minutes: 1440 # 24 hours
-    steps:
-    - uses: actions/checkout@v3
-      with:
+                  ref: develop
+            - name: ALL/unittests.${{ matrix.type }} UnitTests
+              run: scons build/ALL/unittests.${{ matrix.type }} -j $(nproc)
+
+  # start running all of the long tests
+    testlib-long-tests:
+        strategy:
+            fail-fast: false
+            matrix:
+                test-type: [arm_boot_tests, fs, gpu, insttest_se, learning_gem5, m5threads_test_atomic, memory, multi_isa, replacement_policies, riscv_boot_tests,
+                    stdlib, x86_boot_tests]
+        runs-on: [self-hosted, linux, x64]
+        container: ghcr.io/gem5/ubuntu-22.04_all-dependencies:latest
+        needs: [name-artifacts, build-gem5]
+        timeout-minutes: 1440 # 24 hours for entire matrix to run
+        steps:
+            - name: Clean runner
+              run: rm -rf ./* || true rm -rf ./.??* || true rm -rf ~/.cache || true
+            - uses: actions/checkout@v3
+              with:
         # Scheduled workflows run on the default branch by default. We
         # therefore need to explicitly checkout the develop branch.
-        ref: develop
-    - uses: actions/download-artifact@v3
-      with:
-        name: ${{needs.build-gem5.outputs.build-name}}
-        path: build/ALL
-    - run: chmod u+x build/ALL/gem5.opt
-    - uses: actions/download-artifact@v3
-      with:
-        name: ${{needs.build-gem5-ALL_CHI.outputs.build-name}}
-        path: build/ALL_CHI
-    - run: chmod u+x build/ALL_CHI/gem5.opt
-    - name: long arm-boot-tests
-      working-directory: ${{ github.workspace }}/tests
-      run: ./main.py run gem5/arm-boot-tests --length=long --skip-build -vv -t $(nproc)
-    - name: create zip of results
-      if: success() || failure()
-      run: |
-        apt-get -y install zip
-        zip -r output.zip tests/testing-results
-    - name: upload zip
-      if: success() || failure()
-      uses: actions/upload-artifact@v3
-      env:
-        MY_STEP_VAR: ${{github.job}}_COMMIT.${{github.sha}}_RUN.${{github.run_id}}_ATTEMPT.${{github.run_attempt}}
-      with:
-        name: ${{ env.MY_STEP_VAR }}
-        path: output.zip
-        retention-days: 7
-    - run: echo "This job's status is ${{ job.status }}."
-
-  testlib-long-fs:
-    runs-on: [self-hosted, linux, x64, run]
-    container: gcr.io/gem5-test/ubuntu-22.04_all-dependencies:latest
-    needs: [build-gem5, build-gem5-ARM]
-    timeout-minutes: 1440 # 24 hours
-    steps:
-    - uses: actions/checkout@v3
-      with:
-        # Scheduled workflows run on the default branch by default. We
-        # therefore need to explicitly checkout the develop branch.
-        ref: develop
-    - uses: actions/download-artifact@v3
-      with:
-        name: ${{needs.build-gem5.outputs.build-name}}
-        path: build/ALL
-    - run: chmod u+x build/ALL/gem5.opt
-    - uses: actions/download-artifact@v3
-      with:
-        name: ${{needs.build-gem5-ARM.outputs.build-name}}
-        path: build/ARM
-    - run: chmod u+x build/ARM/gem5.opt
-    - name: long fs
-      working-directory: ${{ github.workspace }}/tests
-      run: ./main.py run gem5/fs --length=long --skip-build -vv -t $(nproc)
-    - name: create zip of results
-      if: success() || failure()
-      run: |
-        apt-get -y install zip
-        zip -r output.zip tests/testing-results
-    - name: upload zip
-      if: success() || failure()
-      uses: actions/upload-artifact@v3
-      env:
-        MY_STEP_VAR: ${{github.job}}_COMMIT.${{github.sha}}_RUN.${{github.run_id}}_ATTEMPT.${{github.run_attempt}}
-      with:
-        name: ${{ env.MY_STEP_VAR }}
-        path: output.zip
-        retention-days: 7
-    - run: echo "This job's status is ${{ job.status }}."
-
-  testlib-long-gem5_library_example_tests:
-    runs-on: [self-hosted, linux, x64, run]
-    container: gcr.io/gem5-test/ubuntu-22.04_all-dependencies:latest
-    needs: [build-gem5, build-gem5-ALL_MESI_Two_Level]
-    timeout-minutes: 1440 # 24 hours
-    steps:
-    - uses: actions/checkout@v3
-      with:
+                  ref: develop
+    # download all artifacts for each test
+    # since long tests can't start until the build matrix completes,
+    # we download all artifacts from the build for each test
+    # in this matrix
+            - uses: actions/download-artifact@v3
+              with:
+                  name: ${{needs.name-artifacts.outputs.build-name}}ALL
+                  path: build/ALL
+            - run: chmod u+x build/ALL/gem5.opt
+            - uses: actions/download-artifact@v3
+              with:
+                  name: ${{needs.name-artifacts.outputs.build-name}}ALL_CHI
+                  path: build/ALL_CHI
+            - run: chmod u+x build/ALL_CHI/gem5.opt
+            - uses: actions/download-artifact@v3
+              with:
+                  name: ${{needs.name-artifacts.outputs.build-name}}ARM
+                  path: build/ARM
+            - run: chmod u+x build/ARM/gem5.opt
+            - uses: actions/download-artifact@v3
+              with:
+                  name: ${{needs.name-artifacts.outputs.build-name}}ALL_MSI
+                  path: build/ALL_MSI
+            - run: chmod u+x build/ALL_MSI/gem5.opt
+            - uses: actions/download-artifact@v3
+              with:
+                  name: ${{needs.name-artifacts.outputs.build-name}}ALL_MESI_Two_Level
+                  path: build/ALL_MESI_Two_Level
+            - run: chmod u+x build/ALL_MESI_Two_Level/gem5.opt
+            - uses: actions/download-artifact@v3
+              with:
+                  name: ${{needs.name-artifacts.outputs.build-name}}NULL
+                  path: build/NULL
+            - run: chmod u+x build/NULL/gem5.opt
+            - uses: actions/download-artifact@v3
+              with:
+                  name: ${{needs.name-artifacts.outputs.build-name}}NULL_MI_example
+                  path: build/NULL_MI_example
+            - run: chmod u+x build/NULL_MI_example/gem5.opt
+            - uses: actions/download-artifact@v3
+              with:
+                  name: ${{needs.name-artifacts.outputs.build-name}}RISCV
+                  path: build/RISCV
+            - run: chmod u+x build/RISCV/gem5.opt
+            - uses: actions/download-artifact@v3
+              with:
+                  name: ${{needs.name-artifacts.outputs.build-name}}VEGA_X86
+                  path: build/VEGA_X86
+            - run: chmod u+x build/VEGA_X86/gem5.opt
+    # run test
+            - name: long ${{ matrix.test-type }} tests
+              working-directory: ${{ github.workspace }}/tests
+              run: ./main.py run gem5/${{ matrix.test-type }} --length=long --skip-build -vv -t $(nproc)
+            - name: create zip of results
+              if: success() || failure()
+              run: |
+                  apt-get -y install zip
+                  zip -r output.zip tests/testing-results
+            - name: upload zip
+              if: success() || failure()
+              uses: actions/upload-artifact@v3
+              env:
+                  MY_STEP_VAR: ${{ matrix.test-type }}_COMMIT.${{github.sha}}_RUN.${{github.run_id}}_ATTEMPT.${{github.run_attempt}}
+              with:
+                  name: ${{ env.MY_STEP_VAR }}
+                  path: output.zip
+                  retention-days: 7
+            - run: echo "This job's status is ${{ job.status }}."
+
+  # split library example tests into runs based on Suite UID
+  # so that they don't hog the runners for too long
+    testlib-long-gem5_library_example_tests:
+        runs-on: [self-hosted, linux, x64]
+        strategy:
+            fail-fast: false
+            matrix:
+                test-type: [gem5-library-example-x86-ubuntu-run-ALL-x86_64-opt, gem5-library-example-riscv-ubuntu-run-ALL-x86_64-opt, lupv-example-ALL-x86_64-opt,
+                    gem5-library-example-arm-ubuntu-run-test-ALL-x86_64-opt, gem5-library-example-riscvmatched-hello-ALL-x86_64-opt]
+        container: ghcr.io/gem5/ubuntu-22.04_all-dependencies:latest
+        needs: [name-artifacts, build-gem5]
+        timeout-minutes: 1440 # 24 hours
+        steps:
+            - name: Clean runner
+              run: rm -rf ./* || true rm -rf ./.??* || true rm -rf ~/.cache || true
+            - uses: actions/checkout@v3
+              with:
         # Scheduled workflows run on the default branch by default. We
         # therefore need to explicitly checkout the develop branch.
-        ref: develop
-    - uses: actions/download-artifact@v3
-      with:
-        name: ${{needs.build-gem5.outputs.build-name}}
-        path: build/ALL
-    - run: chmod u+x build/ALL/gem5.opt
-    - uses: actions/download-artifact@v3
-      with:
-        name: ${{needs.build-gem5-ALL_MESI_Two_Level.outputs.build-name}}
-        path: build/ALL_MESI_Two_Level
-    - run: chmod u+x build/ALL_MESI_Two_Level/gem5.opt
-    - name: long gem5_library_example_tests
-      working-directory: ${{ github.workspace }}/tests
-      run: ./main.py run gem5/gem5_library_example_tests --length=long --skip-build -vv -t $(nproc)
-    - name: create zip of results
-      if: success() || failure()
-      run: |
-        apt-get -y install zip
-        zip -r output.zip tests/testing-results
-    - name: upload zip
-      if: success() || failure()
-      uses: actions/upload-artifact@v3
-      env:
-        MY_STEP_VAR: ${{github.job}}_COMMIT.${{github.sha}}_RUN.${{github.run_id}}_ATTEMPT.${{github.run_attempt}}
-      with:
-        name: ${{ env.MY_STEP_VAR }}
-        path: output.zip
-        retention-days: 7
-    - run: echo "This job's status is ${{ job.status }}."
-
-  testlib-long-gpu:
-    runs-on: [self-hosted, linux, x64, run]
-    container: gcr.io/gem5-test/ubuntu-22.04_all-dependencies:latest
-    needs: [build-gem5, build-gem5-VEGA_X86]
-    timeout-minutes: 1440 # 24 hours
-    steps:
-    - uses: actions/checkout@v3
-      with:
-        # Scheduled workflows run on the default branch by default. We
-        # therefore need to explicitly checkout the develop branch.
-        ref: develop
-    - uses: actions/download-artifact@v3
-      with:
-        name: ${{needs.build-gem5-VEGA_X86.outputs.build-name}}
-        path: build/VEGA_X86
-    - run: chmod u+x build/VEGA_X86/gem5.opt
-    - name: long gpu
-      working-directory: ${{ github.workspace }}/tests
-      run: ./main.py run gem5/gpu --length=long --skip-build -vv -t $(nproc)
-    - name: create zip of results
-      if: success() || failure()
-      run: |
-        apt-get -y install zip
-        zip -r output.zip tests/testing-results
-    - name: upload zip
-      if: success() || failure()
-      uses: actions/upload-artifact@v3
-      env:
-        MY_STEP_VAR: ${{github.job}}_COMMIT.${{github.sha}}_RUN.${{github.run_id}}_ATTEMPT.${{github.run_attempt}}
-      with:
-        name: ${{ env.MY_STEP_VAR }}
-        path: output.zip
-        retention-days: 7
-    - run: echo "This job's status is ${{ job.status }}."
-
-  testlib-long-insttest_se:
-    runs-on: [self-hosted, linux, x64, run]
-    container: gcr.io/gem5-test/ubuntu-22.04_all-dependencies:latest
-    needs: build-gem5
-    timeout-minutes: 1440 # 24 hours
-    steps:
-    - uses: actions/checkout@v3
-      with:
-        # Scheduled workflows run on the default branch by default. We
-        # therefore need to explicitly checkout the develop branch.
-        ref: develop
-    - uses: actions/download-artifact@v3
-      with:
-        name: ${{needs.build-gem5.outputs.build-name}}
-        path: build/ALL
-    - run: chmod u+x build/ALL/gem5.opt
-    - name: long insttest_se
-      working-directory: ${{ github.workspace }}/tests
-      run: ./main.py run gem5/insttest_se --length=long --skip-build -vv -t $(nproc)
-    - name: create zip of results
-      if: success() || failure()
-      run: |
-        apt-get -y install zip
-        zip -r output.zip tests/testing-results
-    - name: upload zip
-      if: success() || failure()
-      uses: actions/upload-artifact@v3
-      env:
-        MY_STEP_VAR: ${{github.job}}_COMMIT.${{github.sha}}_RUN.${{github.run_id}}_ATTEMPT.${{github.run_attempt}}
-      with:
-        name: ${{ env.MY_STEP_VAR }}
-        path: output.zip
-        retention-days: 7
-    - run: echo "This job's status is ${{ job.status }}."
-
-# kvm tests don't work on github actions
-  # testlib-long-kvm-fork-tests:
-  #   runs-on: [self-hosted, linux, x64, run]
-  #   container: gcr.io/gem5-test/ubuntu-22.04_all-dependencies:latest
-  #   needs: build-gem5
-  #   timeout-minutes: 1440 # 24 hours
-    # steps:
-    # - uses: actions/checkout@v3
-    #   with:
-    #     # Scheduled workflows run on the default branch by default. We
-    #     # therefore need to explicitly checkout the develop branch.
-    #     ref: develop
-  #   - uses: actions/download-artifact@v3
-  #     with:
-  #       name: ${{ env.artifact-name }}
-  #       path: build/ALL/gem5.opt
-  #   - run: chmod u+x build/ALL/gem5.opt
-  #   - name: long kvm-fork-tests
-  #     working-directory: ${{ github.workspace }}/tests
-  #     run: ./main.py run gem5/kvm-fork-tests --length=long --skip-build -vv -t $(nproc)
-  #   - uses: actions/upload-artifact@v3
-  #     env:
-  #       MY_STEP_VAR: ${{github.job}}_COMMIT.${{github.sha}}_RUN.${{github.run_id}}_ATTEMPT.${{github.run_attempt}}
-  #     with:
-  #       name: ${{ env.MY_STEP_VAR }}
-  #       path: tests/testing-results
-  #       retention-days: 7
-  #   - run: echo "This job's status is ${{ job.status }}."
-
-  # testlib-long-kvm-switch-tests:
-  #   runs-on: [self-hosted, linux, x64, run]
-  #   container: gcr.io/gem5-test/ubuntu-22.04_all-dependencies:latest
-  #   needs: build-gem5
-  #   timeout-minutes: 1440 # 24 hours
-  #   steps:
-    # - uses: actions/checkout@v3
-    #   with:
-    #     # Scheduled workflows run on the default branch by default. We
-    #     # therefore need to explicitly checkout the develop branch.
-    #     ref: develop
-  #   - uses: actions/download-artifact@v3
-  #     with:
-  #       name: ${{ env.artifact-name }}
-  #       path: build/ALL/gem5.opt
-  #   - run: chmod u+x build/ALL/gem5.opt
-  #   - name: long kvm-switch-tests
-  #     working-directory: ${{ github.workspace }}/tests
-  #     run: ./main.py run gem5/kvm-switch-tests --length=long --skip-build -vv -t $(nproc)
-  #   - uses: actions/upload-artifact@v3
-  #     env:
-  #       MY_STEP_VAR: ${{github.job}}_COMMIT.${{github.sha}}_RUN.${{github.run_id}}_ATTEMPT.${{github.run_attempt}}
-  #     with:
-  #       name: ${{ env.MY_STEP_VAR }}
-  #       path: tests/testing-results
-  #       retention-days: 7
-  #   - run: echo "This job's status is ${{ job.status }}."
-
-  testlib-long-learning_gem5:
-    runs-on: [self-hosted, linux, x64, run]
-    container: gcr.io/gem5-test/ubuntu-22.04_all-dependencies:latest
-    needs: build-gem5-ALL_MSI
-    timeout-minutes: 1440 # 24 hours
-    steps:
-    - uses: actions/checkout@v3
-      with:
-        # Scheduled workflows run on the default branch by default. We
-        # therefore need to explicitly checkout the develop branch.
-        ref: develop
-    - uses: actions/download-artifact@v3
-      with:
-        name: ${{needs.build-gem5-ALL_MSI.outputs.build-name}}
-        path: build/ALL_MSI
-    - run: chmod u+x build/ALL_MSI/gem5.opt
-    - name: long learning_gem5
-      working-directory: ${{ github.workspace }}/tests
-      run: ./main.py run gem5/learning_gem5 --length=long --skip-build -vv -t $(nproc)
-    - name: create zip of results
-      if: success() || failure()
-      run: |
-        apt-get -y install zip
-        zip -r output.zip tests/testing-results
-    - name: upload zip
-      if: success() || failure()
-      uses: actions/upload-artifact@v3
-      env:
-        MY_STEP_VAR: ${{github.job}}_COMMIT.${{github.sha}}_RUN.${{github.run_id}}_ATTEMPT.${{github.run_attempt}}
-      with:
-        name: ${{ env.MY_STEP_VAR }}
-        path: output.zip
-        retention-days: 7
-    - run: echo "This job's status is ${{ job.status }}."
-
-  testlib-long-m5_threads:
-    runs-on: [self-hosted, linux, x64, run]
-    container: gcr.io/gem5-test/ubuntu-22.04_all-dependencies:latest
-    needs: build-gem5
-    timeout-minutes: 1440 # 24 hours
-    steps:
-    - uses: actions/checkout@v3
-      with:
-        # Scheduled workflows run on the default branch by default. We
-        # therefore need to explicitly checkout the develop branch.
-        ref: develop
-    - uses: actions/download-artifact@v3
-      with:
-        name: ${{needs.build-gem5.outputs.build-name}}
-        path: build/ALL
-    - run: chmod u+x build/ALL/gem5.opt
-    - name: long m5_threads
-      working-directory: ${{ github.workspace }}/tests
-      run: ./main.py run gem5/m5threads_test_atomic --length=long --skip-build -vv -t $(nproc)
-    - name: create zip of results
-      if: success() || failure()
-      run: |
-        apt-get -y install zip
-        zip -r output.zip tests/testing-results
-    - name: upload zip
-      if: success() || failure()
-      uses: actions/upload-artifact@v3
-      env:
-        MY_STEP_VAR: ${{github.job}}_COMMIT.${{github.sha}}_RUN.${{github.run_id}}_ATTEMPT.${{github.run_attempt}}
-      with:
-        name: ${{ env.MY_STEP_VAR }}
-        path: output.zip
-        retention-days: 7
-    - run: echo "This job's status is ${{ job.status }}."
-
-  testlib-long-memory:
-    runs-on: [self-hosted, linux, x64, run]
-    container: gcr.io/gem5-test/ubuntu-22.04_all-dependencies:latest
-    needs: build-gem5-NULL
-    timeout-minutes: 1440 # 24 hours
-    steps:
-    - uses: actions/checkout@v3
-      with:
-        # Scheduled workflows run on the default branch by default. We
-        # therefore need to explicitly checkout the develop branch.
-        ref: develop
-    - uses: actions/download-artifact@v3
-      with:
-        name: ${{needs.build-gem5-NULL.outputs.build-name}}
-        path: build/NULL
-    - run: chmod u+x build/NULL/gem5.opt
-    - name: long memory
-      working-directory: ${{ github.workspace }}/tests
-      run: ./main.py run gem5/memory --length=long --skip-build -vv -t $(nproc)
-    - name: create zip of results
-      if: success() || failure()
-      run: |
-        apt-get -y install zip
-        zip -r output.zip tests/testing-results
-    - name: upload zip
-      if: success() || failure()
-      uses: actions/upload-artifact@v3
-      env:
-        MY_STEP_VAR: ${{github.job}}_COMMIT.${{github.sha}}_RUN.${{github.run_id}}_ATTEMPT.${{github.run_attempt}}
-      with:
-        name: ${{ env.MY_STEP_VAR }}
-        path: output.zip
-        retention-days: 7
-    - run: echo "This job's status is ${{ job.status }}."
-
-  testlib-long-multi_isa:
-    runs-on: [self-hosted, linux, x64, run]
-    container: gcr.io/gem5-test/ubuntu-22.04_all-dependencies:latest
-    needs: [build-gem5-ARM, build-gem5-VEGA_X86, build-gem5-RISCV]
-    timeout-minutes: 1440 # 24 hours
-    steps:
-    - uses: actions/checkout@v3
-      with:
-        # Scheduled workflows run on the default branch by default. We
-        # therefore need to explicitly checkout the develop branch.
-        ref: develop
-    - uses: actions/download-artifact@v3
-      with:
-        name: ${{needs.build-gem5-ARM.outputs.build-name}}
-        path: build/ARM
-    - run: chmod u+x build/ARM/gem5.opt
-    - uses: actions/download-artifact@v3
-      with:
-        name: ${{needs.build-gem5-VEGA_X86.outputs.build-name}}
-        path: build/VEGA_X86
-    - run: chmod u+x build/VEGA_X86/gem5.opt
-    - uses: actions/download-artifact@v3
-      with:
-        name: ${{needs.build-gem5-RISCV.outputs.build-name}}
-        path: build/RISCV
-    - run: chmod u+x build/RISCV/gem5.opt
-    - name: long multi_isa
-      working-directory: ${{ github.workspace }}/tests
-      run: ./main.py run gem5/multi_isa --length=long --skip-build -vv -t $(nproc)
-    - name: create zip of results
-      if: success() || failure()
-      run: |
-        apt-get -y install zip
-        zip -r output.zip tests/testing-results
-    - name: upload zip
-      if: success() || failure()
-      uses: actions/upload-artifact@v3
-      env:
-        MY_STEP_VAR: ${{github.job}}_COMMIT.${{github.sha}}_RUN.${{github.run_id}}_ATTEMPT.${{github.run_attempt}}
-      with:
-        name: ${{ env.MY_STEP_VAR }}
-        path: output.zip
-        retention-days: 7
-    - run: echo "This job's status is ${{ job.status }}."
-
-  testlib-long-replacement-policies:
-    runs-on: [self-hosted, linux, x64, run]
-    container: gcr.io/gem5-test/ubuntu-22.04_all-dependencies:latest
-    needs: build-gem5-NULL_MI_example
-    timeout-minutes: 1440 # 24 hours
-    steps:
-    - uses: actions/checkout@v3
-      with:
-        # Scheduled workflows run on the default branch by default. We
-        # therefore need to explicitly checkout the develop branch.
-        ref: develop
-    - uses: actions/download-artifact@v3
-      with:
-        name: ${{needs.build-gem5-NULL_MI_example.outputs.build-name}}
-        path: build/NULL_MI_example
-    - run: chmod u+x build/NULL_MI_example/gem5.opt
-    - name: long replacement-policies
-      working-directory: ${{ github.workspace }}/tests
-      run: ./main.py run gem5/replacement-policies --length=long --skip-build -vv -t $(nproc)
-    - name: create zip of results
-      if: success() || failure()
-      run: |
-        apt-get -y install zip
-        zip -r output.zip tests/testing-results
-    - name: upload zip
-      if: success() || failure()
-      uses: actions/upload-artifact@v3
-      env:
-        MY_STEP_VAR: ${{github.job}}_COMMIT.${{github.sha}}_RUN.${{github.run_id}}_ATTEMPT.${{github.run_attempt}}
-      with:
-        name: ${{ env.MY_STEP_VAR }}
-        path: output.zip
-        retention-days: 7
-    - run: echo "This job's status is ${{ job.status }}."
-
-  testlib-long-riscv-boot-tests:
-    runs-on: [self-hosted, linux, x64, run]
-    container: gcr.io/gem5-test/ubuntu-22.04_all-dependencies:latest
-    needs: build-gem5
-    timeout-minutes: 1440 # 24 hours
-    steps:
-    - uses: actions/checkout@v3
-      with:
-        # Scheduled workflows run on the default branch by default. We
-        # therefore need to explicitly checkout the develop branch.
-        ref: develop
-    - uses: actions/download-artifact@v3
-      with:
-        name: ${{needs.build-gem5.outputs.build-name}}
-        path: build/ALL
-    - run: chmod u+x build/ALL/gem5.opt
-    - name: long riscv-boot-tests
-      working-directory: ${{ github.workspace }}/tests
-      run: ./main.py run gem5/riscv-boot-tests --length=long --skip-build -vv -t $(nproc)
-    - name: create zip of results
-      if: success() || failure()
-      run: |
-        apt-get -y install zip
-        zip -r output.zip tests/testing-results
-    - name: upload zip
-      if: success() || failure()
-      uses: actions/upload-artifact@v3
-      env:
-        MY_STEP_VAR: ${{github.job}}_COMMIT.${{github.sha}}_RUN.${{github.run_id}}_ATTEMPT.${{github.run_attempt}}
-      with:
-        name: ${{ env.MY_STEP_VAR }}
-        path: output.zip
-        retention-days: 7
-    - run: echo "This job's status is ${{ job.status }}."
-
-  testlib-long-stdlib:
-    runs-on: [self-hosted, linux, x64, run]
-    container: gcr.io/gem5-test/ubuntu-22.04_all-dependencies:latest
-    needs: [build-gem5-ARM, build-gem5-VEGA_X86, build-gem5-RISCV]
-    timeout-minutes: 1440 # 24 hours
-    steps:
-    - uses: actions/checkout@v3
-      with:
-        # Scheduled workflows run on the default branch by default. We
-        # therefore need to explicitly checkout the develop branch.
-        ref: develop
-    - uses: actions/download-artifact@v3
-      with:
-        name: ${{needs.build-gem5-ARM.outputs.build-name}}
-        path: build/ARM
-    - run: chmod u+x build/ARM/gem5.opt
-    - uses: actions/download-artifact@v3
-      with:
-        name: ${{needs.build-gem5-VEGA_X86.outputs.build-name}}
-        path: build/VEGA_X86
-    - run: chmod u+x build/VEGA_X86/gem5.opt
-    - uses: actions/download-artifact@v3
-      with:
-        name: ${{needs.build-gem5-RISCV.outputs.build-name}}
-        path: build/RISCV
-    - run: chmod u+x build/RISCV/gem5.opt
-    - name: long stdlib
-      working-directory: ${{ github.workspace }}/tests
-      run: ./main.py run gem5/stdlib --length=long --skip-build -vv -t $(nproc)
-    - name: create zip of results
-      if: success() || failure()
-      run: |
-        apt-get -y install zip
-        zip -r output.zip tests/testing-results
-    - name: upload zip
-      if: success() || failure()
-      uses: actions/upload-artifact@v3
-      env:
-        MY_STEP_VAR: ${{github.job}}_COMMIT.${{github.sha}}_RUN.${{github.run_id}}_ATTEMPT.${{github.run_attempt}}
-      with:
-        name: ${{ env.MY_STEP_VAR }}
-        path: output.zip
-        retention-days: 7
-    - run: echo "This job's status is ${{ job.status }}."
-
-  testlib-long-x86-boot-tests:
-    runs-on: [self-hosted, linux, x64, run]
-    container: gcr.io/gem5-test/ubuntu-22.04_all-dependencies:latest
-    needs: build-gem5
-    timeout-minutes: 1440 # 24 hours
-    steps:
-    - uses: actions/checkout@v3
-      with:
-        # Scheduled workflows run on the default branch by default. We
-        # therefore need to explicitly checkout the develop branch.
-        ref: develop
-    - uses: actions/download-artifact@v3
-      with:
-        name: ${{needs.build-gem5.outputs.build-name}}
-        path: build/ALL
-    - run: chmod u+x build/ALL/gem5.opt
-    - name: long x86-boot-tests
-      working-directory: ${{ github.workspace }}/tests
-      run: ./main.py run gem5/x86-boot-tests --length=long --skip-build -vv -t $(nproc)
-    - name: create zip of results
-      if: success() || failure()
-      run: |
-        apt-get -y install zip
-        zip -r output.zip tests/testing-results
-    - name: upload zip
-      if: success() || failure()
-      uses: actions/upload-artifact@v3
-      env:
-        MY_STEP_VAR: ${{github.job}}_COMMIT.${{github.sha}}_RUN.${{github.run_id}}_ATTEMPT.${{github.run_attempt}}
-      with:
-        name: ${{ env.MY_STEP_VAR }}
-        path: output.zip
-        retention-days: 7
-    - run: echo "This job's status is ${{ job.status }}."
+                  ref: develop
+            - uses: actions/download-artifact@v3
+              with:
+                  name: ${{needs.name-artifacts.outputs.build-name}}ALL
+                  path: build/ALL
+            - run: chmod u+x build/ALL/gem5.opt
+            - name: long ${{ matrix.test-type }} gem5_library_example_tests
+              working-directory: ${{ github.workspace }}/tests
+              run: ./main.py run --uid SuiteUID:tests/gem5/gem5_library_example_tests/test_gem5_library_examples.py:test-${{ matrix.test-type }} --length=long
+                  --skip-build -vv
+            - name: create zip of results
+              if: success() || failure()
+              run: |
+                  apt-get -y install zip
+                  zip -r output.zip tests/testing-results
+            - name: upload zip
+              if: success() || failure()
+              uses: actions/upload-artifact@v3
+              env:
+                  MY_STEP_VAR: ${{ matrix.test-type }}_COMMIT.${{github.sha}}_RUN.${{github.run_id}}_ATTEMPT.${{github.run_attempt}}
+              with:
+                  name: ${{ env.MY_STEP_VAR }}
+                  path: output.zip
+                  retention-days: 7
+            - run: echo "This job's status is ${{ job.status }}."
 
   # This runs the SST-gem5 integration compilation and tests it with
   # ext/sst/sst/example.py.
-  sst-test:
-    runs-on: [self-hosted, linux, x64, build]
-    container: gcr.io/gem5-test/sst-env:latest
-    timeout-minutes: 180
-
-    steps:
-    - uses: actions/checkout@v3
-      with:
+    sst-test:
+        runs-on: [self-hosted, linux, x64]
+        container: ghcr.io/gem5/sst-env:latest
+        timeout-minutes: 180
+
+        steps:
+            - uses: actions/checkout@v3
+              with:
         # Scheduled workflows run on the default branch by default. We
         # therefore need to explicitly checkout the develop branch.
-        ref: develop
-    - name: Build RISCV/libgem5_opt.so with SST
-      run: scons build/RISCV/libgem5_opt.so --without-tcmalloc --duplicate-sources --ignore-style -j $(nproc)
-    - name: Compile ext/sst
-      working-directory: ${{ github.workspace }}/ext/sst
-      run: make -j $(nproc)
-    - name: Run SST test
-      working-directory: ${{ github.workspace }}/ext/sst
-      run: sst --add-lib-path=./ sst/example.py
+                  ref: develop
+            - name: Build RISCV/libgem5_opt.so with SST
+              run: scons build/RISCV/libgem5_opt.so --without-tcmalloc --duplicate-sources --ignore-style -j $(nproc)
+            - name: Makefile ext/sst
+              working-directory: ${{ github.workspace }}/ext/sst
+              run: mv Makefile.linux Makefile
+            - name: Compile ext/sst
+              working-directory: ${{ github.workspace }}/ext/sst
+              run: make -j $(nproc)
+            - name: Run SST test
+              working-directory: ${{ github.workspace }}/ext/sst
+              run: sst --add-lib-path=./ sst/example.py
 
   # This runs the gem5 within SystemC ingration and runs a simple hello-world
   # simulation with it.
-  systemc-test:
-    runs-on: [self-hosted, linux, x64, build]
-    container: gcr.io/gem5-test/systemc-env:latest
-    timeout-minutes: 180
-
-    steps:
-    - uses: actions/checkout@v3
-      with:
+    systemc-test:
+        runs-on: [self-hosted, linux, x64]
+        container: ghcr.io/gem5/systemc-env:latest
+        timeout-minutes: 180
+
+        steps:
+            - uses: actions/checkout@v3
+              with:
         # Scheduled workflows run on the default branch by default. We
         # therefore need to explicitly checkout the develop branch.
-        ref: develop
-    - name: Build ARM/gem5.opt
-      run: scons build/ARM/gem5.opt --ignore-style --duplicate-sources -j$(nproc)
-    - name: Build ARM/libgem5_opt.so
-      run: scons build/ARM/libgem5_opt.so --with-cxx-config --without-python --without-tcmalloc USE_SYSTEMC=0 -j$(nproc) --duplicate-sources
-    - name: Compile gem5 withing SystemC
-      working-directory: ${{ github.workspace }}/util/systemc/gem5_within_systemc
-      run: make
-    - name: Run gem5 within SystemC test
-      run: ./build/ARM/gem5.opt configs/deprecated/example/se.py -c tests/test-progs/hello/bin/arm/linux/hello
-    - name: Continue gem5 within SystemC test
-      run: LD_LIBRARY_PATH=build/ARM/:/opt/systemc/lib-linux64/ ./util/systemc/gem5_within_systemc/gem5.opt.sc m5out/config.ini
+                  ref: develop
+            - name: Build ARM/gem5.opt
+              run: scons build/ARM/gem5.opt --ignore-style --duplicate-sources -j$(nproc)
+            - name: Build ARM/libgem5_opt.so
+              run: scons build/ARM/libgem5_opt.so --with-cxx-config --without-python --without-tcmalloc USE_SYSTEMC=0 -j$(nproc) --duplicate-sources
+            - name: Compile gem5 withing SystemC
+              working-directory: ${{ github.workspace }}/util/systemc/gem5_within_systemc
+              run: make
+            - name: Run gem5 within SystemC test
+              run: ./build/ARM/gem5.opt configs/deprecated/example/se.py -c tests/test-progs/hello/bin/arm/linux/hello
+            - name: Continue gem5 within SystemC test
+              run: LD_LIBRARY_PATH=build/ARM/:/opt/systemc/lib-linux64/ ./util/systemc/gem5_within_systemc/gem5.opt.sc m5out/config.ini
 
   # Runs the gem5 Nighyly GPU tests.
-  gpu-tests:
-    runs-on: [self-hosted, linux, x64, build]
-    container: gcr.io/gem5-test/gcn-gpu:latest
-    timeout-minutes: 720 # 12 hours
-
-    steps:
-    - uses: actions/checkout@v3
-      with:
+    gpu-tests:
+        runs-on: [self-hosted, linux, x64]
+        container: ghcr.io/gem5/gcn-gpu:latest
+        timeout-minutes: 720 # 12 hours
+
+        steps:
+            - uses: actions/checkout@v3
+              with:
         # Scheduled workflows run on the default branch by default. We
         # therefore need to explicitly checkout the develop branch.
-        ref: develop
-    - name: Compile build/GCN3_X86/gem5.opt
-      run: scons build/GCN3_X86/gem5.opt -j $(nproc)
-    - name: Get Square test-prog from gem5-resources
-      uses: wei/wget@v1
-      with:
-        args: -q http://dist.gem5.org/dist/develop/test-progs/square/square # Removed -N bc it wasn't available within actions, should be okay bc workspace is clean every time: https://github.com/coder/sshcode/issues/102
-    - name: Run Square test with GCN3_X86/gem5.opt (SE mode)
-      run: |
-        mkdir -p tests/testing-results
-        ./build/GCN3_X86/gem5.opt configs/example/apu_se.py --reg-alloc-policy=dynamic -n3 -c square
-    - name: Get allSyncPrims-1kernel from gem5-resources
-      uses: wei/wget@v1
-      with:
-        args: -q http://dist.gem5.org/dist/develop/test-progs/heterosync/gcn3/allSyncPrims-1kernel # Removed -N bc it wasn't available within actions, should be okay bc workspace is clean every time
-    - name: Run allSyncPrims-1kernel sleepMutex test with GCN3_X86/gem5.opt (SE mode)
-      run: ./build/GCN3_X86/gem5.opt configs/example/apu_se.py --reg-alloc-policy=dynamic -n3 -c allSyncPrims-1kernel --options="sleepMutex 10 16 4"
-    - name: Run allSyncPrims-1kernel lfTreeBarrUsing test with GCN3_X86/gem5.opt (SE mode)
-      run: ./build/GCN3_X86/gem5.opt configs/example/apu_se.py --reg-alloc-policy=dynamic -n3 -c allSyncPrims-1kernel --options="lfTreeBarrUniq 10 16 4"
+                  ref: develop
+            - name: Compile build/GCN3_X86/gem5.opt
+              run: scons build/GCN3_X86/gem5.opt -j $(nproc)
+            - name: Get Square test-prog from gem5-resources
+              uses: wei/wget@v1
+              with:
+                  args: -q http://dist.gem5.org/dist/develop/test-progs/square/square # Removed -N bc it wasn't available within actions, should be okay bc workspace is clean every time: https://github.com/coder/sshcode/issues/102
+            - name: Run Square test with GCN3_X86/gem5.opt (SE mode)
+              run: |
+                  mkdir -p tests/testing-results
+                  ./build/GCN3_X86/gem5.opt configs/example/apu_se.py --reg-alloc-policy=dynamic -n3 -c square
+            - name: Get allSyncPrims-1kernel from gem5-resources
+              uses: wei/wget@v1
+              with:
+                  args: -q http://dist.gem5.org/dist/develop/test-progs/heterosync/gcn3/allSyncPrims-1kernel # Removed -N bc it wasn't available within actions, should be okay bc workspace is clean every time
+            - name: Run allSyncPrims-1kernel sleepMutex test with GCN3_X86/gem5.opt (SE mode)
+              run: ./build/GCN3_X86/gem5.opt configs/example/apu_se.py --reg-alloc-policy=dynamic -n3 -c allSyncPrims-1kernel --options="sleepMutex 10 16
+                  4"
+            - name: Run allSyncPrims-1kernel lfTreeBarrUsing test with GCN3_X86/gem5.opt (SE mode)
+              run: ./build/GCN3_X86/gem5.opt configs/example/apu_se.py --reg-alloc-policy=dynamic -n3 -c allSyncPrims-1kernel --options="lfTreeBarrUniq
+                  10 16 4"
+    daily-tests:
+        # The dummy job is used to indicate whether the daily tests have
+        # passed or not. This can be used as status check for pull requests.
+        # I.e., if we want to stop pull requests from being merged if the
+        # daily tests are failing we can add this job as a required status
+        # check.
+        runs-on: ubuntu-22.04
+        needs:
+            - unittests-fast-debug
+            - testlib-long-tests
+            - testlib-long-gem5_library_example_tests
+            - sst-test
+            - systemc-test
+            - gpu-tests
+        steps:
+            - run: echo "This daily tests have passed."
diff --git a/.github/workflows/docker-build.yaml b/.github/workflows/docker-build.yaml
new file mode 100644
index 0000000000..68b0905e83
--- /dev/null
+++ b/.github/workflows/docker-build.yaml
@@ -0,0 +1,54 @@
+---
+name: Docker images build and push
+
+on:
+    workflow_dispatch:
+jobs:
+    obtain-dockerfiles:
+        runs-on: [self-hosted, linux, x64]
+        container: ghcr.io/gem5/ubuntu-22.04_all-dependencies:latest
+
+        steps:
+            - uses: actions/checkout@v3
+              with:
+          # Scheduled workflows run on the default branch by default. We
+          # therefore need to explicitly checkout the develop branch.
+                  ref: develop
+            - uses: actions/upload-artifact@v3
+              with:
+                  name: dockerfiles
+                  path: util/dockerfiles
+
+  # This builds and pushes the docker image.
+    build-and-push:
+        runs-on: [self-hosted, linux, x64]
+        needs: obtain-dockerfiles
+        permissions:
+            packages: write
+            contents: read
+
+        steps:
+            - uses: actions/download-artifact@v3
+              with:
+                  name: dockerfiles
+                  path: dockerfiles-docker-build
+
+            - uses: docker/setup-qemu-action@v2
+              name: Setup QEMU
+
+            - uses: docker/setup-buildx-action@v2
+              name: Set up Docker Buildx
+
+            - uses: docker/login-action@v2
+              name: Login to the GitHub Container Registry
+              with:
+                  registry: ghcr.io
+                  username: ${{ github.repository_owner }}
+                  password: ${{ secrets.GITHUB_TOKEN }}
+
+            - name: Build and push with bake
+              uses: docker/bake-action@v4
+              with:
+                  workdir: ./dockerfiles-docker-build
+                  files: docker-bake.hcl
+                  push: true
diff --git a/.github/workflows/gpu-tests.yaml b/.github/workflows/gpu-tests.yaml
new file mode 100644
index 0000000000..b390e0750f
--- /dev/null
+++ b/.github/workflows/gpu-tests.yaml
@@ -0,0 +1,95 @@
+---
+# This workflow runs all the Weekly GPU Tests.
+# For now this file is kept separate as we are still developing and testing
+# this workflow. It will eventually be merged with "weekly-tests.yaml"
+
+name: Weekly Tests (GPU)
+
+on:
+  # Runs every Sunday from 7AM UTC
+    schedule:
+        - cron: 00 7 * * 6
+  # Allows us to manually start workflow for testing
+    workflow_dispatch:
+
+jobs:
+    build-gem5:
+        runs-on: [self-hosted, linux, x64]
+        container: ghcr.io/gem5/gcn-gpu:latest
+        steps:
+            - uses: actions/checkout@v3
+              with:
+          # Scheduled workflows run on the default branch by default. We
+          # therefore need to explicitly checkout the develop branch.
+                  ref: develop
+            - name: Build gem5
+              run: scons build/GCN3_X86/gem5.opt -j $(nproc) --ignore-style
+            - uses: actions/upload-artifact@v3
+              with:
+                  name: weekly-test-${{ github.run_number }}-attempt-${{ github.run_attempt }}-gem5-build-gcn3
+                  path: build/GCN3_X86/gem5.opt
+                  retention-days: 5
+            - run: echo "This job's status is ${{ job.status }}."
+
+    LULESH-tests:
+        runs-on: [self-hosted, linux, x64]
+        container: ghcr.io/gem5/gcn-gpu:latest
+        needs: build-gem5
+        timeout-minutes: 480 # 8 hours
+        steps:
+            - uses: actions/checkout@v3
+              with:
+                # Scheduled workflows run on the default branch by default. We
+                # therefore need to explicitly checkout the develop branch.
+                  ref: develop
+
+            - name: Download build/GCN3_X86/gem5.opt
+              uses: actions/download-artifact@v3
+              with:
+                  name: weekly-test-${{ github.run_number }}-attempt-${{ github.run_attempt }}-gem5-build-gcn3
+                  path: build/GCN3_X86
+                # `download-artifact` does not preserve permissions so we need to set
+                # them again.
+            - run: chmod u+x build/GCN3_X86/gem5.opt
+
+            - name: Obtain LULESH
+              # Obtains the latest LULESH compatible with this version of gem5 via
+              # gem5 Resources.
+              run: build/GCN3_X86/gem5.opt util/obtain-resource.py lulesh -p lulesh
+
+            - name: Run LULUESH tests
+              working-directory: ${{ github.workspace }}
+              run: |
+                  build/GCN3_X86/gem5.opt configs/example/apu_se.py -n3 --mem-size=8GB --reg-alloc-policy=dynamic --benchmark-root="lulesh" -c \
+                  lulesh 0.01 2
+
+    HACC-tests:
+        runs-on: [self-hosted, linux, x64]
+        container: ghcr.io/gem5/gcn-gpu:latest
+        needs: build-gem5
+        timeout-minutes: 120 # 2 hours
+        steps:
+            - uses: actions/checkout@v3
+              with:
+          # Scheduled workflows run on the default branch by default. We
+          # therefore need to explicitly checkout the develop branch.
+                  ref: develop
+            - uses: actions/download-artifact@v3
+              with:
+                  name: weekly-test-${{ github.run_number }}-attempt-${{ github.run_attempt }}-gem5-build-gcn3
+                  path: build/GCN3_X86
+            - run: chmod u+x build/GCN3_X86/gem5.opt
+            - name: make hip directory
+              run: mkdir hip
+            - name: Compile m5ops and x86
+              working-directory: ${{ github.workspace }}/util/m5
+              run: |
+                  export TERM=xterm-256color
+                  scons build/x86/out/m5
+            - name: Download tests
+              working-directory: ${{ github.workspace }}/hip
+              run: wget http://dist.gem5.org/dist/v22-1/test-progs/halo-finder/ForceTreeTest
+            - name: Run HACC tests
+              working-directory: ${{ github.workspace }}
+              run: |
+                  build/GCN3_X86/gem5.opt configs/example/apu_se.py -n3 --reg-alloc-policy=dynamic --benchmark-root=hip -c ForceTreeTest --options="0.5 0.1 64 0.1 1 N 12 rcb"
diff --git a/.github/workflows/utils.yaml b/.github/workflows/utils.yaml
new file mode 100644
index 0000000000..ccbd87c82f
--- /dev/null
+++ b/.github/workflows/utils.yaml
@@ -0,0 +1,21 @@
+---
+# This workflow file contains miscellaneous tasks to manage the repository.
+name: Utils for Repository
+on:
+    schedule:
+        - cron: 30 1 * * *
+    workflow_dispatch:
+
+jobs:
+  # This job runs the stale action to close issues that have been inactive for 30 days.
+  # It is scheduled to run every day at 1:30 AM UTC.
+    close-stale-issues:
+        runs-on: ubuntu-latest
+        steps:
+            - uses: actions/stale@v8.0.0
+              with:
+                  close-issue-message: This issue is being closed because it has been inactive waiting for response for 30 days. If this is still an issue,
+                      please open a new issue and reference this one.
+                  days-before-stale: 21
+                  days-before-close: 7
+                  any-of-labels: needs details
diff --git a/.github/workflows/weekly-tests.yaml b/.github/workflows/weekly-tests.yaml
index 4c3f6b55bc..72b1454a5e 100644
--- a/.github/workflows/weekly-tests.yaml
+++ b/.github/workflows/weekly-tests.yaml
@@ -1,176 +1,120 @@
+---
 # This workflow runs all of the very-long tests within main.py
 
 name: Weekly Tests
 
 on:
   # Runs every Sunday from 7AM UTC
-  schedule:
-    - cron:  '00 7 * * 6'
+    schedule:
+        - cron: 00 7 * * 6
   # Allows us to manually start workflow for testing
-  workflow_dispatch:
+    workflow_dispatch:
 
 jobs:
-  build-gem5:
-    runs-on: [self-hosted, linux, x64, build]
-    container: gcr.io/gem5-test/ubuntu-22.04_all-dependencies:latest
-    outputs:
-      build-name: ${{ steps.artifact-name.outputs.name }}
-    steps:
-      - uses: actions/checkout@v3
-        with:
+    build-gem5:
+        runs-on: [self-hosted, linux, x64]
+        container: ghcr.io/gem5/ubuntu-22.04_all-dependencies:latest
+        outputs:
+            build-name: ${{ steps.artifact-name.outputs.name }}
+        steps:
+            - uses: actions/checkout@v3
+              with:
           # Scheduled workflows run on the default branch by default. We
           # therefore need to explicitly checkout the develop branch.
-          ref: develop
-      - id: artifact-name
-        run: echo "name=$(date +"%Y-%m-%d_%H.%M.%S")-ALL" >> $GITHUB_OUTPUT
-      - name: Build gem5
-        run: |
-          scons build/ALL/gem5.opt -j $(nproc)
-      - uses: actions/upload-artifact@v3
-        with:
-          name: ${{ steps.artifact-name.outputs.name }}
-          path: build/ALL/gem5.opt
-          retention-days: 5
-      - run: echo "This job's status is ${{ job.status }}."
+                  ref: develop
+            - id: artifact-name
+              run: echo "name=$(date +"%Y-%m-%d_%H.%M.%S")-ALL" >> $GITHUB_OUTPUT
+            - name: Build gem5
+              run: |
+                  scons build/ALL/gem5.opt -j $(nproc)
+            - uses: actions/upload-artifact@v3
+              with:
+                  name: ${{ steps.artifact-name.outputs.name }}
+                  path: build/ALL/gem5.opt
+                  retention-days: 5
+            - run: echo "This job's status is ${{ job.status }}."
 
-# start running the very-long tests
-  testlib-very-long-gem5_library_example_tests:
-    runs-on: [self-hosted, linux, x64, run]
-    container: gcr.io/gem5-test/ubuntu-22.04_all-dependencies:latest
-    needs: [build-gem5]
-    timeout-minutes: 4320 # 3 days
-    steps:
-    - uses: actions/checkout@v3
-      with:
+  # start running the very-long tests
+    testlib-very-long-tests:
+        strategy:
+            fail-fast: false
+            matrix:
+                test-type: [gem5_library_example_tests, gem5_resources, parsec_benchmarks, x86_boot_tests]
+        runs-on: [self-hosted, linux, x64]
+        container: ghcr.io/gem5/ubuntu-22.04_all-dependencies:latest
+        needs: [build-gem5]
+        timeout-minutes: 4320 # 3 days
+        steps:
+            - name: Clean runner
+              run: rm -rf ./* || true rm -rf ./.??* || true rm -rf ~/.cache || true
+            - uses: actions/checkout@v3
+              with:
         # Scheduled workflows run on the default branch by default. We
         # therefore need to explicitly checkout the develop branch.
-        ref: develop
-    - uses: actions/download-artifact@v3
-      with:
-        name: ${{needs.build-gem5.outputs.build-name}}
-        path: build/ALL
-    - run: chmod u+x build/ALL/gem5.opt
-    - name: very-long gem5_library_example_tests
-      working-directory: ${{ github.workspace }}/tests
-      run: ./main.py run gem5/gem5_library_example_tests --length very-long --skip-build -vv -t $(nproc)
-    - name: create zip of results
-      if: success() || failure()
-      run: |
-        apt-get -y install zip
-        zip -r output.zip tests/testing-results
-    - name: upload zip
-      if: success() || failure()
-      uses: actions/upload-artifact@v3
-      env:
-        MY_STEP_VAR: ${{github.job}}_COMMIT.${{github.sha}}_RUN.${{github.run_id}}_ATTEMPT.${{github.run_attempt}}
-      with:
-        name: ${{ env.MY_STEP_VAR }}
-        path: output.zip
-        retention-days: 7
-    - run: echo "This job's status is ${{ job.status }}."
+                  ref: develop
+            - uses: actions/download-artifact@v3
+              with:
+                  name: ${{needs.build-gem5.outputs.build-name}}
+                  path: build/ALL
+            - run: chmod u+x build/ALL/gem5.opt
+            - name: very-long ${{ matrix.test-type }}
+              working-directory: ${{ github.workspace }}/tests
+              run: ./main.py run gem5/${{ matrix.test-type }} --length very-long --skip-build -vv
+            - name: create zip of results
+              if: success() || failure()
+              run: |
+                  apt-get -y install zip
+                  zip -r output.zip tests/testing-results
+            - name: upload zip
+              if: success() || failure()
+              uses: actions/upload-artifact@v3
+              env:
+                  MY_STEP_VAR: ${{ matrix.test-type }}_COMMIT.${{github.sha}}_RUN.${{github.run_id}}_ATTEMPT.${{github.run_attempt}}
+              with:
+                  name: ${{ env.MY_STEP_VAR }}
+                  path: output.zip
+                  retention-days: 7
+            - run: echo "This job's status is ${{ job.status }}."
 
+    dramsys-tests:
+        runs-on: [self-hosted, linux, x64]
+        container: ghcr.io/gem5/ubuntu-22.04_all-dependencies:latest
+        timeout-minutes: 4320 # 3 days
+        steps:
+            - uses: actions/checkout@v3
+              with:
+          # Scheduled workflows run on the default branch by default. We
+          # therefore need to explicitly checkout the develop branch.
+                  ref: develop
 
-  testlib-long-gem5-resources:
-    runs-on: [self-hosted, linux, x64, run]
-    container: gcr.io/gem5-test/ubuntu-22.04_all-dependencies:latest
-    needs: [build-gem5]
-    timeout-minutes: 4320 # 3 days
-    steps:
-    - uses: actions/checkout@v3
-      with:
-        # Scheduled workflows run on the default branch by default. We
-        # therefore need to explicitly checkout the develop branch.
-        ref: develop
-    - uses: actions/download-artifact@v3
-      with:
-        name: ${{needs.build-gem5.outputs.build-name}}
-        path: build/ALL
-    - run: chmod u+x build/ALL/gem5.opt
-    - name: very-long gem5-resources tests
-      working-directory: ${{ github.workspace }}/tests
-      run: ./main.py run gem5/gem5-resources --length very-long --skip-build -vv -t $(nproc)
-    - name: create zip of results
-      if: success() || failure()
-      run: |
-        apt-get -y install zip
-        zip -r output.zip tests/testing-results
-    - name: upload zip
-      if: success() || failure()
-      uses: actions/upload-artifact@v3
-      env:
-        MY_STEP_VAR: ${{github.job}}_COMMIT.${{github.sha}}_RUN.${{github.run_id}}_ATTEMPT.${{github.run_attempt}}
-      with:
-        name: ${{ env.MY_STEP_VAR }}
-        path: output.zip
-        retention-days: 7
-    - run: echo "This job's status is ${{ job.status }}."
+            - name: Checkout DRAMSys
+              working-directory: ${{ github.workspace }}/ext/dramsys
+              run: |
+                  git clone https://github.com/tukl-msd/DRAMSys DRAMSys
+                  cd DRAMSys
+                  git checkout -b gem5 09f6dcbb91351e6ee7cadfc7bc8b29d97625db8f
+                  git submodule update --init --recursive
 
-  testlib-very-long-parsec-benchmarks:
-    runs-on: [self-hosted, linux, x64, run]
-    container: gcr.io/gem5-test/ubuntu-22.04_all-dependencies:latest
-    needs: build-gem5
-    timeout-minutes: 4320 # 3 days
-    steps:
-    - uses: actions/checkout@v3
-      with:
-        # Scheduled workflows run on the default branch by default. We
-        # therefore need to explicitly checkout the develop branch.
-        ref: develop
-    - uses: actions/download-artifact@v3
-      with:
-        name: ${{needs.build-gem5.outputs.build-name}}
-        path: build/ALL
-    - run: chmod u+x build/ALL/gem5.opt
-    - name: very-long x86-boot-tests
-      working-directory: ${{ github.workspace }}/tests
-      run: ./main.py run gem5/x86-boot-tests --length very-long --skip-build -vv -t $(nproc)
-    - name: create zip of results
-      if: success() || failure()
-      run: |
-        apt-get -y install zip
-        zip -r output.zip tests/testing-results
-    - name: upload zip
-      if: success() || failure()
-      uses: actions/upload-artifact@v3
-      env:
-        MY_STEP_VAR: ${{github.job}}_COMMIT.${{github.sha}}_RUN.${{github.run_id}}_ATTEMPT.${{github.run_attempt}}
-      with:
-        name: ${{ env.MY_STEP_VAR }}
-        path: output.zip
-        retention-days: 7
-    - run: echo "This job's status is ${{ job.status }}."
+      # gem5 is built separately because it depends on the DRAMSys library
+            - name: Build gem5
+              working-directory: ${{ github.workspace }}
+              run: scons build/ALL/gem5.opt -j $(nproc)
 
-  testlib-very-long-x86-boot-tests:
-    runs-on: [self-hosted, linux, x64, run]
-    container: gcr.io/gem5-test/ubuntu-22.04_all-dependencies:latest
-    needs: build-gem5
-    timeout-minutes: 4320 # 3 days
-    steps:
-    - uses: actions/checkout@v3
-      with:
-        # Scheduled workflows run on the default branch by default. We
-        # therefore need to explicitly checkout the develop branch.
-        ref: develop
-    - uses: actions/download-artifact@v3
-      with:
-        name: ${{needs.build-gem5.outputs.build-name}}
-        path: build/ALL
-    - run: chmod u+x build/ALL/gem5.opt
-    - name: very-long x86-boot-tests
-      working-directory: ${{ github.workspace }}/tests
-      run: ./main.py run gem5/x86-boot-tests --length very-long --skip-build -vv -t $(nproc)
-    - name: create zip of results
-      if: success() || failure()
-      run: |
-        apt-get -y install zip
-        zip -r output.zip tests/testing-results
-    - name: upload zip
-      if: success() || failure()
-      uses: actions/upload-artifact@v3
-      env:
-        MY_STEP_VAR: ${{github.job}}_COMMIT.${{github.sha}}_RUN.${{github.run_id}}_ATTEMPT.${{github.run_attempt}}
-      with:
-        name: ${{ env.MY_STEP_VAR }}
-        path: output.zip
-        retention-days: 7
-    - run: echo "This job's status is ${{ job.status }}."
+            - name: Run DRAMSys Checks
+              working-directory: ${{ github.workspace }}
+              run: |
+                  ./build/ALL/gem5.opt configs/example/gem5_library/dramsys/arm-hello-dramsys.py
+                  ./build/ALL/gem5.opt configs/example/gem5_library/dramsys/dramsys-traffic.py
+                  ./build/ALL/gem5.opt configs/example/dramsys.py
+    weekly-tests:
+        # The dummy job is used to indicate whether the weekly tests have
+        # passed or not. This can be used as status check for pull requests.
+        # I.e., if we want to stop pull requests from being merged if the
+        # weekly tests are failing we can add this job as a required status
+        # check.
+        runs-on: ubuntu-22.04
+        needs:
+            - testlib-very-long-tests
+            - dramsys-tests
+        steps:
+            - run: echo "This weekly tests have passed."
diff --git a/.gitignore b/.gitignore
index 229a0d5ae9..8aaf08f076 100644
--- a/.gitignore
+++ b/.gitignore
@@ -32,3 +32,4 @@ configs/example/memcheck.cfg
 configs/dram/lowp_sweep.cfg
 .pyenv
 .vscode
+typings
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 8cbc6afdb7..debad8cc6a 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,3 +1,4 @@
+---
 # Copyright (c) 2022 Arm Limited
 # All rights reserved.
 #
@@ -33,57 +34,73 @@
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-minimum_pre_commit_version: "2.18"
+minimum_pre_commit_version: '2.18'
 
 default_language_version:
-  python: python3
+    python: python3
 
 exclude: |
-  (?x)^(
-    ext/.*|
-    build/.*|
-    src/systemc/ext/.*|
-    src/systemc/tests/.*/.*|
-    src/python/m5/ext/pyfdt/.*|
-    tests/.*/ref/.*
-  )$
+    (?x)^(
+      ext/(?!testlib/).*|
+      build/.*|
+      src/systemc/ext/.*|
+      src/systemc/tests/.*/.*|
+      src/python/m5/ext/pyfdt/.*|
+      tests/.*/ref/.*
+    )$
 
 default_stages: [commit]
 
 repos:
-- repo: https://github.com/pre-commit/pre-commit-hooks
-  rev: v4.3.0
-  hooks:
-  - id: trailing-whitespace
-  - id: end-of-file-fixer
-  - id: check-json
-  - id: check-yaml
-  - id: check-added-large-files
-  - id: mixed-line-ending
-    args: [--fix=lf]
-  - id: check-case-conflict
-- repo: https://github.com/psf/black
-  rev: 22.6.0
-  hooks:
-    - id: black
-- repo: local
-  hooks:
-  - id: gem5-style-checker
-    name: gem5 style checker
-    entry: util/git-pre-commit.py
-    always_run: true
-    exclude: ".*"
-    language: system
-    description: 'The gem5 style checker hook.'
-  - id: gem5-commit-msg-checker
-    name: gem5 commit msg checker
-    entry: ext/git-commit-msg
-    language: system
-    stages: [commit-msg]
-    description: 'The gem5 commit message checker hook.'
-  - id: gerrit-commit-msg-job
-    name: gerrit commit message job
-    entry: util/gerrit-commit-msg-hook
-    language: system
-    stages: [commit-msg]
-    description: 'Adds Change-ID to the commit message. Needed by Gerrit.'
+    - repo: https://github.com/pre-commit/pre-commit-hooks
+      rev: v4.5.0
+      hooks:
+          - id: trailing-whitespace
+          - id: end-of-file-fixer
+          - id: check-json
+          - id: check-yaml
+          - id: check-added-large-files
+          - id: mixed-line-ending
+            args: [--fix=lf]
+          - id: check-ast
+          - id: check-case-conflict
+          - id: check-merge-conflict
+          - id: check-symlinks
+          - id: destroyed-symlinks
+          - id: requirements-txt-fixer
+    - repo: https://github.com/jumanjihouse/pre-commit-hook-yamlfmt
+      rev: 0.2.3
+      hooks:
+          - id: yamlfmt
+    - repo: https://github.com/psf/black
+      rev: 23.9.1
+      hooks:
+          - id: black
+    - repo: https://github.com/asottile/pyupgrade
+      rev: v3.14.0
+      hooks:
+          - id: pyupgrade
+            # Python 3.8 is the earliest version supported.
+            # We therefore conform to the standards compatible with 3.8+.
+            args: [--py38-plus]
+    - repo: local
+      hooks:
+          - id: gem5-style-checker
+            name: gem5 style checker
+            entry: util/git-pre-commit.py
+            always_run: true
+            exclude: .*
+            language: system
+            description: The gem5 style checker hook.
+          - id: gem5-commit-msg-checker
+            name: gem5 commit msg checker
+            entry: ext/git-commit-msg
+            language: system
+            stages: [commit-msg]
+            description: The gem5 commit message checker hook.
+          - id: gerrit-commit-msg-job
+            name: gerrit commit message job
+            entry: util/gerrit-commit-msg-hook
+            language: system
+            stages: [commit-msg]
+            description: Adds Change-ID to the commit message. Needed by Gerrit.
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index ae771d3ffb..208c9444e1 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -1,539 +1,390 @@
-If you've made changes to gem5 that might benefit others, we strongly encourage
-you to contribute those changes to the public gem5 repository. There are
-several reasons to do this:
- * Share your work with others, so that they can benefit from new functionality.
- * Support the scientific principle by enabling others to evaluate your
-   suggestions without having to guess what you did.
- * Once your changes are part of the main repo, you no longer have to merge
-   them back in every time you update your local repo. This can be a huge time
-   saving!
- * Once your code is in the main repo, other people have to make their changes
-   work with your code, and not the other way around.
- * Others may build on your contributions to make them even better, or extend
-   them in ways you did not have time to do.
- * You will have the satisfaction of contributing back to the community.
-
-The main method for contributing code to gem5 is via our code review website:
-https://gem5-review.googlesource.com/. This documents describes the details of
-how to create code changes, upload your changes, have your changes
-reviewed, and finally push your changes to gem5. More information can be found
-from the following sources:
- * http://gem5.org/contributing
- * https://gerrit-review.googlesource.com/Documentation/index.html
- * https://git-scm.com/book
-
-
-High-level flow for submitting changes
-======================================
-
-    +-------------+
-    | Make change |
-    +------+------+
-           |
-           |
-           v
-    +-------------+
-    |  Run tests  |<--------------+
-    +------+------+               |
-           |                      |
-           |                      |
-           v                      |
-    +------+------+               |
-    | Post review |               |
-    +------+------+               |
-           |                      |
-           v                      |
-    +--------+---------+          |
-    | Wait for reviews |          |
-    +--------+---------+          |
-           |                      |
-           |                      |
-           v                      |
-      +----+----+   No     +------+------+
-      |Reviewers+--------->+ Update code |
-      |happy?   |          +------+------+
-      +----+----+                 ^
-           |                      |
-           | Yes                  |
-           v                      |
-      +----+-----+   No           |
-      |Maintainer+----------------+
-      |happy?    |
-      +----+-----+
-           |
-           | Yes
-           v
-    +------+------+
-    | Submit code |
-    +-------------+
-
-After creating your change to gem5, you can post a review on our Gerrit
-code-review site: https://gem5-review.googlesource.com. Before being able to
-submit your code to the mainline of gem5, the code is reviewed by others in the
-community. Additionally, the maintainer for that part of the code must sign off
-on it.
-
-Cloning the gem5 repo to contribute
-===================================
-
-If you plan on contributing, it is strongly encouraged for you to clone the
-repository directly, and checkout the `develop` branch from our gerrit instance
-at https://gem5.googlesource.com/.
-
-To clone the gem5 repository:
+This document serves as a guide to contributing to gem5.
+The following subsections outline, in order, the steps involved in contributing
+to the gem5 project.
 
-```
- git clone https://gem5.googlesource.com/public/gem5
-```
+## Determining what you can contribute
 
-By default, the stable branch is checked out. The stable branch contains the
-latest released version of gem5. To obtain code still under-development (and
-which contributions can be made):
+The easiest way to see how you can contribute to gem5 is to check our Jira
+issue tracker: <https://gem5.atlassian.net> or GitHub issue tracker:
+<https://github.com/gem5/gem5/issues>.
 
-```
-cd gem5
-git checkout --track origin/develop
-```
+Browse these open issues and see if there are any which you are capable of
+handling. When you find a task you are happy to carry out, verify no one else
+is presently assigned, then leave a comment asking if you may assign yourself
+this task. Though not mandatory, we
+advise first-time contributors do this so developers more familiar with the
+task may give advice on how best to implement the necessary changes.
 
-Changes should be made to this develop branch. Changes to the stable branch
-will be blocked. Once a change on the develop branch is properly incorporated
-into the gem5 repo it will be merged into the stable branch upon the next
-release of gem5. New releases of gem5 occur three times a year. Ergo, changes
-made to the develop branch should appear on the stable branch within three to
-four months as part of a stable release.
+Once a developers has replied to your comment (and given any advice they may
+have), you may officially assign yourself the task. This helps the gem5
+development community understand which parts of the project are presently being
+worked on.
 
-Other gem5 repositories
------------------------
+**If, for whatever reason, you stop working on a task, please unassign
+yourself from the task.**
 
-There are a few repositories other than the main gem5 development repository.
+## Obtaining the git repo
 
- * public/m5threads: The code for a pthreads implementation that works with
-   gem5's syscall emulation mode.
- * public/gem5-resources: Resources to enable computer architecture research
-   with gem5. See the README.md file in the gem5-resources repository for more
-   information.
- * public/gem5-website: The gem5.org website source. See the README.md file in
-   the gem5-website repository for more information.
+The gem5 git repository is hosted at <https://github.com/gem5/gem5>.
+**Please note: contributions made to other gem5 repos
+will not be considered. Please contribute to <https://github.com/gem5/gem5>
+exclusively.**
 
-Making changes to gem5
-======================
+To pull the gem5 git repo:
 
-It is strongly encouraged to use git branches when making changes to gem5.
-Additionally, keeping changes small and concise and only have a single logical
-change per commit.
+```sh
+git clone https://github.com/gem5/gem5
+```
 
-Unlike our previous flow with Mercurial and patch queues, when using git, you
-will be committing changes to your local branch. By using separate branches in
-git, you will be able to pull in and merge changes from mainline and simply
-keep up with upstream changes.
+If you wish to use gem5 and never contribute, this is fine. However, to
+contribute, we use the [GitHub Pull-Request model](https://docs.github.com/en/pull-requests), and therefore recommend [Forking the gem5 repository](https://docs.github.com/en/get-started/quickstart/fork-a-repo) prior to contributing.
 
-We use a rebase-always model for contributions to the develop branch of gem5.
-In this model, the changes are rebased on top of the tip of develop instead of
-merged. This means that to contribute, you will have to frequently rebase any
-feature branches on top of develop. If you see a "merge conflict" in gerrit, it
-can often be solved with a simple rebase. To find out more information about
-rebasing and git, see the [git book].
+### Forking
 
-[git book]: https://git-scm.com/book/en/v2/Git-Branching-Rebasing
+Please consult the [GitHub documentation on Forking a GitHub repository](https://docs.github.com/en/get-started/quickstart/fork-a-repo).
+As we will be working atop the `develop` branch, please ensure you Fork all the repository's branches, not just the `stable` branch.
 
+This will create your own forked version of the gem5 repo on your own GitHub account.
+You may then obtain it locally using:
+
+```sh
+git clone https://github.com/{your github account}/gem5
+```
 
-Setting up pre-commit
----------------------
+### stable / develop branch
 
-To help ensure the gem5 style guide is maintained, we use [pre-commit](
-https://pre-commit.com) to run checks on changes to be contributed.
+When cloned the git repo will have the `stable` branch checked-out by default. The
+`stable` branch is the gem5 stable release branch. I.e., the HEAD
+of this branch contains the latest stable release of gem5. (execute `git tag`
+on the `stable` branch to see the list of stable releases. A particular
+release may be checked out by executing `git checkout <release>`). As the
+`stable` branch only contains officially released gem5 code **contributors
+should not develop changes on top of the `stable` branch** they should instead
+**develop changes on top of the `develop` branch**.
 
-To setup pre-commit, run the following in your gem5 directory to install the
-pre-commit and commit message hooks.
+To switch to the `develop` branch:
 
 ```sh
-pip install pre-commit
-pre-commit install -t pre-commit -t commit-msg
+git switch develop
 ```
 
-The hooks are also automatically installed when gem5 is compiled.
+The develop `branch` is merged into the `stable` branch upon a gem5 release.
+Therefore, any changes you make exist on the develop branch until the next release.
+
+We strongly recommend creating your own local branches to do changes.
+The flow of development works best if `develop` and `stable` are not modified directly.
+This helps keep your changes organized across different branches in your forked repository.
+The following example will create a new branch, from `develop`, called `new-feature`:
+
+```sh
+git switch -c new-feature
+```
 
-When you run a `git commit` command the pre-commit hook will run checks on your
-committed code. The commit will be blocked if a check fails.
+## Making modifications
+
+### C/CPP
+
+Different tasks will require the project to be modified in different ways.
+Though, in all cases, our style-guide must be adhered to. The full C/C++ style
+guide is outlined [here](/documentation/general_docs/development/coding_style).
+
+As a high-level overview:
+
+* Lines must not exceed 79 characters in length.
+* There should be no trailing white-space on any line.
+* Indentations must be 4 spaces (no tab characters).
+* Class names must use upper camel case (e.g., `ThisIsAClass`).
+* Class member variables must use lower camel case (e.g.,
+`thisIsAMemberVariable`).
+* Class member variables with their own public accessor must start with an
+underscore (e.g., `_variableWithAccessor`).
+* Local variables must use snake case (e.g., `this_is_a_local_variable`).
+* Functions must use lower camel case (e.g., `thisIsAFunction`)
+* Function parameters must use snake case.
+* Macros must be in all caps with underscores (e.g., `THIS_IS_A_MACRO`).
+* Function declaration return types must be on their own line.
+* Function brackets must be on their own line.
+* `for`/`if`/`while` branching operations must be followed by a white-space
+before the conditional statement (e.g., `for (...)`).
+* `for`/`if`/`while` branching operations' opening bracket must be on the
+same line, with the closing bracket on its own line (e.g.,
+`for (...) {\n ... \n}\n`). There should be a space between the condition(s)
+and the opening bracket.
+* C++ access modifies must be indented by two spaces, with method/variables
+defined within indented by four spaces.
+
+Below is a simple toy example of how a class should be formatted:
+
+```C++
+#DEFINE EXAMPLE_MACRO 7
+class ExampleClass
+{
+  private:
+    int _fooBar;
+    int barFoo;
+
+  public:
+    int
+    getFooBar()
+    {
+        return _fooBar;
+    }
+
+    int
+    aFunction(int parameter_one, int parameter_two)
+    {
+        int local_variable = 0;
+        if (true) {
+            int local_variable = parameter_one + parameter_two + barFoo
+                               + EXAMPLE_MACRO;
+        }
+        return local_variable;
+    }
+
+}
+```
 
-The same checks are run as part of Gerrit's CI tests (those required to obtain
-a Verified label, necessary for a change to be accepted to the develop branch).
-Therefore setting up pre-commit in your local gem5 development environment is
-recommended.
+### Python
 
-You can automatically format files to pass the pre-commit tests by running:
+We use [Python Black](https://github.com/psf/black) to format our Python code
+to the correct style. To install:
 
 ```sh
-pre-commit run --files <files to format>
+pip install black
 ```
 
-Requirements for change descriptions
-------------------------------------
-To help reviewers and future contributors more easily understand and track
-changes, we require all change descriptions be strictly formatted.
-
-A canonical commit message consists of three parts:
- * A short summary line describing the change. This line starts with one or
-   more keywords (found in the MAINTAINERS file) separated by commas followed
-   by a colon and a description of the change. This short description is
-   written in the imperative mood, and should say what happens when the patch
-   is applied. Keep it short and simple. Write it in sentence case preferably
-   not ending in a period. This line should be no more than 65 characters long
-   since version control systems usually add a prefix that causes line-wrapping
-   for longer lines.
- * (Optional, but highly recommended) A detailed description. This describes
-   what you have done and why. If the change isn't obvious, you might want to
-   motivate why it is needed. Lines need to be wrapped to 72 characters or
-   less. Leave a blank line between the first short summary line and this
-   detailed description.
- * Tags describing patch metadata. You are highly recommended to use
-   tags to acknowledge reviewers for their work. Gerrit will automatically add
-   most tags.
-
-Tags are an optional mechanism to store additional metadata about a patch and
-acknowledge people who reported a bug or reviewed that patch. Tags are
-generally appended to the end of the commit message in the order they happen.
-We currently use the following tags:
- * Signed-off-by: Added by the author and the submitter (if different).
-   This tag is a statement saying that you believe the patch to be correct and
-   have the right to submit the patch according to the license in the affected
-   files. Similarly, if you commit someone else's patch, this tells the rest
-   of the world that you have have the right to forward it to the main
-   repository. If you need to make any changes at all to submit the change,
-   these should be described within hard brackets just before your
-   Signed-off-by tag. By adding this line, the contributor certifies the
-   contribution is made under the terms of the Developer Certificate of Origin
-   (DCO) [https://developercertificate.org/].
- * Reviewed-by: Used to acknowledge patch reviewers. It's generally considered
-   good form to add these. Added automatically.
- * Reported-by: Used to acknowledge someone for finding and reporting a bug.
- * Reviewed-on: Link to the review request corresponding to this patch. Added
-   automatically.
- * Change-Id: Used by Gerrit to track changes across rebases. Added
-   automatically with a commit hook by git.
- * Tested-by: Used to acknowledge people who tested a patch. Sometimes added
-   automatically by review systems that integrate with CI systems.
- * Issue-On: Used to link a commit to an issue in gem5's [issue tracker]. The
-   format should be https://gem5.atlassian.net/browse/GEM5-<NUMBER>
-
-[issue tracker]: https://gem5.atlassian.net/
-
-Other than the "Signed-off-by", "Issue-On", "Reported-by", and "Tested-by"
-tags, you generally don't need to add these manually as they are added
-automatically by Gerrit.
-
-It is encouraged for the author of the patch and the submitter to add a
-Signed-off-by tag to the commit message. By adding this line, the contributor
-certifies the contribution is made under the terms of the Developer Certificate
-of Origin (DCO) [https://developercertificate.org/].
-
-If your change relates to a [Jira Issue](https://gem5.atlassian.net), it is
-advised that you provide a link to the issue in the commit message (or messages
-if the Jira Issue relates to multiple commits). Though optional, doing this
-can help reviewers understand the context of a change.
-
-It is imperative that you use your real name and your real email address in
-both tags and in the author field of the changeset.
-
-For significant changes, authors are encouraged to add copyright information
-and their names at the beginning of the file. The main purpose of the author
-names on the file is to track who is most knowledgeable about the file (e.g.,
-who has contributed a significant amount of code to the file). The
-`util/update-copyright.py` helper script can help to keep your copyright dates
-up-to-date when you make further changes to files which already have your
-copyright but with older dates.
-
-Note: If you do not follow these guidelines, the gerrit review site will
-automatically reject your patch.
-If this happens, update your changeset descriptions to match the required style
-and resubmit. The following is a useful git command to update the most recent
-commit (HEAD).
+Then run on modified/added python files using:
 
+```sh
+black <files/directories>
 ```
- git commit --amend
-```
 
-Running tests
-=============
+For variable/method/etc. naming conventions, please follow the [PEP 8 naming
+convention recommendations](
+https://peps.python.org/pep-0008/#naming-conventions). While we try our best to
+enforce naming conventions across the gem5 project, we are aware there are
+instances where they are not. In such cases please **follow the convention
+of the code you are modifying**.
+
+### Using pre-commit
 
-Before posting a change to the code review site, you should always run the
-quick tests!
-See TESTING.md for more information.
+To help enforce our style guide we use use [pre-commit](
+https://pre-commit.com). pre-commit is a git hook and, as such, must be
+explicitly installed by a gem5 developer.
 
-Posting a review
-================
+To install the gem5 pre-commit checks, execute the following in the gem5
+directory:
 
-If you have not signed up for an account on the Gerrit review site
-(https://gem5-review.googlesource.com), you first have to create an account.
+```sh
+pip install pre-commit
+pre-commit install
+```
 
-Setting up an account
----------------------
- 1. Go to https://gem5.googlesource.com/
- 2. Click "Sign In" in the upper right corner. Note: You will need a Google
- account to contribute.
- 3. After signing in, click "Generate Password" and follow the instructions.
+Once installed pre-commit will run checks on modified code prior to running the
+`git commit` command (see [our section on committing](#committing) for more
+details on committing your changes). If these tests fail you will not be able to
+commit.
 
-Submitting a change
--------------------
+These same pre-commit checks are run as part our CI checks (those
+which must pass in order for a change to be merged into the develop branch). It
+is therefore strongly recommended that developers install pre-commit to catch
+style errors early.
 
-In gerrit, to submit a review request, you can simply push your git commits to
-a special named branch. For more information on git push see
-https://git-scm.com/docs/git-push.
+## Compiling and running tests
 
-There are three ways to push your changes to gerrit.
+The minimum criteria for a change to be submitted is that the code is
+compilable and the test cases pass.
 
-Push change to gerrit review
-----------------------------
+The following command both compiles the project and runs our "quick"
+system-level checks:
 
+```sh
+cd tests
+./main.py run
 ```
- git push origin HEAD:refs/for/develop
-```
 
-Assuming origin is https://gem5.googlesource.com/public/gem5 and you want to
-push the changeset at HEAD, this will create a new review request on top of the
-develop branch. More generally,
+**Note: These tests can take several hours to build and execute. `main.py` may
+be run on multiple threads with the `-j` flag. E.g.: `python main.py run
+-j6`.**
+
+The unit tests should also pass. To run the unit tests:
 
+```sh
+scons build/NULL/unittests.opt
 ```
- git push <gem5 gerrit instance> <changeset>:refs/for/<branch>
+
+To compile an individual gem5 binary:
+
+```sh
+scons build/ALL/gem5.opt
 ```
 
-See https://gerrit-review.googlesource.com/Documentation/user-upload.html for
-more information.
+This compiles a gem5 binary containing "ALL" ISA targets. For more information
+on building gem5 please consult our [building documentation](
+/documentation/general_docs/building).
 
-Pushing your first change
---------------------------
-The first time you push a change you may get the following error:
+## Committing
 
-```
- remote: ERROR: [fb1366b] missing Change-Id in commit message footer
- ...
+When you feel your change is done, you may commit. Start by adding the changed
+files:
+
+```Shell
+git add <changed files>
 ```
 
-Within the error message, there is a command line you should run. For every new
-clone of the git repo, you need to run the following command to automatically
-insert the change id in the the commit (all on one line).
+Make sure these changes are being added to your forked repository.
+Then commit using:
 
+```Shell
+git commit
 ```
- curl -Lo `git rev-parse --git-dir`/hooks/commit-msg \
-	https://gerrit-review.googlesource.com/tools/hooks/commit-msg ; \
- chmod +x `git rev-parse --git-dir`/hooks/commit-msg
+
+The commit message must adhere to our style. The first line of the commit is
+the "header". The header starts with a tag (or tags, separated by a comma),
+then a colon. Which tags are used depend on which components of gem5
+you have modified. **Please refer to the [MAINTAINERS.yaml](
+https://github.com/gem5/gem5/blob/stable/MAINTAINERS.yaml) for
+a comprehensive list of accepted tags**. After this colon a short description
+of the commit must be provided. **This header line must not exceed 65
+characters**.
+
+After this, a more detailed description of the commit can be added. This is
+inserted below the header, separated by an empty line. Including a description
+is optional but it's strongly recommended. The description may span multiple
+lines, and multiple paragraphs. **No line in the description may exceed 72
+characters.**
+
+To improve the navigability of the gem5 project we would appreciate if commit
+messages include a link to the relevant Jira issue/issues.
+
+Below is an example of how a gem5 commit message should be formatted:
+
 ```
+test,base: This commit tests some classes in the base component
 
-If you receive the above error, simply run this command and then amend your
-changeset.
+This is a more detailed description of the commit. This can be as long
+as is necessary to adequately describe the change.
 
+A description may spawn multiple paragraphs if desired.
+
+Jira Issue: https://gem5.atlassian.net/browse/GEM5-186
 ```
- git commit --amend
+
+If you feel the need to change your commit, add the necessary files then
+_amend_ the changes to the commit using:
+
+```sh
+git commit --amend
 ```
 
-Push change to gerrit as a Work In Progress
--------------------------------------------
+This will give you opportunity to edit the commit message.
 
-It is acceptable to push commits as "Work In Progress" (WIP) changes within
-gerrit. WIP changes are publicly visible though no one will be able to review
-the changes or be directly notified they have been submitted. WIP changes can
-be useful for backing up code currently under-development or for sharing
-incomplete code with the wider community (i.e., the link to the gerrit change
-may be shared, and others may download the change, comment on it, and track
-alterations over time).
+You may continue to add more commits as a chain of commits to be included in the pull-request.
+However, we recommend that pull-requests are kept small and focused.
+For example, if you wish to add a different feature or fix a different bug, we recommend doing so in another pull requests.
 
-See https://gerrit-review.googlesource.com/Documentation/intro-user.html#wip
-for details on WIP gerrit changes.
+## Keeping your forked and local repositories up-to-date
 
-To push a change as a WIP:
+While working on your contribution, we recommend keeping your forked repository in-sync with the source gem5 repository.
+To do so, regularly [Sync your fork](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/working-with-forks/syncing-a-fork).
+This can be done via the GitHub web interface and, if so, you should `git pull` on top of your local `stable` and `develop` branches to ensure your local repository is in-sync.
+To do so from the command line:
 
+```sh
+# Add the main gem5 repository as a remote on your local repository. This only
+# needs done once.
+git remote add upstream https://github.com/gem5/gem5.git
+
+git fetch upstream # Obtain the latest from the gem5 repo.
+git switch develop # Switch to the develop branch.
+git merge upstream/develop # Merge the latest changes into the develop branch.
+git push # Push to develop to your forked repo.
+git switch stable # Switch to the stable branch.
+git merge upstream/stable # Merge the latest changes into the stable branch.
+git push # Push the changes to stable to your forked repo.
 ```
- git push origin HEAD:refs/for/develop%wip
+
+As our local branch work atop the `develop` branch, once we've synced our forked repository, we can rebase our local branch on top of the `develop` branch.
+Assuming our local branch is called `new-feature`:
+
+```sh
+git switch develop # Switching back to the develop branch.
+git pull # Ensuring we have the latest from the forked repository.
+git switch new-feature # Switching back to our local branch.
+git rebase develop # Rebasing our local branch on top of the develop branch.
 ```
 
-Once you have pushed your change as a WIP, you can log onto [gerrit](
-https://gem5-review.googlesource.com) and view it. Once you're happy with the
-change you can add reviewers which shall move your change from WIP status
-to be considered for submission by the wider gem5 community. Switching from a
-WIP to a regular change does not notify the gem5 community, via the gem5-dev
-mailing-list, that a change has been submitted (as would occur if a change were
-submitted directly for review). It is therefore important to include reviewers
-and CC those who you wish to view the change (they will be notified
-automatically via email).
+Conflicts may need resolved between your branch and new changes.
 
-Push change bypassing gerrit
------------------------------
+## Pushing and creating a pull request
 
-Only maintainers can bypass gerrit review. This should very rarely be used.
+Once you have completed your changes locally, you can push to your forked gem5 repository.
+Assuming the branch we are working on is `new-feature`:
 
-```
- git push origin HEAD:refs/heads/develop
+```sh
+git switch new-feature # Ensure we are on the 'new-feature' branch.
+git push --set-upstream origin new-feature
 ```
 
-Other gerrit push options
--------------------------
-
-There are a number of options you can specify when uploading your changes to
-gerrit (e.g., reviewers, labels). The gerrit documentation has more
-information.
-https://gerrit-review.googlesource.com/Documentation/user-upload.html
-
-Branches
-========
-
-By default, contributions to gem5 should be made on the develop branch. The
-stable branch is maintained as a stable release branch (i.e., it can be pulled
-to obtain the latest official release of gem5). Creation of additional branches
-is generally discouraged due to their tendency to bloat git repositories with
-abandoned code. However, the creation of new branches is permitted for
-development of a specific feature or improvement if one or more of the
-following criteria are met:
-
-1. The feature/improvement is likely to be of a large size, consisting of many
-commits, with little logic in these commits being contributed separately.
-2. The feature/improvement will be developed over a long period of time.
-3. There is sufficient reason that a feature/improvement should not be part
-of the next gem5 release (e.g., the change should be held within a feature
-branch until ready for the next release, at which point it will be merged
-into the develop branch).
-
-If a branch is required it can only be created by a project maintainer.
-Therefore, if a gem5 contributor desires a separate branch for their work, they
-should request one from the maintainer of the component the work relates to
-(see MAINTAINERS for the list of maintainers and the components they are
-responsible for). **The maintainer shall use their discretion to determine
-whether the creation of a branch is necessary**. If approved, the maintainer
-shall create the branch which the contributor may then use.
-
-Development on a branch within Gerrit functions in exactly the same way as
-contributing to the develop branch. When contributors to a branch are
-satisfied, they should create a merge commit into the develop branch. The
-maintainer should then be notified that the branch they created can now be
-deleted.
-
-**Abandonment of changes within branches may result in these branches being
-removed from the repository. All branches within a repo should be under active
-development.**
-
-Reviewing patches
-=================
-
-Reviewing patches is done on our gerrit instance at
-https://gem5-review.googlesource.com/.
-
-After logging in with your Google account, you will be able to comment, review,
-and push your own patches as well as review others' patches. All gem5 users are
-encouraged to review patches. The only requirement to review patches is to be
-polite and respectful of others.
-
-There are multiple labels in Gerrit that can be applied to each review detailed
-below.
- * Code-review: This is used by any gem5 user to review patches. When reviewing
-   a patch you can give it a score of -2 to +2 with the following semantics.
-   * -2: This blocks the patch. You believe that this patch should never be
-     committed. This label should be very rarely used.
-   * -1: You would prefer this is not merged as is
-   * 0: No score
-   * +1: This patch seems good, but you aren't 100% confident that it should be
-     pushed.
-   * +2: This is a good patch and should be pushed as is.
- * Maintainer: Currently only PMC members are maintainers. At least one
-   maintainer must review your patch and give it a +1 before it can be merged.
- * Verified: This is automatically generated from the continuous integrated
-   (CI) tests. Each patch must receive at least a +1 from the CI tests before
-   the patch can be merged. The patch will receive a +1 if gem5 builds and
-   runs, and it will receive a +2 if the stats match.
- * Style-Check: This is automatically generated and tests the patch against the
-   gem5 code style
-   (http://www.gem5.org/documentation/general_docs/development/coding_style/).
-   The patch must receive a +1 from the style checker to be pushed.
-
-Note: Whenever the patch creator updates the patch all reviewers must re-review
-the patch. There is no longer a "Fix it, then Ship It" option.
-
-Once you have received reviews for your patch, you will likely need to make
-changes. To do this, you should update the original git changeset. Then, you
-can simply push the changeset again to the same Gerrit branch to update the
-review request.
+Now, via the GitHub web interface, you can [create a pull request](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/creating-a-pull-request) of your changes from your forked repository's branch into the gem5 `develop` branch.
 
-```
- git push origin HEAD:refs/for/develop
-```
+## Passing the checks
 
-Committing changes
-==================
-
-Each patch must meet the following criteria to be merged:
- * At least one review with +2
- * At least one maintainer with +1
- * At least +1 from the CI tests (gem5 must build and run)
- * At least +1 from the style checker
-
-Once a patch meets the above criteria, the submitter of the patch will be able
-to merge the patch by pressing the "Submit" button on Gerrit. When the patch is
-submitted, it is merged into the public gem5 branch.
-
-Review moderation and guidelines
---------------------------------
-
-Once a change is submitted, reviewers shall review the change. This may require
-several iterations before a merge. Comments from reviewers may include
-questions, and requests for alterations to the change prior to merging. The
-overarching philosophy in managing this process is that there should be
-politeness and clear communication between all parties at all times, and,
-whenever possible, permission should be asked before doing anything that may
-inconvenience another party. Included below are some guidelines we expect
-contributors and reviewers to follow.
-
- * In all forms of communication, contributors and reviewers must be polite.
-   Comments seen as being needlessly hostile or dismissive will not be
-   tolerated.
- * Change contributors should respond to, or act upon, each item of feedback
-   given by reviewers. If there is disagreement with a piece of
-   feedback, a sufficiently detailed reason for this disagreement should
-   be given. Polite discussion, and sharing of information and expertise
-   is strongly encouraged.
- * Contributors are advised to assign reviewers when submitting a change.
-   Anyone who contributes to gem5 can be assigned as a reviewer. However,
-   all changes must be accepted by at least one maintainer prior to a
-   merge, ergo assigning of at least one maintainer as a reviewer is
-   strongly recommended. Please see MAINTAINERS for a breakdown of
-   gem5 maintainers and which components they claim responsibility for.
-   Maintainers should be chosen based on which components the change is
-   targeting. Assigning of reviewers is not strictly enforced, though not
-   assigning reviewers may slow the time in which a change is reviewed.
- * If a contributor posts a change and does not receive any reviews after two
-   working days (excluding regional holidays), it is acceptable to "prod"
-   reviewers. This can be done by adding a reply to the changeset review
-   (e.g., "Would it be possible for someone to review my change?"). If the
-   contributor has yet to assign reviewers, they are strongly advised to do so.
-   Reviewers will get notified when assigned to referee a change.
- * By default, the original contributor is assumed to own a change. I.e.,
-   they are assumed to be the sole party to submit patchsets. If someone
-   other than the original contributor wishes to submit patchsets to a
-   change on the original contributor's behalf, they should first ask
-   permission. If two working days pass without a response, a patchset may be
-   submitted without permission. Permission does not need to be asked to submit
-   a patchset consisting of minor, inoffensive, changes such a typo and format
-   fixes.
- * Once a change is ready to merge, it enters a "Ready to Submit" state. The
-   original contributor should  merge their change at this point, assuming they
-   are content with the commit in its present form. After two working days, a
-   reviewer may message a contributor to remind them of the change being in a
-   "Ready to Submit" state and ask if they can merge the change on the
-   contributors behalf. If a further two working days elapse without a
-   response, the reviewer may merge without permission. A contributor may keep
-   a change open for whatever reason though this should be communicated to the
-   reviewer when asked.
- * After a month of inactivity from a contributor on an active change, a
-   reviewer may post a message on the change reminding the submitter, and
-   anyone else watching the change, of its active status and ask if they are
-   still interested in eventually merging the change. After two weeks of no
-   response the reviewer reserves the right to abandon the change under the
-   assumption there is no longer interest.
- * The final arbiter in any dispute between reviewers and/or contributors
-   is the PMC (PMC members are highlighted in MAINTAINERS). Disputes requiring
-   intervention by the PMC are undesirable. Attempts should be made to resolve
-   disagreements via respectful and polite discourse before being escalated to
-   this level.
-
-Releases
-========
+Once you have created a pull request, the gem5 Continuous Integration (CI) tests will run.
+These run a series of checks to ensure your changes are valid.
+These must pass before your changes can be merged into the gem5 `develop` branch.
+
+In addition to the CI tests, your changes will be reviewed by the gem5 community.
+Your pull-request must have the approval of at least one community member prior to being merged.
+
+Once your pull-request has passed all the CI tests and has been approved by at least one community member, it will be merged a gem5 maintainer will do a [Merge](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/incorporating-changes-from-a-pull-request/about-pull-request-merges) on the pull-request.
+The gem5 maintainers are individuals granted the ability to merge pull requests into the gem5 `develop` branch.
+
+
+### Making iterative improvements based on feedback
+
+A reviewer will ask questions and post suggestions on GitHub. You should read
+these comments and answer these questions. **All communications between
+reviewers and contributors should be done in a polite manner. Rude and/or
+dismissive remarks will not be tolerated.**
+
+When you understand what changes are required make amendments to the pull
+request by adding patches to the same branch and then pushing to the forked repository.
+A git "force push" (i.e., `git push --force`) is also acceptable if you wish to alter the commits locally in order to make the changes.
+We encourage contributors to help keep our `git log` clean and readable.
+We recommend that users rebase their changes frequently on top of the develop branch, squash their commits where appropriate (e.g., in cases where there are many small fix commits to a change in the same PR) then force push changes to keep their PR commits concise.
+
+Once pushed to the forked repository, the pull request will automatically update with your changes.
+The reviewer will then re-review your changes and, if necessary, ask for further changes, or approve your pull-request.
+
+## Reviewing other contributions
+
+We encourage all gem5 developers to review other's contributions.
+Anyone may review a gem5 change and, if they feel it is ready, approve it.
+All pull-requests can be found at <https://github.com/gem5/gem5/pulls>.
+
+When reviewing a pull request we enforce the followings guidelines.
+These have been designed to ensure clear and polite communication between all parties:
+
+* In all forms of communication, contributors and reviewers must be polite.
+Comments seen as being rude or dismissive will not be tolerated.
+* If choosing to not approve a PR, please state clearly why.
+When asking for changes, the commits should be specific and actionable.
+General criticisms which cannot be addressed or understood by the contributor are unhelpful.
+If the contribution needs improvement, reviewers should state what their requested changes are.
+If more information is needed for the reviewers to make a decision the reviewer should ask clear questions.
+If the PR is generally not seen as a worthwhile contribution, a good justification should be given so the contributor may fairly rebuttal.
+* By default, the original contributor is assumed to own a change.
+I.e., they are assumed to be the sole party to submit patches to the pull request.
+If someone other than the original contributor wishes to submit patches on the original contributors behalf they should first ask permission.
+Pull requests which appear abandoned may be adopted by a new contributor as long as there is good enough reason to assume the original contributor is no longer working on the pull request.
+* Maintainers have the final say on whether a change is merged.
+Your review will be taken into account by the maintainer.
+It is expected, in all but the most extreme cases, that the reviewer's concerns must be addressed and for the reviewer to approve the the contribution prior to the maintainer merging the pull request.
+
+We also recommend consulting Google's ["How to write code review comments"](https://google.github.io/eng-practices/review/reviewer/comments.html) for advice on giving feedback to contributors.
+
+## Releases
 
 gem5 releases occur 3 times per year. The procedure for releasing gem5 is as
 follows:
@@ -549,8 +400,8 @@ gem5-dev mailing list will be notified that the staging branch will be merged
 into the stable branch after two weeks, thus marking the new release.
 3. The staging branch will have the full suite of gem5 tests run on it to
 ensure all tests pass and the to-be-released code is in a decent state.
-4. If a user submits a changeset to the staging branch, it will be considered
-and undergo the standard Gerrit review process. However, only alterations that
+4. If a user submits a pull request to the staging branch, it will be considered
+and undergo the standard github review process. However, only alterations that
 cannot wait until the following release will be accepted for submission into
 the branch (i.e., submissions to the staging branch for "last minute"
 inclusions to the release should be of a high priority, such as a critical bug
@@ -558,8 +409,8 @@ fix). The project maintainers will use their discretion in deciding whether a
 change may be submitted directly to the staging branch. All other submissions
 to gem5 will continue to be made to the develop branch. Patches submitted
 into the staging branch do not need to be re-added to the develop branch.
-5. Once signed off by members of the PMC the staging branch shall be merged
-into the stable and develop branch. The staging branch will then be deleted.
+5. Once the staging branch has been deemed ready for release, the [release procedures](https://www.gem5.org/documentation/general_docs/development/release_procedures/) will be carried out.
+This will end with the staging branch being merged into the stable branch.
 6. The stable branch shall be tagged with the correct version number for that
 release. gem5 conforms to a "v{YY}.{MAJOR}.{MINOR}.{HOTFIX}" versioning system.
 E.g., the first major release of 2022 will be "v22.0.0.0", followed by
@@ -569,8 +420,16 @@ the minor release numbers in case this policy changes in the future.
 7. The gem5-dev and gem5-user mailing lists shall be notified of the new gem5
 release.
 
-Hotfixes
---------
+### Exemptions
+
+Due to limitations with GitHub we may update the ".github" directory in the gem5 repo's `stable` branch between gem5 releases.
+This is due to certain processes carried out by the GitHub Actions infrastructure which rely on configurations being present on a repository's primary branch.
+As the files in ".github" only influence the functionality of our GitHub actions and other GitHub activities, updating these files does not change the functionality of the gem5 in way.
+It is therefore safe to do this.
+Despite this exemption to our normal procedure we aim to ensure that **the ".github" directory on the `stable` is never "ahead" of that in the `develop` branch**.
+Therefore contributors who wish to update files in ".github" should submit their changes to `develop` and then request their changes to be applied to the `stable` branch.
+
+### Hotfixes
 
 There may be circumstances in which a change to gem5 is deemed critical and
 cannot wait for an official release (e.g., a high-priority bug fix). In these
@@ -585,7 +444,7 @@ permitted, the following steps will be taken:
 1. A new branch with the prefix "hotfix-" will be created from the stable
 branch. Only gem5 maintainers can create branches. If a non-maintainer requires
 the creation of a hotfix branch then they should contact a gem5 maintainer.
-2. The change shall be submitted to the hotfix branch via gerrit. Full review,
+2. The change shall be submitted to the hotfix branch via github. Full review,
 as with any other change, will be required.
 3. Once fully submitted, the hotfix branch shall be merged into both the
 develop and the stable branch by a gem5 maintainer.
diff --git a/MAINTAINERS.yaml b/MAINTAINERS.yaml
index c8b8957496..ebf5e3a5e7 100644
--- a/MAINTAINERS.yaml
+++ b/MAINTAINERS.yaml
@@ -1,23 +1,23 @@
+---
 # See CONTRIBUTING.md for details of gem5's contribution process.
 #
 # This file contains a list of gem5's subsystems and their
-# maintainers. The key used to identifity a subsystem should be used
-# as a tag in commit messages targetting that subsystem. At least one
-# (not all) of these maintainers must review the patch before it can
-# be pushed. These people will automatically be emailed when you
-# upload the patch to Gerrit (https://gem5-review.googlesource.com).
-# These subsystem keys mostly follow the directory structure.
+# maintainers. The key used to identify a subsystem should be used
+# as a tag in commit messages targeting that subsystem. Via our GitHub
+# Pull Request system (https://github.com/gem5/gem5/pulls) a maintainer
+# of the subsystem impacted by a pull request contribution will be added
+# as an assignee to that pull request. Their role is be to referee the
+# contribution (add a review, assign reviewers, suggest changes, etc.), then
+# merge the contribution into the gem5 develop branch when they are satisfied
+# with the change.
 #
-# Maintainers have the following responsibilities:
-# 1. That at least one maintainer of each subsystem reviews all
-#    changes to that subsystem (they will be automatically tagged and
-#    emailed on each new change).
-# 2. They will complete your reviews in a timely manner (within a few
-#    business days).
-# 3. They pledge to uphold gem5's community standards and its code of
-#    conduct by being polite and professional in their code
-#    reviews. See CODE-OF-CONDUCT.md.
+# Maintainers assigned to a pull request are expected to acknowledge their
+# assignment in 2 business days and to fully begin refereeing the contribution
+# within a business week.
 #
+# Maintainers pledge to uphold gem5's community standards and its code of
+# conduct by being polite and professional in their interactions with
+# contributors. See CODE-OF-CONDUCT.md.
 #
 # Entries in this file have the following format:
 #   key:
@@ -27,310 +27,260 @@
 #     maintainers:
 #       - John Doe <john.doe@gem5.org>
 #       - Jane Doe <jane.doe@gem5.org>
-#
+#     experts:
+#       - Jack Doe <jack.doe@gem5org>
+#       - Jill Doe <jill.doe@gem5org>
 #
 # The status field should have one of the following values:
 #   - maintained: The component has an active maintainer.
 #   - orphaned: The component is looking for a new owner.
-
-
-pmc:
-  desc: >-
-    PMC Members (general maintainers):
-  status: maintained
-  maintainers:
-    - Andreas Sandberg <andreas.sandberg@arm.com>
-    - Brad Beckmann <bradford.beckmann@gmail.com>
-    - David Wood <david@cs.wisc.edu>
-    - Gabe Black <gabe.black@gmail.com>
-    - Giacomo Travaglini <giacomo.travaglini@arm.com>
-    - Jason Lowe-Power <jason@lowepower.com> (chair)
-    - Matt Sinclair <sinclair@cs.wisc.edu>
-    - Tony Gutierrez <anthony.gutierrez@amd.com>
-    - Steve Reinhardt <stever@gmail.com>
+#
+# The experts field is optional and used to identify people who are
+# knowledgeable about the subsystem but are not responsible for it. Those
+# listed as an expert are typically good to add as a reviewer for pull requests
+# targeting that subsystem.
 
 arch:
-  desc: >-
-    General architecture-specific components
-  status: maintained
-  maintainers:
-    - Gabe Black <gabe.black@gmail.com>
+    desc: >-
+        General architecture-specific components
+    status: orphaned
 
 arch-arm:
-  status: maintained
-  maintainers:
-    - Andreas Sandberg <andreas.sandberg@arm.com>
-    - Giacomo Travaglini <giacomo.travaglini@arm.com>
+    status: maintained
+    maintainers:
+        - Giacomo Travaglini <giacomo.travaglini@arm.com>
+        - Andreas Sandberg <andreas.sandberg@arm.com>
 
 arch-gcn3:
-  status: maintained
-  maintainers:
-    - Matt Poremba <matthew.poremba@amd.com>
-    - Matt Sinclair <sinclair@cs.wisc.edu>
+    status: maintained
+    maintainers:
+        - Matt Sinclair <sinclair@cs.wisc.edu>
+        - Matt Poremba <matthew.poremba@amd.com>
 
 arch-vega:
-  status: maintained
-  maintainers:
-    - Matt Poremba <matthew.poremba@amd.com>
-    - Matt Sinclair <sinclair@cs.wisc.edu>
+    status: maintained
+    maintainers:
+        - Matt Sinclair <sinclair@cs.wisc.edu>
+        - Matt Poremba <matthew.poremba@amd.com>
 
 arch-mips:
-  status: orphaned
+    status: orphaned
 
 arch-power:
-  status: maintained
-  maintainers:
-    - Boris Shingarov <shingarov@labware.com>
+    status: orphaned
 
 arch-riscv:
-  status: orphaned
+    status: orphaned
 
 arch-sparc:
-  status: maintained
-  maintainers:
-    - Gabe Black <gabe.black@gmail.com>
+    status: orphaned
 
 arch-x86:
-  status: maintained
-  maintainers:
-    - Gabe Black <gabe.black@gmail.com>
+    status: orphaned
 
 base:
-  status: maintained
-  maintainers:
-    - Bobby Bruce <bbruce@ucdavis.edu>
-    - Daniel Carvalho <odanrc@yahoo.com.br>
+    status: orphaned
 
 base-stats:
-  status: orphaned
+    status: orphaned
 
 configs:
-  status: maintained
-  maintainers:
-    - Jason Lowe-Power <jason@lowepower.com>
+    status: orphaned
+    experts:
+        - Jason Lowe-Power <jason@lowepower.com>
 
 cpu:
-  desc: >-
-    General changes to all CPU models (e.g., BaseCPU)
-  status: maintained
-  maintainers:
-    - Gabe Black <gabe.black@gmail.com>
-    - Jason Lowe-Power <jason@lowepower.com>
+    desc: >-
+        General changes to all CPU models (e.g., BaseCPU)
+    status: orphaned
+    experts:
+        - Jason Lowe-Power <jason@lowepower.com>
 
 cpu-kvm:
-  status: maintained
-  maintainers:
-    - Andreas Sandberg <andreas.sandberg@arm.com>
+    status: maintained
+    maintainers:
+        - Andreas Sandberg <andreas.sandberg@arm.com>
 
 cpu-minor:
-  status: maintained
-  maintainers:
-    - Zhengrong Wang <seanyukigeek@gmail.com>
+    status: orphaned
 
 cpu-o3:
-  status: orphaned
+    status: orphaned
 
 cpu-simple:
-  status: maintained
-  maintainers:
-    - Jason Lowe-Power <jason@lowepower.com>
-    - Gabe Black <gabe.black@gmail.com>
+    status: orphaned
+    experts:
+        - Jason Lowe-Power <jason@lowepower.com>
 
 dev:
-  status: maintained
-  maintainers:
-    - Gabe Black <gabe.black@gmail.com>
+    status: orphaned
 
 dev-hsa:
-  status: maintained
-  maintainers:
-    - Matt Poremba <matthew.poremba@amd.com>
+    status: maintained
+    maintainers:
+        - Matt Poremba <matthew.poremba@amd.com>
 
 dev-amdgpu:
-  status: maintained
-  maintainers:
-    - Matt Poremba <matthew.poremba@amd.com>
+    status: maintained
+    maintainers:
+        - Matt Poremba <matthew.poremba@amd.com>
 
 dev-virtio:
-  status: maintained
-  maintainers:
-    - Andreas Sandberg <andreas.sandberg@arm.com>
+    status: maintained
+    maintainers:
+        - Andreas Sandberg <andreas.sandberg@arm.com>
 
 dev-arm:
-  status: maintained
-  maintainers:
-    - Andreas Sandberg <andreas.sandberg@arm.com>
-    - Giacomo Travaglini <giacomo.travaglini@arm.com>
+    status: maintained
+    maintainers:
+        - Giacomo Travaglini <giacomo.travaglini@arm.com>
+        - Andreas Sandberg <andreas.sandberg@arm.com>
 
 doc:
-  status: maintained
-  maintainers:
-    - Bobby Bruce <bbruce@ucdavis.edu>
+    status: orphaned
 
 ext:
-  desc: >-
-    Components external to gem5
-  status: maintained
-  maintainers:
-    - Bobby Bruce <bbruce@ucdavis.edu>
-    - Jason Lowe-Power <jason@lowepower.com>
+    desc: >-
+        Components external to gem5
+    status: orphaned
+    experts:
+        - Jason Lowe-Power <jason@lowepower.com>
 
 ext-testlib:
-  status: maintained
-  maintainers:
-    - Bobby Bruce <bbruce@ucdavis.edu>
-    - Hoa Nguyen <hoanguyen@ucdavis.edu>
+    status: orphaned
+    experts:
+        - Bobby R. Bruce <bbruce@ucdavis.edu>
 
 fastmodel:
-  desc: >-
-    Changes relating to ARM Fast Models
-  status: maintained
-  maintainers:
-    - Gabe Black <gabe.black@gmail.com>
+    desc: >-
+        Changes relating to ARM Fast Models
+    status: orphaned
 
 gpu-compute:
-  status: maintained
-  maintainers:
-    - Matt Poremba <matthew.poremba@amd.com>
-    - Matt Sinclair <sinclair@cs.wisc.edu>
+    status: maintained
+    maintainers:
+        - Matt Poremba <matthew.poremba@amd.com>
 
 learning-gem5:
-  desc: >-
-    The code and configs for the Learning gem5 book
-  status: maintained
-  maintainers:
-    - Jason Lowe-Power <jason@lowepower.com>
+    desc: >-
+        The code and configs for the Learning gem5 book
+    status: orphaned
+    experts:
+        - Jason Lowe-Power <jason@lowepower.com>
+        - Bobby R. Bruce <bbruce@ucdavis.edu>
 
 stdlib:
-  desc: >-
-    The gem5 standard library found under `src/python/gem5`
-  status: maintained
-  maintainers:
-    - Bobby R. Bruce <bbruce@ucdavis.edu>
+    desc: >-
+        The gem5 standard library found under `src/python/gem5`
+    status: maintained
+    maintainers:
+        - Bobby R. Bruce <bbruce@ucdavis.edu>
 
 mem:
-  desc: >-
-    General memory system (e.g., XBar, Packet)
-  status: maintained
-  maintainers:
-    - Nikos Nikoleris <nikos.nikoleris@arm.com>
+    desc: >-
+        General memory system (e.g., XBar, Packet)
+    status: orphaned
 
 mem-cache:
-  desc: >-
-    Classic caches and coherence
-  status: maintained
-  maintainers:
-    - Nikos Nikoleris <nikos.nikoleris@arm.com>
-    - Daniel Carvalho <odanrc@yahoo.com.br>
+    desc: >-
+        Classic caches and coherence
+    status: orphaned
 
 mem-dram:
-  status: maintained
-  maintainers:
-    - Nikos Nikoleris <nikos.nikoleris@arm.com>
+    status: orphaned
 
 mem-garnet:
-  desc: >-
-    Garnet subcomponent of Ruby
-  status: maintained
-  maintainers:
-    - Srikant Bharadwaj <srikant.bharadwaj@amd.com>
+    desc: >-
+        Garnet subcomponent of Ruby
+    status: orphaned
 
 mem-ruby:
-  desc: >-
-    Ruby structures and protocols
-  status: maintained
-  maintainers:
-    - Jason Lowe-Power <jason@lowepower.com>
-    - Matt Sinclair <sinclair@cs.wisc.edu>
+    desc: >-
+        Ruby structures and protocols
+    status: maintained
+    maintainers:
+        - Matt Sinclair <sinclair@cs.wisc.edu>
+    experts:
+        - Jason Lowe-Power <jason@lowepower.com>
 
 misc:
-  desc: >-
-    Anything outside of the other categories
-  status: maintained
-  maintainers:
-    - Bobby Bruce <bbruce@ucdavis.edu>
-    - Jason Lowe-Power <jason@lowepower.com>
+    desc: >-
+        Anything outside of the other categories
+    status: orphaned
+    experts:
+        - Jason Lowe-Power <jason@lowepower.com>
 
 python:
-  desc: >-
-    Python SimObject wrapping and infrastructure
-  status: maintained
-  maintainers:
-    - Andreas Sandberg <andreas.sandberg@arm.com>
-    - Jason Lowe-Power <jason@lowepower.com>
+    desc: >-
+        Python SimObject wrapping and infrastructure
+    status: orphaned
+    experts:
+        - Jason Lowe-Power <jason@lowepower.com>
+        - Andreas Sandberg <andreas.sandberg@arm.com>
 
 resources:
-  desc: >-
-    The gem5-resources repo with auxiliary resources for simulation
-  status: maintained
-  maintainers:
-    - Bobby Bruce <bbruce@ucdavis.edu>
-    - Jason Lowe-Power <jason@lowepower.com>
+    desc: >-
+        The gem5-resources repo with auxiliary resources for simulation
+    status: maintained
+    maintainers:
+        - Bobby R. Bruce <bbruce@ucdavis.edu>
+    experts:
+        - Jason Lowe-Power <jason@lowepower.com>
 
 scons:
-  desc: >-
-    Build system
-  status: maintained
-  maintainers:
-    - Gabe Black <gabe.black@gmail.com>
+    desc: >-
+        Build system
+    status: orphaned
 
 sim:
-  desc: >-
-    General simulation components
-  status: maintained
-  maintainers:
-    - Jason Lowe-Power <jason@lowepower.com>
+    desc: >-
+        General simulation components
+    status: orphaned
+    experts:
+        - Jason Lowe-Power <jason@lowepower.com>
 
 sim-se:
-  desc: >-
-    Syscall emulation
-  status: orphaned
+    desc: >-
+        Syscall emulation
+    status: orphaned
 
 system-arm:
-  status: maintained
-  maintainers:
-    - Andreas Sandberg <andreas.sandberg@arm.com>
-    - Giacomo Travaglini <giacomo.travaglini@arm.com>
+    status: maintained
+    maintainers:
+        - Giacomo Travaglini <giacomo.travaglini@arm.com>
+        - Andreas Sandberg <andreas.sandberg@arm.com>
 
 systemc:
-  desc: >-
-    Code for the gem5 SystemC implementation and interface
-  status: maintained
-  maintainers:
-    - Gabe Black <gabe.black@gmail.com>
+    desc: >-
+        Code for the gem5 SystemC implementation and interface
+    status: orphaned
 
 tests:
-  desc: >-
-    testing changes
-  status: maintained
-  maintainers:
-    - Bobby Bruce <bbruce@ucdavis.edu>
+    desc: >-
+        testing changes
+    status: maintained
+    maintainers:
+        - Bobby R. Bruce <bbruce@ucdavis.edu>
 
 util:
-  status: maintained
-  maintainers:
-    - Gabe Black <gabe.black@gmail.com>
+    status: orphaned
 
 util-docker:
-  status: maintained
-  maintainers:
-    - Bobby Bruce <bbruce@ucdavis.edu>
+    status: maintained
+    maintainers:
+        - Bobby R. Bruce <bbruce@ucdavis.edu>
 
 util-m5:
-  status: maintained
-  maintainers:
-    - Gabe Black <gabe.black@gmail.com>
+    status: orphaned
 
 util-gem5art:
-  status: maintained
-  maintainers:
-    - Bobby Bruce <bbruce@ucdavis.edu>
-    - Jason Lowe-Power <jason@lowepower.com>
+    status: orphaned
 
 website:
-  desc: >-
-    The gem5-website repo which contains the gem5.org site
-  status: maintained
-  maintainers:
-    - Bobby Bruce <bbruce@ucdavis.edu>
-    - Hoa Nguyen <hoanguyen@ucdavis.edu>
+    desc: >-
+        The gem5-website repo which contains the gem5.org site
+    status: maintained
+    maintainers:
+        - Bobby R. Bruce <bbruce@ucdavis.edu>
+    experts:
+        - Jason Lowe-Power <jason@lowepower.com>
diff --git a/README b/README
deleted file mode 100644
index 5803372a98..0000000000
--- a/README
+++ /dev/null
@@ -1,43 +0,0 @@
-This is the gem5 simulator.
-
-The main website can be found at http://www.gem5.org
-
-A good starting point is http://www.gem5.org/about, and for
-more information about building the simulator and getting started
-please see http://www.gem5.org/documentation and
-http://www.gem5.org/documentation/learning_gem5/introduction.
-
-To build gem5, you will need the following software: g++ or clang,
-Python (gem5 links in the Python interpreter), SCons, zlib, m4, and lastly
-protobuf if you want trace capture and playback support. Please see
-http://www.gem5.org/documentation/general_docs/building for more details
-concerning the minimum versions of these tools.
-
-Once you have all dependencies resolved, type 'scons
-build/<CONFIG>/gem5.opt' where CONFIG is one of the options in build_opts like
-ARM, NULL, MIPS, POWER, SPARC, X86, Garnet_standalone, etc. This will build an
-optimized version of the gem5 binary (gem5.opt) with the the specified
-configuration. See http://www.gem5.org/documentation/general_docs/building for
-more details and options.
-
-The main source tree includes these subdirectories:
-   - build_opts: pre-made default configurations for gem5
-   - build_tools: tools used internally by gem5's build process.
-   - configs: example simulation configuration scripts
-   - ext: less-common external packages needed to build gem5
-   - include: include files for use in other programs
-   - site_scons: modular components of the build system
-   - src: source code of the gem5 simulator
-   - system: source for some optional system software for simulated systems
-   - tests: regression tests
-   - util: useful utility programs and files
-
-To run full-system simulations, you may need compiled system firmware, kernel
-binaries and one or more disk images, depending on gem5's configuration and
-what type of workload you're trying to run. Many of those resources can be
-downloaded from http://resources.gem5.org, and/or from the git repository here:
-https://gem5.googlesource.com/public/gem5-resources/
-
-If you have questions, please send mail to gem5-users@gem5.org
-
-Enjoy using gem5 and please share your modifications and extensions.
diff --git a/README-RH.md b/README-RH.md
new file mode 100644
index 0000000000..e8aec1d24c
--- /dev/null
+++ b/README-RH.md
@@ -0,0 +1,142 @@
+# Information on using the RowHammer Branch (HammerSim)
+
+## Introduction
+
+This file contains information on how to get started with the RowHammer module.
+
+## Changes in the source
+
+Changes to the gem5's source is confined to the following files:
+- `src/mem/DRAMInterface.py`
+- `src/mem/mem_ctrl.cc`
+- `src/mem/mem_Ctrl.hh`
+- `src/mem/packet.hh`
+- `src/mem/dram_interface.hh`
+- `src/mem/dram_interface.cc`
+- `src/mem/mem_interface.hh`
+- `src/mem/mem_interface.cc`
+- `src/mem/SConscript`
+
+Most of the RowHammer parameters are defined in `src/mem/DRAMInterface.py`. In
+the class `DRAMInterface`, we have defined the following parameters:
+- `device_file` - Absolute path to the device map file. The "device map" file
+  refers to a list of all weak cells in the DRAM device. Currently we only
+  only flip bits at the column level. The resolution of a bit flip can be
+  further tuned to be at the capacitor level. This file is a `.json` file with
+  the following format:
+  ```json
+  {
+    "rank_number": {
+        "bank_number": {
+            "row_number": ["(int)list_of_all_weak_columns"],
+        }
+    }
+  }
+  ```
+  For starters, you can use the map included in the repository under
+  `prob-005.json.zip`. This map is statistically generated using VARIUS
+  (S. Sarangi et al.) (see the abstract/writeup for details). You can also
+  generate this map from the hardware using a RowHammer software like TRRespass
+  (P. Frigo et al.) or Blacksmith (P. Jattke et al.).
+- `rowhammer_threshold` - This is the number of activates requires to trigger a
+  single bitflip in a victim row. This number is taken from previous research
+  (Y. Kim et al., J. S. Kim et al.) which states that the minimum activates
+  required for DDR3 DRAM DIMMs is 139,000 and DDR4 DRAM DIMMs is 50,000. LPDDR
+  numbers are even lower (~8,000 -- 16,000).
+- `counter_table_length` - This is a Target Row Refresh (TRR) specific
+  parameter. TRR is the mitigation mechanism present in all modern day DDR4
+  DRAM DIMMs. Most of these TRR parameters are either reverse-engineered via
+  previously mentioned RowHammer softwares or are taken from other reverse-
+  engineering papers including but not limited to (H. Hassan et al.).
+  `counter_table_length` is the total size of the main TRR table. TRR samples
+  frequently activated rows. This table keeps a track of these rows.
+- `trr_variant` - [0 -- 4]. We have implemented a version of the 2 TRR variants
+  out of the three major DRAM vendors (Samsung, SK Hynix and MICRON) based on
+  previous reverse-engineering techniques and also our own observations. This
+  is not a 1:1 implementation of the actual TRR as it is proprietary, however
+  we have tested for similar bitflips in same rows against real hardware.
+  Following are the four different `trr_variants`:
+  - 0: No TRR
+  - 1: A counter table-based TRR mechanism, which works on a per-bank basis.
+  - 2: A sampler-based TRR mechanism, which maintains a global refreshing
+       scheme.
+  - 3: Partially implemented another sampler-based TRR mechanism, which is not
+       verified.
+  - 4: PARA (Y. Kim et al.), one of the first RH mitigation mechanism, which
+       issues activates to rows with a probability P. This is hard-coded to
+       PARA-001 in the source.
+- `companion_table_length` - Inserting a row into the companion table is
+  tricky IMO. Therefore, I have used another small table, similar to the work
+  called ProHIT (M. Son et al.). A row is initially inserted into the companion
+  table first. Then, it is promoted to the counter table. This is specific to
+  the TRR variant, which uses counter tables.
+- `companion_threshold` - This is minimum number of activates required to make
+  an entry into the companion table. Understandably, the threshold for the
+  companion table is much lower than the actual TRR table (1024).
+- `trr_stat_dump` - This is a boolean value ot dump all the actions of the TRR
+  mechanism. One can set this to true to do a post-runtime analysis of
+  RowHammer and TRR.
+- `rh_stat_dump` - Similar to `trr_stat_dump`, you can also dump the stats of
+  the RowHammer triggers. This is helpful for post-runtime analysis.
+- `single_sided_prob` - The number of bitflips observed with a single-sided
+  RowHammer attack is much lower than a double-sided rowhammer attack. We saw
+  that this drop is 1e7 times less probable than a double-sided RowHammer
+  attack.
+- `half_double_prob` - Half-double (Google) is even more rare than a single
+  sided RowHammer attack. We could not reproduce this with our experimental
+  hardware setup. Therefore, we took this number from the Half-Double report.
+  We kept this probability at 1/1e9.
+
+Adding a new mitigation mechanism has to be done in the `mem_interface.cc`
+file. This is done in:
+```cpp
+// the sampler/counter mechanism is defined here.
+void
+DRAMInterface::activateBank(Rank& rank_ref, Bank& bank_ref,
+                       Tick act_tick, uint32_t row) {
+    ...
+    switch (trrVariant) {
+        ...
+        case N: {
+            // write a new mitigation mechanism here.
+        }
+        ...
+    }
+    ...
+}
+
+// the inhibitor mechanism is implemented here. this is because the inhibitor
+// mechanism is triggers when the DRAM device is locked for refreshing.
+void
+DRAMInterface::Rank::processRefreshEvent() {
+    ...
+    switch(dram.trrVariant) {
+        ...
+        case N: {
+            // write the inhibitor mechanism here to keep DRAM timing
+            // consistent.
+        }
+        ...
+    }
+    ...
+}
+```
+
+RowHammer bitflips are checked in the following function:
+```cpp
+void
+DRAMInterface::checkRowHammer(Bank& bank_ref, MemPacket* mem_pkt) {
+    ...
+}
+```
+
+## Using HammerSim
+
+There are pre-defined config scripts, that can be directly used with HammerSim.
+There are located in `configs/dram/rowhammer` directory. There are both traffic
+generators and also full system scripts. Note that the disk image path need to
+be replaced.
+
+## More Information
+
+More on HammerSim can be found here: https://arch.cs.ucdavis.edu/memory/simulation/security/2023/03/20/yarch-hammersim.html
diff --git a/README.md b/README.md
new file mode 100644
index 0000000000..b119c28b6e
--- /dev/null
+++ b/README.md
@@ -0,0 +1,108 @@
+# The gem5 Simulator
+
+This is the repository for the gem5 simulator. It contains the full source code
+for the simulator and all tests and regressions.
+
+The gem5 simulator is a modular platform for computer-system architecture
+research, encompassing system-level architecture as well as processor
+microarchitecture. It is primarily used to evaluate new hardware designs,
+system software changes, and compile-time and run-time system optimizations.
+
+The main website can be found at <http://www.gem5.org>.
+
+## Testing status
+
+**Note**: These regard tests run on the develop branch of gem5:
+<https://github.com/gem5/gem5/tree/develop>.
+
+[![Daily Tests](https://github.com/gem5/gem5/actions/workflows/daily-tests.yaml/badge.svg)](https://github.com/gem5/gem5/actions/workflows/daily-tests.yaml)
+[![Weekly Tests](https://github.com/gem5/gem5/actions/workflows/weekly-tests.yaml/badge.svg)](https://github.com/gem5/gem5/actions/workflows/weekly-tests.yaml)
+[![Compiler Tests](https://github.com/gem5/gem5/actions/workflows/compiler-tests.yaml/badge.svg)](https://github.com/gem5/gem5/actions/workflows/compiler-tests.yaml)
+
+## Getting started
+
+A good starting point is <http://www.gem5.org/about>, and for
+more information about building the simulator and getting started
+please see <http://www.gem5.org/documentation> and
+<http://www.gem5.org/documentation/learning_gem5/introduction>.
+
+## Building gem5
+
+To build gem5, you will need the following software: g++ or clang,
+Python (gem5 links in the Python interpreter), SCons, zlib, m4, and lastly
+protobuf if you want trace capture and playback support. Please see
+<http://www.gem5.org/documentation/general_docs/building> for more details
+concerning the minimum versions of these tools.
+
+### Compiling HammerSim
+
+HammerSim has the following dependencies:
+* json: `ext/json`. Follow the `README` in `ext/json` to compile json.
+
+Once you have all dependencies resolved, execute
+`scons build/ALL/gem5.opt` to build an optimized version of the gem5 binary
+(`gem5.opt`) containing all gem5 ISAs. If you only wish to compile gem5 to
+include a single ISA, you can replace `ALL` with the name of the ISA. Valid
+options include `ARM`, `NULL`, `MIPS`, `POWER`, `RISCV`, `SPARC`, and `X86`
+The complete list of options can be found in the build_opts directory.
+
+See https://www.gem5.org/documentation/general_docs/building for more
+information on building gem5.
+
+## The Source Tree
+
+The main source tree includes these subdirectories:
+
+* build_opts: pre-made default configurations for gem5
+* build_tools: tools used internally by gem5's build process.
+* configs: example simulation configuration scripts
+* ext: less-common external packages needed to build gem5
+* include: include files for use in other programs
+* site_scons: modular components of the build system
+* src: source code of the gem5 simulator. The C++ source, Python wrappers, and Python standard library are found in this directory.
+* system: source for some optional system software for simulated systems
+* tests: regression tests
+* util: useful utility programs and files
+
+## Using HammerSim
+
+Please see `README-RH.md`.
+
+## gem5 Resources
+
+To run full-system simulations, you may need compiled system firmware, kernel
+binaries and one or more disk images, depending on gem5's configuration and
+what type of workload you're trying to run. Many of these resources can be
+obtained from <https://resources.gem5.org>.
+
+More information on gem5 Resources can be found at
+<https://www.gem5.org/documentation/general_docs/gem5_resources/>.
+
+## Getting Help, Reporting bugs, and Requesting Features
+
+We provide a variety of channels for users and developers to get help, report
+bugs, requests features, or engage in community discussions. Below
+are a few of the most common we recommend using.
+
+* **GitHub Discussions**: A GitHub Discussions page. This can be used to start
+discussions or ask questions. Available at
+<https://github.com/orgs/gem5/discussions>.
+* **GitHub Issues**: A GitHub Issues page for reporting bugs or requesting
+features. Available at <https://github.com/gem5/gem5/issues>.
+* **Jira Issue Tracker**: A Jira Issue Tracker for reporting bugs or requesting
+features. Available at <https://gem5.atlassian.net/>.
+* **Slack**: A Slack server with a variety of channels for the gem5 community
+to engage in a variety of discussions. Please visit
+<https://www.gem5.org/join-slack> to join.
+* **gem5-users@gem5.org**: A mailing list for users of gem5 to ask questions
+or start discussions. To join the mailing list please visit
+<https://www.gem5.org/mailing_lists>.
+* **gem5-dev@gem5.org**: A mailing list for developers of gem5 to ask questions
+or start discussions. To join the mailing list please visit
+<https://www.gem5.org/mailing_lists>.
+
+## Contributing to gem5
+
+We hope you enjoy using gem5. When appropriate we advise charing your
+contributions to the project. <https://www.gem5.org/contributing> can help you
+get started. Additional information can be found in the CONTRIBUTING.md file.
diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md
index 0c158ee245..1e3a7fbadb 100644
--- a/RELEASE-NOTES.md
+++ b/RELEASE-NOTES.md
@@ -1,3 +1,41 @@
+# Version 23.0.1.0
+
+This minor release incorporates documentation updates, bug fixes, and some minor improvements.
+
+## Documentation updates
+
+* "TESTING.md" has been updated to more accurately reflect our current testing infrastructure.
+* "README" has been replaced with "README.md" and includes more up-to-date information on using gem5.
+* "CONTRIBUTING.md" has been updated to reflect our migration to GitHub and the changes in policy and proceedures.
+* Where needed old references to Gerrit have been removed in favor of GitHub.
+
+## Bug Fixes
+
+* Fixes an assert failure when using ARM which was trigged when `shiftAmt` is 0 for a UQRSH instruction.
+* Fixes `name 'fatal' is not defined` being thrown when tracing is off.
+* Fixes a bug in ARM in which the TLBIOS instructions were decoded as normal MSR instructions with no effect on the TLBs.
+* Fixes invalid `packet_id` value in flit.
+* Fixes default CustomMesh for use with Garnet.
+
+## Minor Improvements
+
+* The gem5 resources downloader now outputs more helpful errors in the case of a failure.
+* "util/github-runners-vagrant" has been added. This outlines how to setup a GitHub Action's set-hosted runner for gem5.
+* The PyUnit tests have been refactored to no longer download large resources during testing.
+* Using Perf is now optional when utilizing KVM CPUs.
+
+# Version 23.0.0.1
+
+**[HOTFIX]** Fixes compilation of `GCN3_X86` and `VEGA_X85`.
+
+This hotfix release:
+
+* Removes the use of 'std::random_shuffle'.
+This is a deprecated function in C++17 and has been removed in C++20.
+* Adds missing 'overrides' in "src/arch/amdgpu/vega/insts/instructions.hh".
+* Fixes Linux specific includes, allowing for compilation on non-linux systems.
+* Adds a missing include in "src/gpu-compute/dispatcher.cc".
+
 # Version 23.0
 
 This release has approximately 500 contributions from 50 unique contributors.
diff --git a/SConstruct b/SConstruct
index 4fe2f64366..785071b2d0 100755
--- a/SConstruct
+++ b/SConstruct
@@ -414,6 +414,7 @@ for variant_path in variant_paths:
 
         # We always compile using C++17
         env.Append(CXXFLAGS=['-std=c++17'])
+        env.Append(CXXFLAGS=['-I/`pwd`/ext/json/json/include/'])
 
         if sys.platform.startswith('freebsd'):
             env.Append(CCFLAGS=['-I/usr/local/include'])
@@ -447,6 +448,12 @@ for variant_path in variant_paths:
                     conf.CheckLinkFlag(
                             '-Wl,--thread-count=%d' % GetOption('num_jobs'))
 
+        # Treat warnings as errors but white list some warnings that we
+        # want to allow (e.g., deprecation warnings).
+        env.Append(CCFLAGS=['-Werror',
+                             '-Wno-error=deprecated-declarations',
+                             '-Wno-error=deprecated',
+                            ])
 
     else:
         error('\n'.join((
@@ -556,10 +563,14 @@ for variant_path in variant_paths:
     if sanitizers:
         sanitizers = ','.join(sanitizers)
         if env['GCC'] or env['CLANG']:
+            libsan = (
+                ['-static-libubsan', '-static-libasan']
+                if env['GCC']
+                else ['-static-libsan']
+            )
             env.Append(CCFLAGS=['-fsanitize=%s' % sanitizers,
                                  '-fno-omit-frame-pointer'],
-                        LINKFLAGS=['-fsanitize=%s' % sanitizers,
-                                   '-static-libasan'])
+                       LINKFLAGS=['-fsanitize=%s' % sanitizers] + libsan)
 
             if main["BIN_TARGET_ARCH"] == "x86_64":
                 # Sanitizers can enlarge binary size drammatically, north of
@@ -626,7 +637,7 @@ for variant_path in variant_paths:
                 LINKFLAGS=['-Wl,--no-as-needed', '-lprofiler',
                     '-Wl,--as-needed'])
 
-    env['HAVE_PKG_CONFIG'] = env.Detect('pkg-config')
+    env['HAVE_PKG_CONFIG'] = env.Detect('pkg-config') == 'pkg-config'
 
     with gem5_scons.Configure(env) as conf:
         # On Solaris you need to use libsocket for socket ops
diff --git a/TESTING.md b/TESTING.md
index 146aeac8b1..19965157fb 100644
--- a/TESTING.md
+++ b/TESTING.md
@@ -7,9 +7,9 @@ gem5's testing infrastructure has the following goals:
  * Fast execution in the simple case
  * High coverage of gem5 code
 
-# Running unit tests
+## Running the CPP unit tests
 
-gem5 comes with unit tests, created using the Google Test framework. These can
+gem5 comes with unit tests for CPP, created using the Google Test framework. These can
 be built through SCons.
 
 To build and run all the unit tests:
@@ -18,8 +18,8 @@ To build and run all the unit tests:
 scons build/ALL/unittests.opt
 ```
 
-All unit tests should be run prior to posting a patch to
-https://gem5-review.googlesource.com
+All unit tests should be run prior to creating a pull request at
+https://github.com/gem5/gem5/pulls/
 
 To compile and run just one set of tests (e.g. those declared within
 `src/base/bitunion.test.cc`):
@@ -41,9 +41,30 @@ To run a specific test function (e.g., BitUnionData.NormalBitfield):
 ./build/ALL/base/bitunion.test.opt --gtest_filter=BitUnionData.NormalBitfield
 ```
 
-# Running system-level tests
+## Running the Python unit tests
 
-Within the `tests` directory we have system-level tests. These tests run
+gem5 comes with Python unit tests.
+These are built using the [Python unit testing framework](https://docs.python.org/3/library/unittest.html).
+These tests can be found in "tests/gem5/pyunit".
+
+To run these tests a gem5 binary must first be compiled.
+We recommend, `build/ALL/gem5.opt`:
+
+```sh
+scons build/ALL/gem5.opt -j {number of compilation threads}
+```
+
+Then the Pyunit tests may be executed using:
+
+```sh
+./build/ALL/gem5.opt tests/run_pyunit.py
+```
+
+**Note**: These tests are also run via the 'quick' system-level tests, explained below.
+
+## Running system-level tests
+
+Within the "tests/gem5" directory we have system-level tests. These tests run
 the gem5 framework against various hardware configurations, with different
 ISAs, then verify the simulations execute correctly. These should be seen as
 high-level, coarse-grained tests to compliment the unit-tests.
@@ -60,8 +81,8 @@ cd tests
 ./main.py run
 ```
 
-The above is the *minumum* you should run before posting a patch to
-https://gem5-review.googlesource.com
+The above is the *minumum* you should run before posting a pull request to
+https://github.com/gem5/gem5/pulls/
 
 ## Running tests from multiple directories
 
@@ -77,56 +98,37 @@ arguments:
 This will load every test in directory1 and directory2 (and their
 subdirectories).
 
-## Specifying a subset of tests to run
+### 'quick', 'long', and 'very-long' tests
 
-You can use the tag query interface to specify the exact tests you want to run.
-For instance, if you want to run only with `gem5.opt`, you can use
+There are three categoties of tests which may be run from the "tests" directory:
 
-```shell
-./main.py run --variant opt
-```
+1. **'quick' tests**. This suite of tests are designed to finish execution in a few hours, inclusive of compilation of gem5.
+We run these as part of our continuous integration tests on pull requests made to our repository.
+These tests all utilize a binary build `scons build/ALL/gem5.opt`, and thus only rely on a single compilation for the tests to run.
+2. **'long' tests**. This suite of tests are designed to finish execution in around 12 hours.
+They incorporate longer running tests which are unsuitable to run as part of the 'quick' tests.
+We run these daily via a scheduled job.
+3. **'very-long' tests**. This suite of tests are designed to finish execution in days.
+They incorporate tests which are too long to run frequntly
+We run these daily via a scheduled job.
 
-Or, if you want to just run quick tests with the `gem5.opt` binary:
+When executing `./main.py run` the 'quick' tests are executed.
+To run the 'long' tests execute:
 
-```shell
-./main.py run --length quick --variant opt
+```sh
+./main.py run --length=long
 ```
 
+and to run the 'very-long' tests execute:
 
-To view all of the available tags, use
-
-```shell
-./main.py list --all-tags
-```
-
-The output is split into tag *types* (e.g., isa, variant, length) and the
-tags for each type are listed after the type name.
-
-Note that when using the isa tag type, tests were traditionally sorted based
-on what compilation it required. However, as tests have switched to all be
-compiled under the ALL compilation, which includes all ISAs so one doesn't
-need to compile each one individually, using the isa tag for ISAs other than
-ALL has become a less optimal way of searching for tests.  It would instead
-be better to run subsets of tests based on their directories, as described
-above.
-
-You can specify "or" between tags within the same type by using the tag flag
-multiple times. For instance, to run everything that is tagged "opt" or "fast"
-use
-
-```shell
-./main.py run --variant opt --variant fast
+```sh
+./main.py run --length=very-long
 ```
 
-You can also specify "and" between different types of tags by specifying more
-than one type on the command line. For instance, this will only run tests with
-both the "ALL" and "opt" tags.
-
-```shell
-./main.py run --isa All --variant opt
-```
+In most cases we recommend running the 'quick' tests for most changes.
+Only in some cases, such as contributions which significantly change the codebase, do we recommend running the 'long' or 'very-long' suite.
 
-## Running tests in batch
+### Running tests in batch
 
 The testing infrastructure provides the two needed methods to run tests in
 batch. First, you can list all of the tests based on the same tags as above in
@@ -160,7 +162,7 @@ run more than one uid, you must call `./main.py` multiple times.
 Currently, you must specify `--skip-build` if you want to run a single suite or
 run in batch mode. Otherwise, you will build gem5 for all architectures.
 
-## Rerunning failed tests
+### Rerunning failed tests
 
 While developing software a common practice is to run tests, make a change, and
 assert that the tests still pass. If tests fail you'll likely want to
@@ -178,7 +180,7 @@ using the `rerun` command.
 ./main.py rerun
 ```
 
-## If something goes wrong
+### If something goes wrong
 
 The first step is to turn up the verbosity of the output using `-v`. This will
 allow you to see what tests are running and why a test is failing.
@@ -186,7 +188,7 @@ allow you to see what tests are running and why a test is failing.
 If a test fails, the temporary directory where the gem5 output was saved is kept
 and the path to the directory is printed in the terminal.
 
-## Debugging the testing infrastructure
+### Debugging the testing infrastructure
 
 Every command takes an option for the verbosity. `-v`, `-vv`, `-vvv` will
 increase the verbosity level. If something isn't working correctly, you can
@@ -197,7 +199,7 @@ contains the base code for tests, suites, fixtures, etc. The code in tests/gem5
 is *gem5-specific* code. For the most part, the code in tests/gem5 extends the
 structures in ext/testlib.
 
-## Common errors
+### Common errors
 
 You may see a number of lines of output during test discovery that look like
 the following:
@@ -213,50 +215,34 @@ test library executes each python file it finds searching for tests. It's okay
 if the file causes an exception. This means there are no tests in that file
 (e.g., it's not a new-style test).
 
+### Running Tests in Parallel
 
-## Binary test applications
-
-The code for some test binaries that are run in the gem5 guest during
-testing can be found in `tests/test-progs`.
-There's one directory per test application.
-The source code is under the `source` directory.
-
-You may have a `bin` directory as well.
-The `bin` directory is automatically created when running the test case that
-uses the test binary.
-This is not the case when a test is run via the --bin-path option.
-In that scenario a bin directory will be created in the selected path
-rather than in `tests/test-progs`.
-The binary is downloaded from the gem5 servers the first
-time it is referenced by a test.
-
-Some other tests (like Linux-boot) don't have sources inside gem5 and
-are simply downloaded from gem5 servers.
+Whimsy has support for parallel testing baked in. This system supports
+running multiple suites at the same time on the same computer. To run
+suites in parallel, supply the `-t <number-tests>` flag to the run command.
 
-## Updating the test binaries
+For example, to run up to three test suites at the same time::
 
-The test infrastructure should check with the gem5 servers to ensure you have
-the latest binaries. However, if you believe your binaries are out of date,
-simply delete the `bin` directory and they will be re-downloaded to your local
-machine.
+    ./main.py run --skip-build -t 3
 
-## Building (new-style) test binaries
+### Testing resources
 
-In each `src/` directory under `tests/test-progs`, there is a Makefile.
-This Makefile downloads a docker image and builds the test binary for some ISA
-(e.g., Makefile.x86 builds the binary for x86). Additionally, if you run `make
-upload` it will upload the binaries to the gem5 server, if you have access to
-modify the binaries. *If you need to modify the binaries for updating a test or
-adding a new test and you don't have access to the gem5 server, contact a
-maintainer (see MAINTAINERS).*
+By default binaries and testing resources are obtained via the [gem5 resources infrastructure](https://www.gem5.org/documentation/general_docs/gem5_resources/).
+The downloaded resources are cached in "tests/gem5/resources".
+The resources are cached to avoid re-downloading when tests are run multiple times, though some of these resources, such as disk images, are large.
+It is therefore recommended you remove the "tests/gem5/resources" directory when you are done testing.
 
+## Running Tests within GitHub Actions
 
-## Running Tests in Parallel
+These tests outlined here are run as part of [GitHub Actions](https://github.com/features/actions).
+These are outlined in [workflow files](https://docs.github.com/en/actions/using-workflows/about-workflows), which can be found in the repo's ".github" directory.
+Each workflow is made up of individual jobs where ecch job consists of a series of steps which are executed within a [GitHub Runner](https://docs.github.com/en/actions/using-github-hosted-runners/about-github-hosted-runners).
 
-Whimsy has support for parallel testing baked in. This system supports
-running multiple suites at the same time on the same computer. To run
-suites in parallel, supply the `-t <number-tests>` flag to the run command.
+### Adding Tests to GitHub Actions
 
-For example, to run up to three test suites at the same time::
+To ensure tests added are run in GitHub Actions you may need to modify the worklfow files.
+For tests run via `./main.py` we split up the tests via the subdirectories in "tests/gem5".
+For example, all tests under "test/gem5/cpu_tests" are run as one job.
+Therefore tests added to existing directories are likely to be included, but modifications to the workflow files may be needed if new directories are added.
 
-    ./main.py run --skip-build -t 3
+We strongly recommend that when adding or ammending tests, that contributors check the ".github/workflows" files to ensure the tests they specify will be run as intended.
diff --git a/build_tools/code_formatter.py b/build_tools/code_formatter.py
index a2651c9dd0..cb6ce8e5a9 100644
--- a/build_tools/code_formatter.py
+++ b/build_tools/code_formatter.py
@@ -46,7 +46,7 @@
 import re
 
 
-class lookup(object):
+class lookup:
     def __init__(self, formatter, frame, *args, **kwargs):
         self.frame = frame
         self.formatter = formatter
@@ -106,7 +106,7 @@ class code_formatter_meta(type):
     """
 
     def __init__(cls, name, bases, dct):
-        super(code_formatter_meta, cls).__init__(name, bases, dct)
+        super().__init__(name, bases, dct)
         if "pattern" in dct:
             pat = cls.pattern
         else:
@@ -125,7 +125,7 @@ def __init__(cls, name, bases, dct):
         cls.pattern = re.compile(pat, re.VERBOSE | re.DOTALL | re.MULTILINE)
 
 
-class code_formatter(object, metaclass=code_formatter_meta):
+class code_formatter(metaclass=code_formatter_meta):
     delim = r"$"
     ident = r"[_A-z]\w*"
     pos = r"[0-9]+"
@@ -272,7 +272,7 @@ def convert(match):
             # check for a lone identifier
             if ident:
                 indent = match.group("indent")  # must be spaces
-                lone = "%s" % (l[ident],)
+                lone = f"{l[ident]}"
 
                 def indent_lines(gen):
                     for line in gen:
@@ -284,7 +284,7 @@ def indent_lines(gen):
             # check for an identifier, braced or not
             ident = match.group("ident") or match.group("b_ident")
             if ident is not None:
-                return "%s" % (l[ident],)
+                return f"{l[ident]}"
 
             # check for a positional parameter, braced or not
             pos = match.group("pos") or match.group("b_pos")
@@ -295,13 +295,13 @@ def indent_lines(gen):
                         "Positional parameter #%d not found in pattern" % pos,
                         code_formatter.pattern,
                     )
-                return "%s" % (args[int(pos)],)
+                return f"{args[int(pos)]}"
 
             # check for a double braced expression
             eval_expr = match.group("eval")
             if eval_expr is not None:
                 result = eval(eval_expr, {}, l)
-                return "%s" % (result,)
+                return f"{result}"
 
             # check for an escaped delimiter
             if match.group("escaped") is not None:
diff --git a/build_tools/cxx_config_cc.py b/build_tools/cxx_config_cc.py
index 33d3bba864..f2b126b463 100644
--- a/build_tools/cxx_config_cc.py
+++ b/build_tools/cxx_config_cc.py
@@ -3,6 +3,7 @@
 # Copyright 2013 Mark D. Hill and David A. Wood
 # Copyright 2017-2020 ARM Limited
 # Copyright 2021 Google, Inc.
+# Copyright 2023 COSEDA Technologies GmbH
 #
 # The license below extends only to copyright in the software and shall
 # not be construed as granting a license to any other intellectual
@@ -104,7 +105,7 @@ def cxx_bool(b):
 
 for port in sim_object._ports.values():
     is_vector = isinstance(port, m5.params.VectorPort)
-    is_requestor = port.role == "GEM5 REQUESTOR"
+    is_requestor = port.is_source
 
     code(
         'ports["%s"] = new PortDesc("%s", %s, %s);'
diff --git a/build_tools/enum_cc.py b/build_tools/enum_cc.py
index 5d82b401b2..504a1b9883 100644
--- a/build_tools/enum_cc.py
+++ b/build_tools/enum_cc.py
@@ -97,7 +97,7 @@
         )
     else:
         code(
-            """namespace enums
+            """namespace ${wrapper_name}
 {"""
         )
         code.indent(1)
@@ -112,13 +112,12 @@
 
 if not enum.wrapper_is_struct and not enum.is_class:
     code.dedent(1)
-    code("} // namespace enums")
+    code("} // namespace ${wrapper_name}")
 
 code("} // namespace gem5")
 
 
 if use_python:
-
     name = enum.__name__
     enum_name = enum.__name__ if enum.enum_name is None else enum.enum_name
     wrapper_name = enum_name if enum.is_class else enum.wrapper_name
diff --git a/build_tools/enum_hh.py b/build_tools/enum_hh.py
index a5b9f42cba..f91ffef437 100644
--- a/build_tools/enum_hh.py
+++ b/build_tools/enum_hh.py
@@ -66,7 +66,7 @@
 wrapper_name = enum.wrapper_name
 wrapper = "struct" if enum.wrapper_is_struct else "namespace"
 name = enum.__name__ if enum.enum_name is None else enum.enum_name
-idem_macro = "__ENUM__%s__%s__" % (wrapper_name, name)
+idem_macro = f"__ENUM__{wrapper_name}__{name}__"
 
 code(
     """\
diff --git a/build_tools/grammar.py b/build_tools/grammar.py
index 6ac638bcd0..582ff8710b 100644
--- a/build_tools/grammar.py
+++ b/build_tools/grammar.py
@@ -36,7 +36,7 @@ def __init__(self, message, token=None):
         self.token = token
 
 
-class Grammar(object):
+class Grammar:
     def setupLexerFactory(self, **kwargs):
         if "module" in kwargs:
             raise AttributeError("module is an illegal attribute")
@@ -92,7 +92,7 @@ def __getattr__(self, attr):
             return self.current_lexer.lineno
 
         raise AttributeError(
-            "'%s' object has no attribute '%s'" % (type(self), attr)
+            f"'{type(self)}' object has no attribute '{attr}'"
         )
 
     def parse_string(self, data, source="<string>", debug=None, tracking=0):
@@ -118,7 +118,7 @@ def parse_string(self, data, source="<string>", debug=None, tracking=0):
     def parse_file(self, f, **kwargs):
         if isinstance(f, str):
             source = f
-            f = open(f, "r")
+            f = open(f)
         elif isinstance(f, file):
             source = f.name
         else:
@@ -137,7 +137,7 @@ def p_error(self, t):
                 t.value,
             )
         else:
-            msg = "Syntax error at end of %s" % (self.current_source,)
+            msg = f"Syntax error at end of {self.current_source}"
         raise ParseError(msg, t)
 
     def t_error(self, t):
diff --git a/build_tools/infopy.py b/build_tools/infopy.py
index 4f15f24f98..1662094573 100644
--- a/build_tools/infopy.py
+++ b/build_tools/infopy.py
@@ -51,8 +51,12 @@
 code = code_formatter()
 
 for source in args.files:
-    src = os.path.basename(source)
-    with open(source, "r") as f:
+    # We replace the "."s in the file name with underscores to make
+    # it a valid python identifier. With the dot, "README.md" would generate
+    # `README.md = "..."` which is not valid as `md` is not a property of
+    # `README`.
+    src = os.path.basename(source).replace(".", "_")
+    with open(source) as f:
         data = "".join(f)
     code("${src} = ${{repr(data)}}")
 
diff --git a/build_tools/marshal.py b/build_tools/marshal.py
index 58c78e1632..979c0eda6f 100644
--- a/build_tools/marshal.py
+++ b/build_tools/marshal.py
@@ -74,7 +74,7 @@
 
 _, cpp, python, modpath, abspath = sys.argv
 
-with open(python, "r") as f:
+with open(python) as f:
     src = f.read()
 
 compiled = compile(src, python, "exec")
diff --git a/build_tools/sim_object_param_struct_cc.py b/build_tools/sim_object_param_struct_cc.py
index 0384809456..2ef90c7420 100644
--- a/build_tools/sim_object_param_struct_cc.py
+++ b/build_tools/sim_object_param_struct_cc.py
@@ -88,7 +88,6 @@
 
 # only include pybind if python is enabled in the build
 if use_python:
-
     code(
         """#include "pybind11/pybind11.h"
 #include "pybind11/stl.h"
diff --git a/build_tools/sim_object_param_struct_hh.py b/build_tools/sim_object_param_struct_hh.py
index bf37da2a07..45971669f8 100644
--- a/build_tools/sim_object_param_struct_hh.py
+++ b/build_tools/sim_object_param_struct_hh.py
@@ -81,7 +81,7 @@
 warned_about_nested_templates = False
 
 
-class CxxClass(object):
+class CxxClass:
     def __init__(self, sig, template_params=[]):
         # Split the signature into its constituent parts. This could
         # potentially be done with regular expressions, but
diff --git a/configs/common/GPUTLBConfig.py b/configs/common/GPUTLBConfig.py
index e59cd00da4..5b34ddbfcd 100644
--- a/configs/common/GPUTLBConfig.py
+++ b/configs/common/GPUTLBConfig.py
@@ -36,7 +36,6 @@
 
 
 def TLB_constructor(options, level, gpu_ctrl=None, full_system=False):
-
     if full_system:
         constructor_call = (
             "VegaGPUTLB(\
@@ -71,7 +70,6 @@ def TLB_constructor(options, level, gpu_ctrl=None, full_system=False):
 
 
 def Coalescer_constructor(options, level, full_system):
-
     if full_system:
         constructor_call = (
             "VegaTLBCoalescer(probesPerCycle = \
diff --git a/configs/common/GPUTLBOptions.py b/configs/common/GPUTLBOptions.py
index 1a77a2c192..6f232e2d0d 100644
--- a/configs/common/GPUTLBOptions.py
+++ b/configs/common/GPUTLBOptions.py
@@ -29,7 +29,6 @@
 
 
 def tlb_options(parser):
-
     # ===================================================================
     # TLB Configuration
     # ===================================================================
diff --git a/configs/common/HMC.py b/configs/common/HMC.py
index f8321f356b..eef1e793fb 100644
--- a/configs/common/HMC.py
+++ b/configs/common/HMC.py
@@ -430,7 +430,6 @@ def add_options(parser):
 
 # configure HMC host controller
 def config_hmc_host_ctrl(opt, system):
-
     # create HMC host controller
     system.hmc_host = SubSystem()
 
@@ -533,7 +532,6 @@ def config_hmc_host_ctrl(opt, system):
 
 # Create an HMC device
 def config_hmc_dev(opt, system, hmc_host):
-
     # create HMC device
     system.hmc_dev = SubSystem()
 
diff --git a/configs/common/MemConfig.py b/configs/common/MemConfig.py
index baa0d233af..02af2b8449 100644
--- a/configs/common/MemConfig.py
+++ b/configs/common/MemConfig.py
@@ -40,7 +40,7 @@
 
 def create_mem_intf(intf, r, i, intlv_bits, intlv_size, xor_low_bit):
     """
-    Helper function for creating a single memoy controller from the given
+    Helper function for creating a single memory controller from the given
     options.  This function is invoked multiple times in config_mem function
     to create an array of controllers.
     """
diff --git a/configs/common/ObjectList.py b/configs/common/ObjectList.py
index 4b862db9e8..7b926efaf5 100644
--- a/configs/common/ObjectList.py
+++ b/configs/common/ObjectList.py
@@ -42,7 +42,7 @@
 from textwrap import TextWrapper
 
 
-class ObjectList(object):
+class ObjectList:
     """Creates a list of objects that are sub-classes of a given class."""
 
     def _is_obj_class(self, cls):
@@ -86,7 +86,7 @@ def print(self):
                     print(line)
 
         if self._aliases:
-            print("\Aliases:")
+            print(r"\Aliases:")
             for alias, target in list(self._aliases.items()):
                 print(f"\t{alias} => {target}")
 
@@ -127,14 +127,14 @@ def _is_obj_class(self, cls):
         # We can't use the normal inspect.isclass because the ParamFactory
         # and ProxyFactory classes have a tendency to confuse it.
         try:
-            return super(CPUList, self)._is_obj_class(cls) and not issubclass(
+            return super()._is_obj_class(cls) and not issubclass(
                 cls, m5.objects.CheckerCPU
             )
         except (TypeError, AttributeError):
             return False
 
     def _add_objects(self):
-        super(CPUList, self)._add_objects()
+        super()._add_objects()
 
         from importlib import import_module
 
@@ -164,7 +164,7 @@ class EnumList(ObjectList):
     def _add_objects(self):
         """Add all enum values to the ObjectList"""
         self._sub_classes = {}
-        for (key, value) in list(self.base_cls.__members__.items()):
+        for key, value in list(self.base_cls.__members__.items()):
             # All Enums have a value Num_NAME at the end which we
             # do not want to include
             if not key.startswith("Num_"):
diff --git a/configs/common/SimpleOpts.py b/configs/common/SimpleOpts.py
index 96c73f57b8..da78ec977d 100644
--- a/configs/common/SimpleOpts.py
+++ b/configs/common/SimpleOpts.py
@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 # Copyright (c) 2015 Jason Power
 # All rights reserved.
 #
diff --git a/configs/common/Simulation.py b/configs/common/Simulation.py
index 4377b65e64..a2a04c3610 100644
--- a/configs/common/Simulation.py
+++ b/configs/common/Simulation.py
@@ -153,8 +153,8 @@ def findCptDir(options, cptdir, testsys):
         # Assumes that the checkpoint dir names are formatted as follows:
         dirs = listdir(cptdir)
         expr = re.compile(
-            "cpt\.simpoint_(\d+)_inst_(\d+)"
-            + "_weight_([\d\.e\-]+)_interval_(\d+)_warmup_(\d+)"
+            r"cpt\.simpoint_(\d+)_inst_(\d+)"
+            + r"_weight_([\d\.e\-]+)_interval_(\d+)_warmup_(\d+)"
         )
         cpts = []
         for dir in dirs:
@@ -190,7 +190,7 @@ def findCptDir(options, cptdir, testsys):
 
     else:
         dirs = listdir(cptdir)
-        expr = re.compile("cpt\.([0-9]+)")
+        expr = re.compile(r"cpt\.([0-9]+)")
         cpts = []
         for dir in dirs:
             match = expr.match(dir)
@@ -325,7 +325,7 @@ def parseSimpointAnalysisFile(options, testsys):
         line = simpoint_file.readline()
         if not line:
             break
-        m = re.match("(\d+)\s+(\d+)", line)
+        m = re.match(r"(\d+)\s+(\d+)", line)
         if m:
             interval = int(m.group(1))
         else:
@@ -334,7 +334,7 @@ def parseSimpointAnalysisFile(options, testsys):
         line = weight_file.readline()
         if not line:
             fatal("not enough lines in simpoint weight file!")
-        m = re.match("([0-9\.e\-]+)\s+(\d+)", line)
+        m = re.match(r"([0-9\.e\-]+)\s+(\d+)", line)
         if m:
             weight = float(m.group(1))
         else:
@@ -771,7 +771,6 @@ def run(options, root, testsys, cpu_class):
     if (
         options.take_checkpoints or options.take_simpoint_checkpoints
     ) and options.checkpoint_restore:
-
         if m5.options.outdir:
             cptdir = m5.options.outdir
         else:
diff --git a/configs/common/SysPaths.py b/configs/common/SysPaths.py
index 60375c30c5..382740e110 100644
--- a/configs/common/SysPaths.py
+++ b/configs/common/SysPaths.py
@@ -30,7 +30,7 @@
 config_root = os.path.dirname(config_path)
 
 
-class PathSearchFunc(object):
+class PathSearchFunc:
     _sys_paths = None
     environment_variable = "M5_PATH"
 
@@ -58,7 +58,7 @@ def __call__(self, filename):
                 paths = list(filter(os.path.isdir, paths))
 
                 if not paths:
-                    raise IOError(
+                    raise OSError(
                         "Can't find system files directory, "
                         "check your {} environment variable".format(
                             self.environment_variable
@@ -72,7 +72,7 @@ def __call__(self, filename):
             try:
                 return next(p for p in paths if os.path.exists(p))
             except StopIteration:
-                raise IOError(
+                raise OSError(
                     f"Can't find file '{filepath}' on {self.environment_variable}."
                 )
 
diff --git a/configs/common/cores/arm/HPI.py b/configs/common/cores/arm/HPI.py
index d3d46054f1..8fe396abfa 100644
--- a/configs/common/cores/arm/HPI.py
+++ b/configs/common/cores/arm/HPI.py
@@ -44,6 +44,7 @@
 
 from m5.objects import *
 
+
 # Simple function to allow a string of [01x_] to be converted into a
 # mask and value for use with MinorFUTiming
 def make_implicant(implicant_string):
@@ -1679,7 +1680,14 @@ class HPI_MMU(ArmMMU):
     dtb = ArmTLB(entry_type="data", size=256)
 
 
+class HPI_BTB(SimpleBTB):
+    numEntries = 128
+    tagBits = 18
+
+
 class HPI_BP(TournamentBP):
+    btb = HPI_BTB()
+    ras = ReturnAddrStack(numEntries=8)
     localPredictorSize = 64
     localCtrBits = 2
     localHistoryTableSize = 64
@@ -1687,9 +1695,6 @@ class HPI_BP(TournamentBP):
     globalCtrBits = 2
     choicePredictorSize = 1024
     choiceCtrBits = 2
-    BTBEntries = 128
-    BTBTagSize = 18
-    RASSize = 8
     instShiftAmt = 2
 
 
diff --git a/tests/configs/memcheck.py b/configs/common/cores/arm/O3_ARM_Etrace.py
similarity index 68%
rename from tests/configs/memcheck.py
rename to configs/common/cores/arm/O3_ARM_Etrace.py
index 25a48f9f9d..3315664cec 100644
--- a/tests/configs/memcheck.py
+++ b/configs/common/cores/arm/O3_ARM_Etrace.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2016 ARM Limited
+# Copyright (c) 2012, 2017-2018, 2023 Arm Limited
 # All rights reserved.
 #
 # The license below extends only to copyright in the software and shall
@@ -10,9 +10,6 @@
 # unmodified and in its entirety in all distributions of the software,
 # modified or unmodified, in source code or in binary form.
 #
-# Copyright (c) 2015 Jason Lowe-Power
-# All rights reserved.
-#
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions are
 # met: redistributions of source code must retain the above copyright
@@ -36,25 +33,27 @@
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-import m5
 from m5.objects import *
-
-# the traffic generator is only available if we have protobuf support,
-# so potentially skip this test
-require_sim_object("TrafficGen")
-
-# A wrapper around configs/example/memcheck.py
-
-# For some reason, this is implicitly needed by run.py
-root = None
-
-
-def run_test(root):
-    # Called from tests/run.py
-
-    import sys
-
-    argv = [sys.argv[0], "-m %d" % maxtick]
-
-    # Execute the script we are wrapping
-    run_config("configs/example/memcheck.py", argv=argv)
+from .O3_ARM_v7a import O3_ARM_v7a_3
+
+
+# O3_ARM_v7a_3 adapted to generate elastic traces
+class O3_ARM_v7a_3_Etrace(O3_ARM_v7a_3):
+    # Make the number of entries in the ROB, LQ and SQ very
+    # large so that there are no stalls due to resource
+    # limitation as such stalls will get captured in the trace
+    # as compute delay. For replay, ROB, LQ and SQ sizes are
+    # modelled in the Trace CPU.
+    numROBEntries = 512
+    LQEntries = 128
+    SQEntries = 128
+
+    def attach_probe_listener(self, inst_trace_file, data_trace_file):
+        # Attach the elastic trace probe listener. Set the protobuf trace
+        # file names. Set the dependency window size equal to the cpu it
+        # is attached to.
+        self.traceListener = m5.objects.ElasticTrace(
+            instFetchTraceFile=inst_trace_file,
+            dataDepTraceFile=data_trace_file,
+            depWindowSize=3 * self.numROBEntries,
+        )
diff --git a/configs/common/cores/arm/O3_ARM_v7a.py b/configs/common/cores/arm/O3_ARM_v7a.py
index 6a1734235a..de258324be 100644
--- a/configs/common/cores/arm/O3_ARM_v7a.py
+++ b/configs/common/cores/arm/O3_ARM_v7a.py
@@ -26,6 +26,7 @@
 
 from m5.objects import *
 
+
 # Simple ALU Instructions have a latency of 1
 class O3_ARM_v7a_Simple_Int(FUDesc):
     opList = [OpDesc(opClass="IntAlu", opLat=1)]
@@ -107,15 +108,19 @@ class O3_ARM_v7a_FUP(FUPool):
     ]
 
 
+class O3_ARM_v7a_BTB(SimpleBTB):
+    numEntries = 2048
+    tagBits = 18
+
+
 # Bi-Mode Branch Predictor
 class O3_ARM_v7a_BP(BiModeBP):
+    btb = O3_ARM_v7a_BTB()
+    ras = ReturnAddrStack(numEntries=16)
     globalPredictorSize = 8192
     globalCtrBits = 2
     choicePredictorSize = 8192
     choiceCtrBits = 2
-    BTBEntries = 2048
-    BTBTagSize = 18
-    RASSize = 16
     instShiftAmt = 2
 
 
diff --git a/configs/common/cores/arm/ex5_LITTLE.py b/configs/common/cores/arm/ex5_LITTLE.py
index 982792d2d2..a89881436d 100644
--- a/configs/common/cores/arm/ex5_LITTLE.py
+++ b/configs/common/cores/arm/ex5_LITTLE.py
@@ -31,6 +31,7 @@
 #                ex5 LITTLE core (based on the ARM Cortex-A7)
 # -----------------------------------------------------------------------
 
+
 # Simple ALU Instructions have a latency of 3
 class ex5_LITTLE_Simple_Int(MinorDefaultIntFU):
     opList = [OpDesc(opClass="IntAlu", opLat=4)]
diff --git a/configs/common/cores/arm/ex5_big.py b/configs/common/cores/arm/ex5_big.py
index 0d4d4903cf..7803c1e0cc 100644
--- a/configs/common/cores/arm/ex5_big.py
+++ b/configs/common/cores/arm/ex5_big.py
@@ -31,6 +31,7 @@
 #                ex5 big core (based on the ARM Cortex-A15)
 # -----------------------------------------------------------------------
 
+
 # Simple ALU Instructions have a latency of 1
 class ex5_big_Simple_Int(FUDesc):
     opList = [OpDesc(opClass="IntAlu", opLat=1)]
@@ -104,15 +105,19 @@ class ex5_big_FUP(FUPool):
     ]
 
 
+class ex5_big_BTB(SimpleBTB):
+    numEntries = 4096
+    tagBits = 18
+
+
 # Bi-Mode Branch Predictor
 class ex5_big_BP(BiModeBP):
+    btb = ex5_big_BTB()
+    ras = ReturnAddrStack(numEntries=48)
     globalPredictorSize = 4096
     globalCtrBits = 2
     choicePredictorSize = 1024
     choiceCtrBits = 3
-    BTBEntries = 4096
-    BTBTagSize = 18
-    RASSize = 48
     instShiftAmt = 2
 
 
diff --git a/configs/common/cpu2000.py b/configs/common/cpu2000.py
index 06f927cbcf..b928152939 100644
--- a/configs/common/cpu2000.py
+++ b/configs/common/cpu2000.py
@@ -71,7 +71,7 @@ def copyfiles(srcdir, dstdir):
         os.symlink(".", outlink)
 
 
-class Benchmark(object):
+class Benchmark:
     def __init__(self, isa, os, input_set):
         if not hasattr(self.__class__, "name"):
             self.name = self.__class__.__name__
@@ -877,7 +877,7 @@ def __init__(self, isa, os, input_set):
         else:
             raise AttributeError(f"unknown ISA {isa}")
 
-        super(vortex, self).__init__(isa, os, input_set)
+        super().__init__(isa, os, input_set)
 
     def test(self, isa, os):
         self.args = [f"{self.endian}.raw"]
diff --git a/configs/deprecated/example/fs.py b/configs/deprecated/example/fs.py
index c50e3ac4cc..ce6eea7623 100644
--- a/configs/deprecated/example/fs.py
+++ b/configs/deprecated/example/fs.py
@@ -164,7 +164,7 @@ def build_test_system(np):
         # assuming that there is just one such port.
         test_sys.iobus.mem_side_ports = test_sys.ruby._io_port.in_ports
 
-        for (i, cpu) in enumerate(test_sys.cpu):
+        for i, cpu in enumerate(test_sys.cpu):
             #
             # Tie the cpu ports to the correct ruby system ports
             #
diff --git a/configs/deprecated/example/se.py b/configs/deprecated/example/se.py
index 8d6735903f..6e0aa5b919 100644
--- a/configs/deprecated/example/se.py
+++ b/configs/deprecated/example/se.py
@@ -94,7 +94,7 @@ def get_processes(args):
         process.gid = os.getgid()
 
         if args.env:
-            with open(args.env, "r") as f:
+            with open(args.env) as f:
                 process.env = [line.rstrip() for line in f]
 
         if len(pargs) > idx:
diff --git a/configs/dist/sw.py b/configs/dist/sw.py
index 726735773e..701e986707 100644
--- a/configs/dist/sw.py
+++ b/configs/dist/sw.py
@@ -62,7 +62,7 @@ def build_switch(args):
         for i in range(args.dist_size)
     ]
 
-    for (i, link) in enumerate(switch.portlink):
+    for i, link in enumerate(switch.portlink):
         link.int0 = switch.interface[i]
 
     return switch
diff --git a/configs/dram/lat_mem_rd.py b/configs/dram/lat_mem_rd.py
index 74a94997bb..639a93dbf6 100644
--- a/configs/dram/lat_mem_rd.py
+++ b/configs/dram/lat_mem_rd.py
@@ -150,6 +150,7 @@
 burst_size = 64
 system.cache_line_size = burst_size
 
+
 # lazy version to check if an integer is a power of two
 def is_pow2(num):
     return num != 0 and ((num & (num - 1)) == 0)
@@ -177,13 +178,14 @@ def is_pow2(num):
 # do not pile up in the system, adjust if needed
 itt = 150 * 1000
 
+
 # for every data point, we create a trace containing a random address
 # sequence, so that we can play back the same sequence for warming and
 # the actual measurement
 def create_trace(filename, max_addr, burst_size, itt):
     try:
         proto_out = gzip.open(filename, "wb")
-    except IOError:
+    except OSError:
         print("Failed to open ", filename, " for writing")
         exit(-1)
 
@@ -276,6 +278,7 @@ def create_trace(filename, max_addr, burst_size, itt):
 # basic to explore some of the options
 from common.Caches import *
 
+
 # a starting point for an L3 cache
 class L3Cache(Cache):
     assoc = 16
diff --git a/configs/dram/rowhammer/FSConfigs/x86-rowhammer-fs.py b/configs/dram/rowhammer/FSConfigs/x86-rowhammer-fs.py
new file mode 100644
index 0000000000..9216f50f67
--- /dev/null
+++ b/configs/dram/rowhammer/FSConfigs/x86-rowhammer-fs.py
@@ -0,0 +1,144 @@
+# Copyright (c) 2021 The Regents of the University of California
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+"""
+
+This script shows an example of running a full system Ubuntu boot simulation
+using the gem5 library. This simulation boots Ubuntu 18.04 using 2 KVM CPU
+cores. The simulation then switches to 2 Timing CPU cores before running an
+echo statement.
+
+Usage
+-----
+
+```
+scons build/X86/gem5.opt
+./build/X86/gem5.opt configs/example/gem5_library/x86-ubuntu-run-with-kvm.py
+```
+"""
+import os
+
+from numpy import partition
+from gem5.utils.requires import requires
+from gem5.components.boards.x86_board import X86Board
+from gem5.components.memory.single_channel import SingleChannelDDR3_1600
+from gem5.components.processors.simple_switchable_processor import (
+    SimpleSwitchableProcessor,
+)
+from gem5.components.processors.cpu_types import CPUTypes
+from gem5.isas import ISA
+from gem5.coherence_protocol import CoherenceProtocol
+from gem5.resources.resource import *
+from gem5.simulate.simulator import Simulator
+from gem5.simulate.exit_event import ExitEvent
+
+# This runs a check to ensure the gem5 binary is compiled to X86 and to the
+# MESI Two Level coherence protocol.
+requires(
+    isa_required=ISA.X86,
+    coherence_protocol_required=CoherenceProtocol.MESI_TWO_LEVEL,
+    kvm_required=True,
+)
+
+from gem5.components.cachehierarchies.classic.private_l1_private_l2_cache_hierarchy import (
+    PrivateL1PrivateL2CacheHierarchy,
+)
+
+# Here we setup a MESI Two Level Cache Hierarchy.
+cache_hierarchy = PrivateL1PrivateL2CacheHierarchy(
+    l1d_size="16kB",
+    l1i_size="16kB",
+    l2_size="256kB",
+)
+
+# Setup the system memory.
+memory = SingleChannelDDR3_1600(size="3GB")
+
+# Here we setup the processor. This is a special switchable processor in which
+# a starting core type and a switch core type must be specified. Once a
+# configuration is instantiated a user may call `processor.switch()` to switch
+# from the starting core types to the switch core types. In this simulation
+# we start with KVM cores to simulate the OS boot, then switch to the Timing
+# cores for the command we wish to run after boot.
+processor = SimpleSwitchableProcessor(
+    starting_core_type=CPUTypes.KVM,
+    switch_core_type=CPUTypes.TIMING,
+    num_cores=2,
+)
+
+# Here we setup the board. The X86Board allows for Full-System X86 simulations.
+board = X86Board(
+    clk_freq="3GHz",
+    processor=processor,
+    memory=memory,
+    cache_hierarchy=cache_hierarchy,
+)
+
+# Here we set the Full System workload.
+# The `set_kernel_disk_workload` function for the X86Board takes a kernel, a
+# disk image, and, optionally, a command to run.
+
+# This is the command to run after the system has booted. The first `m5 exit`
+# will stop the simulation so we can switch the CPU cores from KVM to timing
+# and continue the simulation to run the echo command, sleep for a second,
+# then, again, call `m5 exit` to terminate the simulation. After simulation
+# has ended you may inspect `m5out/system.pc.com_1.device` to see the echo
+# output.
+command = (
+    "m5 exit;" + "echo 'This is running on Timing CPU cores.';" + "sleep 1;"
+)
+
+board.set_kernel_disk_workload(
+    # The x86 linux kernel will be automatically downloaded to the if not
+    # already present.
+    kernel=CustomResource(
+        os.path.join(
+            os.path.expanduser("~"), ".cache/gem5/x86-linux-kernel-5.4.49"
+        )
+    ),
+    # The x86 ubuntu image will be automatically downloaded to the if not
+    # already present.
+    disk_image=CustomDiskImageResource(
+        os.path.join(
+            os.path.expanduser("~"), ".cache/gem5/x86-ubuntu-18.04-img"
+        ),
+        disk_root_partition="1",
+    ),
+    readfile_contents=command,
+)
+
+simulator = Simulator(
+    board=board,
+    on_exit_event={
+        # Here we want override the default behavior for the first m5 exit
+        # exit event. Instead of exiting the simulator, we just want to
+        # switch the processor. The 2nd m5 exit after will revert to using
+        # default behavior where the simulator run will exit.
+        ExitEvent.EXIT: (func() for func in [processor.switch]),
+    },
+)
+simulator.run()
+simulator.run()
diff --git a/configs/dram/rowhammer/FSConfigs/x86-rowhammer-test.py b/configs/dram/rowhammer/FSConfigs/x86-rowhammer-test.py
new file mode 100644
index 0000000000..3af6b7aed6
--- /dev/null
+++ b/configs/dram/rowhammer/FSConfigs/x86-rowhammer-test.py
@@ -0,0 +1,104 @@
+# Copyright (c) 2021 The Regents of the University of California
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+"""
+This gem5 configuation script runs `rowhammer-test` on the rowhammer model of
+gem5.
+
+This is setup is the close to the simplest setup possible using the gem5
+library. It does not contain any kind of caching, IO, or any non-essential
+components.
+
+Usage
+-----
+
+```
+scons build/ARM/gem5.opt
+./build/ARM/gem5.opt configs/gem5_library/arm-hello.py
+```
+"""
+
+import os
+from gem5.isas import ISA
+from gem5.utils.requires import requires
+from gem5.resources.resource import CustomResource
+from gem5.components.memory import SingleChannelDDR3_1600
+from gem5.components.processors.cpu_types import CPUTypes
+from gem5.components.boards.simple_board import SimpleBoard
+from gem5.components.cachehierarchies.classic.no_cache import NoCache
+from gem5.components.processors.simple_processor import SimpleProcessor
+from gem5.simulate.simulator import Simulator
+
+# This check ensures the gem5 binary is compiled to the ARM ISA target. If not,
+# an exception will be thrown.
+requires(isa_required=ISA.X86)
+
+# In this setup we don't have a cache. `NoCache` can be used for such setups.
+cache_hierarchy = NoCache()
+
+# We use a single channel DDR3_1600 memory system
+memory = SingleChannelDDR3_1600(size="1GB")
+
+# We use a simple Timing processor with one core.
+processor = SimpleProcessor(cpu_type=CPUTypes.TIMING, num_cores=1)
+
+# The gem5 library simble board which can be used to run simple SE-mode
+# simulations.
+board = SimpleBoard(
+    clk_freq="3GHz",
+    processor=processor,
+    memory=memory,
+    cache_hierarchy=cache_hierarchy,
+)
+
+# Here we set the workload. In this case we want to run a simple "Hello World!"
+# program compiled to the ARM ISA. The `Resource` class will automatically
+# download the binary from the gem5 Resources cloud bucket if it's not already
+# present.
+board.set_se_binary_workload(
+    # The `Resource` class reads the `resources.json` file from the gem5
+    # resources repository:
+    # https://gem5.googlesource.com/public/gem5-resource.
+    # Any resource specified in this file will be automatically retrieved.
+    # At the time of writing, this file is a WIP and does not contain all
+    # resources. Jira ticket: https://gem5.atlassian.net/browse/GEM5-1096
+    CustomResource(
+        os.path.join(
+            os.getcwd(), "tests/test-progs/rowhammer/sequential_v2"
+        )  # rowhammer_test")
+    )
+)
+
+# Lastly we run the simulation.
+simulator = Simulator(board=board, full_system=False)
+simulator.run()  # max_ticks = 7000000000)
+
+print(
+    "Exiting @ tick {} because {}.".format(
+        simulator.get_current_tick(),
+        simulator.get_last_exit_event_cause(),
+    )
+)
diff --git a/configs/dram/rowhammer/FSConfigs/x86-rowhammer-with-kvm.py b/configs/dram/rowhammer/FSConfigs/x86-rowhammer-with-kvm.py
new file mode 100644
index 0000000000..3be5398c59
--- /dev/null
+++ b/configs/dram/rowhammer/FSConfigs/x86-rowhammer-with-kvm.py
@@ -0,0 +1,136 @@
+# Copyright (c) 2021 The Regents of the University of California
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+"""
+
+This script shows an example of running a full system Ubuntu boot simulation
+using the gem5 library. This simulation boots Ubuntu 18.04 using 2 KVM CPU
+cores. The simulation then switches to 2 Timing CPU cores before running an
+echo statement.
+
+Usage
+-----
+
+```
+scons build/X86/gem5.opt
+./build/X86/gem5.opt configs/example/gem5_library/x86-ubuntu-run-with-kvm.py
+```
+"""
+
+import os
+from gem5.resources.resource import CustomResource, CustomDiskImageResource
+from gem5.utils.requires import requires
+from gem5.components.boards.x86_board import X86Board
+from gem5.components.memory.single_channel import SingleChannelDDR3_1600
+from gem5.components.processors.simple_switchable_processor import (
+    SimpleSwitchableProcessor,
+)
+from gem5.components.processors.cpu_types import CPUTypes
+from gem5.isas import ISA
+from gem5.coherence_protocol import CoherenceProtocol
+from gem5.resources.resource import Resource
+from gem5.simulate.simulator import Simulator
+from gem5.simulate.exit_event import ExitEvent
+
+# This runs a check to ensure the gem5 binary is compiled to X86 and to the
+# MESI Two Level coherence protocol.
+requires(isa_required=ISA.X86, kvm_required=True)
+
+from gem5.components.cachehierarchies.classic.private_l1_private_l2_cache_hierarchy import (
+    PrivateL1PrivateL2CacheHierarchy,
+)
+
+# Here we setup a MESI Two Level Cache Hierarchy.
+cache_hierarchy = PrivateL1PrivateL2CacheHierarchy(
+    l1d_size="16kB", l1i_size="16kB", l2_size="256kB"
+)
+
+# Setup the system memory.
+memory = SingleChannelDDR3_1600(size="3GB")
+
+# Here we setup the processor. This is a special switchable processor in which
+# a starting core type and a switch core type must be specified. Once a
+# configuration is instantiated a user may call `processor.switch()` to switch
+# from the starting core types to the switch core types. In this simulation
+# we start with KVM cores to simulate the OS boot, then switch to the Timing
+# cores for the command we wish to run after boot.
+processor = SimpleSwitchableProcessor(
+    starting_core_type=CPUTypes.KVM,
+    switch_core_type=CPUTypes.TIMING,
+    num_cores=2,
+)
+
+# Here we setup the board. The X86Board allows for Full-System X86 simulations.
+board = X86Board(
+    clk_freq="3GHz",
+    processor=processor,
+    memory=memory,
+    cache_hierarchy=cache_hierarchy,
+)
+
+# Here we set the Full System workload.
+# The `set_kernel_disk_workload` function for the X86Board takes a kernel, a
+# disk image, and, optionally, a command to run.
+
+# This is the command to run after the system has booted. The first `m5 exit`
+# will stop the simulation so we can switch the CPU cores from KVM to timing
+# and continue the simulation to run the echo command, sleep for a second,
+# then, again, call `m5 exit` to terminate the simulation. After simulation
+# has ended you may inspect `m5out/system.pc.com_1.device` to see the echo
+# output.
+command = "rowhammer_test"
+# + "echo 'This is running on Timing CPU cores.';" \
+# + "sleep 1;"
+# + "m5 exit;"
+
+board.set_kernel_disk_workload(
+    # The x86 linux kernel will be automatically downloaded to the if not
+    # already present.
+    kernel=CustomResource(
+        os.path.join(
+            os.path.expanduser("~"), ".cache/gem5/x86-linux-kernel-5.4.49"
+        )
+    ),
+    # The x86 ubuntu image will be automatically downloaded to the if not
+    # already present.
+    disk_image=CustomDiskImageResource(
+        os.path.join(os.getcwd(), "rh.img"), disk_root_partition="1"
+    ),
+    readfile_contents=command,
+)
+
+simulator = Simulator(
+    board=board,
+    on_exit_event={
+        # Here we want override the default behavior for the first m5 exit
+        # exit event. Instead of exiting the simulator, we just want to
+        # switch the processor. The 2nd m5 exit after will revert to using
+        # default behavior where the simulator run will exit.
+        ExitEvent.EXIT: (func() for func in [processor.switch]),
+    },
+)
+simulator.run()
+simulator.run()
diff --git a/configs/dram/rowhammer/TrafficGen/simple_dram.py b/configs/dram/rowhammer/TrafficGen/simple_dram.py
new file mode 100644
index 0000000000..39639bba0f
--- /dev/null
+++ b/configs/dram/rowhammer/TrafficGen/simple_dram.py
@@ -0,0 +1,95 @@
+# Copyright (c) 2021-2023 The Regents of the University of California
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from m5.objects import *
+import m5
+
+system = System()
+system.clk_domain = SrcClockDomain()
+system.clk_domain.clock = "4GHz"
+system.clk_domain.voltage_domain = VoltageDomain()
+system.mem_mode = "timing"
+
+system.generator = PyTrafficGen()
+
+
+class DRAM_TEST(DDR4_2400_16x4):
+    ranks_per_channel = 1
+
+
+system.mem_ranges = [AddrRange("256MB")]
+
+system.mem_ctrl = MemCtrl()
+
+system.mem_ctrl.dram = DRAM_TEST(range=system.mem_ranges[0])
+# system.mem_ctrl.command_window = '2ns'
+# system.mem_ctrl.nvm = HBM_2000_4H_1x128(range=system.mem_ranges[1])
+
+
+# system.mem_ctrl.dram.tREFI = "2000"
+# system.mem_ctrl.nvm.tREFI = "2000"
+# system.mem_ctrl.dram.read_buffer_size = "256"
+
+# comment one of these policies
+# system.mem_ctrl.mem_sched_policy = "frfcfs"
+# system.mem_ctrl.mem_sched_policy = "fcfs"
+
+system.mem_ctrl.port = system.generator.port
+
+
+def createRandomTraffic(tgen):
+    yield tgen.createRandom(
+        10000000,  # duration
+        0,  # min_addr
+        AddrRange("256MB").end,  # max_adr
+        64,  # block_size
+        1000,  # min_period
+        1000,  # max_period
+        100,  # rd_perc
+        0,
+    )  # data_limit
+    yield tgen.createExit(0)
+
+
+def createLinearTraffic(tgen):
+    yield tgen.createLinear(
+        10000000,  # duration
+        0,  # min_addr
+        AddrRange("3kB").end,  # max_adr
+        64,  # block_size
+        1000,  # min_period
+        1000,  # max_period
+        100,  # rd_perc
+        0,
+    )  # data_limit
+    yield tgen.createExit(0)
+
+
+root = Root(full_system=False, system=system)
+
+m5.instantiate()
+system.generator.start(createLinearTraffic(system.generator))
+exit_event = m5.simulate()
diff --git a/configs/dram/rowhammer/TrafficGen/simple_dram_18S_VA.py b/configs/dram/rowhammer/TrafficGen/simple_dram_18S_VA.py
new file mode 100644
index 0000000000..a7f205e91c
--- /dev/null
+++ b/configs/dram/rowhammer/TrafficGen/simple_dram_18S_VA.py
@@ -0,0 +1,339 @@
+# Copyright (c) 2021-2023 The Regents of the University of California
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# This rowhammer pattern was found on a 8GB single rank Vendor A DIMM using
+# blacksmith (https://github.com/comsec-group/blacksmith). The DRAM
+# specifications are:
+#
+# ---=== SPD EEPROM Information ===---
+# EEPROM CRC of bytes 0-125                        OK (0xBD97)
+# # of bytes written to SDRAM EEPROM               384
+# Total number of bytes in EEPROM                  512
+# Fundamental Memory type                          DDR4 SDRAM
+# SPD Revision                                     1.1
+# Module Type                                      UDIMM
+# EEPROM CRC of bytes 128-253                      OK (0x53D8)
+
+# ---=== Memory Characteristics ===---
+# Maximum module speed                             2400 MHz (PC4-19200)
+# Size                                             8192 MB
+# Banks x Rows x Columns x Bits                    16 x 16 x 10 x 64
+# SDRAM Device Width                               8 bits
+# Ranks                                            1
+# AA-RCD-RP-RAS (cycles)                           17-17-17-39
+# Supported CAS Latencies                          18T, 17T, 16T, 15T, 14T, 13T
+#                                                  12T, 11T, 10T
+
+# ---=== Timings at Standard Speeds ===---
+# AA-RCD-RP-RAS (cycles) as DDR4-2400              17-17-17-39
+# AA-RCD-RP-RAS (cycles) as DDR4-2133              15-15-15-35
+# AA-RCD-RP-RAS (cycles) as DDR4-1866              13-13-13-30
+# AA-RCD-RP-RAS (cycles) as DDR4-1600              11-11-11-26
+
+# ---=== Timing Parameters ===---
+# Minimum Cycle Time (tCKmin)                      0.833 ns
+# Maximum Cycle Time (tCKmax)                      1.600 ns
+# Minimum CAS Latency Time (tAA)                   13.750 ns
+# Minimum RAS to CAS Delay (tRCD)                  13.750 ns
+# Minimum Row Precharge Delay (tRP)                13.750 ns
+# Minimum Active to Precharge Delay (tRAS)         32.000 ns
+# Minimum Active to Auto-Refresh Delay (tRC)       45.750 ns
+# Minimum Recovery Delay (tRFC1)                   350.000 ns
+# Minimum Recovery Delay (tRFC2)                   260.000 ns
+# Minimum Recovery Delay (tRFC4)                   160.000 ns
+# Minimum Four Activate Window Delay (tFAW)        21.000 ns
+# Minimum Row Active to Row Active Delay (tRRD_S)  3.300 ns
+# Minimum Row Active to Row Active Delay (tRRD_L)  4.900 ns
+# Minimum CAS to CAS Delay (tCCD_L)                5.000 ns
+# Minimum Write Recovery Time (tWR)                15.000 ns
+# Minimum Write to Read Time (tWTR_S)              2.500 ns
+# Minimum Write to Read Time (tWTR_L)              7.500 ns
+
+# ---=== Other Information ===---
+# Package Type                                     Monolithic
+# Maximum Activate Count                           Unlimited
+# Post Package Repair                              One row per bank group
+# Soft PPR                                         Supported
+# Module Nominal Voltage                           1.2 V
+# Thermal Sensor                                   No
+
+# ---=== Physical Characteristics ===---
+# Module Height                                    32 mm
+# Module Thickness                                 2 mm front, 1 mm back
+# Module Reference Card                            A revision 2
+
+# --------------------------------------------------------------------------- #
+
+# The following aggressor addresses were collected from an Intel(R) Core
+# i7-7700 machine with a single rank Vendor A DDR4 DRAM DIMM.
+
+# Aggressor 0x203eafc000,row 8023.
+# Aggressor 0x203eb24000,row 8025.
+# Aggressor 0x2002490000,row 292.
+# Aggressor 0x20024d8000,row 294.
+# Aggressor 0x201586c000,row 2755.
+# Aggressor 0x20158b4000,row 2757.
+# Aggressor 0x203f900000,row 8136.
+# Aggressor 0x203f948000,row 8138.
+
+# This suggests that the trr_length has to be <= 6 as atleast one single.
+# only the row information is important here.
+# writing 8 traffic generators for this script.
+
+# utility method
+def get_data_chunk(row_number, width=8):
+    return row_number * 128
+
+
+from m5.objects import *
+import m5
+
+duration = int(1e11)
+
+# Configuring the device type and the parameters specified by exeucting
+# `decode-dimms`.
+class Vendor_A_1R_x8(DDR4_2400_8x8):
+
+    ranks_per_channel = 1
+    trr_variant = 1
+
+    # We don't know these values yet for vendor A.
+    companion_threshold = 1024
+    trr_threshold = 32768
+    rowhammer_threshold = 50000
+    counter_table_length = 6
+
+    # Set this flag to True if you want to do a post simulation analysis. It is
+    # recommended only when simulating full system.
+    rh_stat_dump = False
+
+    # The device size does not matter as the known good rowhammer patterns were
+    # collected on 1 GB huge pages. This is discussed in details in the paper.
+    # device_size = "1GiB"
+
+    # tCK does not have a MIN and a MAX in gem5. We ignore this parameter and
+    # use the devault value.
+
+    # The following variables are overridden in this config script.
+    tRCD = "13.75ns"
+    tRP = "13.75ns"
+    tXAW = "21ns"
+
+    # We are not making any changes to the power model.
+
+
+system = System()
+system.clk_domain = SrcClockDomain()
+system.clk_domain.clock = "4GHz"
+system.clk_domain.voltage_domain = VoltageDomain()
+system.mem_mode = "timing"
+system.mem_ranges = [AddrRange("1GB")]
+
+system.generator0 = PyTrafficGen()
+system.generator1 = PyTrafficGen()
+system.generator2 = PyTrafficGen()
+system.generator3 = PyTrafficGen()
+system.generator4 = PyTrafficGen()
+system.generator5 = PyTrafficGen()
+system.generator6 = PyTrafficGen()
+system.generator7 = PyTrafficGen()
+system.generator8 = PyTrafficGen()
+
+system.mem_ctrl = MemCtrl()
+
+system.mem_ctrl.dram = Vendor_A_1R_x8(range=system.mem_ranges[0])
+
+system.membus = L2XBar()
+
+system.membus.cpu_side_ports = system.generator0.port
+system.membus.cpu_side_ports = system.generator1.port
+system.membus.cpu_side_ports = system.generator2.port
+system.membus.cpu_side_ports = system.generator3.port
+system.membus.cpu_side_ports = system.generator4.port
+system.membus.cpu_side_ports = system.generator5.port
+system.membus.cpu_side_ports = system.generator6.port
+system.membus.cpu_side_ports = system.generator7.port
+
+# for testing the victim row
+
+system.membus.cpu_side_ports = system.generator8.port
+system.mem_ctrl.port = system.membus.mem_side_ports
+
+
+def createLinearTraffic0(tgen):
+    yield tgen.createLinear(
+        duration,
+        AddrRange(str(get_data_chunk(292)) + "kB").end,
+        AddrRange(str(get_data_chunk(292) + 7) + "kB").end,
+        64,
+        2000,
+        2000,
+        100,
+        0,
+    )
+    yield tgen.createExit(0)
+
+
+#
+
+
+def createLinearTraffic1(tgen):
+    yield tgen.createLinear(
+        duration,
+        AddrRange(str(get_data_chunk(294)) + "kB").end,
+        AddrRange(str(get_data_chunk(294) + 7) + "kB").end,
+        64,
+        2000,
+        2000,
+        100,
+        0,
+    )
+    yield tgen.createExit(0)
+
+
+def createLinearTraffic2(tgen):
+    yield tgen.createLinear(
+        duration,
+        AddrRange(str(get_data_chunk(2755)) + "kB").end,
+        AddrRange(str(get_data_chunk(2755) + 7) + "kB").end,
+        64,
+        2000,
+        2000,
+        100,
+        0,
+    )
+    yield tgen.createExit(0)
+
+
+#
+
+
+def createLinearTraffic3(tgen):
+    yield tgen.createLinear(
+        duration,
+        AddrRange(str(get_data_chunk(2757)) + "kB").end,
+        AddrRange(str(get_data_chunk(2757) + 7) + "kB").end,
+        64,
+        2000,
+        2000,
+        100,
+        0,
+    )
+    yield tgen.createExit(0)
+
+
+def createLinearTraffic4(tgen):
+    yield tgen.createLinear(
+        duration,
+        AddrRange(str(get_data_chunk(8023)) + "kB").end,
+        AddrRange(str(get_data_chunk(8023) + 7) + "kB").end,
+        64,
+        2000,
+        2000,
+        100,
+        0,
+    )
+    yield tgen.createExit(0)
+
+
+#
+
+
+def createLinearTraffic5(tgen):
+    yield tgen.createLinear(
+        duration,
+        AddrRange(str(get_data_chunk(8025)) + "kB").end,
+        AddrRange(str(get_data_chunk(8025) + 7) + "kB").end,
+        64,
+        2000,
+        2000,
+        100,
+        0,
+    )
+    yield tgen.createExit(0)
+
+
+def createLinearTraffic6(tgen):
+    yield tgen.createLinear(
+        duration,
+        AddrRange(str(get_data_chunk(8136)) + "kB").end,
+        AddrRange(str(get_data_chunk(8136) + 7) + "kB").end,
+        64,
+        2000,
+        2000,
+        100,
+        0,
+    )
+    yield tgen.createExit(0)
+
+
+#
+
+
+def createLinearTraffic7(tgen):
+    yield tgen.createLinear(
+        duration,
+        AddrRange(str(get_data_chunk(8138)) + "kB").end,
+        AddrRange(str(get_data_chunk(8138) + 7) + "kB").end,
+        64,
+        2000,
+        2000,
+        100,
+        0,
+    )
+    yield tgen.createExit(0)
+
+
+# ----- data -----
+def createLinearTraffic8(tgen):
+    yield tgen.createLinear(
+        duration,
+        AddrRange("37504kB").end,
+        AddrRange("37505kB").end,
+        64,
+        1000000,
+        1000000,
+        100,
+        0,
+    )
+    yield tgen.createExit(0)
+
+
+root = Root(full_system=False, system=system)
+
+m5.instantiate()
+
+system.generator0.start(createLinearTraffic0(system.generator0))
+system.generator1.start(createLinearTraffic1(system.generator1))
+system.generator2.start(createLinearTraffic2(system.generator2))
+system.generator3.start(createLinearTraffic3(system.generator3))
+system.generator4.start(createLinearTraffic4(system.generator4))
+system.generator5.start(createLinearTraffic5(system.generator5))
+system.generator6.start(createLinearTraffic6(system.generator6))
+system.generator7.start(createLinearTraffic7(system.generator7))
+
+system.generator8.start(createLinearTraffic8(system.generator8))
+exit_event = m5.simulate()
diff --git a/configs/dram/rowhammer/TrafficGen/simple_dram_1S-trafgen.py b/configs/dram/rowhammer/TrafficGen/simple_dram_1S-trafgen.py
new file mode 100644
index 0000000000..de648f9ca3
--- /dev/null
+++ b/configs/dram/rowhammer/TrafficGen/simple_dram_1S-trafgen.py
@@ -0,0 +1,221 @@
+# Copyright (c) 2021-2023 The Regents of the University of California
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from m5.objects import *
+import m5
+
+
+class DRAM_TEST(DDR4_2400_8x8):
+    ranks_per_channel = 1
+    rowhammer_threshold = 3
+    trr_variant = 0
+    trr_threshold = 16834
+    counter_table_length = 6
+    companion_table_length = 6
+    rh_stat_dump = False
+    half_double_prob = 1e7
+
+
+duration = int(1e11)
+
+
+system = System()
+system.clk_domain = SrcClockDomain()
+system.clk_domain.clock = "4GHz"
+system.clk_domain.voltage_domain = VoltageDomain()
+system.mem_mode = "timing"
+system.mem_ranges = [AddrRange("1GB")]
+
+system.generator0 = PyTrafficGen()
+system.generator1 = PyTrafficGen()
+# system.generator2 = PyTrafficGen()
+# system.generator3 = PyTrafficGen()
+# system.generator4 = PyTrafficGen()
+# system.generator5 = PyTrafficGen()
+# system.generator6 = PyTrafficGen()
+# system.generator7 = PyTrafficGen()
+system.generator8 = PyTrafficGen()
+
+system.mem_ctrl = MemCtrl()
+
+system.mem_ctrl.dram = DRAM_TEST(range=system.mem_ranges[0])
+
+system.membus = L2XBar()
+
+system.membus.cpu_side_ports = system.generator0.port
+system.membus.cpu_side_ports = system.generator1.port
+# system.membus.cpu_side_ports = system.generator2.port
+# system.membus.cpu_side_ports = system.generator3.port
+# system.membus.cpu_side_ports = system.generator4.port
+# system.membus.cpu_side_ports = system.generator5.port
+# system.membus.cpu_side_ports = system.generator6.port
+# system.membus.cpu_side_ports = system.generator7.port
+
+# for testing the victim row
+
+system.membus.cpu_side_ports = system.generator8.port
+
+system.mem_ctrl.port = system.membus.mem_side_ports
+
+
+def get_data_chunk(row_number, width=8):
+    return row_number * 128
+
+
+def createLinearTraffic0(tgen):
+    yield tgen.createLinear(
+        duration,  # duration
+        AddrRange("37376kB").end,  # min_addr
+        AddrRange("37383kB").end,  # max_adr
+        64,  # block_size
+        2000,  # min_period
+        2000,  # max_period
+        100,  # rd_perc
+        0,
+    )  # data_limit
+    yield tgen.createExit(0)
+
+
+#
+
+
+def createLinearTraffic1(tgen):
+    yield tgen.createLinear(
+        duration,  # duration
+        AddrRange("37632kB").end,  # min_addr
+        AddrRange("37639kB").end,  # max_adr
+        64,  # block_size
+        2000,  # min_period
+        2000,  # max_period
+        100,  # rd_perc
+        0,
+    )  # data_limit
+    yield tgen.createExit(0)
+
+
+# def createLinearTraffic2(tgen):
+#     yield tgen.createLinear(duration,   # duration
+#                             AddrRange(str(get_data_chunk(2755)) + "kB").end,              # min_addr
+#                             AddrRange(str(get_data_chunk(2755) + 7) + "kB").end,              # max_adr
+#                             64,             # block_size
+#                             2000,          # min_period
+#                             2000,          # max_period
+#                             100,             # rd_perc
+#                             0)              # data_limit
+#     yield tgen.createExit(0)
+
+# #
+
+# def createLinearTraffic3(tgen):
+#     yield tgen.createLinear(duration,   # duration
+#                             AddrRange(str(get_data_chunk(2757)) + "kB").end,             # min_addr
+#                             AddrRange(str(get_data_chunk(2757) + 7) + "kB").end,              # max_adr
+#                             64,             # block_size
+#                             2000,          # min_period
+#                             2000,          # max_period
+#                             100,             # rd_perc
+#                             0)              # data_limit
+#     yield tgen.createExit(0)
+
+# def createLinearTraffic4(tgen):
+#     yield tgen.createLinear(duration,   # duration
+#                             AddrRange(str(get_data_chunk(8023)) + "kB").end,              # min_addr
+#                             AddrRange(str(get_data_chunk(8023) + 7) + "kB").end,              # max_adr
+#                             64,             # block_size
+#                             2000,          # min_period
+#                             2000,          # max_period
+#                             100,             # rd_perc
+#                             0)              # data_limit
+#     yield tgen.createExit(0)
+
+# #
+
+# def createLinearTraffic5(tgen):
+#     yield tgen.createLinear(duration,   # duration
+#                             AddrRange(str(get_data_chunk(8025)) + "kB").end,             # min_addr
+#                             AddrRange(str(get_data_chunk(8025) + 7) + "kB").end,              # max_adr
+#                             64,             # block_size
+#                             2000,          # min_period
+#                             2000,          # max_period
+#                             100,             # rd_perc
+#                             0)              # data_limit
+#     yield tgen.createExit(0)
+
+# def createLinearTraffic6(tgen):
+#     yield tgen.createLinear(duration,   # duration
+#                             AddrRange(str(get_data_chunk(8136)) + "kB").end,              # min_addr
+#                             AddrRange(str(get_data_chunk(8136) + 7) + "kB").end,              # max_adr
+#                             64,             # block_size
+#                             2000,          # min_period
+#                             2000,          # max_period
+#                             100,             # rd_perc
+#                             0)              # data_limit
+#     yield tgen.createExit(0)
+
+# #
+
+# def createLinearTraffic7(tgen):
+#     yield tgen.createLinear(duration,   # duration
+#                             AddrRange(str(get_data_chunk(8138)) + "kB").end,             # min_addr
+#                             AddrRange(str(get_data_chunk(8138) + 7) + "kB").end,              # max_adr
+#                             64,             # block_size
+#                             2000,          # min_period
+#                             2000,          # max_period
+#                             100,             # rd_perc
+#                             0)              # data_limit
+#     yield tgen.createExit(0)
+
+
+# ----- data -----
+def createLinearTraffic8(tgen):
+    yield tgen.createLinear(
+        duration,  # duration
+        AddrRange("47504kB").end,  # min_addr
+        AddrRange("47505kB").end,  # max_adr
+        64,  # block_size
+        1000000,  # min_period
+        1000000,  # max_period
+        100,  # rd_perc
+        0,
+    )  # data_limit
+    yield tgen.createExit(0)
+
+
+root = Root(full_system=False, system=system)
+
+m5.instantiate()
+
+system.generator0.start(createLinearTraffic0(system.generator0))
+system.generator1.start(createLinearTraffic1(system.generator1))
+# system.generator2.start(createLinearTraffic2(system.generator2))
+# system.generator3.start(createLinearTraffic3(system.generator3))
+# system.generator4.start(createLinearTraffic4(system.generator4))
+# system.generator5.start(createLinearTraffic5(system.generator5))
+# system.generator6.start(createLinearTraffic6(system.generator6))
+# system.generator7.start(createLinearTraffic7(system.generator7))
+
+system.generator8.start(createLinearTraffic8(system.generator8))
+exit_event = m5.simulate()
diff --git a/configs/dram/rowhammer/TrafficGen/simple_dram_2R-RH.py b/configs/dram/rowhammer/TrafficGen/simple_dram_2R-RH.py
new file mode 100644
index 0000000000..61fc090a11
--- /dev/null
+++ b/configs/dram/rowhammer/TrafficGen/simple_dram_2R-RH.py
@@ -0,0 +1,113 @@
+# Copyright (c) 2021-2023 The Regents of the University of California
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from m5.objects import *
+import m5
+
+
+class DRAM_TEST(DDR4_2400_16x4):
+    ranks_per_channel = 1
+    # companion_threshold = 2
+    # trr_threshold = 4
+    rowhammer_threshold = 7
+
+
+system = System()
+system.clk_domain = SrcClockDomain()
+system.clk_domain.clock = "4GHz"
+system.clk_domain.voltage_domain = VoltageDomain()
+system.mem_mode = "timing"
+system.mem_ranges = [AddrRange("256MB")]
+
+system.generator1 = PyTrafficGen()
+system.generator2 = PyTrafficGen()
+system.generator3 = PyTrafficGen()
+
+system.mem_ctrl = MemCtrl()
+
+system.mem_ctrl.dram = DRAM_TEST(range=system.mem_ranges[0])
+
+system.membus = L2XBar()
+
+system.membus.cpu_side_ports = system.generator1.port
+system.membus.cpu_side_ports = system.generator2.port
+system.membus.cpu_side_ports = system.generator3.port
+system.mem_ctrl.port = system.membus.mem_side_ports
+# system.mem_ctrl.dram.tREFI = "2000s"
+
+# system.mem_ctrl.port = system.generator1.port
+# system.mem_ctrl.port = system.generator2.port
+
+
+def createLinearTraffic1(tgen):
+    yield tgen.createLinear(
+        10000000000,  # duration
+        AddrRange("128kB").end,  # min_addr
+        AddrRange("132kB").end,  # max_adr
+        64,  # block_size
+        2000,  # min_period
+        2000,  # max_period
+        100,  # rd_perc
+        0,
+    )  # data_limit
+    yield tgen.createExit(0)
+
+
+def createLinearTraffic2(tgen):
+    yield tgen.createLinear(
+        10000000000,  # duration
+        AddrRange("384kB").end,  # min_addr
+        AddrRange("386kB").end,  # max_adr
+        64,  # block_size
+        2000,  # min_period
+        2000,  # max_period
+        100,  # rd_perc
+        0,
+    )  # data_limit
+    yield tgen.createExit(0)
+
+
+def createLinearTraffic3(tgen):
+    yield tgen.createLinear(
+        10000000000,  # duration
+        0,  # min_addr
+        AddrRange("1kB").end,  # max_adr
+        64,  # block_size
+        1000000,  # min_period
+        1000000,  # max_period
+        100,  # rd_perc
+        0,
+    )  # data_limit
+    yield tgen.createExit(0)
+
+
+root = Root(full_system=False, system=system)
+
+m5.instantiate()
+system.generator1.start(createLinearTraffic1(system.generator1))
+system.generator2.start(createLinearTraffic2(system.generator2))
+system.generator3.start(createLinearTraffic3(system.generator3))
+exit_event = m5.simulate()
diff --git a/configs/dram/rowhammer/TrafficGen/simple_dram_2trafgen.py b/configs/dram/rowhammer/TrafficGen/simple_dram_2trafgen.py
new file mode 100644
index 0000000000..fb5f138fb0
--- /dev/null
+++ b/configs/dram/rowhammer/TrafficGen/simple_dram_2trafgen.py
@@ -0,0 +1,133 @@
+# Copyright (c) 2021-2023 The Regents of the University of California
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from m5.objects import *
+import m5
+import os
+
+
+class DRAM_TEST(DDR4_2400_8x8):
+    ranks_per_channel = 1
+    # rowhammer_threshold = 3
+    trr_variant = 0
+    counter_table_length = 6
+
+
+duration = int(1e11)
+
+
+system = System()
+system.clk_domain = SrcClockDomain()
+system.clk_domain.clock = "4GHz"
+system.clk_domain.voltage_domain = VoltageDomain()
+system.mem_mode = "timing"
+system.mem_ranges = [AddrRange("256MB")]
+
+system.generator1 = PyTrafficGen()
+system.generator2 = PyTrafficGen()
+system.generator3 = PyTrafficGen()
+system.generator4 = PyTrafficGen()
+
+system.mem_ctrl = MemCtrl()
+
+system.mem_ctrl.dram = DRAM_TEST(range=system.mem_ranges[0])
+
+system.membus = L2XBar()
+
+system.membus.cpu_side_ports = system.generator1.port
+system.membus.cpu_side_ports = system.generator2.port
+system.membus.cpu_side_ports = system.generator3.port
+system.membus.cpu_side_ports = system.generator4.port
+system.mem_ctrl.port = system.membus.mem_side_ports
+# system.mem_ctrl.dram.tREFI = "2000s"
+
+# system.mem_ctrl.port = system.generator1.port
+# system.mem_ctrl.port = system.generator2.port
+
+
+def createLinearTraffic1(tgen):
+    yield tgen.createLinear(
+        duration,  # duration
+        AddrRange("128kB").end,  # min_addr
+        AddrRange("135kB").end,  # max_adr
+        64,  # block_size
+        2000,  # min_period
+        2000,  # max_period
+        100,  # rd_perc
+        0,
+    )  # data_limit
+    yield tgen.createExit(0)
+
+
+def createLinearTraffic2(tgen):
+    yield tgen.createLinear(
+        duration,  # duration
+        AddrRange("384kB").end,  # min_addr
+        AddrRange("391kB").end,  # max_adr
+        64,  # block_size
+        2000,  # min_period
+        2000,  # max_period
+        100,  # rd_perc
+        0,
+    )  # data_limit
+    yield tgen.createExit(0)
+
+
+# ----- data -----
+def createLinearTraffic3(tgen):
+    yield tgen.createLinear(
+        duration,  # duration
+        AddrRange("256kB").end,  # min_addr
+        AddrRange("263kB").end,  # max_adr
+        64,  # block_size
+        1000000,  # min_period
+        1000000,  # max_period
+        100,  # rd_perc
+        0,
+    )  # data_limit
+    yield tgen.createExit(0)
+
+
+# def createLinearTraffic3(tgen):
+#     yield tgen.createLinear(10000000,   # duration
+#                             0,              # min_addr
+#                             AddrRange('1kB').end,              # max_adr
+#                             64,             # block_size
+#                             1000000,          # min_period
+#                             1000000,          # max_period
+#                             100,             # rd_perc
+#                             0)              # data_limit
+#     yield tgen.createExit(0)
+
+
+root = Root(full_system=False, system=system)
+
+m5.instantiate()
+system.generator4.start(createLinearTraffic1(system.generator4))
+system.generator1.start(createLinearTraffic1(system.generator1))
+system.generator2.start(createLinearTraffic2(system.generator2))
+system.generator3.start(createLinearTraffic3(system.generator3))
+exit_event = m5.simulate()
diff --git a/configs/dram/rowhammer/TrafficGen/simple_dram_8traficgen.py b/configs/dram/rowhammer/TrafficGen/simple_dram_8traficgen.py
new file mode 100644
index 0000000000..0b7a81fd06
--- /dev/null
+++ b/configs/dram/rowhammer/TrafficGen/simple_dram_8traficgen.py
@@ -0,0 +1,240 @@
+# Copyright (c) 2021-2023 The Regents of the University of California
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from m5.objects import *
+import m5, os
+
+
+class DRAM_TEST(DDR4_2400_8x8):
+    ranks_per_channel = 1
+    # rowhammer_threshold = 3
+    trr_variant = 0
+    trr_threshold = 16834
+    counter_table_length = 6
+    companion_table_length = 6
+    rh_stat_dump = False
+    half_double_prob = 1e7
+    double_sided_prob = 1e1
+    single_sided_prob = 1e1
+
+
+duration = int(1e11)
+
+
+system = System()
+system.clk_domain = SrcClockDomain()
+system.clk_domain.clock = "4GHz"
+system.clk_domain.voltage_domain = VoltageDomain()
+system.mem_mode = "timing"
+system.mem_ranges = [AddrRange("1GB")]
+
+system.generator0 = PyTrafficGen()
+system.generator1 = PyTrafficGen()
+system.generator2 = PyTrafficGen()
+system.generator3 = PyTrafficGen()
+system.generator4 = PyTrafficGen()
+system.generator5 = PyTrafficGen()
+system.generator6 = PyTrafficGen()
+system.generator7 = PyTrafficGen()
+# system.generator8 = PyTrafficGen()
+
+system.mem_ctrl = MemCtrl()
+
+system.mem_ctrl.dram = DRAM_TEST(range=system.mem_ranges[0])
+
+system.membus = L2XBar()
+
+system.membus.cpu_side_ports = system.generator0.port
+system.membus.cpu_side_ports = system.generator1.port
+system.membus.cpu_side_ports = system.generator2.port
+system.membus.cpu_side_ports = system.generator3.port
+system.membus.cpu_side_ports = system.generator4.port
+system.membus.cpu_side_ports = system.generator5.port
+system.membus.cpu_side_ports = system.generator6.port
+system.membus.cpu_side_ports = system.generator7.port
+
+# for testing the victim row
+
+# system.membus.cpu_side_ports = system.generator8.port
+
+system.mem_ctrl.port = system.membus.mem_side_ports
+
+
+def get_data_chunk(row_number, width=8):
+    return row_number * 128
+
+
+def createLinearTraffic0(tgen):
+    yield tgen.createLinear(
+        duration,  # duration
+        AddrRange("37376kB").end,  # min_addr
+        AddrRange("37383kB").end,  # max_adr
+        64,  # block_size
+        2000,  # min_period
+        2000,  # max_period
+        100,  # rd_perc
+        0,
+    )  # data_limit
+    yield tgen.createExit(0)
+
+
+#
+
+
+def createLinearTraffic1(tgen):
+    yield tgen.createLinear(
+        duration,  # duration
+        AddrRange("37632kB").end,  # min_addr
+        AddrRange("37639kB").end,  # max_adr
+        64,  # block_size
+        2000,  # min_period
+        2000,  # max_period
+        100,  # rd_perc
+        0,
+    )  # data_limit
+    yield tgen.createExit(0)
+
+
+def createLinearTraffic2(tgen):
+    yield tgen.createLinear(
+        duration,  # duration
+        AddrRange(str(get_data_chunk(2755)) + "kB").end,  # min_addr
+        AddrRange(str(get_data_chunk(2755) + 7) + "kB").end,  # max_adr
+        64,  # block_size
+        2000,  # min_period
+        2000,  # max_period
+        100,  # rd_perc
+        0,
+    )  # data_limit
+    yield tgen.createExit(0)
+
+
+#
+
+
+def createLinearTraffic3(tgen):
+    yield tgen.createLinear(
+        duration,  # duration
+        AddrRange(str(get_data_chunk(2757)) + "kB").end,  # min_addr
+        AddrRange(str(get_data_chunk(2757) + 7) + "kB").end,  # max_adr
+        64,  # block_size
+        2000,  # min_period
+        2000,  # max_period
+        100,  # rd_perc
+        0,
+    )  # data_limit
+    yield tgen.createExit(0)
+
+
+def createLinearTraffic4(tgen):
+    yield tgen.createLinear(
+        duration,  # duration
+        AddrRange(str(get_data_chunk(8023)) + "kB").end,  # min_addr
+        AddrRange(str(get_data_chunk(8023) + 7) + "kB").end,  # max_adr
+        64,  # block_size
+        2000,  # min_period
+        2000,  # max_period
+        100,  # rd_perc
+        0,
+    )  # data_limit
+    yield tgen.createExit(0)
+
+
+#
+
+
+def createLinearTraffic5(tgen):
+    yield tgen.createLinear(
+        duration,  # duration
+        AddrRange(str(get_data_chunk(8025)) + "kB").end,  # min_addr
+        AddrRange(str(get_data_chunk(8025) + 7) + "kB").end,  # max_adr
+        64,  # block_size
+        2000,  # min_period
+        2000,  # max_period
+        100,  # rd_perc
+        0,
+    )  # data_limit
+    yield tgen.createExit(0)
+
+
+def createLinearTraffic6(tgen):
+    yield tgen.createLinear(
+        duration,  # duration
+        AddrRange(str(get_data_chunk(8136)) + "kB").end,  # min_addr
+        AddrRange(str(get_data_chunk(8136) + 7) + "kB").end,  # max_adr
+        64,  # block_size
+        2000,  # min_period
+        2000,  # max_period
+        100,  # rd_perc
+        0,
+    )  # data_limit
+    yield tgen.createExit(0)
+
+
+#
+
+
+def createLinearTraffic7(tgen):
+    yield tgen.createLinear(
+        duration,  # duration
+        AddrRange(str(get_data_chunk(8138)) + "kB").end,  # min_addr
+        AddrRange(str(get_data_chunk(8138) + 7) + "kB").end,  # max_adr
+        64,  # block_size
+        2000,  # min_period
+        2000,  # max_period
+        100,  # rd_perc
+        0,
+    )  # data_limit
+    yield tgen.createExit(0)
+
+
+# ----- data -----
+# def createLinearTraffic8(tgen):
+#     yield tgen.createLinear(duration,   # duration
+#                             AddrRange('37504kB').end,              # min_addr
+#                             AddrRange('37505kB').end,              # max_adr
+#                             64,             # block_size
+#                             1000000,          # min_period
+#                             1000000,          # max_period
+#                             100,             # rd_perc
+#                             0)              # data_limit
+#     yield tgen.createExit(0)
+
+root = Root(full_system=False, system=system)
+
+m5.instantiate()
+
+system.generator0.start(createLinearTraffic0(system.generator0))
+system.generator1.start(createLinearTraffic1(system.generator1))
+system.generator2.start(createLinearTraffic2(system.generator2))
+system.generator3.start(createLinearTraffic3(system.generator3))
+system.generator4.start(createLinearTraffic4(system.generator4))
+system.generator5.start(createLinearTraffic5(system.generator5))
+system.generator6.start(createLinearTraffic6(system.generator6))
+system.generator7.start(createLinearTraffic7(system.generator7))
+
+# system.generator8.start(createLinearTraffic8(system.generator8))
+exit_event = m5.simulate()
diff --git a/configs/dram/rowhammer/TrafficGen/simple_dram_half_double.py b/configs/dram/rowhammer/TrafficGen/simple_dram_half_double.py
new file mode 100644
index 0000000000..cf77911b09
--- /dev/null
+++ b/configs/dram/rowhammer/TrafficGen/simple_dram_half_double.py
@@ -0,0 +1,201 @@
+from m5.objects import *
+import m5
+import os
+
+
+class DRAM_TEST(DDR4_2400_8x8):
+    ranks_per_channel = 1
+    trr_variant = 0
+    # not relevant stats
+    trr_threshold = 32678
+    counter_table_length = 6
+    companion_table_length = 6
+    rh_stat_dump = False
+    half_double_prob = 1e3
+    double_sided_prob = 1e5
+
+
+duration = int(1e11)
+
+
+system = System()
+system.clk_domain = SrcClockDomain()
+system.clk_domain.clock = "4GHz"
+system.clk_domain.voltage_domain = VoltageDomain()
+system.mem_mode = "timing"
+system.mem_ranges = [AddrRange("1GB")]
+
+system.generator0 = PyTrafficGen()
+system.generator1 = PyTrafficGen()
+system.generator2 = PyTrafficGen()
+system.generator3 = PyTrafficGen()
+# system.generator4 = PyTrafficGen()
+# system.generator5 = PyTrafficGen()
+# system.generator6 = PyTrafficGen()
+# system.generator7 = PyTrafficGen()
+system.generator8 = PyTrafficGen()
+
+system.mem_ctrl = MemCtrl()
+
+system.mem_ctrl.dram = DRAM_TEST(range=system.mem_ranges[0])
+
+system.membus = L2XBar()
+
+system.membus.cpu_side_ports = system.generator0.port
+system.membus.cpu_side_ports = system.generator1.port
+system.membus.cpu_side_ports = system.generator2.port
+system.membus.cpu_side_ports = system.generator3.port
+# system.membus.cpu_side_ports = system.generator4.port
+# system.membus.cpu_side_ports = system.generator5.port
+# system.membus.cpu_side_ports = system.generator6.port
+# system.membus.cpu_side_ports = system.generator7.port
+
+# for testing the victim row
+
+system.membus.cpu_side_ports = system.generator8.port
+
+system.mem_ctrl.port = system.membus.mem_side_ports
+
+
+def get_data_chunk(row_number, width=8):
+    return row_number * 128
+
+
+def createLinearTraffic0(tgen):
+    yield tgen.createLinear(
+        duration,  # duration
+        AddrRange(str(get_data_chunk(291)) + "kB").end,  # min_addr
+        AddrRange(str(get_data_chunk(291)) + "kB").end,  # max_adr
+        64,  # block_size
+        100,  # min_period
+        100,  # max_period
+        100,  # rd_perc
+        0,
+    )  # data_limit
+    yield tgen.createExit(0)
+
+
+#
+
+
+def createLinearTraffic1(tgen):
+    yield tgen.createLinear(
+        duration,  # duration
+        AddrRange(str(get_data_chunk(293)) + "kB").end,  # min_addr
+        AddrRange(str(get_data_chunk(293)) + "kB").end,  # max_adr
+        64,  # block_size
+        100,  # min_period
+        100,  # max_period
+        100,  # rd_perc
+        0,
+    )  # data_limit
+    yield tgen.createExit(0)
+
+
+def createLinearTraffic2(tgen):
+    yield tgen.createLinear(
+        duration,  # duration
+        AddrRange(str(get_data_chunk(290)) + "kB").end,  # min_addr
+        AddrRange(str(get_data_chunk(290)) + "kB").end,  # max_adr
+        64,  # block_size
+        800000000,  # min_period
+        800000000,  # max_period
+        100,  # rd_perc
+        0,
+    )  # data_limit
+    yield tgen.createExit(0)
+
+
+#
+
+
+def createLinearTraffic3(tgen):
+    yield tgen.createLinear(
+        duration,  # duration
+        AddrRange(str(get_data_chunk(294)) + "kB").end,  # min_addr
+        AddrRange(str(get_data_chunk(294)) + "kB").end,  # max_adr
+        64,  # block_size
+        800000000,  # min_period
+        800000000,  # max_period
+        100,  # rd_perc
+        0,
+    )  # data_limit
+    yield tgen.createExit(0)
+
+
+# def createLinearTraffic4(tgen):
+#     yield tgen.createLinear(duration,   # duration
+#                             AddrRange(str(get_data_chunk(8023)) + "kB").end,              # min_addr
+#                             AddrRange(str(get_data_chunk(8023) + 7) + "kB").end,              # max_adr
+#                             64,             # block_size
+#                             2000,          # min_period
+#                             2000,          # max_period
+#                             100,             # rd_perc
+#                             0)              # data_limit
+#     yield tgen.createExit(0)
+
+# #
+
+# def createLinearTraffic5(tgen):
+#     yield tgen.createLinear(duration,   # duration
+#                             AddrRange(str(get_data_chunk(8025)) + "kB").end,             # min_addr
+#                             AddrRange(str(get_data_chunk(8025) + 7) + "kB").end,              # max_adr
+#                             64,             # block_size
+#                             2000,          # min_period
+#                             2000,          # max_period
+#                             100,             # rd_perc
+#                             0)              # data_limit
+#     yield tgen.createExit(0)
+
+# def createLinearTraffic6(tgen):
+#     yield tgen.createLinear(duration,   # duration
+#                             AddrRange(str(get_data_chunk(8136)) + "kB").end,              # min_addr
+#                             AddrRange(str(get_data_chunk(8136) + 7) + "kB").end,              # max_adr
+#                             64,             # block_size
+#                             2000,          # min_period
+#                             2000,          # max_period
+#                             100,             # rd_perc
+#                             0)              # data_limit
+#     yield tgen.createExit(0)
+
+# #
+
+# def createLinearTraffic7(tgen):
+#     yield tgen.createLinear(duration,   # duration
+#                             AddrRange(str(get_data_chunk(8138)) + "kB").end,             # min_addr
+#                             AddrRange(str(get_data_chunk(8138) + 7) + "kB").end,              # max_adr
+#                             64,             # block_size
+#                             2000,          # min_period
+#                             2000,          # max_period
+#                             100,             # rd_perc
+#                             0)              # data_limit
+#     yield tgen.createExit(0)
+
+
+# ----- data -----
+# def createLinearTraffic8(tgen):
+#     yield tgen.createLinear(duration,   # duration
+#                             AddrRange('47504kB').end,              # min_addr
+#                             AddrRange('47505kB').end,              # max_adr
+#                             64,             # block_size
+#                             1000000,          # min_period
+#                             1000000,          # max_period
+#                             100,             # rd_perc
+#                             0)              # data_limit
+# yield tgen.createExit(0)
+
+root = Root(full_system=False, system=system)
+
+m5.instantiate()
+
+system.generator0.start(createLinearTraffic0(system.generator0))
+system.generator1.start(createLinearTraffic1(system.generator1))
+system.generator2.start(createLinearTraffic2(system.generator2))
+system.generator3.start(createLinearTraffic3(system.generator3))
+# system.generator4.start(createLinearTraffic4(system.generator4))
+# system.generator5.start(createLinearTraffic5(system.generator5))
+# system.generator6.start(createLinearTraffic6(system.generator6))
+# system.generator7.start(createLinearTraffic7(system.generator7))
+
+# system.generator8.start(createLinearTraffic8(system.generator8))
+exit_event = m5.simulate()
diff --git a/configs/example/apu_se.py b/configs/example/apu_se.py
index 287135fd62..b20779fcdb 100644
--- a/configs/example/apu_se.py
+++ b/configs/example/apu_se.py
@@ -673,6 +673,7 @@
 shader.dispatcher = dispatcher
 shader.gpu_cmd_proc = gpu_cmd_proc
 
+
 # Create and assign the workload Check for rel_path in elements of
 # base_list using test, returning the first full path that satisfies test
 def find_path(base_list, rel_path, test):
@@ -698,7 +699,7 @@ def find_file(base_list, rel_path):
     executable = find_file(benchmark_path, args.cmd)
 
 if args.env:
-    with open(args.env, "r") as f:
+    with open(args.env) as f:
         env = [line.rstrip() for line in f]
 else:
     env = [
@@ -756,7 +757,7 @@ def find_file(base_list, rel_path):
     ]
 
 # Other CPU strings cause bad addresses in ROCm. Revert back to M5 Simulator.
-for (i, cpu) in enumerate(cpu_list):
+for i, cpu in enumerate(cpu_list):
     for j in range(len(cpu)):
         cpu.isa[j].vendor_string = "M5 Simulator"
 
diff --git a/configs/example/arm/baremetal.py b/configs/example/arm/baremetal.py
index 08af3ef435..fae85fa04b 100644
--- a/configs/example/arm/baremetal.py
+++ b/configs/example/arm/baremetal.py
@@ -171,9 +171,10 @@ def create(args):
     system.workload = workload_class(object_file, system)
 
     if args.with_pmu:
-        enabled_pmu_events = set(
-            (*args.pmu_dump_stats_on, *args.pmu_reset_stats_on)
-        )
+        enabled_pmu_events = {
+            *args.pmu_dump_stats_on,
+            *args.pmu_reset_stats_on,
+        }
         exit_sim_on_control = bool(
             enabled_pmu_events & set(pmu_control_events.keys())
         )
diff --git a/configs/example/arm/devices.py b/configs/example/arm/devices.py
index 6c6474ca2b..7d92f4ee36 100644
--- a/configs/example/arm/devices.py
+++ b/configs/example/arm/devices.py
@@ -338,11 +338,80 @@ def connectMemSide(self, bus):
         pass
 
 
-class BaseSimpleSystem(ArmSystem):
+class ClusterSystem:
+    """
+    Base class providing cpu clusters generation/handling methods to
+    SE/FS systems
+    """
+
+    def __init__(self, **kwargs):
+        self._clusters = []
+
+    def numCpuClusters(self):
+        return len(self._clusters)
+
+    def addCpuCluster(self, cpu_cluster):
+        self._clusters.append(cpu_cluster)
+
+    def addCaches(self, need_caches, last_cache_level):
+        if not need_caches:
+            # connect each cluster to the memory hierarchy
+            for cluster in self._clusters:
+                cluster.connectMemSide(self.membus)
+            return
+
+        cluster_mem_bus = self.membus
+        assert last_cache_level >= 1 and last_cache_level <= 3
+        for cluster in self._clusters:
+            cluster.addL1()
+        if last_cache_level > 1:
+            for cluster in self._clusters:
+                cluster.addL2(cluster.clk_domain)
+        if last_cache_level > 2:
+            max_clock_cluster = max(
+                self._clusters, key=lambda c: c.clk_domain.clock[0]
+            )
+            self.l3 = L3(clk_domain=max_clock_cluster.clk_domain)
+            self.toL3Bus = L2XBar(width=64)
+            self.toL3Bus.mem_side_ports = self.l3.cpu_side
+            self.l3.mem_side = self.membus.cpu_side_ports
+            cluster_mem_bus = self.toL3Bus
+
+        # connect each cluster to the memory hierarchy
+        for cluster in self._clusters:
+            cluster.connectMemSide(cluster_mem_bus)
+
+
+class SimpleSeSystem(System, ClusterSystem):
+    """
+    Example system class for syscall emulation mode
+    """
+
+    # Use a fixed cache line size of 64 bytes
+    cache_line_size = 64
+
+    def __init__(self, **kwargs):
+        System.__init__(self, **kwargs)
+        ClusterSystem.__init__(self, **kwargs)
+        # Create a voltage and clock domain for system components
+        self.voltage_domain = VoltageDomain(voltage="3.3V")
+        self.clk_domain = SrcClockDomain(
+            clock="1GHz", voltage_domain=self.voltage_domain
+        )
+
+        # Create the off-chip memory bus.
+        self.membus = SystemXBar()
+
+    def connect(self):
+        self.system_port = self.membus.cpu_side_ports
+
+
+class BaseSimpleSystem(ArmSystem, ClusterSystem):
     cache_line_size = 64
 
     def __init__(self, mem_size, platform, **kwargs):
-        super(BaseSimpleSystem, self).__init__(**kwargs)
+        ArmSystem.__init__(self, **kwargs)
+        ClusterSystem.__init__(self, **kwargs)
 
         self.voltage_domain = VoltageDomain(voltage="1.0V")
         self.clk_domain = SrcClockDomain(
@@ -364,8 +433,6 @@ def __init__(self, mem_size, platform, **kwargs):
         # Device DMA -> MEM
         self.mem_ranges = self.getMemRanges(int(Addr(mem_size)))
 
-        self._clusters = []
-
     def getMemRanges(self, mem_size):
         """
         Define system memory ranges. This depends on the physical
@@ -388,40 +455,6 @@ def getMemRanges(self, mem_size):
 
         raise ValueError("memory size too big for platform capabilities")
 
-    def numCpuClusters(self):
-        return len(self._clusters)
-
-    def addCpuCluster(self, cpu_cluster):
-        self._clusters.append(cpu_cluster)
-
-    def addCaches(self, need_caches, last_cache_level):
-        if not need_caches:
-            # connect each cluster to the memory hierarchy
-            for cluster in self._clusters:
-                cluster.connectMemSide(self.membus)
-            return
-
-        cluster_mem_bus = self.membus
-        assert last_cache_level >= 1 and last_cache_level <= 3
-        for cluster in self._clusters:
-            cluster.addL1()
-        if last_cache_level > 1:
-            for cluster in self._clusters:
-                cluster.addL2(cluster.clk_domain)
-        if last_cache_level > 2:
-            max_clock_cluster = max(
-                self._clusters, key=lambda c: c.clk_domain.clock[0]
-            )
-            self.l3 = L3(clk_domain=max_clock_cluster.clk_domain)
-            self.toL3Bus = L2XBar(width=64)
-            self.toL3Bus.mem_side_ports = self.l3.cpu_side
-            self.l3.mem_side = self.membus.cpu_side_ports
-            cluster_mem_bus = self.toL3Bus
-
-        # connect each cluster to the memory hierarchy
-        for cluster in self._clusters:
-            cluster.connectMemSide(cluster_mem_bus)
-
 
 class SimpleSystem(BaseSimpleSystem):
     """
@@ -429,7 +462,7 @@ class SimpleSystem(BaseSimpleSystem):
     """
 
     def __init__(self, caches, mem_size, platform=None, **kwargs):
-        super(SimpleSystem, self).__init__(mem_size, platform, **kwargs)
+        super().__init__(mem_size, platform, **kwargs)
 
         self.membus = MemBus()
         # CPUs->PIO
@@ -468,7 +501,7 @@ class ArmRubySystem(BaseSimpleSystem):
     """
 
     def __init__(self, mem_size, platform=None, **kwargs):
-        super(ArmRubySystem, self).__init__(mem_size, platform, **kwargs)
+        super().__init__(mem_size, platform, **kwargs)
         self._dma_ports = []
         self._mem_ports = []
 
diff --git a/configs/example/arm/etrace_se.py b/configs/example/arm/etrace_se.py
new file mode 100644
index 0000000000..8fa971ff84
--- /dev/null
+++ b/configs/example/arm/etrace_se.py
@@ -0,0 +1,191 @@
+# Copyright (c) 2016-2017, 2022-2023 Arm Limited
+# All rights reserved.
+#
+# The license below extends only to copyright in the software and shall
+# not be construed as granting a license to any other intellectual
+# property including but not limited to intellectual property relating
+# to a hardware implementation of the functionality of the software
+# licensed hereunder.  You may use the software subject to the license
+# terms below provided that you ensure that this notice is replicated
+# unmodified and in its entirety in all distributions of the software,
+# modified or unmodified, in source code or in binary form.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+import os
+import m5
+from m5.util import addToPath
+from m5.objects import *
+import argparse
+import shlex
+
+m5.util.addToPath("../..")
+
+from common import ObjectList
+
+import devices
+
+
+def get_processes(cmd):
+    """Interprets commands to run and returns a list of processes"""
+
+    cwd = os.getcwd()
+    multiprocesses = []
+    for idx, c in enumerate(cmd):
+        argv = shlex.split(c)
+
+        process = Process(pid=100 + idx, cwd=cwd, cmd=argv, executable=argv[0])
+        process.gid = os.getgid()
+
+        print("info: %d. command and arguments: %s" % (idx + 1, process.cmd))
+        multiprocesses.append(process)
+
+    return multiprocesses
+
+
+def create(args):
+    """Create and configure the system object."""
+
+    system = devices.SimpleSeSystem(
+        mem_mode="timing",
+    )
+
+    # Add CPUs to the system. A cluster of CPUs typically have
+    # private L1 caches and a shared L2 cache.
+    system.cpu_cluster = devices.ArmCpuCluster(
+        system,
+        args.num_cores,
+        args.cpu_freq,
+        "1.2V",
+        ObjectList.cpu_list.get("O3_ARM_v7a_3_Etrace"),
+        devices.L1I,
+        devices.L1D,
+        devices.L2,
+    )
+
+    # Attach the elastic trace probe listener to every CPU in the cluster
+    for cpu in system.cpu_cluster:
+        cpu.attach_probe_listener(args.inst_trace_file, args.data_trace_file)
+
+    # As elastic trace generation is enabled, make sure the memory system is
+    # minimal so that compute delays do not include memory access latencies.
+    # Configure the compulsory L1 caches for the O3CPU, do not configure
+    # any more caches.
+    system.addCaches(True, last_cache_level=1)
+
+    # For elastic trace, over-riding Simple Memory latency to 1ns."
+    system.memory = SimpleMemory(
+        range=AddrRange(start=0, size=args.mem_size),
+        latency="1ns",
+        port=system.membus.mem_side_ports,
+    )
+
+    # Parse the command line and get a list of Processes instances
+    # that we can pass to gem5.
+    processes = get_processes(args.commands_to_run)
+    if len(processes) != args.num_cores:
+        print(
+            "Error: Cannot map %d command(s) onto %d CPU(s)"
+            % (len(processes), args.num_cores)
+        )
+        sys.exit(1)
+
+    system.workload = SEWorkload.init_compatible(processes[0].executable)
+
+    # Assign one workload to each CPU
+    for cpu, workload in zip(system.cpu_cluster.cpus, processes):
+        cpu.workload = workload
+
+    return system
+
+
+def main():
+    parser = argparse.ArgumentParser(epilog=__doc__)
+
+    parser.add_argument(
+        "commands_to_run",
+        metavar="command(s)",
+        nargs="+",
+        help="Command(s) to run",
+    )
+    parser.add_argument(
+        "--inst-trace-file",
+        action="store",
+        type=str,
+        help="""Instruction fetch trace file input to
+                Elastic Trace probe in a capture simulation and
+                Trace CPU in a replay simulation""",
+        default="fetchtrace.proto.gz",
+    )
+    parser.add_argument(
+        "--data-trace-file",
+        action="store",
+        type=str,
+        help="""Data dependency trace file input to
+                Elastic Trace probe in a capture simulation and
+                Trace CPU in a replay simulation""",
+        default="deptrace.proto.gz",
+    )
+    parser.add_argument("--cpu-freq", type=str, default="4GHz")
+    parser.add_argument(
+        "--num-cores", type=int, default=1, help="Number of CPU cores"
+    )
+    parser.add_argument(
+        "--mem-size",
+        action="store",
+        type=str,
+        default="2GB",
+        help="Specify the physical memory size",
+    )
+
+    args = parser.parse_args()
+
+    # Create a single root node for gem5's object hierarchy. There can
+    # only exist one root node in the simulator at any given
+    # time. Tell gem5 that we want to use syscall emulation mode
+    # instead of full system mode.
+    root = Root(full_system=False)
+
+    # Populate the root node with a system. A system corresponds to a
+    # single node with shared memory.
+    root.system = create(args)
+
+    # Instantiate the C++ object hierarchy. After this point,
+    # SimObjects can't be instantiated anymore.
+    m5.instantiate()
+
+    # Start the simulator. This gives control to the C++ world and
+    # starts the simulator. The returned event tells the simulation
+    # script why the simulator exited.
+    event = m5.simulate()
+
+    # Print the reason for the simulation exit. Some exit codes are
+    # requests for service (e.g., checkpoints) from the simulation
+    # script. We'll just ignore them here and exit.
+    print(f"{event.getCause()} ({event.getCode()}) @ {m5.curTick()}")
+
+
+if __name__ == "__m5_main__":
+    main()
diff --git a/configs/example/arm/fs_power.py b/configs/example/arm/fs_power.py
index 671cf63f2f..0442682411 100644
--- a/configs/example/arm/fs_power.py
+++ b/configs/example/arm/fs_power.py
@@ -47,7 +47,7 @@
 
 class CpuPowerOn(MathExprPowerModel):
     def __init__(self, cpu_path, **kwargs):
-        super(CpuPowerOn, self).__init__(**kwargs)
+        super().__init__(**kwargs)
         # 2A per IPC, 3pA per cache miss
         # and then convert to Watt
         self.dyn = (
@@ -64,7 +64,7 @@ class CpuPowerOff(MathExprPowerModel):
 
 class CpuPowerModel(PowerModel):
     def __init__(self, cpu_path, **kwargs):
-        super(CpuPowerModel, self).__init__(**kwargs)
+        super().__init__(**kwargs)
         self.pm = [
             CpuPowerOn(cpu_path),  # ON
             CpuPowerOff(),  # CLK_GATED
@@ -75,7 +75,7 @@ def __init__(self, cpu_path, **kwargs):
 
 class L2PowerOn(MathExprPowerModel):
     def __init__(self, l2_path, **kwargs):
-        super(L2PowerOn, self).__init__(**kwargs)
+        super().__init__(**kwargs)
         # Example to report l2 Cache overallAccesses
         # The estimated power is converted to Watt and will vary based
         # on the size of the cache
@@ -90,7 +90,7 @@ class L2PowerOff(MathExprPowerModel):
 
 class L2PowerModel(PowerModel):
     def __init__(self, l2_path, **kwargs):
-        super(L2PowerModel, self).__init__(**kwargs)
+        super().__init__(**kwargs)
         # Choose a power model for every power state
         self.pm = [
             L2PowerOn(l2_path),  # ON
diff --git a/configs/example/arm/starter_se.py b/configs/example/arm/starter_se.py
index f21f399675..9834487155 100644
--- a/configs/example/arm/starter_se.py
+++ b/configs/example/arm/starter_se.py
@@ -64,72 +64,6 @@
 }
 
 
-class SimpleSeSystem(System):
-    """
-    Example system class for syscall emulation mode
-    """
-
-    # Use a fixed cache line size of 64 bytes
-    cache_line_size = 64
-
-    def __init__(self, args, **kwargs):
-        super(SimpleSeSystem, self).__init__(**kwargs)
-
-        # Setup book keeping to be able to use CpuClusters from the
-        # devices module.
-        self._clusters = []
-        self._num_cpus = 0
-
-        # Create a voltage and clock domain for system components
-        self.voltage_domain = VoltageDomain(voltage="3.3V")
-        self.clk_domain = SrcClockDomain(
-            clock="1GHz", voltage_domain=self.voltage_domain
-        )
-
-        # Create the off-chip memory bus.
-        self.membus = SystemXBar()
-
-        # Wire up the system port that gem5 uses to load the kernel
-        # and to perform debug accesses.
-        self.system_port = self.membus.cpu_side_ports
-
-        # Add CPUs to the system. A cluster of CPUs typically have
-        # private L1 caches and a shared L2 cache.
-        self.cpu_cluster = devices.ArmCpuCluster(
-            self,
-            args.num_cores,
-            args.cpu_freq,
-            "1.2V",
-            *cpu_types[args.cpu],
-            tarmac_gen=args.tarmac_gen,
-            tarmac_dest=args.tarmac_dest,
-        )
-
-        # Create a cache hierarchy (unless we are simulating a
-        # functional CPU in atomic memory mode) for the CPU cluster
-        # and connect it to the shared memory bus.
-        if self.cpu_cluster.memory_mode() == "timing":
-            self.cpu_cluster.addL1()
-            self.cpu_cluster.addL2(self.cpu_cluster.clk_domain)
-        self.cpu_cluster.connectMemSide(self.membus)
-
-        # Tell gem5 about the memory mode used by the CPUs we are
-        # simulating.
-        self.mem_mode = self.cpu_cluster.memory_mode()
-
-    def numCpuClusters(self):
-        return len(self._clusters)
-
-    def addCpuCluster(self, cpu_cluster):
-        assert cpu_cluster not in self._clusters
-        assert len(cpu_cluster) > 0
-        self._clusters.append(cpu_cluster)
-        self._num_cpus += len(cpu_cluster)
-
-    def numCpus(self):
-        return self._num_cpus
-
-
 def get_processes(cmd):
     """Interprets commands to run and returns a list of processes"""
 
@@ -150,7 +84,31 @@ def get_processes(cmd):
 def create(args):
     """Create and configure the system object."""
 
-    system = SimpleSeSystem(args)
+    cpu_class = cpu_types[args.cpu][0]
+    mem_mode = cpu_class.memory_mode()
+    # Only simulate caches when using a timing CPU (e.g., the HPI model)
+    want_caches = True if mem_mode == "timing" else False
+
+    system = devices.SimpleSeSystem(
+        mem_mode=mem_mode,
+    )
+
+    # Add CPUs to the system. A cluster of CPUs typically have
+    # private L1 caches and a shared L2 cache.
+    system.cpu_cluster = devices.ArmCpuCluster(
+        system,
+        args.num_cores,
+        args.cpu_freq,
+        "1.2V",
+        *cpu_types[args.cpu],
+        tarmac_gen=args.tarmac_gen,
+        tarmac_dest=args.tarmac_dest,
+    )
+
+    # Create a cache hierarchy for the cluster. We are assuming that
+    # clusters have core-private L1 caches and an L2 that's shared
+    # within the cluster.
+    system.addCaches(want_caches, last_cache_level=2)
 
     # Tell components about the expected physical memory ranges. This
     # is, for example, used by the MemConfig helper to determine where
@@ -160,6 +118,9 @@ def create(args):
     # Configure the off-chip memory system.
     MemConfig.config_mem(args, system)
 
+    # Wire up the system's memory system
+    system.connect()
+
     # Parse the command line and get a list of Processes instances
     # that we can pass to gem5.
     processes = get_processes(args.commands_to_run)
diff --git a/configs/example/arm/workloads.py b/configs/example/arm/workloads.py
index 5c70dabfc2..d7aea2ca0a 100644
--- a/configs/example/arm/workloads.py
+++ b/configs/example/arm/workloads.py
@@ -49,7 +49,7 @@ class ArmBaremetal(ArmFsWorkload):
     dtb_addr = 0
 
     def __init__(self, obj, system, **kwargs):
-        super(ArmBaremetal, self).__init__(**kwargs)
+        super().__init__(**kwargs)
 
         self.object_file = obj
 
@@ -76,7 +76,7 @@ class ArmTrustedFirmware(ArmFsWorkload):
     dtb_addr = 0
 
     def __init__(self, obj, system, **kwargs):
-        super(ArmTrustedFirmware, self).__init__(**kwargs)
+        super().__init__(**kwargs)
 
         self.extras = [binary("bl1.bin"), binary("fip.bin")]
         self.extras_addrs = [
diff --git a/configs/example/dramsys.py b/configs/example/dramsys.py
index 934ff17b57..0e8bebfb75 100755
--- a/configs/example/dramsys.py
+++ b/configs/example/dramsys.py
@@ -37,9 +37,8 @@
 system.cpu = traffic_gen
 
 dramsys = DRAMSys(
-    configuration="ext/dramsys/DRAMSys/DRAMSys/"
-    "library/resources/simulations/ddr4-example.json",
-    resource_directory="ext/dramsys/DRAMSys/DRAMSys/library/resources",
+    configuration="ext/dramsys/DRAMSys/configs/ddr4-example.json",
+    resource_directory="ext/dramsys/DRAMSys/configs",
 )
 
 system.target = dramsys
diff --git a/configs/example/etrace_replay.py b/configs/example/etrace_replay.py
index ddbf01acf5..184934fab9 100644
--- a/configs/example/etrace_replay.py
+++ b/configs/example/etrace_replay.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2015 ARM Limited
+# Copyright (c) 2015, 2023 Arm Limited
 # All rights reserved.
 #
 # The license below extends only to copyright in the software and shall
@@ -43,10 +43,42 @@
 
 from common import Options
 from common import Simulation
-from common import CacheConfig
 from common import MemConfig
 from common.Caches import *
 
+
+def config_cache(args, system):
+    """
+    Configure the cache hierarchy.  Only two configurations are natively
+    supported as an example: L1(I/D) only or L1 + L2.
+    """
+    from common.CacheConfig import _get_cache_opts
+
+    system.l1i = L1_ICache(**_get_cache_opts("l1i", args))
+    system.l1d = L1_DCache(**_get_cache_opts("l1d", args))
+
+    system.cpu.dcache_port = system.l1d.cpu_side
+    system.cpu.icache_port = system.l1i.cpu_side
+
+    if args.l2cache:
+        # Provide a clock for the L2 and the L1-to-L2 bus here as they
+        # are not connected using addTwoLevelCacheHierarchy. Use the
+        # same clock as the CPUs.
+        system.l2 = L2Cache(
+            clk_domain=system.cpu_clk_domain, **_get_cache_opts("l2", args)
+        )
+
+        system.tol2bus = L2XBar(clk_domain=system.cpu_clk_domain)
+        system.l2.cpu_side = system.tol2bus.mem_side_ports
+        system.l2.mem_side = system.membus.cpu_side_ports
+
+        system.l1i.mem_side = system.tol2bus.cpu_side_ports
+        system.l1d.mem_side = system.tol2bus.cpu_side_ports
+    else:
+        system.l1i.mem_side = system.membus.cpu_side_ports
+        system.l1d.mem_side = system.membus.cpu_side_ports
+
+
 parser = argparse.ArgumentParser()
 Options.addCommonOptions(parser)
 
@@ -59,29 +91,18 @@
 
 args = parser.parse_args()
 
-numThreads = 1
-
-if args.cpu_type != "TraceCPU":
-    fatal(
-        "This is a script for elastic trace replay simulation, use "
-        "--cpu-type=TraceCPU\n"
-    )
-
 if args.num_cpus > 1:
     fatal("This script does not support multi-processor trace replay.\n")
 
-# In this case FutureClass will be None as there is not fast forwarding or
-# switching
-(CPUClass, test_mem_mode, FutureClass) = Simulation.setCPUClass(args)
-CPUClass.numThreads = numThreads
-
 system = System(
-    cpu=CPUClass(cpu_id=0),
-    mem_mode=test_mem_mode,
+    mem_mode=TraceCPU.memory_mode(),
     mem_ranges=[AddrRange(args.mem_size)],
     cache_line_size=args.cacheline_size,
 )
 
+# Generate the TraceCPU
+system.cpu = TraceCPU()
+
 # Create a top-level voltage domain
 system.voltage_domain = VoltageDomain(voltage=args.sys_voltage)
 
@@ -105,11 +126,6 @@
 for cpu in system.cpu:
     cpu.clk_domain = system.cpu_clk_domain
 
-# BaseCPU no longer has default values for the BaseCPU.isa
-# createThreads() is needed to fill in the cpu.isa
-for cpu in system.cpu:
-    cpu.createThreads()
-
 # Assign input trace files to the Trace CPU
 system.cpu.instTraceFile = args.inst_trace_file
 system.cpu.dataTraceFile = args.data_trace_file
@@ -118,8 +134,11 @@
 MemClass = Simulation.setMemClass(args)
 system.membus = SystemXBar()
 system.system_port = system.membus.cpu_side_ports
-CacheConfig.config_cache(args, system)
+
+# Configure the classic cache hierarchy
+config_cache(args, system)
+
 MemConfig.config_mem(args, system)
 
 root = Root(full_system=False, system=system)
-Simulation.run(args, root, system, FutureClass)
+Simulation.run(args, root, system, None)
diff --git a/configs/example/gem5_library/arm-hello.py b/configs/example/gem5_library/arm-hello.py
index d66eee5790..721d71c2cb 100644
--- a/configs/example/gem5_library/arm-hello.py
+++ b/configs/example/gem5_library/arm-hello.py
@@ -43,7 +43,7 @@
 
 from gem5.isas import ISA
 from gem5.utils.requires import requires
-from gem5.resources.resource import Resource
+from gem5.resources.resource import obtain_resource
 from gem5.components.memory import SingleChannelDDR3_1600
 from gem5.components.processors.cpu_types import CPUTypes
 from gem5.components.boards.simple_board import SimpleBoard
@@ -80,11 +80,11 @@
 board.set_se_binary_workload(
     # The `Resource` class reads the `resources.json` file from the gem5
     # resources repository:
-    # https://gem5.googlesource.com/public/gem5-resource.
+    # https://github.com/gem5/gem5-resources.
     # Any resource specified in this file will be automatically retrieved.
     # At the time of writing, this file is a WIP and does not contain all
     # resources. Jira ticket: https://gem5.atlassian.net/browse/GEM5-1096
-    Resource("arm-hello64-static")
+    obtain_resource("arm-hello64-static")
 )
 
 # Lastly we run the simulation.
diff --git a/configs/example/gem5_library/arm-ubuntu-run.py b/configs/example/gem5_library/arm-ubuntu-run.py
index 7f976f06db..78160c9976 100644
--- a/configs/example/gem5_library/arm-ubuntu-run.py
+++ b/configs/example/gem5_library/arm-ubuntu-run.py
@@ -43,7 +43,7 @@
 from gem5.isas import ISA
 from m5.objects import ArmDefaultRelease
 from gem5.utils.requires import requires
-from gem5.resources.workload import Workload
+from gem5.resources.resource import obtain_resource
 from gem5.simulate.simulator import Simulator
 from m5.objects import VExpress_GEM5_Foundation
 from gem5.coherence_protocol import CoherenceProtocol
@@ -100,7 +100,7 @@
 # Here we set a full system workload. The "arm64-ubuntu-20.04-boot" boots
 # Ubuntu 20.04.
 
-board.set_workload(Workload("arm64-ubuntu-20.04-boot"))
+board.set_workload(obtain_resource("arm64-ubuntu-20.04-boot"))
 
 # We define the system with the aforementioned system defined.
 
diff --git a/configs/example/gem5_library/caches/octopi-cache-example.py b/configs/example/gem5_library/caches/octopi-cache-example.py
new file mode 100644
index 0000000000..4a4926a174
--- /dev/null
+++ b/configs/example/gem5_library/caches/octopi-cache-example.py
@@ -0,0 +1,100 @@
+# Copyright (c) 2023 The Regents of the University of California
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+"""
+This script boots Ubuntu 20.04 with 8 timing cores in 1 CCD.
+
+Usage
+-----
+
+```
+scons build/ARM_MESI_Three_Level/gem5.opt -j `nproc`
+./build/ARM_MESI_Three_Level/gem5.opt \
+    configs/example/gem5_library/caches/octopi-cache-example.py
+```
+"""
+
+
+from m5.objects import ArmDefaultRelease, VExpress_GEM5_Foundation
+
+from gem5.utils.requires import requires
+from gem5.components.boards.arm_board import ArmBoard
+from gem5.components.memory import DualChannelDDR4_2400
+from gem5.components.processors.simple_processor import SimpleProcessor
+from gem5.components.processors.cpu_types import CPUTypes
+from gem5.components.cachehierarchies.ruby.caches.mesi_three_level.octopi import (
+    OctopiCache,
+)
+from gem5.isas import ISA
+from gem5.coherence_protocol import CoherenceProtocol
+from gem5.simulate.simulator import Simulator
+from gem5.resources.resource import obtain_resource
+
+num_ccds = 1  # CCDs
+num_cores_per_ccd = 8  # 8 cores/CCD
+
+# OctopiCache is built on top of gem5's MESI_Three_Level cache coherence
+# protocol
+requires(coherence_protocol_required=CoherenceProtocol.MESI_THREE_LEVEL)
+cache_hierarchy = OctopiCache(
+    l1i_size="32KiB",
+    l1i_assoc=8,
+    l1d_size="32KiB",
+    l1d_assoc=8,
+    l2_size="512KiB",
+    l2_assoc=8,
+    l3_size="32MiB",
+    l3_assoc=16,
+    num_core_complexes=num_ccds,
+    is_fullsystem=True,
+)
+
+memory = DualChannelDDR4_2400(size="16GB")
+
+# The number of cores must be consistent with
+# num_core_complexes and num_cores_per_core_complexes
+processor = SimpleProcessor(
+    cpu_type=CPUTypes.TIMING,
+    isa=ISA.ARM,
+    num_cores=num_ccds * num_cores_per_ccd,
+)
+
+release = ArmDefaultRelease()
+platform = VExpress_GEM5_Foundation()
+
+board = ArmBoard(
+    clk_freq="4GHz",
+    processor=processor,
+    memory=memory,
+    cache_hierarchy=cache_hierarchy,
+    release=release,
+    platform=platform,
+)
+
+board.set_workload(obtain_resource("arm64-ubuntu-20.04-boot"))
+
+simulator = Simulator(board=board)
+simulator.run()
diff --git a/configs/example/gem5_library/checkpoints/riscv-hello-restore-checkpoint.py b/configs/example/gem5_library/checkpoints/riscv-hello-restore-checkpoint.py
index 60a7dd0f59..eed76e2448 100644
--- a/configs/example/gem5_library/checkpoints/riscv-hello-restore-checkpoint.py
+++ b/configs/example/gem5_library/checkpoints/riscv-hello-restore-checkpoint.py
@@ -48,7 +48,7 @@
 
 from gem5.isas import ISA
 from gem5.utils.requires import requires
-from gem5.resources.resource import Resource
+from gem5.resources.resource import obtain_resource
 from gem5.components.memory import SingleChannelDDR3_1600
 from gem5.components.processors.cpu_types import CPUTypes
 from gem5.components.boards.simple_board import SimpleBoard
@@ -89,8 +89,8 @@
 # configs/example/gem5_library/checkpoints/riscv-hello-save-checkpoint.py
 board.set_se_binary_workload(
     # the workload should be the same as the save-checkpoint script
-    Resource("riscv-hello"),
-    checkpoint=Resource("riscv-hello-example-checkpoint-v23"),
+    obtain_resource("riscv-hello"),
+    checkpoint=obtain_resource("riscv-hello-example-checkpoint"),
 )
 
 simulator = Simulator(
diff --git a/configs/example/gem5_library/checkpoints/riscv-hello-save-checkpoint.py b/configs/example/gem5_library/checkpoints/riscv-hello-save-checkpoint.py
index 159c4b76e1..234153a57f 100644
--- a/configs/example/gem5_library/checkpoints/riscv-hello-save-checkpoint.py
+++ b/configs/example/gem5_library/checkpoints/riscv-hello-save-checkpoint.py
@@ -46,7 +46,7 @@
 import argparse
 from gem5.isas import ISA
 from gem5.utils.requires import requires
-from gem5.resources.resource import Resource
+from gem5.resources.resource import obtain_resource
 from gem5.components.memory import SingleChannelDDR3_1600
 from gem5.components.processors.cpu_types import CPUTypes
 from gem5.components.boards.simple_board import SimpleBoard
@@ -97,11 +97,11 @@
 board.set_se_binary_workload(
     # The `Resource` class reads the `resources.json` file from the gem5
     # resources repository:
-    # https://gem5.googlesource.com/public/gem5-resource.
+    # https://github.com/gem5/gem5-resources.
     # Any resource specified in this file will be automatically retrieved.
     # At the time of writing, this file is a WIP and does not contain all
     # resources. Jira ticket: https://gem5.atlassian.net/browse/GEM5-1096
-    Resource("riscv-hello")
+    obtain_resource("riscv-hello")
 )
 
 # Lastly we run the simulation.
diff --git a/configs/example/gem5_library/checkpoints/simpoints-se-checkpoint.py b/configs/example/gem5_library/checkpoints/simpoints-se-checkpoint.py
index b5eb7e9912..5787bf4bfc 100644
--- a/configs/example/gem5_library/checkpoints/simpoints-se-checkpoint.py
+++ b/configs/example/gem5_library/checkpoints/simpoints-se-checkpoint.py
@@ -128,7 +128,6 @@
 )
 
 dir = Path(args.checkpoint_path)
-dir.mkdir(exist_ok=True)
 
 simulator = Simulator(
     board=board,
diff --git a/configs/example/gem5_library/dramsys/arm-hello-dramsys.py b/configs/example/gem5_library/dramsys/arm-hello-dramsys.py
index 8b25a36396..ae2b4bb5b6 100644
--- a/configs/example/gem5_library/dramsys/arm-hello-dramsys.py
+++ b/configs/example/gem5_library/dramsys/arm-hello-dramsys.py
@@ -35,7 +35,7 @@
 
 from gem5.isas import ISA
 from gem5.utils.requires import requires
-from gem5.resources.resource import Resource
+from gem5.resources.resource import obtain_resource
 from gem5.components.memory import DRAMSysDDR3_1600
 from gem5.components.processors.cpu_types import CPUTypes
 from gem5.components.boards.simple_board import SimpleBoard
@@ -74,11 +74,11 @@
 board.set_se_binary_workload(
     # The `Resource` class reads the `resources.json` file from the gem5
     # resources repository:
-    # https://gem5.googlesource.com/public/gem5-resource.
+    # https://github.com/gem5/gem5-resources.
     # Any resource specified in this file will be automatically retrieved.
     # At the time of writing, this file is a WIP and does not contain all
     # resources. Jira ticket: https://gem5.atlassian.net/browse/GEM5-1096
-    Resource("arm-hello64-static")
+    obtain_resource("arm-hello64-static")
 )
 
 # Lastly we run the simulation.
diff --git a/configs/example/gem5_library/dramsys/dramsys-traffic.py b/configs/example/gem5_library/dramsys/dramsys-traffic.py
index ee9ad7228d..2f9b768696 100644
--- a/configs/example/gem5_library/dramsys/dramsys-traffic.py
+++ b/configs/example/gem5_library/dramsys/dramsys-traffic.py
@@ -31,16 +31,14 @@
 DRRAMSys simulator. Please consult 'ext/dramsys/README' on how to compile
 correctly. If this is not done correctly this script will run with error.
 """
-import m5
-from gem5.components.memory import DRAMSysMem
+
+from gem5.components.memory.dramsys import DRAMSysMem
 from gem5.components.boards.test_board import TestBoard
 from gem5.components.processors.linear_generator import LinearGenerator
-from m5.objects import Root
+from gem5.simulate.simulator import Simulator
 
 memory = DRAMSysMem(
-    configuration="ext/dramsys/DRAMSys/DRAMSys/"
-    "library/resources/simulations/ddr4-example.json",
-    resource_directory="ext/dramsys/DRAMSys/DRAMSys/library/resources",
+    configuration="ext/dramsys/DRAMSys/configs/ddr4-example.json",
     recordable=True,
     size="4GB",
 )
@@ -51,12 +49,16 @@
     num_cores=1,
     max_addr=memory.get_size(),
 )
+
 board = TestBoard(
     clk_freq="3GHz", generator=generator, memory=memory, cache_hierarchy=None
 )
 
-root = Root(full_system=False, system=board)
-board._pre_instantiate()
-m5.instantiate()
-generator.start_traffic()
-exit_event = m5.simulate()
+simulator = Simulator(board=board)
+simulator.run()
+
+print(
+    "Exiting @ tick {} because {}.".format(
+        simulator.get_current_tick(), simulator.get_last_exit_event_cause()
+    )
+)
diff --git a/configs/example/gem5_library/looppoints/create-looppoint-checkpoints.py b/configs/example/gem5_library/looppoints/create-looppoint-checkpoints.py
index abb15fb7f8..4a15da55ff 100644
--- a/configs/example/gem5_library/looppoints/create-looppoint-checkpoints.py
+++ b/configs/example/gem5_library/looppoints/create-looppoint-checkpoints.py
@@ -56,7 +56,7 @@
 from gem5.components.processors.simple_processor import SimpleProcessor
 from gem5.components.processors.cpu_types import CPUTypes
 from gem5.isas import ISA
-from gem5.resources.workload import Workload
+from gem5.resources.resource import obtain_resource
 from pathlib import Path
 from gem5.simulate.exit_event_generators import (
     looppoint_save_checkpoint_generator,
@@ -110,7 +110,9 @@
     cache_hierarchy=cache_hierarchy,
 )
 
-board.set_workload(Workload("x86-matrix-multiply-omp-100-8-looppoint-csv"))
+board.set_workload(
+    obtain_resource("x86-matrix-multiply-omp-100-8-looppoint-csv")
+)
 
 dir = Path(args.checkpoint_path)
 dir.mkdir(exist_ok=True)
diff --git a/configs/example/gem5_library/looppoints/restore-looppoint-checkpoint.py b/configs/example/gem5_library/looppoints/restore-looppoint-checkpoint.py
index 21353a34a1..4ca6ad495e 100644
--- a/configs/example/gem5_library/looppoints/restore-looppoint-checkpoint.py
+++ b/configs/example/gem5_library/looppoints/restore-looppoint-checkpoint.py
@@ -54,7 +54,6 @@
 from gem5.components.processors.cpu_types import CPUTypes
 from gem5.isas import ISA
 from gem5.resources.resource import obtain_resource
-from gem5.resources.workload import Workload
 from m5.stats import reset, dump
 
 requires(isa_required=ISA.X86)
@@ -113,11 +112,12 @@
 )
 
 board.set_workload(
-    Workload(
+    obtain_resource(
         f"x86-matrix-multiply-omp-100-8-looppoint-region-{args.checkpoint_region}"
     )
 )
 
+
 # This generator will dump the stats and exit the simulation loop when the
 # simulation region reaches its end. In the case there is a warmup interval,
 # the simulation stats are reset after the warmup is complete.
diff --git a/configs/example/gem5_library/power-hello.py b/configs/example/gem5_library/power-hello.py
index cf31778945..59020643e0 100644
--- a/configs/example/gem5_library/power-hello.py
+++ b/configs/example/gem5_library/power-hello.py
@@ -43,7 +43,7 @@
 
 from gem5.isas import ISA
 from gem5.utils.requires import requires
-from gem5.resources.resource import Resource
+from gem5.resources.resource import obtain_resource
 from gem5.components.memory import SingleChannelDDR4_2400
 from gem5.components.processors.cpu_types import CPUTypes
 from gem5.components.boards.simple_board import SimpleBoard
@@ -75,7 +75,7 @@
     cache_hierarchy=cache_hierarchy,
 )
 
-board.set_se_binary_workload(Resource("power-hello"))
+board.set_se_binary_workload(obtain_resource("power-hello"))
 
 # Lastly we run the simulation.
 simulator = Simulator(board=board)
diff --git a/configs/example/gem5_library/riscv-fs.py b/configs/example/gem5_library/riscv-fs.py
index e3e2bc75e1..8a0de6c688 100644
--- a/configs/example/gem5_library/riscv-fs.py
+++ b/configs/example/gem5_library/riscv-fs.py
@@ -27,7 +27,7 @@
 """
 This example runs a simple linux boot. It uses the 'riscv-disk-img' resource.
 It is built with the sources in `src/riscv-fs` in [gem5 resources](
-https://gem5.googlesource.com/public/gem5-resources).
+https://github.com/gem5/gem5-resources).
 
 Characteristics
 ---------------
@@ -48,7 +48,7 @@
 from gem5.components.processors.cpu_types import CPUTypes
 from gem5.isas import ISA
 from gem5.utils.requires import requires
-from gem5.resources.resource import Resource
+from gem5.resources.resource import obtain_resource
 from gem5.simulate.simulator import Simulator
 
 # Run a check to ensure the right version of gem5 is being used.
@@ -79,8 +79,8 @@
 
 # Set the Full System workload.
 board.set_kernel_disk_workload(
-    kernel=Resource("riscv-bootloader-vmlinux-5.10"),
-    disk_image=Resource("riscv-disk-img"),
+    kernel=obtain_resource("riscv-bootloader-vmlinux-5.10"),
+    disk_image=obtain_resource("riscv-disk-img"),
 )
 
 simulator = Simulator(board=board)
diff --git a/configs/example/gem5_library/riscv-ubuntu-run.py b/configs/example/gem5_library/riscv-ubuntu-run.py
index 87b98cc5ba..9b172fd501 100644
--- a/configs/example/gem5_library/riscv-ubuntu-run.py
+++ b/configs/example/gem5_library/riscv-ubuntu-run.py
@@ -50,7 +50,7 @@
 from gem5.components.processors.cpu_types import CPUTypes
 from gem5.isas import ISA
 from gem5.simulate.simulator import Simulator
-from gem5.resources.workload import Workload
+from gem5.resources.resource import obtain_resource
 
 # This runs a check to ensure the gem5 binary is compiled for RISCV.
 
@@ -88,7 +88,7 @@
 # Ubuntu 20.04. Once the system successfully boots it encounters an `m5_exit`
 # instruction which stops the simulation. When the simulation has ended you may
 # inspect `m5out/system.pc.com_1.device` to see the stdout.
-board.set_workload(Workload("riscv-ubuntu-20.04-boot"))
+board.set_workload(obtain_resource("riscv-ubuntu-20.04-boot"))
 
 simulator = Simulator(board=board)
 simulator.run()
diff --git a/configs/example/gem5_library/riscvmatched-fs.py b/configs/example/gem5_library/riscvmatched-fs.py
index 3e84b8c1ea..29ec76e16b 100644
--- a/configs/example/gem5_library/riscvmatched-fs.py
+++ b/configs/example/gem5_library/riscvmatched-fs.py
@@ -42,7 +42,7 @@
 from gem5.utils.requires import requires
 from gem5.isas import ISA
 from gem5.simulate.simulator import Simulator
-from gem5.resources.workload import Workload
+from gem5.resources.resource import obtain_resource
 
 import argparse
 
@@ -76,7 +76,7 @@
 # In the case where the `-i` flag is passed, we add the kernel argument
 # `init=/root/exit.sh`. This means the simulation will exit after the Linux
 # Kernel has booted.
-workload = Workload("riscv-ubuntu-20.04-boot")
+workload = obtain_resource("riscv-ubuntu-20.04-boot")
 kernel_args = board.get_default_kernel_args()
 if args.to_init:
     kernel_args.append("init=/root/exit.sh")
diff --git a/configs/example/gem5_library/riscvmatched-hello.py b/configs/example/gem5_library/riscvmatched-hello.py
index e7b4cf7128..a11ec39159 100644
--- a/configs/example/gem5_library/riscvmatched-hello.py
+++ b/configs/example/gem5_library/riscvmatched-hello.py
@@ -37,7 +37,7 @@
 ```
 """
 
-from gem5.resources.resource import Resource
+from gem5.resources.resource import obtain_resource
 from gem5.simulate.simulator import Simulator
 from gem5.prebuilt.riscvmatched.riscvmatched_board import RISCVMatchedBoard
 from gem5.isas import ISA
@@ -49,7 +49,7 @@
 board = RISCVMatchedBoard()
 
 # set the hello world riscv binary as the board workload
-board.set_se_binary_workload(Resource("riscv-hello"))
+board.set_se_binary_workload(obtain_resource("riscv-hello"))
 
 # run the simulation with the RISCV Matched board
 simulator = Simulator(board=board, full_system=False)
diff --git a/configs/example/gem5_library/riscvmatched-microbenchmark-suite.py b/configs/example/gem5_library/riscvmatched-microbenchmark-suite.py
new file mode 100644
index 0000000000..7e08355e31
--- /dev/null
+++ b/configs/example/gem5_library/riscvmatched-microbenchmark-suite.py
@@ -0,0 +1,80 @@
+# Copyright (c) 2023 The Regents of the University of California
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+"""
+This script shows how to use a suite. In this example, we will use the
+RISCVMatchedBoard and the RISCV Vertical Microbenchmark Suite,
+and show the different functionalities of the suite.
+
+The print statements in the script are for illustrative purposes only,
+and are not required to run the script.
+"""
+
+from gem5.resources.resource import obtain_resource
+from gem5.simulate.simulator import Simulator
+from gem5.prebuilt.riscvmatched.riscvmatched_board import RISCVMatchedBoard
+from gem5.isas import ISA
+from gem5.utils.requires import requires
+
+requires(isa_required=ISA.RISCV)
+
+# instantiate the riscv matched board with default parameters
+board = RISCVMatchedBoard()
+
+# obtain the RISC-V Vertical Microbenchmarks
+microbenchmarks = obtain_resource("riscv-vertical-microbenchmarks")
+
+# list all the microbenchmarks present in the suite
+print("Microbenchmarks present in the suite:")
+print("====================================")
+for workload in microbenchmarks:
+    print(f"Workload ID: {workload.get_id()}")
+    print(f"Workload Version: {workload.get_resource_version()}")
+    print(f"WorkloadResource Object: {workload}")
+    print("====================================")
+
+# list all the WorkloadResource objects present in the suite
+for resource in microbenchmarks:
+    print(f"WorkloadResource Object: {resource}")
+
+# list all the available input groups in the suite
+print("Input groups present in the suite:")
+print(microbenchmarks.get_input_groups())
+
+# for this example, we will filter the suite
+# to run the Workload "riscv-cca-run"
+# it has the input group 'cca', which is used as the filter
+board.set_workload(list(microbenchmarks.with_input_group("cca"))[0])
+
+# run the simulation with the RISCV Matched board
+simulator = Simulator(board=board, full_system=False)
+simulator.run()
+print(
+    "Exiting @ tick {} because {}.".format(
+        simulator.get_current_tick(),
+        simulator.get_last_exit_event_cause(),
+    )
+)
diff --git a/configs/example/gem5_library/x86-gapbs-benchmarks.py b/configs/example/gem5_library/x86-gapbs-benchmarks.py
index b85ce6e7e8..c20d2ea4cc 100644
--- a/configs/example/gem5_library/x86-gapbs-benchmarks.py
+++ b/configs/example/gem5_library/x86-gapbs-benchmarks.py
@@ -63,7 +63,7 @@
 from gem5.components.processors.cpu_types import CPUTypes
 from gem5.isas import ISA
 from gem5.coherence_protocol import CoherenceProtocol
-from gem5.resources.resource import Resource
+from gem5.resources.resource import obtain_resource
 from gem5.simulate.simulator import Simulator
 from gem5.simulate.exit_event import ExitEvent
 
@@ -203,10 +203,10 @@
     # The x86 linux kernel will be automatically downloaded to the
     # `~/.cache/gem5` directory if not already present.
     # gapbs benchamarks was tested with kernel version 4.19.83
-    kernel=Resource("x86-linux-kernel-4.19.83"),
+    kernel=obtain_resource("x86-linux-kernel-4.19.83"),
     # The x86-gapbs image will be automatically downloaded to the
     # `~/.cache/gem5` directory if not already present.
-    disk_image=Resource("x86-gapbs"),
+    disk_image=obtain_resource("x86-gapbs"),
     readfile_contents=command,
 )
 
diff --git a/configs/example/gem5_library/x86-npb-benchmarks.py b/configs/example/gem5_library/x86-npb-benchmarks.py
index cffba5a294..bcc48382ac 100644
--- a/configs/example/gem5_library/x86-npb-benchmarks.py
+++ b/configs/example/gem5_library/x86-npb-benchmarks.py
@@ -60,7 +60,7 @@
 from gem5.components.processors.cpu_types import CPUTypes
 from gem5.isas import ISA
 from gem5.coherence_protocol import CoherenceProtocol
-from gem5.resources.resource import Resource
+from gem5.resources.resource import obtain_resource
 from gem5.simulate.simulator import Simulator
 from gem5.simulate.simulator import ExitEvent
 
@@ -204,13 +204,14 @@
     # The x86 linux kernel will be automatically downloaded to the
     # `~/.cache/gem5` directory if not already present.
     # npb benchamarks was tested with kernel version 4.19.83
-    kernel=Resource("x86-linux-kernel-4.19.83"),
+    kernel=obtain_resource("x86-linux-kernel-4.19.83"),
     # The x86-npb image will be automatically downloaded to the
     # `~/.cache/gem5` directory if not already present.
-    disk_image=Resource("x86-npb"),
+    disk_image=obtain_resource("x86-npb"),
     readfile_contents=command,
 )
 
+
 # The first exit_event ends with a `workbegin` cause. This means that the
 # system started successfully and the execution on the program started.
 def handle_workbegin():
@@ -236,6 +237,7 @@ def handle_workbegin():
 # The next exit_event is to simulate the ROI. It should be exited with a cause
 # marked by `workend`.
 
+
 # We exepect that ROI ends with `workend` or `simulate() limit reached`.
 def handle_workend():
     print("Dump stats at the end of the ROI!")
diff --git a/configs/example/gem5_library/x86-parsec-benchmarks.py b/configs/example/gem5_library/x86-parsec-benchmarks.py
index aaffec8edc..5e855b773f 100644
--- a/configs/example/gem5_library/x86-parsec-benchmarks.py
+++ b/configs/example/gem5_library/x86-parsec-benchmarks.py
@@ -59,7 +59,7 @@
 from gem5.components.processors.cpu_types import CPUTypes
 from gem5.isas import ISA
 from gem5.coherence_protocol import CoherenceProtocol
-from gem5.resources.resource import Resource
+from gem5.resources.resource import obtain_resource
 from gem5.simulate.simulator import Simulator
 from gem5.simulate.exit_event import ExitEvent
 
@@ -175,7 +175,7 @@
 
 
 command = (
-    "cd /home/gem5/parsec-benchmark;".format(args.benchmark)
+    f"cd /home/gem5/parsec-benchmark;"
     + "source env.sh;"
     + f"parsecmgmt -a run -p {args.benchmark} -c gcc-hooks -i {args.size}         -n 2;"
     + "sleep 5;"
@@ -185,13 +185,14 @@
     # The x86 linux kernel will be automatically downloaded to the
     # `~/.cache/gem5` directory if not already present.
     # PARSEC benchamarks were tested with kernel version 4.19.83
-    kernel=Resource("x86-linux-kernel-4.19.83"),
+    kernel=obtain_resource("x86-linux-kernel-4.19.83"),
     # The x86-parsec image will be automatically downloaded to the
     # `~/.cache/gem5` directory if not already present.
-    disk_image=Resource("x86-parsec"),
+    disk_image=obtain_resource("x86-parsec"),
     readfile_contents=command,
 )
 
+
 # functions to handle different exit events during the simuation
 def handle_workbegin():
     print("Done booting Linux")
@@ -235,7 +236,7 @@ def handle_workend():
 print()
 print("Performance statistics:")
 
-print("Simulated time in ROI: " + ((str(simulator.get_roi_ticks()[0]))))
+print("Simulated time in ROI: " + (str(simulator.get_roi_ticks()[0])))
 print(
     "Ran a total of", simulator.get_current_tick() / 1e12, "simulated seconds"
 )
diff --git a/configs/example/gem5_library/x86-spec-cpu2006-benchmarks.py b/configs/example/gem5_library/x86-spec-cpu2006-benchmarks.py
index 10d5da0adb..63a7b6b236 100644
--- a/configs/example/gem5_library/x86-spec-cpu2006-benchmarks.py
+++ b/configs/example/gem5_library/x86-spec-cpu2006-benchmarks.py
@@ -65,7 +65,7 @@
 from gem5.components.processors.cpu_types import CPUTypes
 from gem5.isas import ISA
 from gem5.coherence_protocol import CoherenceProtocol
-from gem5.resources.resource import Resource, CustomDiskImageResource
+from gem5.resources.resource import Resource, DiskImageResource
 from gem5.simulate.simulator import Simulator
 from gem5.simulate.exit_event import ExitEvent
 
@@ -261,9 +261,7 @@
     # 5.4.49
     kernel=Resource("x86-linux-kernel-4.19.83"),
     # The location of the x86 SPEC CPU 2017 image
-    disk_image=CustomDiskImageResource(
-        args.image, root_partition=args.partition
-    ),
+    disk_image=DiskImageResource(args.image, root_partition=args.partition),
     readfile_contents=command,
 )
 
diff --git a/configs/example/gem5_library/x86-spec-cpu2017-benchmarks.py b/configs/example/gem5_library/x86-spec-cpu2017-benchmarks.py
index cb5f5d19e3..348c26f1ff 100644
--- a/configs/example/gem5_library/x86-spec-cpu2017-benchmarks.py
+++ b/configs/example/gem5_library/x86-spec-cpu2017-benchmarks.py
@@ -63,7 +63,7 @@
 from gem5.components.processors.cpu_types import CPUTypes
 from gem5.isas import ISA
 from gem5.coherence_protocol import CoherenceProtocol
-from gem5.resources.resource import Resource, CustomDiskImageResource
+from gem5.resources.resource import obtain_resource, DiskImageResource
 from gem5.simulate.simulator import Simulator
 from gem5.simulate.exit_event import ExitEvent
 
@@ -268,18 +268,16 @@
 
 command = f"{args.benchmark} {args.size} {output_dir}"
 
-# For enabling CustomResource, we pass an additional parameter to mount the
+# For enabling DiskImageResource, we pass an additional parameter to mount the
 # correct partition.
 
 board.set_kernel_disk_workload(
     # The x86 linux kernel will be automatically downloaded to the
     # `~/.cache/gem5` directory if not already present.
     # SPEC CPU2017 benchamarks were tested with kernel version 4.19.83
-    kernel=Resource("x86-linux-kernel-4.19.83"),
+    kernel=obtain_resource("x86-linux-kernel-4.19.83"),
     # The location of the x86 SPEC CPU 2017 image
-    disk_image=CustomDiskImageResource(
-        args.image, root_partition=args.partition
-    ),
+    disk_image=DiskImageResource(args.image, root_partition=args.partition),
     readfile_contents=command,
 )
 
diff --git a/configs/example/gem5_library/x86-ubuntu-run-with-kvm-no-perf.py b/configs/example/gem5_library/x86-ubuntu-run-with-kvm-no-perf.py
new file mode 100644
index 0000000000..233efd92b8
--- /dev/null
+++ b/configs/example/gem5_library/x86-ubuntu-run-with-kvm-no-perf.py
@@ -0,0 +1,138 @@
+# Copyright (c) 2023 The Regents of the University of California
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+"""
+
+This script demonstrates how to use KVM CPU without perf.
+This simulation boots Ubuntu 18.04 using 2 KVM CPUs without using perf.
+
+Usage
+-----
+
+```
+scons build/X86/gem5.opt -j`nproc`
+./build/X86/gem5.opt configs/example/gem5_library/x86-ubuntu-run-with-kvm-no-perf.py
+```
+"""
+
+from gem5.utils.requires import requires
+from gem5.components.boards.x86_board import X86Board
+from gem5.components.cachehierarchies.ruby.mesi_two_level_cache_hierarchy import (
+    MESITwoLevelCacheHierarchy,
+)
+from gem5.components.memory.single_channel import SingleChannelDDR4_2400
+from gem5.components.processors.simple_switchable_processor import (
+    SimpleSwitchableProcessor,
+)
+from gem5.components.processors.cpu_types import CPUTypes
+from gem5.isas import ISA
+from gem5.coherence_protocol import CoherenceProtocol
+from gem5.simulate.simulator import Simulator
+from gem5.simulate.exit_event import ExitEvent
+from gem5.resources.resource import obtain_resource
+
+# This simulation requires using KVM with gem5 compiled for X86 simulation
+# and with MESI_Two_Level cache coherence protocol.
+requires(
+    isa_required=ISA.X86,
+    coherence_protocol_required=CoherenceProtocol.MESI_TWO_LEVEL,
+    kvm_required=True,
+)
+
+from gem5.components.cachehierarchies.ruby.mesi_two_level_cache_hierarchy import (
+    MESITwoLevelCacheHierarchy,
+)
+
+cache_hierarchy = MESITwoLevelCacheHierarchy(
+    l1d_size="32KiB",
+    l1d_assoc=8,
+    l1i_size="32KiB",
+    l1i_assoc=8,
+    l2_size="512KiB",
+    l2_assoc=16,
+    num_l2_banks=1,
+)
+
+# Main memory
+memory = SingleChannelDDR4_2400(size="3GiB")
+
+# This is a switchable CPU. We first boot Ubuntu using KVM, then the guest
+# will exit the simulation by calling "m5 exit" (see the `command` variable
+# below, which contains the command to be run in the guest after booting).
+# Upon exiting from the simulation, the Exit Event handler will switch the
+# CPU type (see the ExitEvent.EXIT line below, which contains a map to
+# a function to be called when an exit event happens).
+processor = SimpleSwitchableProcessor(
+    starting_core_type=CPUTypes.KVM,
+    switch_core_type=CPUTypes.TIMING,
+    isa=ISA.X86,
+    num_cores=2,
+)
+
+# Here we tell the KVM CPU (the starting CPU) not to use perf.
+for proc in processor.start:
+    proc.core.usePerf = False
+
+# Here we setup the board. The X86Board allows for Full-System X86 simulations.
+board = X86Board(
+    clk_freq="3GHz",
+    processor=processor,
+    memory=memory,
+    cache_hierarchy=cache_hierarchy,
+)
+
+# Here we set the Full System workload.
+# The `set_kernel_disk_workload` function for the X86Board takes a kernel, a
+# disk image, and, optionally, a command to run.
+
+# This is the command to run after the system has booted. The first `m5 exit`
+# will stop the simulation so we can switch the CPU cores from KVM to timing
+# and continue the simulation to run the echo command, sleep for a second,
+# then, again, call `m5 exit` to terminate the simulation. After simulation
+# has ended you may inspect `m5out/system.pc.com_1.device` to see the echo
+# output.
+command = (
+    "m5 exit;"
+    + "echo 'This is running on Timing CPU cores.';"
+    + "sleep 1;"
+    + "m5 exit;"
+)
+
+workload = obtain_resource("x86-ubuntu-18.04-boot")
+workload.set_parameter("readfile_contents", command)
+board.set_workload(workload)
+
+simulator = Simulator(
+    board=board,
+    on_exit_event={
+        # Here we want override the default behavior for the first m5 exit
+        # exit event. Instead of exiting the simulator, we just want to
+        # switch the processor. The 2nd m5 exit after will revert to using
+        # default behavior where the simulator run will exit.
+        ExitEvent.EXIT: (func() for func in [processor.switch])
+    },
+)
+simulator.run()
diff --git a/configs/example/gem5_library/x86-ubuntu-run-with-kvm.py b/configs/example/gem5_library/x86-ubuntu-run-with-kvm.py
index f55ec60f21..00c00d1459 100644
--- a/configs/example/gem5_library/x86-ubuntu-run-with-kvm.py
+++ b/configs/example/gem5_library/x86-ubuntu-run-with-kvm.py
@@ -51,7 +51,7 @@
 from gem5.coherence_protocol import CoherenceProtocol
 from gem5.simulate.simulator import Simulator
 from gem5.simulate.exit_event import ExitEvent
-from gem5.resources.workload import Workload
+from gem5.resources.resource import obtain_resource
 
 # This runs a check to ensure the gem5 binary is compiled to X86 and to the
 # MESI Two Level coherence protocol.
@@ -117,7 +117,7 @@
     + "m5 exit;"
 )
 
-workload = Workload("x86-ubuntu-18.04-boot")
+workload = obtain_resource("x86-ubuntu-18.04-boot")
 workload.set_parameter("readfile_contents", command)
 board.set_workload(workload)
 
diff --git a/configs/example/gem5_library/x86-ubuntu-run.py b/configs/example/gem5_library/x86-ubuntu-run.py
index 50b52e6e3c..fe72d653f5 100644
--- a/configs/example/gem5_library/x86-ubuntu-run.py
+++ b/configs/example/gem5_library/x86-ubuntu-run.py
@@ -45,7 +45,7 @@
 """
 
 from gem5.prebuilt.demo.x86_demo_board import X86DemoBoard
-from gem5.resources.workload import Workload
+from gem5.resources.resource import obtain_resource
 from gem5.simulate.simulator import Simulator
 
 
@@ -56,7 +56,7 @@
 # We then set the workload. Here we use the "x86-ubuntu-18.04-boot" workload.
 # This boots Ubuntu 18.04 with Linux 5.4.49. If the required resources are not
 # found locally, they will be downloaded.
-board.set_workload(Workload("x86-ubuntu-18.04-boot"))
+board.set_workload(obtain_resource("x86-ubuntu-18.04-boot"))
 
 simulator = Simulator(board=board)
 simulator.run()
diff --git a/configs/example/gpufs/DisjointNetwork.py b/configs/example/gpufs/DisjointNetwork.py
index 1fbd0dcb15..9215691476 100644
--- a/configs/example/gpufs/DisjointNetwork.py
+++ b/configs/example/gpufs/DisjointNetwork.py
@@ -37,7 +37,7 @@
 
 class DisjointSimple(SimpleNetwork):
     def __init__(self, ruby_system):
-        super(DisjointSimple, self).__init__()
+        super().__init__()
 
         self.netifs = []
         self.routers = []
@@ -46,7 +46,6 @@ def __init__(self, ruby_system):
         self.ruby_system = ruby_system
 
     def connectCPU(self, opts, controllers):
-
         # Setup parameters for makeTopology call for CPU network
         topo_module = import_module(f"topologies.{opts.cpu_topology}")
         topo_class = getattr(topo_module, opts.cpu_topology)
@@ -56,7 +55,6 @@ def connectCPU(self, opts, controllers):
         self.initSimple(opts, self.int_links, self.ext_links)
 
     def connectGPU(self, opts, controllers):
-
         # Setup parameters for makeTopology call for GPU network
         topo_module = import_module(f"topologies.{opts.gpu_topology}")
         topo_class = getattr(topo_module, opts.gpu_topology)
@@ -66,7 +64,6 @@ def connectGPU(self, opts, controllers):
         self.initSimple(opts, self.int_links, self.ext_links)
 
     def initSimple(self, opts, int_links, ext_links):
-
         # Attach links to network
         self.int_links = int_links
         self.ext_links = ext_links
@@ -76,13 +73,12 @@ def initSimple(self, opts, int_links, ext_links):
 
 class DisjointGarnet(GarnetNetwork):
     def __init__(self, ruby_system):
-        super(DisjointGarnet, self).__init__()
+        super().__init__()
 
         self.netifs = []
         self.ruby_system = ruby_system
 
     def connectCPU(self, opts, controllers):
-
         # Setup parameters for makeTopology call for CPU network
         topo_module = import_module(f"topologies.{opts.cpu_topology}")
         topo_class = getattr(topo_module, opts.cpu_topology)
@@ -94,7 +90,6 @@ def connectCPU(self, opts, controllers):
         Network.init_network(opts, self, GarnetNetworkInterface)
 
     def connectGPU(self, opts, controllers):
-
         # Setup parameters for makeTopology call
         topo_module = import_module(f"topologies.{opts.gpu_topology}")
         topo_class = getattr(topo_module, opts.gpu_topology)
diff --git a/configs/example/gpufs/Disjoint_VIPER.py b/configs/example/gpufs/Disjoint_VIPER.py
index 14b47d8cf0..d4619c01a0 100644
--- a/configs/example/gpufs/Disjoint_VIPER.py
+++ b/configs/example/gpufs/Disjoint_VIPER.py
@@ -38,7 +38,6 @@
 
 class DummySystem:
     def __init__(self, mem_ranges):
-
         self.mem_ctrls = []
         self.mem_ranges = mem_ranges
 
@@ -48,10 +47,9 @@ def __init__(self):
         if buildEnv["PROTOCOL"] != "GPU_VIPER":
             fatal("This ruby config only supports the GPU_VIPER protocol")
 
-        super(Disjoint_VIPER, self).__init__()
+        super().__init__()
 
     def create(self, options, system, piobus, dma_devices):
-
         # Disjoint network topology
         if "garnet" in options.network:
             self.network_cpu = DisjointGarnet(self)
diff --git a/configs/example/gpufs/runfs.py b/configs/example/gpufs/runfs.py
index f8ef70d5a2..8192503373 100644
--- a/configs/example/gpufs/runfs.py
+++ b/configs/example/gpufs/runfs.py
@@ -137,6 +137,44 @@ def addRunFSOptions(parser):
         "MI200 (gfx90a)",
     )
 
+    parser.add_argument(
+        "--debug-at-gpu-kernel",
+        type=int,
+        default=-1,
+        help="Turn on debug flags starting with this kernel",
+    )
+
+    parser.add_argument(
+        "--exit-at-gpu-kernel",
+        type=int,
+        default=-1,
+        help="Exit simulation after running this many kernels",
+    )
+
+    parser.add_argument(
+        "--root-partition",
+        type=str,
+        default="/dev/sda1",
+        help="Root partition of disk image",
+    )
+
+    parser.add_argument(
+        "--disable-avx",
+        action="store_true",
+        default=False,
+        help="Disables AVX. AVX is used in some ROCm libraries but "
+        "does not have checkpointing support yet. If simulation either "
+        "creates a checkpoint or restores from one, then AVX needs to "
+        "be disabled for correct functionality ",
+    )
+
+    parser.add_argument(
+        "--no-kvm-perf",
+        default=False,
+        action="store_true",
+        help="Disable KVM perf counters (use this with LSF / ETX)",
+    )
+
 
 def runGpuFSSystem(args):
     """
@@ -148,7 +186,8 @@ def runGpuFSSystem(args):
     # GPUFS is primarily designed to use the X86 KVM CPU. This model needs to
     # use multiple event queues when more than one CPU is simulated. Force it
     # on if that is the case.
-    args.host_parallel = True if args.num_cpus > 1 else False
+    if ObjectList.is_kvm_cpu(ObjectList.cpu_list.get(args.cpu_type)):
+        args.host_parallel = True if args.num_cpus > 1 else False
 
     # These are used by the protocols. They should not be set by the user.
     n_cu = args.num_compute_units
@@ -157,10 +196,15 @@ def runGpuFSSystem(args):
         math.ceil(float(n_cu) / args.cu_per_scalar_cache)
     )
 
-    # Verify MMIO trace is valid
-    mmio_md5 = hashlib.md5(open(args.gpu_mmio_trace, "rb").read()).hexdigest()
-    if mmio_md5 != "c4ff3326ae8a036e329b8b595c83bd6d":
-        m5.util.panic("MMIO file does not match gem5 resources")
+    # Verify MMIO trace is valid. This is only needed for Vega10 simulations.
+    # The md5sum refers to the md5sum of the Vega10 MMIO hardware trace in
+    # the gem5-resources repository. By checking it here, we avoid potential
+    # errors that would cause the driver not to load and simulations to fail.
+    if args.gpu_device == "Vega10":
+        mmio_file = open(args.gpu_mmio_trace, "rb")
+        mmio_md5 = hashlib.md5(mmio_file.read()).hexdigest()
+        if mmio_md5 != "c4ff3326ae8a036e329b8b595c83bd6d":
+            m5.util.panic("MMIO file does not match gem5 resources")
 
     system = makeGpuFSSystem(args)
 
@@ -184,6 +228,9 @@ def runGpuFSSystem(args):
 
     print("Running the simulation")
     sim_ticks = args.abs_max_tick
+    kernels_launched = 0
+    if args.debug_at_gpu_kernel != -1:
+        m5.trace.disable()
 
     exit_event = m5.simulate(sim_ticks)
 
@@ -199,11 +246,21 @@ def runGpuFSSystem(args):
             assert args.checkpoint_dir is not None
             m5.checkpoint(args.checkpoint_dir)
             break
+        elif "GPU Kernel Completed" in exit_event.getCause():
+            kernels_launched += 1
         else:
             print(
                 f"Unknown exit event: {exit_event.getCause()}. Continuing..."
             )
 
+        if kernels_launched == args.debug_at_gpu_kernel:
+            m5.trace.enable()
+        if kernels_launched == args.exit_at_gpu_kernel:
+            print(f"Exiting @ GPU kernel {kernels_launched}")
+            break
+
+        exit_event = m5.simulate(sim_ticks - m5.curTick())
+
     print(
         "Exiting @ tick %i because %s" % (m5.curTick(), exit_event.getCause())
     )
diff --git a/configs/example/gpufs/system/amdgpu.py b/configs/example/gpufs/system/amdgpu.py
index 9697e50a04..4bca52c77e 100644
--- a/configs/example/gpufs/system/amdgpu.py
+++ b/configs/example/gpufs/system/amdgpu.py
@@ -184,4 +184,27 @@ def connectGPU(system, args):
     elif args.gpu_device == "Vega10":
         system.pc.south_bridge.gpu.DeviceID = 0x6863
     else:
-        panic("Unknown GPU device: {}".format(args.gpu_device))
+        panic(f"Unknown GPU device: {args.gpu_device}")
+
+    # Use the gem5 default of 0x280 OR'd  with 0x10 which tells Linux there is
+    # a PCI capabilities list to travse.
+    system.pc.south_bridge.gpu.Status = 0x0290
+
+    # The PCI capabilities are like a linked list. The list has a memory
+    # offset and a capability type ID read by the OS. Make the first
+    # capability at 0x80 and set the PXCAP (PCI express) capability to
+    # that address. Mark the type ID as PCI express.
+    # We leave the next ID of PXCAP blank to end the list.
+    system.pc.south_bridge.gpu.PXCAPBaseOffset = 0x80
+    system.pc.south_bridge.gpu.CapabilityPtr = 0x80
+    system.pc.south_bridge.gpu.PXCAPCapId = 0x10
+
+    # Set bits 7 and 8 in the second PCIe device capabilities register which
+    # reports support for PCIe atomics for 32 and 64 bits respectively.
+    # Bit 9 for 128-bit compare and swap is not set because the amdgpu driver
+    # does not check this.
+    system.pc.south_bridge.gpu.PXCAPDevCap2 = 0x00000180
+
+    # Set bit 6 to enable atomic requestor, meaning this device can request
+    # atomics from other PCI devices.
+    system.pc.south_bridge.gpu.PXCAPDevCtrl2 = 0x00000040
diff --git a/configs/example/gpufs/system/system.py b/configs/example/gpufs/system/system.py
index 471892945e..ee0e0c0fbf 100644
--- a/configs/example/gpufs/system/system.py
+++ b/configs/example/gpufs/system/system.py
@@ -50,7 +50,7 @@ def makeGpuFSSystem(args):
         "earlyprintk=ttyS0",
         "console=ttyS0,9600",
         "lpj=7999923",
-        "root=/dev/sda1",
+        f"root={args.root_partition}",
         "drm_kms_helper.fbdev_emulation=0",
         "modprobe.blacklist=amdgpu",
         "modprobe.blacklist=psmouse",
@@ -231,7 +231,43 @@ def makeGpuFSSystem(args):
         clock=args.ruby_clock, voltage_domain=system.voltage_domain
     )
 
-    for (i, cpu) in enumerate(system.cpu):
+    # If we are using KVM cpu, enable AVX. AVX is used in some ROCm libraries
+    # such as rocBLAS which is used in higher level libraries like PyTorch.
+    use_avx = False
+    if ObjectList.is_kvm_cpu(TestCPUClass) and not args.disable_avx:
+        # AVX also requires CR4.osxsave to be 1. These must be set together
+        # of KVM will error out.
+        system.workload.enable_osxsave = 1
+        use_avx = True
+
+    # These values are taken from a real CPU and are further explained here:
+    # https://sandpile.org/x86/cpuid.htm#level_0000_000Dh
+    avx_extended_state = [
+        0x00000007,
+        0x00000340,
+        0x00000000,
+        0x00000340,
+        0x0000000F,
+        0x00000340,
+        0x00000000,
+        0x00000000,
+        0x00000100,
+        0x00000240,
+        0x00000000,
+        0x00000040,
+        0x00000000,
+        0x00000000,
+        0x00000000,
+        0x00000000,
+    ]
+
+    # This modifies the default value for ECX only (4th in this array).
+    # See: https://sandpile.org/x86/cpuid.htm#level_0000_0001h
+    # Enables AVX, OSXSAVE, XSAVE, POPCNT, SSE4.2, SSE4.1, CMPXCHG16B,
+    # and FMA.
+    avx_cpu_features = [0x00020F51, 0x00000805, 0xEFDBFBFF, 0x1C983209]
+
+    for i, cpu in enumerate(system.cpu):
         # Break once we reach the shader "CPU"
         if i == args.num_cpus:
             break
@@ -247,6 +283,9 @@ def makeGpuFSSystem(args):
 
         for j in range(len(system.cpu[i].isa)):
             system.cpu[i].isa[j].vendor_string = "AuthenticAMD"
+            if use_avx:
+                system.cpu[i].isa[j].ExtendedState = avx_extended_state
+                system.cpu[i].isa[j].FamilyModelStepping = avx_cpu_features
 
     if args.host_parallel:
         # To get the KVM CPUs to run on different host CPUs, specify a
@@ -257,6 +296,12 @@ def makeGpuFSSystem(args):
                 obj.eventq_index = 0
             cpu.eventq_index = i + 1
 
+    # Disable KVM Perf counters if specified. This is useful for machines
+    # with more restrictive KVM paranoid levels.
+    if args.no_kvm_perf and ObjectList.is_kvm_cpu(TestCPUClass):
+        for i, cpu in enumerate(system.cpu[:-1]):
+            cpu.usePerf = False
+
     gpu_port_idx = (
         len(system.ruby._cpu_ports)
         - args.num_compute_units
diff --git a/configs/example/lupv/run_lupv.py b/configs/example/lupv/run_lupv.py
index d92ea3fa3f..e106d051e7 100644
--- a/configs/example/lupv/run_lupv.py
+++ b/configs/example/lupv/run_lupv.py
@@ -42,7 +42,7 @@
 from gem5.components.processors.cpu_types import CPUTypes
 from gem5.isas import ISA
 from gem5.utils.requires import requires
-from gem5.resources.resource import Resource, CustomResource
+from gem5.resources.resource import obtain_resource
 
 import argparse
 
@@ -98,8 +98,8 @@
 # Set the Full System workload.
 
 board.set_kernel_disk_workload(
-    kernel=Resource("riscv-lupio-linux-kernel"),
-    disk_image=Resource("riscv-lupio-busybox-img"),
+    kernel=obtain_resource("riscv-lupio-linux-kernel"),
+    disk_image=obtain_resource("riscv-lupio-busybox-img"),
 )
 
 
diff --git a/configs/example/memcheck.py b/configs/example/memcheck.py
index aee2ef74d0..f26eabcacd 100644
--- a/configs/example/memcheck.py
+++ b/configs/example/memcheck.py
@@ -260,6 +260,7 @@
 # For each level, track the next subsys index to use
 next_subsys_index = [0] * (len(cachespec) + 1)
 
+
 # Recursive function to create a sub-tree of the cache and tester
 # hierarchy
 def make_cache_level(ncaches, prototypes, level, next_cache):
diff --git a/configs/example/memtest.py b/configs/example/memtest.py
index 0cbbab5b4f..96ee11c107 100644
--- a/configs/example/memtest.py
+++ b/configs/example/memtest.py
@@ -145,6 +145,7 @@
 
 args = parser.parse_args()
 
+
 # Get the total number of testers
 def numtesters(cachespec, testerspec):
     # Determine the tester multiplier for each level as the
@@ -278,6 +279,7 @@ def numtesters(cachespec, testerspec):
 # For each level, track the next subsys index to use
 next_subsys_index = [0] * (len(cachespec) + 1)
 
+
 # Recursive function to create a sub-tree of the cache and tester
 # hierarchy
 def make_cache_level(ncaches, prototypes, level, next_cache):
diff --git a/configs/example/noc_config/2x4.py b/configs/example/noc_config/2x4.py
index 3dd2403799..b6c1e80fc5 100644
--- a/configs/example/noc_config/2x4.py
+++ b/configs/example/noc_config/2x4.py
@@ -35,6 +35,7 @@
 
 from ruby import CHI_config
 
+
 # CustomMesh parameters for a 2x4 mesh. Routers will have the following layout:
 #
 # 0 --- 1 --- 2 --- 3
diff --git a/configs/example/read_config.py b/configs/example/read_config.py
index 40c20ef501..27e23b69ee 100644
--- a/configs/example/read_config.py
+++ b/configs/example/read_config.py
@@ -140,7 +140,7 @@ def memory_bandwidth_parser(cls, flags, param):
     setattr(m5.params.__dict__[name], "parse_ini", classmethod(parser))
 
 
-class PortConnection(object):
+class PortConnection:
     """This class is similar to m5.params.PortRef but with just enough
     information for ConfigManager"""
 
@@ -151,7 +151,7 @@ def __init__(self, object_name, port_name, index):
 
     @classmethod
     def from_string(cls, str):
-        m = re.match("(.*)\.([^.\[]+)(\[(\d+)\])?", str)
+        m = re.match(r"(.*)\.([^.\[]+)(\[(\d+)\])?", str)
         object_name, port_name, whole_index, index = m.groups()
         if index is not None:
             index = int(index)
@@ -178,7 +178,7 @@ def to_list(v):
         return [v]
 
 
-class ConfigManager(object):
+class ConfigManager:
     """Manager for parsing a Root configuration from a config file"""
 
     def __init__(self, config):
@@ -296,7 +296,7 @@ def fill_in_children(self, object_name, obj):
     def parse_port_name(self, port):
         """Parse the name of a port"""
 
-        m = re.match("(.*)\.([^.\[]+)(\[(\d+)\])?", port)
+        m = re.match(r"(.*)\.([^.\[]+)(\[(\d+)\])?", port)
         peer, peer_port, whole_index, index = m.groups()
         if index is not None:
             index = int(index)
@@ -366,7 +366,6 @@ def increment_port_index(port):
             if port_has_correct_index(from_port) and port_has_correct_index(
                 to_port
             ):
-
                 connections_to_make.append((from_port, to_port))
 
                 increment_port_index(from_port)
@@ -416,7 +415,7 @@ def find_all_objects(self):
         self.bind_ports(connections)
 
 
-class ConfigFile(object):
+class ConfigFile:
     def get_flags(self):
         return set()
 
@@ -445,7 +444,7 @@ def get_object_children(self, object_name):
         pass
 
     def get_port_peers(self, object_name, port_name):
-        """Get the list of connected port names (in the string form
+        r"""Get the list of connected port names (in the string form
         object.port(\[index\])?) of the port object_name.port_name"""
         pass
 
@@ -508,7 +507,7 @@ def find_all_objects(self, node):
                 self.find_all_objects(elem)
 
     def load(self, config_file):
-        root = json.load(open(config_file, "r"))
+        root = json.load(open(config_file))
         self.object_dicts = {}
         self.find_all_objects(root)
 
diff --git a/configs/example/ruby_mem_test.py b/configs/example/ruby_mem_test.py
index c90950107e..9ad6a1b7ad 100644
--- a/configs/example/ruby_mem_test.py
+++ b/configs/example/ruby_mem_test.py
@@ -62,6 +62,12 @@
     default=0,
     help="percentage of accesses that should be functional",
 )
+parser.add_argument(
+    "--atomic",
+    type=int,
+    default=0,
+    help="percentage of accesses that should be atomic",
+)
 parser.add_argument(
     "--suppress-func-errors",
     action="store_true",
@@ -105,6 +111,7 @@
         max_loads=args.maxloads,
         percent_functional=args.functional,
         percent_uncacheable=0,
+        percent_atomic=args.atomic,
         progress_interval=args.progress,
         suppress_func_errors=args.suppress_func_errors,
     )
@@ -133,7 +140,7 @@
     dmas = []
 
 dma_ports = []
-for (i, dma) in enumerate(dmas):
+for i, dma in enumerate(dmas):
     dma_ports.append(dma.test)
 Ruby.create_system(args, False, system, dma_ports=dma_ports)
 
@@ -155,7 +162,7 @@
 
 assert len(cpus) == len(system.ruby._cpu_ports)
 
-for (i, cpu) in enumerate(cpus):
+for i, cpu in enumerate(cpus):
     #
     # Tie the cpu memtester ports to the correct system ports
     #
diff --git a/configs/example/sst/arm_fs.py b/configs/example/sst/arm_fs.py
index bee4be1118..b8fcb68e37 100644
--- a/configs/example/sst/arm_fs.py
+++ b/configs/example/sst/arm_fs.py
@@ -48,7 +48,7 @@
 
 class ArmSstSystem(ArmSystem):
     def __init__(self, cpu_clock_rate, **kwargs):
-        super(ArmSstSystem, self).__init__(**kwargs)
+        super().__init__(**kwargs)
 
         self.voltage_domain = VoltageDomain(voltage="1.0V")
         self.clk_domain = SrcClockDomain(
@@ -185,3 +185,4 @@ class VExpress_GEM5_V1_SST(VExpress_GEM5_V1):
     cpu.createInterruptController()
 
 root = Root(full_system=True, system=system)
+m5.instantiate()
diff --git a/configs/example/sst/riscv_fs.py b/configs/example/sst/riscv_fs.py
index fc8f8618c4..c82ad9a6b9 100644
--- a/configs/example/sst/riscv_fs.py
+++ b/configs/example/sst/riscv_fs.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2021 The Regents of the University of California
+# Copyright (c) 2021-2023 The Regents of the University of California
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -29,7 +29,7 @@
 from os import path
 
 # For downloading the disk image
-from gem5.resources.resource import Resource
+from gem5.resources.resource import obtain_resource
 
 import argparse
 
@@ -104,16 +104,9 @@ def createHiFivePlatform(system):
 
     system.platform.pci_host.pio = system.membus.mem_side_ports
 
-    system.platform.rtc = RiscvRTC(frequency=Frequency("100MHz"))
+    system.platform.rtc = RiscvRTC(frequency=Frequency("10MHz"))
     system.platform.clint.int_pin = system.platform.rtc.int_pin
 
-    system.pma_checker = PMAChecker(
-        uncacheable=[
-            *system.platform._on_chip_ranges(),
-            *system.platform._off_chip_ranges(),
-        ]
-    )
-
     system.iobus = IOXBar()
     system.bridge = Bridge(delay="50ns")
     system.bridge.mem_side_port = system.iobus.cpu_side_ports
@@ -122,6 +115,15 @@ def createHiFivePlatform(system):
 
     system.platform.setNumCores(1)
 
+    for cpu in system.cpu:
+        # pma_checker has to be added for each of the system cpus.
+        cpu.mmu.pma_checker = PMAChecker(
+            uncacheable=[
+                *system.platform._on_chip_ranges(),
+                *system.platform._off_chip_ranges(),
+            ]
+        )
+
     system.platform.attachOnChipIO(system.membus)
     system.platform.attachOffChipIO(system.iobus)
 
@@ -139,7 +141,7 @@ def createHiFivePlatform(system):
 memory_size = args.memory_size
 
 # Try downloading the Resource
-bbl_resource = Resource("riscv-boot-exit-nodisk")
+bbl_resource = obtain_resource("riscv-boot-exit-nodisk")
 bbl_path = bbl_resource.get_local_path()
 
 system = System()
@@ -173,3 +175,4 @@ def createHiFivePlatform(system):
     cpu.createInterruptController()
 
 root = Root(full_system=True, system=system)
+m5.instantiate()
diff --git a/configs/learning_gem5/part1/caches.py b/configs/learning_gem5/part1/caches.py
index 3f7d26ed21..04c7d6797c 100644
--- a/configs/learning_gem5/part1/caches.py
+++ b/configs/learning_gem5/part1/caches.py
@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 # Copyright (c) 2015 Jason Power
 # All rights reserved.
 #
@@ -55,7 +54,7 @@ class L1Cache(Cache):
     tgts_per_mshr = 20
 
     def __init__(self, options=None):
-        super(L1Cache, self).__init__()
+        super().__init__()
         pass
 
     def connectBus(self, bus):
@@ -79,7 +78,7 @@ class L1ICache(L1Cache):
     )
 
     def __init__(self, opts=None):
-        super(L1ICache, self).__init__(opts)
+        super().__init__(opts)
         if not opts or not opts.l1i_size:
             return
         self.size = opts.l1i_size
@@ -100,7 +99,7 @@ class L1DCache(L1Cache):
     )
 
     def __init__(self, opts=None):
-        super(L1DCache, self).__init__(opts)
+        super().__init__(opts)
         if not opts or not opts.l1d_size:
             return
         self.size = opts.l1d_size
@@ -125,7 +124,7 @@ class L2Cache(Cache):
     SimpleOpts.add_option("--l2_size", help=f"L2 cache size. Default: {size}")
 
     def __init__(self, opts=None):
-        super(L2Cache, self).__init__()
+        super().__init__()
         if not opts or not opts.l2_size:
             return
         self.size = opts.l2_size
diff --git a/configs/learning_gem5/part1/simple-arm.py b/configs/learning_gem5/part1/simple-arm.py
index 62f7645c5a..fac53a78af 100644
--- a/configs/learning_gem5/part1/simple-arm.py
+++ b/configs/learning_gem5/part1/simple-arm.py
@@ -73,6 +73,6 @@
 root = Root(full_system=False, system=system)
 m5.instantiate()
 
-print("Beginning simulation!")
+print(f"Beginning simulation!")
 exit_event = m5.simulate()
-print("Exiting @ tick %i because %s" % (m5.curTick(), exit_event.getCause()))
+print(f"Exiting @ tick {m5.curTick()} because {exit_event.getCause()}")
diff --git a/configs/learning_gem5/part1/simple-riscv.py b/configs/learning_gem5/part1/simple-riscv.py
index f05ca4ab50..6e296d5fc0 100644
--- a/configs/learning_gem5/part1/simple-riscv.py
+++ b/configs/learning_gem5/part1/simple-riscv.py
@@ -73,6 +73,6 @@
 root = Root(full_system=False, system=system)
 m5.instantiate()
 
-print("Beginning simulation!")
+print(f"Beginning simulation!")
 exit_event = m5.simulate()
-print("Exiting @ tick %i because %s" % (m5.curTick(), exit_event.getCause()))
+print(f"Exiting @ tick {m5.curTick()} because {exit_event.getCause()}")
diff --git a/configs/learning_gem5/part1/simple.py b/configs/learning_gem5/part1/simple.py
index e36cd78c8e..3792fddc4d 100644
--- a/configs/learning_gem5/part1/simple.py
+++ b/configs/learning_gem5/part1/simple.py
@@ -113,6 +113,6 @@
 # instantiate all of the objects we've created above
 m5.instantiate()
 
-print("Beginning simulation!")
+print(f"Beginning simulation!")
 exit_event = m5.simulate()
-print("Exiting @ tick %i because %s" % (m5.curTick(), exit_event.getCause()))
+print(f"Exiting @ tick {m5.curTick()} because {exit_event.getCause()}")
diff --git a/configs/learning_gem5/part1/two_level.py b/configs/learning_gem5/part1/two_level.py
index 8aa7dd7e83..c6f4f4872f 100644
--- a/configs/learning_gem5/part1/two_level.py
+++ b/configs/learning_gem5/part1/two_level.py
@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 # Copyright (c) 2015 Jason Power
 # All rights reserved.
 #
@@ -140,6 +139,6 @@
 # instantiate all of the objects we've created above
 m5.instantiate()
 
-print("Beginning simulation!")
+print(f"Beginning simulation!")
 exit_event = m5.simulate()
-print("Exiting @ tick %i because %s" % (m5.curTick(), exit_event.getCause()))
+print(f"Exiting @ tick {m5.curTick()} because {exit_event.getCause()}")
diff --git a/configs/learning_gem5/part2/hello_goodbye.py b/configs/learning_gem5/part2/hello_goodbye.py
index e4b70ba3ae..4a4171c990 100644
--- a/configs/learning_gem5/part2/hello_goodbye.py
+++ b/configs/learning_gem5/part2/hello_goodbye.py
@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 # Copyright (c) 2017 Jason Lowe-Power
 # All rights reserved.
 #
@@ -50,6 +49,6 @@
 # instantiate all of the objects we've created above
 m5.instantiate()
 
-print("Beginning simulation!")
+print(f"Beginning simulation!")
 exit_event = m5.simulate()
-print("Exiting @ tick %i because %s" % (m5.curTick(), exit_event.getCause()))
+print(f"Exiting @ tick {m5.curTick()} because {exit_event.getCause()}")
diff --git a/configs/learning_gem5/part2/run_simple.py b/configs/learning_gem5/part2/run_simple.py
index be5f6ee7b8..ce7ea277e8 100644
--- a/configs/learning_gem5/part2/run_simple.py
+++ b/configs/learning_gem5/part2/run_simple.py
@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 # Copyright (c) 2017 Jason Lowe-Power
 # All rights reserved.
 #
@@ -48,6 +47,6 @@
 # instantiate all of the objects we've created above
 m5.instantiate()
 
-print("Beginning simulation!")
+print(f"Beginning simulation!")
 exit_event = m5.simulate()
-print("Exiting @ tick %i because %s" % (m5.curTick(), exit_event.getCause()))
+print(f"Exiting @ tick {m5.curTick()} because {exit_event.getCause()}")
diff --git a/configs/learning_gem5/part2/simple_cache.py b/configs/learning_gem5/part2/simple_cache.py
index 4228956126..2ba138c879 100644
--- a/configs/learning_gem5/part2/simple_cache.py
+++ b/configs/learning_gem5/part2/simple_cache.py
@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 # Copyright (c) 2017 Jason Lowe-Power
 # All rights reserved.
 #
@@ -103,6 +102,6 @@
 # instantiate all of the objects we've created above
 m5.instantiate()
 
-print("Beginning simulation!")
+print(f"Beginning simulation!")
 exit_event = m5.simulate()
-print("Exiting @ tick %i because %s" % (m5.curTick(), exit_event.getCause()))
+print(f"Exiting @ tick {m5.curTick()} because {exit_event.getCause()}")
diff --git a/configs/learning_gem5/part2/simple_memobj.py b/configs/learning_gem5/part2/simple_memobj.py
index 20f4362b81..c845241ced 100644
--- a/configs/learning_gem5/part2/simple_memobj.py
+++ b/configs/learning_gem5/part2/simple_memobj.py
@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 # Copyright (c) 2017 Jason Lowe-Power
 # All rights reserved.
 #
@@ -101,6 +100,6 @@
 # instantiate all of the objects we've created above
 m5.instantiate()
 
-print("Beginning simulation!")
+print(f"Beginning simulation!")
 exit_event = m5.simulate()
-print("Exiting @ tick %i because %s" % (m5.curTick(), exit_event.getCause()))
+print(f"Exiting @ tick {m5.curTick()} because {exit_event.getCause()}")
diff --git a/configs/learning_gem5/part3/msi_caches.py b/configs/learning_gem5/part3/msi_caches.py
index 13b2a11b1a..f628cca96d 100644
--- a/configs/learning_gem5/part3/msi_caches.py
+++ b/configs/learning_gem5/part3/msi_caches.py
@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 # Copyright (c) 2017 Jason Power
 # All rights reserved.
 #
@@ -48,7 +47,7 @@ def __init__(self):
         if buildEnv["PROTOCOL"] != "MSI":
             fatal("This system assumes MSI from learning gem5!")
 
-        super(MyCacheSystem, self).__init__()
+        super().__init__()
 
     def setup(self, system, cpus, mem_ctrls):
         """Set up the Ruby cache subsystem. Note: This can't be done in the
@@ -110,7 +109,6 @@ def setup(self, system, cpus, mem_ctrls):
 
 
 class L1Cache(L1Cache_Controller):
-
     _version = 0
 
     @classmethod
@@ -122,7 +120,7 @@ def __init__(self, system, ruby_system, cpu):
         """CPUs are needed to grab the clock domain and system is needed for
         the cache block size.
         """
-        super(L1Cache, self).__init__()
+        super().__init__()
 
         self.version = self.versionCount()
         # This is the cache memory object that stores the cache data and tags
@@ -174,7 +172,6 @@ def connectQueues(self, ruby_system):
 
 
 class DirController(Directory_Controller):
-
     _version = 0
 
     @classmethod
@@ -186,7 +183,7 @@ def __init__(self, ruby_system, ranges, mem_ctrls):
         """ranges are the memory ranges assigned to this controller."""
         if len(mem_ctrls) > 1:
             panic("This cache system can only be connected to one mem ctrl")
-        super(DirController, self).__init__()
+        super().__init__()
         self.version = self.versionCount()
         self.addr_ranges = ranges
         self.ruby_system = ruby_system
@@ -218,7 +215,7 @@ class MyNetwork(SimpleNetwork):
     """A simple point-to-point network. This doesn't not use garnet."""
 
     def __init__(self, ruby_system):
-        super(MyNetwork, self).__init__()
+        super().__init__()
         self.netifs = []
         self.ruby_system = ruby_system
 
diff --git a/configs/learning_gem5/part3/ruby_caches_MI_example.py b/configs/learning_gem5/part3/ruby_caches_MI_example.py
index 8c25a9b2d9..aec54b1068 100644
--- a/configs/learning_gem5/part3/ruby_caches_MI_example.py
+++ b/configs/learning_gem5/part3/ruby_caches_MI_example.py
@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 # Copyright (c) 2015 Jason Power
 # All rights reserved.
 #
@@ -50,7 +49,7 @@ def __init__(self):
         if buildEnv["PROTOCOL"] != "MI_example":
             fatal("This system assumes MI_example!")
 
-        super(MyCacheSystem, self).__init__()
+        super().__init__()
 
     def setup(self, system, cpus, mem_ctrls):
         """Set up the Ruby cache subsystem. Note: This can't be done in the
@@ -108,7 +107,6 @@ def setup(self, system, cpus, mem_ctrls):
 
 
 class L1Cache(L1Cache_Controller):
-
     _version = 0
 
     @classmethod
@@ -120,7 +118,7 @@ def __init__(self, system, ruby_system, cpu):
         """CPUs are needed to grab the clock domain and system is needed for
         the cache block size.
         """
-        super(L1Cache, self).__init__()
+        super().__init__()
 
         self.version = self.versionCount()
         # This is the cache memory object that stores the cache data and tags
@@ -163,7 +161,6 @@ def connectQueues(self, ruby_system):
 
 
 class DirController(Directory_Controller):
-
     _version = 0
 
     @classmethod
@@ -175,7 +172,7 @@ def __init__(self, ruby_system, ranges, mem_ctrls):
         """ranges are the memory ranges assigned to this controller."""
         if len(mem_ctrls) > 1:
             panic("This cache system can only be connected to one mem ctrl")
-        super(DirController, self).__init__()
+        super().__init__()
         self.version = self.versionCount()
         self.addr_ranges = ranges
         self.ruby_system = ruby_system
@@ -204,7 +201,7 @@ class MyNetwork(SimpleNetwork):
     """A simple point-to-point network. This doesn't not use garnet."""
 
     def __init__(self, ruby_system):
-        super(MyNetwork, self).__init__()
+        super().__init__()
         self.netifs = []
         self.ruby_system = ruby_system
 
diff --git a/configs/learning_gem5/part3/ruby_test.py b/configs/learning_gem5/part3/ruby_test.py
index e46f07bb0a..5b00e1169e 100644
--- a/configs/learning_gem5/part3/ruby_test.py
+++ b/configs/learning_gem5/part3/ruby_test.py
@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 # Copyright (c) 2015 Jason Power
 # All rights reserved.
 #
diff --git a/configs/learning_gem5/part3/simple_ruby.py b/configs/learning_gem5/part3/simple_ruby.py
index f3f84353e8..6e22beb4dd 100644
--- a/configs/learning_gem5/part3/simple_ruby.py
+++ b/configs/learning_gem5/part3/simple_ruby.py
@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 # Copyright (c) 2015 Jason Power
 # All rights reserved.
 #
diff --git a/configs/learning_gem5/part3/test_caches.py b/configs/learning_gem5/part3/test_caches.py
index 7b0ce52dad..ebd646ffab 100644
--- a/configs/learning_gem5/part3/test_caches.py
+++ b/configs/learning_gem5/part3/test_caches.py
@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 # Copyright (c) 2017 Jason Power
 # All rights reserved.
 #
@@ -48,7 +47,7 @@ def __init__(self):
         if buildEnv["PROTOCOL"] != "MSI":
             fatal("This system assumes MSI from learning gem5!")
 
-        super(TestCacheSystem, self).__init__()
+        super().__init__()
 
     def setup(self, system, tester, mem_ctrls):
         """Set up the Ruby cache subsystem. Note: This can't be done in the
diff --git a/configs/network/Network.py b/configs/network/Network.py
index a5334741c0..dbac88c246 100644
--- a/configs/network/Network.py
+++ b/configs/network/Network.py
@@ -121,7 +121,6 @@ def define_options(parser):
 
 
 def create_network(options, ruby):
-
     # Allow legacy users to use garnet through garnet2.0 option
     # until next gem5 release.
     if options.network == "garnet2.0":
@@ -162,7 +161,6 @@ def create_network(options, ruby):
 
 
 def init_network(options, network, InterfaceClass):
-
     if options.network == "garnet":
         network.num_rows = options.mesh_rows
         network.vcs_per_vnet = options.vcs_per_vnet
diff --git a/configs/ruby/AMD_Base_Constructor.py b/configs/ruby/AMD_Base_Constructor.py
index 030b45cbb6..ec06fbad34 100644
--- a/configs/ruby/AMD_Base_Constructor.py
+++ b/configs/ruby/AMD_Base_Constructor.py
@@ -38,6 +38,7 @@
 
 from topologies.Cluster import Cluster
 
+
 #
 # Note: the L1 Cache latency is only used by the sequencer on fast path hits
 #
@@ -115,7 +116,6 @@ def construct(options, system, ruby_system):
     cpuCluster = None
     cpuCluster = Cluster(name="CPU Cluster", extBW=8, intBW=8)  # 16 GB/s
     for i in range((options.num_cpus + 1) // 2):
-
         cp_cntrl = CPCntrl()
         cp_cntrl.create(options, ruby_system, system)
 
diff --git a/configs/ruby/CHI.py b/configs/ruby/CHI.py
index 96537e558a..2487f696fd 100644
--- a/configs/ruby/CHI.py
+++ b/configs/ruby/CHI.py
@@ -65,7 +65,6 @@ def read_config_file(file):
 def create_system(
     options, full_system, system, dma_ports, bootmem, ruby_system, cpus
 ):
-
     if buildEnv["PROTOCOL"] != "CHI":
         m5.panic("This script requires the CHI build")
 
diff --git a/configs/ruby/CHI_config.py b/configs/ruby/CHI_config.py
index 4f2580c373..3cccfd0676 100644
--- a/configs/ruby/CHI_config.py
+++ b/configs/ruby/CHI_config.py
@@ -116,7 +116,7 @@ class NoC_Params:
         router_list = None
 
     def __init__(self, ruby_system):
-        super(CHI_Node, self).__init__()
+        super().__init__()
         self._ruby_system = ruby_system
         self._network = ruby_system.network
 
@@ -201,7 +201,7 @@ class CHI_Cache_Controller(Cache_Controller):
     """
 
     def __init__(self, ruby_system):
-        super(CHI_Cache_Controller, self).__init__(
+        super().__init__(
             version=Versions.getVersion(Cache_Controller),
             ruby_system=ruby_system,
             mandatoryQueue=MessageBuffer(),
@@ -228,7 +228,7 @@ class CHI_L1Controller(CHI_Cache_Controller):
     """
 
     def __init__(self, ruby_system, sequencer, cache, prefetcher):
-        super(CHI_L1Controller, self).__init__(ruby_system)
+        super().__init__(ruby_system)
         self.sequencer = sequencer
         self.cache = cache
         self.use_prefetcher = False
@@ -244,6 +244,7 @@ def __init__(self, ruby_system, sequencer, cache, prefetcher):
         self.alloc_on_readunique = True
         self.alloc_on_readonce = True
         self.alloc_on_writeback = True
+        self.alloc_on_atomic = False
         self.dealloc_on_unique = False
         self.dealloc_on_shared = False
         self.dealloc_backinv_unique = True
@@ -264,7 +265,7 @@ class CHI_L2Controller(CHI_Cache_Controller):
     """
 
     def __init__(self, ruby_system, cache, prefetcher):
-        super(CHI_L2Controller, self).__init__(ruby_system)
+        super().__init__(ruby_system)
         self.sequencer = NULL
         self.cache = cache
         self.use_prefetcher = False
@@ -280,6 +281,7 @@ def __init__(self, ruby_system, cache, prefetcher):
         self.alloc_on_readunique = True
         self.alloc_on_readonce = True
         self.alloc_on_writeback = True
+        self.alloc_on_atomic = False
         self.dealloc_on_unique = False
         self.dealloc_on_shared = False
         self.dealloc_backinv_unique = True
@@ -299,7 +301,7 @@ class CHI_HNFController(CHI_Cache_Controller):
     """
 
     def __init__(self, ruby_system, cache, prefetcher, addr_ranges):
-        super(CHI_HNFController, self).__init__(ruby_system)
+        super().__init__(ruby_system)
         self.sequencer = NULL
         self.cache = cache
         self.use_prefetcher = False
@@ -316,6 +318,7 @@ def __init__(self, ruby_system, cache, prefetcher, addr_ranges):
         self.alloc_on_readunique = False
         self.alloc_on_readonce = True
         self.alloc_on_writeback = True
+        self.alloc_on_atomic = True
         self.dealloc_on_unique = True
         self.dealloc_on_shared = False
         self.dealloc_backinv_unique = False
@@ -337,7 +340,7 @@ class CHI_MNController(MiscNode_Controller):
     def __init__(
         self, ruby_system, addr_range, l1d_caches, early_nonsync_comp
     ):
-        super(CHI_MNController, self).__init__(
+        super().__init__(
             version=Versions.getVersion(MiscNode_Controller),
             ruby_system=ruby_system,
             mandatoryQueue=MessageBuffer(),
@@ -368,7 +371,7 @@ class CHI_DMAController(CHI_Cache_Controller):
     """
 
     def __init__(self, ruby_system, sequencer):
-        super(CHI_DMAController, self).__init__(ruby_system)
+        super().__init__(ruby_system)
         self.sequencer = sequencer
 
         class DummyCache(RubyCache):
@@ -392,6 +395,7 @@ class DummyCache(RubyCache):
         self.alloc_on_readunique = False
         self.alloc_on_readonce = False
         self.alloc_on_writeback = False
+        self.alloc_on_atomic = False
         self.dealloc_on_unique = False
         self.dealloc_on_shared = False
         self.dealloc_backinv_unique = False
@@ -459,7 +463,7 @@ def __init__(
         l1Iprefetcher_type=None,
         l1Dprefetcher_type=None,
     ):
-        super(CHI_RNF, self).__init__(ruby_system)
+        super().__init__(ruby_system)
 
         self._block_size_bits = int(math.log(cache_line_size, 2))
 
@@ -602,7 +606,7 @@ def getAddrRanges(cls, hnf_idx):
     # The CHI controller can be a child of this object or another if
     # 'parent' if specified
     def __init__(self, hnf_idx, ruby_system, llcache_type, parent):
-        super(CHI_HNF, self).__init__(ruby_system)
+        super().__init__(ruby_system)
 
         addr_ranges, intlvHighBit = self.getAddrRanges(hnf_idx)
         # All ranges should have the same interleaving
@@ -640,7 +644,7 @@ class NoC_Params(CHI_Node.NoC_Params):
     # The CHI controller can be a child of this object or another if
     # 'parent' if specified
     def __init__(self, ruby_system, l1d_caches, early_nonsync_comp=False):
-        super(CHI_MN, self).__init__(ruby_system)
+        super().__init__(ruby_system)
 
         # MiscNode has internal address range starting at 0
         addr_range = AddrRange(0, size="1kB")
@@ -671,7 +675,7 @@ class CHI_SNF_Base(CHI_Node):
     # The CHI controller can be a child of this object or another if
     # 'parent' if specified
     def __init__(self, ruby_system, parent):
-        super(CHI_SNF_Base, self).__init__(ruby_system)
+        super().__init__(ruby_system)
 
         self._cntrl = Memory_Controller(
             version=Versions.getVersion(Memory_Controller),
@@ -718,7 +722,7 @@ class CHI_SNF_BootMem(CHI_SNF_Base):
     """
 
     def __init__(self, ruby_system, parent, bootmem):
-        super(CHI_SNF_BootMem, self).__init__(ruby_system, parent)
+        super().__init__(ruby_system, parent)
         self._cntrl.memory_out_port = bootmem.port
         self._cntrl.addr_ranges = self.getMemRange(bootmem)
 
@@ -729,7 +733,7 @@ class CHI_SNF_MainMem(CHI_SNF_Base):
     """
 
     def __init__(self, ruby_system, parent, mem_ctrl=None):
-        super(CHI_SNF_MainMem, self).__init__(ruby_system, parent)
+        super().__init__(ruby_system, parent)
         if mem_ctrl:
             self._cntrl.memory_out_port = mem_ctrl.port
             self._cntrl.addr_ranges = self.getMemRange(mem_ctrl)
@@ -744,7 +748,7 @@ class CHI_RNI_Base(CHI_Node):
     # The CHI controller can be a child of this object or another if
     # 'parent' if specified
     def __init__(self, ruby_system, parent):
-        super(CHI_RNI_Base, self).__init__(ruby_system)
+        super().__init__(ruby_system)
 
         self._sequencer = RubySequencer(
             version=Versions.getSeqId(),
@@ -773,7 +777,7 @@ class CHI_RNI_DMA(CHI_RNI_Base):
     """
 
     def __init__(self, ruby_system, dma_port, parent):
-        super(CHI_RNI_DMA, self).__init__(ruby_system, parent)
+        super().__init__(ruby_system, parent)
         assert dma_port != None
         self._sequencer.in_ports = dma_port
 
@@ -784,5 +788,5 @@ class CHI_RNI_IO(CHI_RNI_Base):
     """
 
     def __init__(self, ruby_system, parent):
-        super(CHI_RNI_IO, self).__init__(ruby_system, parent)
+        super().__init__(ruby_system, parent)
         ruby_system._io_port = self._sequencer
diff --git a/configs/ruby/GPU_VIPER.py b/configs/ruby/GPU_VIPER.py
index ee8d570498..665f739b4b 100644
--- a/configs/ruby/GPU_VIPER.py
+++ b/configs/ruby/GPU_VIPER.py
@@ -154,6 +154,8 @@ def create(self, options, ruby_system, system):
             dataAccessLatency=options.TCP_latency,
         )
         self.L1cache.resourceStalls = options.no_resource_stalls
+        self.L1cache.dataArrayBanks = options.tcp_num_banks
+        self.L1cache.tagArrayBanks = options.tcp_num_banks
         self.L1cache.create(options)
         self.issue_latency = 1
         # TCP_Controller inherits this from RubyController
@@ -273,6 +275,8 @@ class TCC(RubyCache):
 
     def create(self, options):
         self.assoc = options.tcc_assoc
+        self.atomicLatency = options.atomic_alu_latency
+        self.atomicALUs = options.tcc_num_atomic_alus
         if hasattr(options, "bw_scalor") and options.bw_scalor > 0:
             s = options.num_compute_units
             tcc_size = s * 128
@@ -298,7 +302,10 @@ def create(self, options):
 class TCCCntrl(TCC_Controller, CntrlBase):
     def create(self, options, ruby_system, system):
         self.version = self.versionCount()
-        self.L2cache = TCC()
+        self.L2cache = TCC(
+            tagAccessLatency=options.tcc_tag_access_latency,
+            dataAccessLatency=options.tcc_data_access_latency,
+        )
         self.L2cache.create(options)
         self.L2cache.resourceStalls = options.no_tcc_resource_stalls
 
@@ -489,10 +496,45 @@ def define_options(parser):
         help="Size of the mandatory queue in the GPU scalar "
         "cache controller",
     )
+    parser.add_argument(
+        "--glc-atomic-latency", type=int, default=1, help="GLC Atomic Latency"
+    )
+    parser.add_argument(
+        "--atomic-alu-latency", type=int, default=0, help="Atomic ALU Latency"
+    )
+    parser.add_argument(
+        "--tcc-num-atomic-alus",
+        type=int,
+        default=64,
+        help="Number of atomic ALUs in the TCC",
+    )
+    parser.add_argument(
+        "--tcp-num-banks",
+        type=int,
+        default="16",
+        help="Num of banks in L1 cache",
+    )
+    parser.add_argument(
+        "--tcc-num-banks",
+        type=int,
+        default="16",
+        help="Num of banks in L2 cache",
+    )
+    parser.add_argument(
+        "--tcc-tag-access-latency",
+        type=int,
+        default="2",
+        help="Tag access latency in L2 cache",
+    )
+    parser.add_argument(
+        "--tcc-data-access-latency",
+        type=int,
+        default="8",
+        help="Data access latency in L2 cache",
+    )
 
 
 def construct_dirs(options, system, ruby_system, network):
-
     dir_cntrl_nodes = []
 
     # For an odd number of CPUs, still create the right number of controllers
@@ -524,6 +566,7 @@ def construct_dirs(options, system, ruby_system, network):
         dir_cntrl.create(options, dir_ranges, ruby_system, system)
         dir_cntrl.number_of_TBEs = options.num_tbes
         dir_cntrl.useL3OnWT = options.use_L3_on_WT
+        dir_cntrl.L2isWB = options.WB_L2
         # the number_of_TBEs is inclusive of TBEs below
 
         # Connect the Directory controller to the ruby network
@@ -560,7 +603,6 @@ def construct_dirs(options, system, ruby_system, network):
 
 
 def construct_gpudirs(options, system, ruby_system, network):
-
     dir_cntrl_nodes = []
     mem_ctrls = []
 
@@ -588,6 +630,7 @@ def construct_gpudirs(options, system, ruby_system, network):
         dir_cntrl.create(options, [addr_range], ruby_system, system)
         dir_cntrl.number_of_TBEs = options.num_tbes
         dir_cntrl.useL3OnWT = False
+        dir_cntrl.L2isWB = options.WB_L2
 
         # Connect the Directory controller to the ruby network
         dir_cntrl.requestFromCores = MessageBuffer(ordered=True)
@@ -649,12 +692,10 @@ def construct_gpudirs(options, system, ruby_system, network):
 
 
 def construct_corepairs(options, system, ruby_system, network):
-
     cpu_sequencers = []
     cp_cntrl_nodes = []
 
     for i in range((options.num_cpus + 1) // 2):
-
         cp_cntrl = CPCntrl()
         cp_cntrl.create(options, ruby_system, system)
 
@@ -689,7 +730,6 @@ def construct_corepairs(options, system, ruby_system, network):
 
 
 def construct_tcps(options, system, ruby_system, network):
-
     tcp_sequencers = []
     tcp_cntrl_nodes = []
 
@@ -697,7 +737,6 @@ def construct_tcps(options, system, ruby_system, network):
     TCC_bits = int(math.log(options.num_tccs, 2))
 
     for i in range(options.num_compute_units):
-
         tcp_cntrl = TCPCntrl(
             TCC_select_num_bits=TCC_bits, issue_latency=1, number_of_TBEs=2560
         )
@@ -737,7 +776,6 @@ def construct_tcps(options, system, ruby_system, network):
 
 
 def construct_sqcs(options, system, ruby_system, network):
-
     sqc_sequencers = []
     sqc_cntrl_nodes = []
 
@@ -745,7 +783,6 @@ def construct_sqcs(options, system, ruby_system, network):
     TCC_bits = int(math.log(options.num_tccs, 2))
 
     for i in range(options.num_sqc):
-
         sqc_cntrl = SQCCntrl(TCC_select_num_bits=TCC_bits)
         sqc_cntrl.create(options, ruby_system, system)
 
@@ -772,7 +809,6 @@ def construct_sqcs(options, system, ruby_system, network):
 
 
 def construct_scalars(options, system, ruby_system, network):
-
     scalar_sequencers = []
     scalar_cntrl_nodes = []
 
@@ -805,7 +841,6 @@ def construct_scalars(options, system, ruby_system, network):
 
 
 def construct_cmdprocs(options, system, ruby_system, network):
-
     cmdproc_sequencers = []
     cmdproc_cntrl_nodes = []
 
@@ -813,7 +848,6 @@ def construct_cmdprocs(options, system, ruby_system, network):
     TCC_bits = int(math.log(options.num_tccs, 2))
 
     for i in range(options.num_cp):
-
         tcp_ID = options.num_compute_units + i
         sqc_ID = options.num_sqc + i
 
@@ -866,15 +900,14 @@ def construct_cmdprocs(options, system, ruby_system, network):
 
 
 def construct_tccs(options, system, ruby_system, network):
-
     tcc_cntrl_nodes = []
 
     for i in range(options.num_tccs):
-
         tcc_cntrl = TCCCntrl(l2_response_latency=options.TCC_latency)
         tcc_cntrl.create(options, ruby_system, system)
         tcc_cntrl.l2_request_latency = options.gpu_to_dir_latency
         tcc_cntrl.l2_response_latency = options.TCC_latency
+        tcc_cntrl.glc_atomic_latency = options.glc_atomic_latency
         tcc_cntrl_nodes.append(tcc_cntrl)
         tcc_cntrl.WB = options.WB_L2
         tcc_cntrl.number_of_TBEs = 2560 * options.num_compute_units
diff --git a/configs/ruby/Garnet_standalone.py b/configs/ruby/Garnet_standalone.py
index ba5216eb24..eb481bb4ad 100644
--- a/configs/ruby/Garnet_standalone.py
+++ b/configs/ruby/Garnet_standalone.py
@@ -31,6 +31,7 @@
 from m5.util import addToPath
 from .Ruby import create_topology, create_directories
 
+
 #
 # Declare caches used by the protocol
 #
diff --git a/configs/ruby/MESI_Three_Level.py b/configs/ruby/MESI_Three_Level.py
index 70f9c82723..077c461b69 100644
--- a/configs/ruby/MESI_Three_Level.py
+++ b/configs/ruby/MESI_Three_Level.py
@@ -35,6 +35,7 @@
 from .Ruby import send_evicts
 from common import FileSystemConfig
 
+
 #
 # Declare caches used by the protocol
 #
@@ -77,7 +78,6 @@ def define_options(parser):
 def create_system(
     options, full_system, system, dma_ports, bootmem, ruby_system, cpus
 ):
-
     if buildEnv["PROTOCOL"] != "MESI_Three_Level":
         fatal(
             "This script requires the MESI_Three_Level protocol to be\
diff --git a/configs/ruby/MESI_Three_Level_HTM.py b/configs/ruby/MESI_Three_Level_HTM.py
index 883db9800e..f2c2ecfd9f 100644
--- a/configs/ruby/MESI_Three_Level_HTM.py
+++ b/configs/ruby/MESI_Three_Level_HTM.py
@@ -35,6 +35,7 @@
 from .Ruby import send_evicts
 from common import FileSystemConfig
 
+
 #
 # Declare caches used by the protocol
 #
@@ -77,7 +78,6 @@ def define_options(parser):
 def create_system(
     options, full_system, system, dma_ports, bootmem, ruby_system, cpus
 ):
-
     if buildEnv["PROTOCOL"] != "MESI_Three_Level_HTM":
         fatal(
             "This script requires the MESI_Three_Level protocol to be\
diff --git a/configs/ruby/MESI_Two_Level.py b/configs/ruby/MESI_Two_Level.py
index 80a823bc52..7326a6cab8 100644
--- a/configs/ruby/MESI_Two_Level.py
+++ b/configs/ruby/MESI_Two_Level.py
@@ -32,6 +32,7 @@
 from .Ruby import create_topology, create_directories
 from .Ruby import send_evicts
 
+
 #
 # Declare caches used by the protocol
 #
@@ -50,7 +51,6 @@ def define_options(parser):
 def create_system(
     options, full_system, system, dma_ports, bootmem, ruby_system, cpus
 ):
-
     if buildEnv["PROTOCOL"] != "MESI_Two_Level":
         fatal("This script requires the MESI_Two_Level protocol to be built.")
 
diff --git a/configs/ruby/MI_example.py b/configs/ruby/MI_example.py
index 0ccfd75506..7adf5b8ebd 100644
--- a/configs/ruby/MI_example.py
+++ b/configs/ruby/MI_example.py
@@ -32,6 +32,7 @@
 from .Ruby import create_topology, create_directories
 from .Ruby import send_evicts
 
+
 #
 # Declare caches used by the protocol
 #
@@ -46,7 +47,6 @@ def define_options(parser):
 def create_system(
     options, full_system, system, dma_ports, bootmem, ruby_system, cpus
 ):
-
     if buildEnv["PROTOCOL"] != "MI_example":
         panic("This script requires the MI_example protocol to be built.")
 
diff --git a/configs/ruby/MOESI_AMD_Base.py b/configs/ruby/MOESI_AMD_Base.py
index 30c7678f08..2c7d743ede 100644
--- a/configs/ruby/MOESI_AMD_Base.py
+++ b/configs/ruby/MOESI_AMD_Base.py
@@ -327,7 +327,6 @@ def create_system(
     # For an odd number of CPUs, still create the right number of controllers
     cpuCluster = Cluster(extBW=512, intBW=512)  # 1 TB/s
     for i in range((options.num_cpus + 1) // 2):
-
         cp_cntrl = CPCntrl()
         cp_cntrl.create(options, ruby_system, system)
 
diff --git a/configs/ruby/MOESI_CMP_directory.py b/configs/ruby/MOESI_CMP_directory.py
index ead03c1693..ecee4a464e 100644
--- a/configs/ruby/MOESI_CMP_directory.py
+++ b/configs/ruby/MOESI_CMP_directory.py
@@ -44,6 +44,7 @@
 from .Ruby import create_topology, create_directories
 from .Ruby import send_evicts
 
+
 #
 # Declare caches used by the protocol
 #
@@ -64,7 +65,6 @@ def define_options(parser):
 def create_system(
     options, full_system, system, dma_ports, bootmem, ruby_system, cpus
 ):
-
     if buildEnv["PROTOCOL"] != "MOESI_CMP_directory":
         panic(
             "This script requires the MOESI_CMP_directory protocol to be built."
diff --git a/configs/ruby/MOESI_CMP_token.py b/configs/ruby/MOESI_CMP_token.py
index a610db5076..89852e07c3 100644
--- a/configs/ruby/MOESI_CMP_token.py
+++ b/configs/ruby/MOESI_CMP_token.py
@@ -32,6 +32,7 @@
 from .Ruby import create_topology, create_directories
 from .Ruby import send_evicts
 
+
 #
 # Declare caches used by the protocol
 #
@@ -71,7 +72,6 @@ def define_options(parser):
 def create_system(
     options, full_system, system, dma_ports, bootmem, ruby_system, cpus
 ):
-
     if buildEnv["PROTOCOL"] != "MOESI_CMP_token":
         panic("This script requires the MOESI_CMP_token protocol to be built.")
 
diff --git a/configs/ruby/MOESI_hammer.py b/configs/ruby/MOESI_hammer.py
index 65ec11a1ad..6ed6a2d50f 100644
--- a/configs/ruby/MOESI_hammer.py
+++ b/configs/ruby/MOESI_hammer.py
@@ -33,6 +33,7 @@
 from .Ruby import send_evicts
 from common import FileSystemConfig
 
+
 #
 # Declare caches used by the protocol
 #
@@ -70,7 +71,6 @@ def define_options(parser):
 def create_system(
     options, full_system, system, dma_ports, bootmem, ruby_system, cpus
 ):
-
     if buildEnv["PROTOCOL"] != "MOESI_hammer":
         panic("This script requires the MOESI_hammer protocol to be built.")
 
diff --git a/configs/ruby/Ruby.py b/configs/ruby/Ruby.py
index d3c2efbb3f..a7aeb6b16f 100644
--- a/configs/ruby/Ruby.py
+++ b/configs/ruby/Ruby.py
@@ -221,7 +221,6 @@ def create_system(
     bootmem=None,
     cpus=None,
 ):
-
     system.ruby = RubySystem()
     ruby = system.ruby
 
diff --git a/configs/splash2/cluster.py b/configs/splash2/cluster.py
index 4c09eee8f8..f7157d98c2 100644
--- a/configs/splash2/cluster.py
+++ b/configs/splash2/cluster.py
@@ -64,6 +64,7 @@
 
 args = parser.parse_args()
 
+
 # --------------------
 # Define Splash2 Benchmarks
 # ====================
diff --git a/configs/splash2/run.py b/configs/splash2/run.py
index 08c11e0f5a..4bc2cf653d 100644
--- a/configs/splash2/run.py
+++ b/configs/splash2/run.py
@@ -66,6 +66,7 @@
     print("Specify the number of cpus with -n")
     sys.exit(1)
 
+
 # --------------------
 # Define Splash2 Benchmarks
 # ====================
diff --git a/configs/topologies/BaseTopology.py b/configs/topologies/BaseTopology.py
index cdcca3f7eb..2e5132927c 100644
--- a/configs/topologies/BaseTopology.py
+++ b/configs/topologies/BaseTopology.py
@@ -27,7 +27,7 @@
 import m5
 
 
-class BaseTopology(object):
+class BaseTopology:
     description = "BaseTopology"
 
     def __init__(self):
diff --git a/configs/topologies/Crossbar.py b/configs/topologies/Crossbar.py
index e0d220a0fd..45929b18ed 100644
--- a/configs/topologies/Crossbar.py
+++ b/configs/topologies/Crossbar.py
@@ -34,7 +34,6 @@ class Crossbar(SimpleTopology):
     description = "Crossbar"
 
     def makeTopology(self, options, network, IntLink, ExtLink, Router):
-
         # default values for link latency and router latency.
         # Can be over-ridden on a per link/router basis
         link_latency = options.link_latency  # used by simple and garnet
diff --git a/configs/topologies/CustomMesh.py b/configs/topologies/CustomMesh.py
index c62b39a9c2..0f0d6765cf 100644
--- a/configs/topologies/CustomMesh.py
+++ b/configs/topologies/CustomMesh.py
@@ -67,7 +67,6 @@ def _makeMesh(
         cross_links,
         cross_link_latency,
     ):
-
         # East->West, West->East, North->South, South->North
         # XY routing weights
         link_weights = [1, 1, 2, 2]
@@ -171,7 +170,9 @@ def _makeMesh(
     def _createRNFRouter(self, mesh_router):
         # Create a zero-latency router bridging node controllers
         # and the mesh router
-        node_router = self._Router(router_id=len(self._routers), latency=0)
+        node_router = self._Router(
+            router_id=len(self._routers), latency=self.node_router_latency
+        )
         self._routers.append(node_router)
 
         # connect node_router <-> mesh router
@@ -270,6 +271,7 @@ def makeTopology(self, options, network, IntLink, ExtLink, Router):
         self._ExtLink = ExtLink
         self._Router = Router
 
+        self.node_router_latency = 1 if options.network == "garnet" else 0
         if hasattr(options, "router_link_latency"):
             self._router_link_latency = options.router_link_latency
             self._node_link_latency = options.node_link_latency
diff --git a/configs/topologies/MeshDirCorners_XY.py b/configs/topologies/MeshDirCorners_XY.py
index 6faf340c5b..1f6eb4297b 100644
--- a/configs/topologies/MeshDirCorners_XY.py
+++ b/configs/topologies/MeshDirCorners_XY.py
@@ -91,7 +91,7 @@ def makeTopology(self, options, network, IntLink, ExtLink, Router):
 
         # Connect each cache controller to the appropriate router
         ext_links = []
-        for (i, n) in enumerate(cache_nodes):
+        for i, n in enumerate(cache_nodes):
             cntrl_level, router_id = divmod(i, num_routers)
             assert cntrl_level < caches_per_router
             ext_links.append(
@@ -161,7 +161,7 @@ def makeTopology(self, options, network, IntLink, ExtLink, Router):
         link_count += 1
 
         # Connect the dma nodes to router 0.  These should only be DMA nodes.
-        for (i, node) in enumerate(dma_nodes):
+        for i, node in enumerate(dma_nodes):
             assert node.type == "DMA_Controller"
             ext_links.append(
                 ExtLink(
diff --git a/configs/topologies/Mesh_XY.py b/configs/topologies/Mesh_XY.py
index 94cb770750..e5402d3d83 100644
--- a/configs/topologies/Mesh_XY.py
+++ b/configs/topologies/Mesh_XY.py
@@ -87,7 +87,7 @@ def makeTopology(self, options, network, IntLink, ExtLink, Router):
 
         # Connect each node to the appropriate router
         ext_links = []
-        for (i, n) in enumerate(network_nodes):
+        for i, n in enumerate(network_nodes):
             cntrl_level, router_id = divmod(i, num_routers)
             assert cntrl_level < cntrls_per_router
             ext_links.append(
@@ -102,7 +102,7 @@ def makeTopology(self, options, network, IntLink, ExtLink, Router):
 
         # Connect the remainding nodes to router 0.  These should only be
         # DMA nodes.
-        for (i, node) in enumerate(remainder_nodes):
+        for i, node in enumerate(remainder_nodes):
             assert node.type == "DMA_Controller"
             assert i < remainder
             ext_links.append(
diff --git a/configs/topologies/Mesh_westfirst.py b/configs/topologies/Mesh_westfirst.py
index 663c31e2cf..45702b759a 100644
--- a/configs/topologies/Mesh_westfirst.py
+++ b/configs/topologies/Mesh_westfirst.py
@@ -90,7 +90,7 @@ def makeTopology(self, options, network, IntLink, ExtLink, Router):
 
         # Connect each node to the appropriate router
         ext_links = []
-        for (i, n) in enumerate(network_nodes):
+        for i, n in enumerate(network_nodes):
             cntrl_level, router_id = divmod(i, num_routers)
             assert cntrl_level < cntrls_per_router
             ext_links.append(
@@ -105,7 +105,7 @@ def makeTopology(self, options, network, IntLink, ExtLink, Router):
 
         # Connect the remainding nodes to router 0.  These should only be
         # DMA nodes.
-        for (i, node) in enumerate(remainder_nodes):
+        for i, node in enumerate(remainder_nodes):
             assert node.type == "DMA_Controller"
             assert i < remainder
             ext_links.append(
diff --git a/ext/drampower/src/CmdScheduler.h b/ext/drampower/src/CmdScheduler.h
index 58efd279b1..1497304f54 100644
--- a/ext/drampower/src/CmdScheduler.h
+++ b/ext/drampower/src/CmdScheduler.h
@@ -84,8 +84,7 @@ class cmdScheduler {
     std::string  name;
     physicalAddr PhysicalAddr;
     // sorting the commands according to their scheduling time.
-    struct commandItemSorter : public std::binary_function<commandItem&,
-                                                           commandItem&, bool>{
+    struct commandItemSorter {
       bool operator()(const commandItem& lhs,
                       const commandItem& rhs) const
       {
diff --git a/util/dockerfiles/ubuntu-18.04_all-dependencies/Dockerfile b/ext/dramsys/CMakeLists.txt
similarity index 69%
rename from util/dockerfiles/ubuntu-18.04_all-dependencies/Dockerfile
rename to ext/dramsys/CMakeLists.txt
index 629fc5d614..ada9369124 100644
--- a/util/dockerfiles/ubuntu-18.04_all-dependencies/Dockerfile
+++ b/ext/dramsys/CMakeLists.txt
@@ -1,5 +1,5 @@
-# Copyright (c) 2020 The Regents of the University of California
-# All Rights Reserved.
+# Copyright (c) 2023 Fraunhofer IESE
+# All rights reserved
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions are
@@ -24,16 +24,15 @@
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-FROM ubuntu:18.04
+cmake_minimum_required(VERSION 3.22.0)
+project(DRAMSys)
 
-RUN apt -y update && apt -y upgrade && \
-    apt -y install build-essential git m4 scons zlib1g zlib1g-dev \
-    libprotobuf-dev protobuf-compiler libprotoc-dev libgoogle-perftools-dev \
-    python3-dev python3 doxygen libboost-all-dev \
-    libhdf5-serial-dev python3-pydot libpng-dev libelf-dev pkg-config \
-    python3-pip python3-venv
+set(BUILD_SHARED_LIBS OFF)
 
-RUN pip3 install black mypy pre-commit
+add_library(systemc INTERFACE)
 
-RUN update-alternatives --install /usr/bin/python python /usr/bin/python3 10
-RUN update-alternatives --install /usr/bin/python python /usr/bin/python2 1
+target_include_directories(systemc INTERFACE "${SCONS_SOURCE_DIR}/src/systemc/ext/systemc_home/include")
+
+add_library(SystemC::systemc ALIAS systemc)
+
+add_subdirectory(DRAMSys)
diff --git a/ext/dramsys/README b/ext/dramsys/README
index 477da52895..1af3903b12 100644
--- a/ext/dramsys/README
+++ b/ext/dramsys/README
@@ -1,10 +1,13 @@
-Follow these steps to get DRAMSys as part of gem5
+Follow these steps to build DRAMSys as part of gem5
 
 1. Go to ext/dramsys (this directory)
-2. Clone DRAMSys: 'git clone --recursive git@github.com:tukl-msd/DRAMSys.git DRAMSys'
-3. Change directory to DRAMSys: 'cd DRAMSys'
-4. Checkout the correct commit: 'git checkout -b gem5 09f6dcbb91351e6ee7cadfc7bc8b29d97625db8f'
+2. Clone DRAMSys: 'git clone https://github.com/tukl-msd/DRAMSys --branch v5.0 --depth 1 DRAMSys'
+
+The latest verified working version is v5.0, but later versions might work too.
+gem5 will automatically pick up DRAMSys as an external module when it is rebuilt.
 
 If you wish to run a simulation using the gem5 processor cores, make sure to enable the storage mode in DRAMSys.
 This is done by setting the value of the "StoreMode" key to "Store" in the base configuration file.
 Those configuration file can be found in 'DRAMSys/library/resources/configs/simulator'.
+
+Currently, DRAMSys is only supported in conjunction with a cache. Running DRAMSys in Release mode without caches will silently fail!
diff --git a/ext/dramsys/SConscript b/ext/dramsys/SConscript
index d6ea27e0d1..0cf163aede 100644
--- a/ext/dramsys/SConscript
+++ b/ext/dramsys/SConscript
@@ -25,72 +25,54 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 import os
+import subprocess
 
-Import('env')
+Import("env")
 
-build_root = Dir('../..').abspath
-src_root = Dir('DRAMSys/DRAMSys/library').srcnode().abspath
+build_root = Dir("../..").abspath
+build_current = Dir(".").abspath
+src_root = Dir(".").srcnode().abspath
+scons_root = Dir("#").abspath
 
 # See if we got a cloned DRAMSys repo as a subdirectory and set the
 # HAVE_DRAMSys flag accordingly
-if not os.path.exists(Dir('.').srcnode().abspath + '/DRAMSys'):
-    env['HAVE_DRAMSYS'] = False
+if not os.path.exists(Dir(".").srcnode().abspath + "/DRAMSys"):
+    env["HAVE_DRAMSYS"] = False
     Return()
 
-env['HAVE_DRAMSYS'] = True
+env["HAVE_DRAMSYS"] = True
 
-dramsys_files = []
-dramsys_configuration_files = []
+subprocess.run(
+    [
+        "cmake",
+        f"-S{src_root}",
+        f"-B{build_current}",
+        "-DCMAKE_BUILD_TYPE=Release",
+        f"-DSCONS_SOURCE_DIR:STRING={scons_root}",
+        "-DDRAMSYS_BUILD_CLI=OFF"
+    ],
+    check=True
+)
 
-dramsys_files.extend(Glob("%s/*.cpp" % f"{src_root}/src/controller"))
-for root, dirs, files in os.walk(f"{src_root}/src/controller", topdown=False):
-    for dir in dirs:
-        dramsys_files.extend(Glob("%s/*.cpp" % os.path.join(root, dir)))
+subprocess.run(
+    ["cmake", "--build", build_current],
+    check=True
+)
 
-dramsys_files.extend(Glob("%s/*.cpp" % f"{src_root}/src/simulation"))
-for root, dirs, files in os.walk(f"{src_root}/src/simulation", topdown=False):
-    for dir in dirs:
-        dramsys_files.extend(Glob("%s/*.cpp" % os.path.join(root, dir)))
+env.Append(LIBS="DRAMSys_libdramsys")
+env.Append(LIBPATH=Dir("./DRAMSys/src/libdramsys").abspath)
 
-dramsys_files.extend(Glob("%s/*.cpp" % f"{src_root}/src/configuration"))
-for root, dirs, files in os.walk(f"{src_root}/src/configuration", topdown=False):
-    for dir in dirs:
-        dramsys_files.extend(Glob("%s/*.cpp" % os.path.join(root, dir)))
+env.Append(LIBS="DRAMSys_Configuration")
+env.Append(LIBPATH=Dir("./DRAMSys/src/configuration").abspath)
 
-dramsys_files.extend(Glob("%s/*.cpp" % f"{src_root}/src/error"))
-dramsys_files.extend(Glob(f"{src_root}/src/error/ECC/Bit.cpp"))
-dramsys_files.extend(Glob(f"{src_root}/src/error/ECC/ECC.cpp"))
-dramsys_files.extend(Glob(f"{src_root}/src/error/ECC/Word.cpp"))
+env.Append(LIBS="sqlite3")
+env.Append(LIBPATH=Dir("./DRAMSys/lib/sqlite3").abspath)
 
-dramsys_files.extend(Glob("%s/*.cpp" % f"{src_root}/src/common"))
-dramsys_files.extend(Glob("%s/*.cpp" % f"{src_root}/src/common/configuration"))
-dramsys_files.extend(Glob("%s/*.cpp" % f"{src_root}/src/common/configuration/memspec"))
-dramsys_files.extend(Glob("%s/*.c" % f"{src_root}/src/common/third_party/sqlite-amalgamation"))
+env.Append(CPPPATH=src_root + "/DRAMSys/src/libdramsys")
+env.Append(CPPPATH=src_root + "/DRAMSys/src/configuration")
+env.Append(CPPPATH=src_root + "/DRAMSys/src/util")
+env.Append(CPPPATH=src_root + "/DRAMSys/lib/nlohmann_json/include")
 
-env.Prepend(CPPPATH=[
-    src_root + "/src",
-    src_root + "/src/common/configuration",
-    src_root + "/src/common/third_party/nlohmann/include",
-])
-
-env.Prepend(CPPDEFINES=[("DRAMSysResourceDirectory", '\\"' + os.getcwd() + '/resources' + '\\"')])
 env.Prepend(CPPDEFINES=[("SYSTEMC_VERSION", 20191203)])
-
-dramsys = env.Clone()
-
-if '-Werror' in dramsys['CCFLAGS']:
-    dramsys['CCFLAGS'].remove('-Werror')
-
-dramsys.Prepend(CPPPATH=[
-    src_root + "/src/common/third_party/sqlite-amalgamation",
-    build_root + "/systemc/ext"
-])
-
-dramsys.Prepend(CPPDEFINES=[("SQLITE_ENABLE_RTREE", "1")])
-
-dramsys_configuration = env.Clone()
-
-dramsys.Library('dramsys', dramsys_files)
-
-env.Append(LIBS=['dramsys', 'dl'])
-env.Append(LIBPATH=[Dir('.')])
+env.Prepend(CPPDEFINES=[("DRAMSYS_RESOURCE_DIR",
+                         '\\"' + os.getcwd() + '/DRAMSys/configs' + '\\"')])
diff --git a/ext/gdbremote/signals.hh b/ext/gdbremote/signals.hh
index 11835e6f5a..07c0064d39 100644
--- a/ext/gdbremote/signals.hh
+++ b/ext/gdbremote/signals.hh
@@ -168,12 +168,12 @@ namespace gem5{
     INFO = 142, //information request
     unknown = 143, //unknown signal
 
-    EXC_BAD_ACCESS = 145, //could not access memory
-    EXC_BAD_INSTRUCTION = 146, //illegal instruction/operand
-    EXC_ARITHMETIC = 147, //arithmetic exception
-    EXC_EMULATION = 148, //emulation instruction
-    EXC_SOFTWARE = 149, //software generated exception
-    EXC_BREAKPOINT = 150, //breakpoint
+    GEM5_EXC_BAD_ACCESS = 145, //could not access memory
+    GEM5_EXC_BAD_INSTRUCTION = 146, //illegal instruction/operand
+    GEM5_EXC_ARITHMETIC = 147, //arithmetic exception
+    GEM5_EXC_EMULATION = 148, //emulation instruction
+    GEM5_EXC_SOFTWARE = 149, //software generated exception
+    GEM5_EXC_BREAKPOINT = 150, //breakpoint
 
     LIBRT = 151, //librt internal signal
   };
diff --git a/ext/json/README b/ext/json/README
new file mode 100644
index 0000000000..7c4da3a69a
--- /dev/null
+++ b/ext/json/README
@@ -0,0 +1,15 @@
+Follow these steps to get nlohmann/json as part of gem5
+
+1. Download nlohmann/json
+    1.1 Go to ext/json (this directory)
+    1.2 Clone nlohmann/json: git clone https://github.com/nlohmann/json.git
+    1.3 cd json && mkdir build
+    1.4 cd build
+    1.5 cmake ..
+    1.6 make -j<num_proc>
+
+2. Compile gem5
+    2.0 use the gem5-rowhammer version for the correct SConscript
+    2.1 cd gem5
+    2.2 Business as usual
+
diff --git a/ext/magic_enum/SConscript b/ext/magic_enum/SConscript
new file mode 100644
index 0000000000..9ba39bcaa9
--- /dev/null
+++ b/ext/magic_enum/SConscript
@@ -0,0 +1,37 @@
+# Copyright (c) 2022 Arteris, Inc. and its applicable licensors and affiliates.
+# All rights reserved.  This license is licensed under the Gem5 license.
+#
+# The license below extends only to copyright in the software and shall not be
+# construed as granting a license to any other intellectual property including
+# but not limited to intellectual property relating to a hardware
+# implementation of the functionality of the software licensed hereunder.  You
+# may use the software subject to the license terms below provided that you
+# ensure that this notice is replicated unmodified and in its entirety in all
+# distributions of the software, modified or unmodified, in source code or in
+# binary form.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer; redistributions in binary
+# form must reproduce the above copyright notice, this list of conditions and
+# the following disclaimer in the documentation and/or other materials provided
+# with the distribution; neither the name of the copyright holders nor the
+# names of its contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+
+Import('env')
+
+env.Append(CPPDEFINES=['MAGIC_ENUM_RANGE_MAX=0x100'])
\ No newline at end of file
diff --git a/ext/magic_enum/magic_enum.hh b/ext/magic_enum/magic_enum.hh
new file mode 100644
index 0000000000..1763cc314e
--- /dev/null
+++ b/ext/magic_enum/magic_enum.hh
@@ -0,0 +1,1462 @@
+//  __  __             _        ______                          _____
+// |  \/  |           (_)      |  ____|                        / ____|_     _
+// | \  / | __ _  __ _ _  ___  | |__   _ __  _   _ _ __ ___   | |   _| |_ _| |_
+// | |\/| |/ _` |/ _` | |/ __| |  __| | '_ \| | | | '_ ` _ \  | |  |_   _|_   _|
+// | |  | | (_| | (_| | | (__  | |____| | | | |_| | | | | | | | |____|_|   |_|
+// |_|  |_|\__,_|\__, |_|\___| |______|_| |_|\__,_|_| |_| |_|  \_____|
+//                __/ | https://github.com/Neargye/magic_enum
+//               |___/  version 0.9.2
+//
+// Licensed under the MIT License <http://opensource.org/licenses/MIT>.
+// SPDX-License-Identifier: MIT
+// Copyright (c) 2019 - 2023 Daniil Goncharov <neargye@gmail.com>.
+//
+// Permission is hereby  granted, free of charge, to any  person obtaining a copy
+// of this software and associated  documentation files (the "Software"), to deal
+// in the Software  without restriction, including without  limitation the rights
+// to  use, copy,  modify, merge,  publish, distribute,  sublicense, and/or  sell
+// copies  of  the Software,  and  to  permit persons  to  whom  the Software  is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all
+// copies or substantial portions of the Software.
+//
+// THE SOFTWARE  IS PROVIDED "AS  IS", WITHOUT WARRANTY  OF ANY KIND,  EXPRESS OR
+// IMPLIED,  INCLUDING BUT  NOT  LIMITED TO  THE  WARRANTIES OF  MERCHANTABILITY,
+// FITNESS FOR  A PARTICULAR PURPOSE AND  NONINFRINGEMENT. IN NO EVENT  SHALL THE
+// AUTHORS  OR COPYRIGHT  HOLDERS  BE  LIABLE FOR  ANY  CLAIM,  DAMAGES OR  OTHER
+// LIABILITY, WHETHER IN AN ACTION OF  CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE  OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+
+#ifndef NEARGYE_MAGIC_ENUM_HPP
+#define NEARGYE_MAGIC_ENUM_HPP
+
+#define MAGIC_ENUM_VERSION_MAJOR 0
+#define MAGIC_ENUM_VERSION_MINOR 9
+#define MAGIC_ENUM_VERSION_PATCH 2
+
+#include <array>
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <functional>
+#include <limits>
+#include <type_traits>
+#include <utility>
+
+#if defined(MAGIC_ENUM_CONFIG_FILE)
+#include MAGIC_ENUM_CONFIG_FILE
+#endif
+
+#if !defined(MAGIC_ENUM_USING_ALIAS_OPTIONAL)
+#include <optional>
+#endif
+#if !defined(MAGIC_ENUM_USING_ALIAS_STRING)
+#include <string>
+#endif
+#if !defined(MAGIC_ENUM_USING_ALIAS_STRING_VIEW)
+#include <string_view>
+#endif
+
+#if defined(__clang__)
+#  pragma clang diagnostic push
+#  pragma clang diagnostic ignored "-Wunknown-warning-option"
+#  pragma clang diagnostic ignored "-Wenum-constexpr-conversion"
+#elif defined(__GNUC__)
+#  pragma GCC diagnostic push
+#  pragma GCC diagnostic ignored "-Wmaybe-uninitialized" // May be used uninitialized 'return {};'.
+#elif defined(_MSC_VER)
+#  pragma warning(push)
+#  pragma warning(disable : 26495) // Variable 'static_str<N>::chars_' is uninitialized.
+#  pragma warning(disable : 28020) // Arithmetic overflow: Using operator '-' on a 4 byte value and then casting the result to a 8 byte value.
+#  pragma warning(disable : 26451) // The expression '0<=_Param_(1)&&_Param_(1)<=1-1' is not true at this call.
+#  pragma warning(disable : 4514) // Unreferenced inline function has been removed.
+#endif
+
+// Checks magic_enum compiler compatibility.
+#if defined(__clang__) && __clang_major__ >= 5 || defined(__GNUC__) && __GNUC__ >= 9 || defined(_MSC_VER) && _MSC_VER >= 1910 || defined(__RESHARPER__)
+#  undef  MAGIC_ENUM_SUPPORTED
+#  define MAGIC_ENUM_SUPPORTED 1
+#endif
+
+// Checks magic_enum compiler aliases compatibility.
+#if defined(__clang__) && __clang_major__ >= 5 || defined(__GNUC__) && __GNUC__ >= 9 || defined(_MSC_VER) && _MSC_VER >= 1920
+#  undef  MAGIC_ENUM_SUPPORTED_ALIASES
+#  define MAGIC_ENUM_SUPPORTED_ALIASES 1
+#endif
+
+// Enum value must be greater or equals than MAGIC_ENUM_RANGE_MIN. By default MAGIC_ENUM_RANGE_MIN = -128.
+// If need another min range for all enum types by default, redefine the macro MAGIC_ENUM_RANGE_MIN.
+#if !defined(MAGIC_ENUM_RANGE_MIN)
+#  define MAGIC_ENUM_RANGE_MIN -128
+#endif
+
+// Enum value must be less or equals than MAGIC_ENUM_RANGE_MAX. By default MAGIC_ENUM_RANGE_MAX = 128.
+// If need another max range for all enum types by default, redefine the macro MAGIC_ENUM_RANGE_MAX.
+#if !defined(MAGIC_ENUM_RANGE_MAX)
+#  define MAGIC_ENUM_RANGE_MAX 0x100
+#endif
+
+// Improve ReSharper C++ intellisense performance with builtins, avoiding unnecessary template instantiations.
+#if defined(__RESHARPER__)
+#  undef MAGIC_ENUM_GET_ENUM_NAME_BUILTIN
+#  undef MAGIC_ENUM_GET_TYPE_NAME_BUILTIN
+#  if __RESHARPER__ >= 20230100
+#    define MAGIC_ENUM_GET_ENUM_NAME_BUILTIN(V) __rscpp_enumerator_name(V)
+#    define MAGIC_ENUM_GET_TYPE_NAME_BUILTIN(T) __rscpp_type_name<T>()
+#  else
+#    define MAGIC_ENUM_GET_ENUM_NAME_BUILTIN(V) nullptr
+#    define MAGIC_ENUM_GET_TYPE_NAME_BUILTIN(T) nullptr
+#  endif
+#endif
+
+namespace magic_enum {
+
+// If need another optional type, define the macro MAGIC_ENUM_USING_ALIAS_OPTIONAL.
+#if defined(MAGIC_ENUM_USING_ALIAS_OPTIONAL)
+MAGIC_ENUM_USING_ALIAS_OPTIONAL
+#else
+using std::optional;
+#endif
+
+// If need another string_view type, define the macro MAGIC_ENUM_USING_ALIAS_STRING_VIEW.
+#if defined(MAGIC_ENUM_USING_ALIAS_STRING_VIEW)
+MAGIC_ENUM_USING_ALIAS_STRING_VIEW
+#else
+using std::string_view;
+#endif
+
+// If need another string type, define the macro MAGIC_ENUM_USING_ALIAS_STRING.
+#if defined(MAGIC_ENUM_USING_ALIAS_STRING)
+MAGIC_ENUM_USING_ALIAS_STRING
+#else
+using std::string;
+#endif
+
+using char_type = string_view::value_type;
+static_assert(std::is_same_v<string_view::value_type, string::value_type>, "magic_enum::customize requires same string_view::value_type and string::value_type");
+static_assert([] {
+  if constexpr (std::is_same_v<char_type, wchar_t>) {
+    constexpr const char     c[] =  "abcdefghijklmnopqrstuvwxyz_ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789|";
+    constexpr const wchar_t wc[] = L"abcdefghijklmnopqrstuvwxyz_ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789|";
+    static_assert(std::size(c) == std::size(wc), "magic_enum::customize identifier characters are multichars in wchar_t.");
+
+    for (std::size_t i = 0; i < std::size(c); ++i) {
+      if (c[i] != wc[i]) {
+        return false;
+      }
+    }
+  }
+  return true;
+} (), "magic_enum::customize wchar_t is not compatible with ASCII.");
+
+namespace customize {
+
+// Enum value must be in range [MAGIC_ENUM_RANGE_MIN, MAGIC_ENUM_RANGE_MAX]. By default MAGIC_ENUM_RANGE_MIN = -128, MAGIC_ENUM_RANGE_MAX = 128.
+// If need another range for all enum types by default, redefine the macro MAGIC_ENUM_RANGE_MIN and MAGIC_ENUM_RANGE_MAX.
+// If need another range for specific enum type, add specialization enum_range for necessary enum type.
+template <typename E>
+struct enum_range {
+  static_assert(std::is_enum_v<E>, "magic_enum::customize::enum_range requires enum type.");
+  static constexpr int min = MAGIC_ENUM_RANGE_MIN;
+  static constexpr int max = MAGIC_ENUM_RANGE_MAX;
+  static_assert(max > min, "magic_enum::customize::enum_range requires max > min.");
+};
+
+static_assert(MAGIC_ENUM_RANGE_MAX > MAGIC_ENUM_RANGE_MIN, "MAGIC_ENUM_RANGE_MAX must be greater than MAGIC_ENUM_RANGE_MIN.");
+static_assert((MAGIC_ENUM_RANGE_MAX - MAGIC_ENUM_RANGE_MIN) < (std::numeric_limits<std::uint16_t>::max)(), "MAGIC_ENUM_RANGE must be less than UINT16_MAX.");
+
+namespace detail {
+
+enum class customize_tag {
+  default_tag,
+  invalid_tag,
+  custom_tag
+};
+
+} // namespace magic_enum::customize::detail
+
+class customize_t : public std::pair<detail::customize_tag, string_view> {
+ public:
+  constexpr customize_t(string_view srt) : std::pair<detail::customize_tag, string_view>{detail::customize_tag::custom_tag, srt} {}
+  constexpr customize_t(const char_type* srt) : customize_t{string_view{srt}} {}
+  constexpr customize_t(detail::customize_tag tag) : std::pair<detail::customize_tag, string_view>{tag, string_view{}} {
+    assert(tag != detail::customize_tag::custom_tag);
+  }
+};
+
+// Default customize.
+inline constexpr auto default_tag = customize_t{detail::customize_tag::default_tag};
+// Invalid customize.
+inline constexpr auto invalid_tag = customize_t{detail::customize_tag::invalid_tag};
+
+// If need custom names for enum, add specialization enum_name for necessary enum type.
+template <typename E>
+constexpr customize_t enum_name(E) noexcept {
+  return default_tag;
+}
+
+// If need custom type name for enum, add specialization enum_type_name for necessary enum type.
+template <typename E>
+constexpr customize_t enum_type_name() noexcept {
+  return default_tag;
+}
+
+} // namespace magic_enum::customize
+
+namespace detail {
+
+template <typename T>
+struct supported
+#if defined(MAGIC_ENUM_SUPPORTED) && MAGIC_ENUM_SUPPORTED || defined(MAGIC_ENUM_NO_CHECK_SUPPORT)
+    : std::true_type {};
+#else
+    : std::false_type {};
+#endif
+
+template <auto V, typename E = std::decay_t<decltype(V)>, std::enable_if_t<std::is_enum_v<E>, int> = 0>
+using enum_constant = std::integral_constant<E, V>;
+
+template <typename... T>
+inline constexpr bool always_false_v = false;
+
+template <typename T, typename = void>
+struct has_is_flags : std::false_type {};
+
+template <typename T>
+struct has_is_flags<T, std::void_t<decltype(customize::enum_range<T>::is_flags)>> : std::bool_constant<std::is_same_v<bool, std::decay_t<decltype(customize::enum_range<T>::is_flags)>>> {};
+
+template <typename T, typename = void>
+struct range_min : std::integral_constant<int, MAGIC_ENUM_RANGE_MIN> {};
+
+template <typename T>
+struct range_min<T, std::void_t<decltype(customize::enum_range<T>::min)>> : std::integral_constant<decltype(customize::enum_range<T>::min), customize::enum_range<T>::min> {};
+
+template <typename T, typename = void>
+struct range_max : std::integral_constant<int, MAGIC_ENUM_RANGE_MAX> {};
+
+template <typename T>
+struct range_max<T, std::void_t<decltype(customize::enum_range<T>::max)>> : std::integral_constant<decltype(customize::enum_range<T>::max), customize::enum_range<T>::max> {};
+
+struct str_view {
+  const char* str_ = nullptr;
+  std::size_t size_ = 0;
+};
+
+template <std::uint16_t N>
+class static_str {
+ public:
+  constexpr explicit static_str(str_view str) noexcept : static_str{str.str_, std::make_integer_sequence<std::uint16_t, N>{}} {
+    assert(str.size_ == N);
+  }
+
+  constexpr explicit static_str(string_view str) noexcept : static_str{str.data(), std::make_integer_sequence<std::uint16_t, N>{}} {
+    assert(str.size() == N);
+  }
+
+  constexpr const char_type* data() const noexcept { return chars_; }
+
+  constexpr std::uint16_t size() const noexcept { return N; }
+
+  constexpr operator string_view() const noexcept { return {data(), size()}; }
+
+ private:
+  template <std::uint16_t... I>
+  constexpr static_str(const char* str, std::integer_sequence<std::uint16_t, I...>) noexcept : chars_{static_cast<char_type>(str[I])..., static_cast<char_type>('\0')} {}
+
+  template <std::uint16_t... I>
+  constexpr static_str(string_view str, std::integer_sequence<std::uint16_t, I...>) noexcept : chars_{str[I]..., static_cast<char_type>('\0')} {}
+
+  char_type chars_[static_cast<std::size_t>(N) + 1];
+};
+
+template <>
+class static_str<0> {
+ public:
+  constexpr explicit static_str() = default;
+
+  constexpr explicit static_str(str_view) noexcept {}
+
+  constexpr explicit static_str(string_view) noexcept {}
+
+  constexpr const char_type* data() const noexcept { return nullptr; }
+
+  constexpr std::uint16_t size() const noexcept { return 0; }
+
+  constexpr operator string_view() const noexcept { return {}; }
+};
+
+template <typename Op = std::equal_to<>>
+class case_insensitive {
+  static constexpr char_type to_lower(char_type c) noexcept {
+    return (c >= static_cast<char_type>('A') && c <= static_cast<char_type>('Z')) ? static_cast<char_type>(c + (static_cast<char_type>('a') - static_cast<char_type>('A'))) : c;
+  }
+
+ public:
+  template <typename L, typename R>
+  constexpr auto operator()(L lhs,R rhs) const noexcept -> std::enable_if_t<std::is_same_v<std::decay_t<L>, char_type> && std::is_same_v<std::decay_t<R>, char_type>, bool> {
+    return Op{}(to_lower(lhs), to_lower(rhs));
+  }
+};
+
+constexpr std::size_t find(string_view str, char_type c) noexcept {
+#if defined(__clang__) && __clang_major__ < 9 && defined(__GLIBCXX__) || defined(_MSC_VER) && _MSC_VER < 1920 && !defined(__clang__)
+// https://stackoverflow.com/questions/56484834/constexpr-stdstring-viewfind-last-of-doesnt-work-on-clang-8-with-libstdc
+// https://developercommunity.visualstudio.com/content/problem/360432/vs20178-regression-c-failed-in-test.html
+  constexpr bool workaround = true;
+#else
+  constexpr bool workaround = false;
+#endif
+
+  if constexpr (workaround) {
+    for (std::size_t i = 0; i < str.size(); ++i) {
+      if (str[i] == c) {
+        return i;
+      }
+    }
+
+    return string_view::npos;
+  } else {
+    return str.find(c);
+  }
+}
+
+template <typename BinaryPredicate>
+constexpr bool is_default_predicate() noexcept {
+  return std::is_same_v<std::decay_t<BinaryPredicate>, std::equal_to<string_view::value_type>> ||
+         std::is_same_v<std::decay_t<BinaryPredicate>, std::equal_to<>>;
+}
+
+template <typename BinaryPredicate>
+constexpr bool is_nothrow_invocable() {
+  return is_default_predicate<BinaryPredicate>() ||
+         std::is_nothrow_invocable_r_v<bool, BinaryPredicate, char_type, char_type>;
+}
+
+template <typename BinaryPredicate>
+constexpr bool cmp_equal(string_view lhs, string_view rhs, [[maybe_unused]] BinaryPredicate&& p) noexcept(is_nothrow_invocable<BinaryPredicate>()) {
+#if defined(_MSC_VER) && _MSC_VER < 1920 && !defined(__clang__)
+  // https://developercommunity.visualstudio.com/content/problem/360432/vs20178-regression-c-failed-in-test.html
+  // https://developercommunity.visualstudio.com/content/problem/232218/c-constexpr-string-view.html
+  constexpr bool workaround = true;
+#else
+  constexpr bool workaround = false;
+#endif
+
+  if constexpr (!is_default_predicate<BinaryPredicate>() || workaround) {
+    if (lhs.size() != rhs.size()) {
+      return false;
+    }
+
+    const auto size = lhs.size();
+    for (std::size_t i = 0; i < size; ++i) {
+      if (!p(lhs[i], rhs[i])) {
+        return false;
+      }
+    }
+
+    return true;
+  } else {
+    return lhs == rhs;
+  }
+}
+
+template <typename L, typename R>
+constexpr bool cmp_less(L lhs, R rhs) noexcept {
+  static_assert(std::is_integral_v<L> && std::is_integral_v<R>, "magic_enum::detail::cmp_less requires integral type.");
+
+  if constexpr (std::is_signed_v<L> == std::is_signed_v<R>) {
+    // If same signedness (both signed or both unsigned).
+    return lhs < rhs;
+  } else if constexpr (std::is_same_v<L, bool>) { // bool special case
+      return static_cast<R>(lhs) < rhs;
+  } else if constexpr (std::is_same_v<R, bool>) { // bool special case
+      return lhs < static_cast<L>(rhs);
+  } else if constexpr (std::is_signed_v<R>) {
+    // If 'right' is negative, then result is 'false', otherwise cast & compare.
+    return rhs > 0 && lhs < static_cast<std::make_unsigned_t<R>>(rhs);
+  } else {
+    // If 'left' is negative, then result is 'true', otherwise cast & compare.
+    return lhs < 0 || static_cast<std::make_unsigned_t<L>>(lhs) < rhs;
+  }
+}
+
+template <typename I>
+constexpr I log2(I value) noexcept {
+  static_assert(std::is_integral_v<I>, "magic_enum::detail::log2 requires integral type.");
+
+  if constexpr (std::is_same_v<I, bool>) { // bool special case
+    return assert(false), value;
+  } else {
+    auto ret = I{0};
+    for (; value > I{1}; value >>= I{1}, ++ret) {}
+
+    return ret;
+  }
+}
+
+#if defined(__cpp_lib_array_constexpr) && __cpp_lib_array_constexpr >= 201603L
+#  define MAGIC_ENUM_ARRAY_CONSTEXPR 1
+#else
+template <typename T, std::size_t N, std::size_t... I>
+constexpr std::array<std::remove_cv_t<T>, N> to_array(T (&a)[N], std::index_sequence<I...>) noexcept {
+  return {{a[I]...}};
+}
+#endif
+
+template <typename T>
+inline constexpr bool is_enum_v = std::is_enum_v<T> && std::is_same_v<T, std::decay_t<T>>;
+
+template <typename E>
+constexpr auto n() noexcept {
+  static_assert(is_enum_v<E>, "magic_enum::detail::n requires enum type.");
+
+  if constexpr (supported<E>::value) {
+#if defined(MAGIC_ENUM_GET_TYPE_NAME_BUILTIN)
+    constexpr auto name_ptr = MAGIC_ENUM_GET_TYPE_NAME_BUILTIN(E);
+    constexpr auto name = name_ptr ? str_view{name_ptr, std::char_traits<char>::length(name_ptr)} : str_view{};
+#elif defined(__clang__)
+    auto name = str_view{__PRETTY_FUNCTION__ + 34, sizeof(__PRETTY_FUNCTION__) - 36};
+#elif defined(__GNUC__)
+    auto name = str_view{__PRETTY_FUNCTION__, sizeof(__PRETTY_FUNCTION__) - 1};
+    if (name.str_[name.size_ - 1] == ']') {
+      name.size_ -= 50;
+      name.str_ += 49;
+    } else {
+      name.size_ -= 40;
+      name.str_ += 37;
+    }
+#elif defined(_MSC_VER)
+    auto name = str_view{__FUNCSIG__ + 40, sizeof(__FUNCSIG__) - 57};
+#else
+    auto name = str_view{};
+#endif
+    return name;
+  } else {
+    return str_view{}; // Unsupported compiler or Invalid customize.
+  }
+}
+
+template <typename E>
+constexpr auto type_name() noexcept {
+  [[maybe_unused]] constexpr auto custom = customize::enum_type_name<E>();
+  static_assert(std::is_same_v<std::decay_t<decltype(custom)>, customize::customize_t>, "magic_enum::customize requires customize_t type.");
+  if constexpr (custom.first == customize::detail::customize_tag::custom_tag) {
+    constexpr auto name = custom.second;
+    static_assert(!name.empty(), "magic_enum::customize requires not empty string.");
+    return static_str<name.size()>{name};
+  } else if constexpr (custom.first == customize::detail::customize_tag::invalid_tag) {
+    return static_str<0>{};
+  } else if constexpr (custom.first == customize::detail::customize_tag::default_tag) {
+    constexpr auto name = n<E>();
+    return static_str<name.size_>{name};
+  } else {
+    static_assert(always_false_v<E>, "magic_enum::customize invalid.");
+  }
+}
+
+template <typename E>
+inline constexpr auto type_name_v = type_name<E>();
+
+template <auto V>
+constexpr auto n() noexcept {
+  static_assert(is_enum_v<decltype(V)>, "magic_enum::detail::n requires enum type.");
+
+  if constexpr (supported<decltype(V)>::value) {
+#if defined(MAGIC_ENUM_GET_ENUM_NAME_BUILTIN)
+    constexpr auto name_ptr = MAGIC_ENUM_GET_ENUM_NAME_BUILTIN(V);
+    auto name = name_ptr ? str_view{name_ptr, std::char_traits<char>::length(name_ptr)} : str_view{};
+#elif defined(__clang__)
+    auto name = str_view{__PRETTY_FUNCTION__ + 34, sizeof(__PRETTY_FUNCTION__) - 36};
+    if (name.size_ > 22 && name.str_[0] == '(' && name.str_[1] == 'a' && name.str_[10] == ' ' && name.str_[22] == ':') {
+      name.size_ -= 23;
+      name.str_ += 23;
+    }
+    if (name.str_[0] == '(' || name.str_[0] == '-' || (name.str_[0] >= '0' && name.str_[0] <= '9')) {
+      name = str_view{};
+    }
+#elif defined(__GNUC__)
+    auto name = str_view{__PRETTY_FUNCTION__, sizeof(__PRETTY_FUNCTION__) - 1};
+    if (name.str_[name.size_ - 1] == ']') {
+      name.size_ -= 55;
+      name.str_ += 54;
+    } else {
+      name.size_ -= 40;
+      name.str_ += 37;
+    }
+    if (name.str_[0] == '(') {
+      name = str_view{};
+    }
+#elif defined(_MSC_VER)
+    str_view name;
+    if ((__FUNCSIG__[5] == '_' && __FUNCSIG__[35] != '(') || (__FUNCSIG__[5] == 'c' && __FUNCSIG__[41] != '(')) {
+      name = str_view{__FUNCSIG__ + 35, sizeof(__FUNCSIG__) - 52};
+    }
+#else
+    auto name = str_view{};
+#endif
+    std::size_t p = 0;
+    for (std::size_t i = name.size_; i > 0; --i) {
+      if (name.str_[i] == ':') {
+        p = i + 1;
+        break;
+      }
+    }
+    if (p > 0) {
+      name.size_ -= p;
+      name.str_ += p;
+    }
+    return name;
+  } else {
+    return str_view{}; // Unsupported compiler or Invalid customize.
+  }
+}
+
+#if defined(_MSC_VER) && !defined(__clang__) && _MSC_VER < 1920
+#  define MAGIC_ENUM_VS_2017_WORKAROUND 1
+#endif
+
+#if defined(MAGIC_ENUM_VS_2017_WORKAROUND)
+template <typename E, E V>
+constexpr auto n() noexcept {
+  static_assert(is_enum_v<E>, "magic_enum::detail::n requires enum type.");
+
+  str_view name = str_view{__FUNCSIG__, sizeof(__FUNCSIG__) - 17};
+  std::size_t p = 0;
+  for (std::size_t i = name.size_; i > 0; --i) {
+    if (name.str_[i] == ',' || name.str_[i] == ':') {
+      p = i + 1;
+      break;
+    }
+  }
+  if (p > 0) {
+    name.size_ -= p;
+    name.str_ += p;
+  }
+  if (name.str_[0] == '(' || name.str_[0] == '-' || (name.str_[0] >= '0' && name.str_[0] <= '9')) {
+    name = str_view{};
+  }
+  return name;
+}
+#endif
+
+template <typename E, E V>
+constexpr auto enum_name() noexcept {
+  [[maybe_unused]] constexpr auto custom = customize::enum_name<E>(V);
+  static_assert(std::is_same_v<std::decay_t<decltype(custom)>, customize::customize_t>, "magic_enum::customize requires customize_t type.");
+  if constexpr (custom.first == customize::detail::customize_tag::custom_tag) {
+    constexpr auto name = custom.second;
+    static_assert(!name.empty(), "magic_enum::customize requires not empty string.");
+    return static_str<name.size()>{name};
+  } else if constexpr (custom.first == customize::detail::customize_tag::invalid_tag) {
+    return static_str<0>{};
+  } else if constexpr (custom.first == customize::detail::customize_tag::default_tag) {
+#if defined(MAGIC_ENUM_VS_2017_WORKAROUND)
+    constexpr auto name = n<E, V>();
+#else
+    constexpr auto name = n<V>();
+#endif
+    return static_str<name.size_>{name};
+  } else {
+    static_assert(always_false_v<E>, "magic_enum::customize invalid.");
+  }
+}
+
+template <typename E, E V>
+inline constexpr auto enum_name_v = enum_name<E, V>();
+
+template <typename E, auto V>
+constexpr bool is_valid() noexcept {
+#if defined(__clang__) && __clang_major__ >= 16
+  // https://reviews.llvm.org/D130058, https://reviews.llvm.org/D131307
+  constexpr E v = __builtin_bit_cast(E, V);
+#else
+  constexpr E v = static_cast<E>(V);
+#endif
+  [[maybe_unused]] constexpr auto custom = customize::enum_name<E>(v);
+  static_assert(std::is_same_v<std::decay_t<decltype(custom)>, customize::customize_t>, "magic_enum::customize requires customize_t type.");
+  if constexpr (custom.first == customize::detail::customize_tag::custom_tag) {
+    constexpr auto name = custom.second;
+    static_assert(!name.empty(), "magic_enum::customize requires not empty string.");
+    return name.size() != 0;
+  } else if constexpr (custom.first == customize::detail::customize_tag::default_tag) {
+#if defined(MAGIC_ENUM_VS_2017_WORKAROUND)
+    return n<E, v>().size_ != 0;
+#else
+    return n<v>().size_ != 0;
+#endif
+  } else {
+    return false;
+  }
+}
+
+enum class enum_subtype {
+  common,
+  flags
+};
+
+template <typename E, int O, enum_subtype S, typename U = std::underlying_type_t<E>>
+constexpr U ualue(std::size_t i) noexcept {
+  if constexpr (std::is_same_v<U, bool>) { // bool special case
+    static_assert(O == 0, "magic_enum::detail::ualue requires valid offset.");
+
+    return static_cast<U>(i);
+  } else if constexpr (S == enum_subtype::flags) {
+    return static_cast<U>(U{1} << static_cast<U>(static_cast<int>(i) + O));
+  } else {
+    return static_cast<U>(static_cast<int>(i) + O);
+  }
+}
+
+template <typename E, int O, enum_subtype S, typename U = std::underlying_type_t<E>>
+constexpr E value(std::size_t i) noexcept {
+  return static_cast<E>(ualue<E, O, S>(i));
+}
+
+template <typename E, enum_subtype S, typename U = std::underlying_type_t<E>>
+constexpr int reflected_min() noexcept {
+  if constexpr (S == enum_subtype::flags) {
+    return 0;
+  } else {
+    constexpr auto lhs = range_min<E>::value;
+    constexpr auto rhs = (std::numeric_limits<U>::min)();
+
+    if constexpr (cmp_less(rhs, lhs)) {
+      return lhs;
+    } else {
+      return rhs;
+    }
+  }
+}
+
+template <typename E, enum_subtype S, typename U = std::underlying_type_t<E>>
+constexpr int reflected_max() noexcept {
+  if constexpr (S == enum_subtype::flags) {
+    return std::numeric_limits<U>::digits - 1;
+  } else {
+    constexpr auto lhs = range_max<E>::value;
+    constexpr auto rhs = (std::numeric_limits<U>::max)();
+
+    if constexpr (cmp_less(lhs, rhs)) {
+      return lhs;
+    } else {
+      return rhs;
+    }
+  }
+}
+
+#define MAGIC_ENUM_FOR_EACH_256(T)                                                                                                                                                                 \
+  T(  0)T(  1)T(  2)T(  3)T(  4)T(  5)T(  6)T(  7)T(  8)T(  9)T( 10)T( 11)T( 12)T( 13)T( 14)T( 15)T( 16)T( 17)T( 18)T( 19)T( 20)T( 21)T( 22)T( 23)T( 24)T( 25)T( 26)T( 27)T( 28)T( 29)T( 30)T( 31) \
+  T( 32)T( 33)T( 34)T( 35)T( 36)T( 37)T( 38)T( 39)T( 40)T( 41)T( 42)T( 43)T( 44)T( 45)T( 46)T( 47)T( 48)T( 49)T( 50)T( 51)T( 52)T( 53)T( 54)T( 55)T( 56)T( 57)T( 58)T( 59)T( 60)T( 61)T( 62)T( 63) \
+  T( 64)T( 65)T( 66)T( 67)T( 68)T( 69)T( 70)T( 71)T( 72)T( 73)T( 74)T( 75)T( 76)T( 77)T( 78)T( 79)T( 80)T( 81)T( 82)T( 83)T( 84)T( 85)T( 86)T( 87)T( 88)T( 89)T( 90)T( 91)T( 92)T( 93)T( 94)T( 95) \
+  T( 96)T( 97)T( 98)T( 99)T(100)T(101)T(102)T(103)T(104)T(105)T(106)T(107)T(108)T(109)T(110)T(111)T(112)T(113)T(114)T(115)T(116)T(117)T(118)T(119)T(120)T(121)T(122)T(123)T(124)T(125)T(126)T(127) \
+  T(128)T(129)T(130)T(131)T(132)T(133)T(134)T(135)T(136)T(137)T(138)T(139)T(140)T(141)T(142)T(143)T(144)T(145)T(146)T(147)T(148)T(149)T(150)T(151)T(152)T(153)T(154)T(155)T(156)T(157)T(158)T(159) \
+  T(160)T(161)T(162)T(163)T(164)T(165)T(166)T(167)T(168)T(169)T(170)T(171)T(172)T(173)T(174)T(175)T(176)T(177)T(178)T(179)T(180)T(181)T(182)T(183)T(184)T(185)T(186)T(187)T(188)T(189)T(190)T(191) \
+  T(192)T(193)T(194)T(195)T(196)T(197)T(198)T(199)T(200)T(201)T(202)T(203)T(204)T(205)T(206)T(207)T(208)T(209)T(210)T(211)T(212)T(213)T(214)T(215)T(216)T(217)T(218)T(219)T(220)T(221)T(222)T(223) \
+  T(224)T(225)T(226)T(227)T(228)T(229)T(230)T(231)T(232)T(233)T(234)T(235)T(236)T(237)T(238)T(239)T(240)T(241)T(242)T(243)T(244)T(245)T(246)T(247)T(248)T(249)T(250)T(251)T(252)T(253)T(254)T(255)
+
+template <typename E, enum_subtype S, std::size_t Size, int Min, std::size_t I>
+constexpr void valid_count(bool* valid, std::size_t& count) noexcept {
+#define MAGIC_ENUM_V(O)                                     \
+  if constexpr ((I + O) < Size) {                           \
+    if constexpr (is_valid<E, ualue<E, Min, S>(I + O)>()) { \
+      valid[I + O] = true;                                  \
+      ++count;                                              \
+    }                                                       \
+  }
+
+  MAGIC_ENUM_FOR_EACH_256(MAGIC_ENUM_V);
+
+  if constexpr ((I + 256) < Size) {
+    valid_count<E, S, Size, Min, I + 256>(valid, count);
+  }
+#undef MAGIC_ENUM_V
+}
+
+template <std::size_t N>
+struct valid_count_t {
+  std::size_t count = 0;
+  bool valid[N] = {};
+};
+
+template <typename E, enum_subtype S, std::size_t Size, int Min>
+constexpr auto valid_count() noexcept {
+  valid_count_t<Size> vc;
+  valid_count<E, S, Size, Min, 0>(vc.valid, vc.count);
+  return vc;
+}
+
+template <typename E, enum_subtype S, std::size_t Size, int Min>
+constexpr auto values() noexcept {
+  constexpr auto vc = valid_count<E, S, Size, Min>();
+
+  if constexpr (vc.count > 0) {
+#if defined(MAGIC_ENUM_ARRAY_CONSTEXPR)
+    std::array<E, vc.count> values = {};
+#else
+    E values[vc.count] = {};
+#endif
+    for (std::size_t i = 0, v = 0; v < vc.count; ++i) {
+      if (vc.valid[i]) {
+        values[v++] = value<E, Min, S>(i);
+      }
+    }
+#if defined(MAGIC_ENUM_ARRAY_CONSTEXPR)
+    return values;
+#else
+    return to_array(values, std::make_index_sequence<vc.count>{});
+#endif
+  } else {
+    return std::array<E, 0>{};
+  }
+}
+
+template <typename E, enum_subtype S, typename U = std::underlying_type_t<E>>
+constexpr auto values() noexcept {
+  constexpr auto min = reflected_min<E, S>();
+  constexpr auto max = reflected_max<E, S>();
+  constexpr auto range_size = max - min + 1;
+  static_assert(range_size > 0, "magic_enum::enum_range requires valid size.");
+  static_assert(range_size < (std::numeric_limits<std::uint16_t>::max)(), "magic_enum::enum_range requires valid size.");
+
+  return values<E, S, range_size, min>();
+}
+
+template <typename E, typename U = std::underlying_type_t<E>>
+constexpr enum_subtype subtype(std::true_type) noexcept {
+  if constexpr (std::is_same_v<U, bool>) { // bool special case
+    return enum_subtype::common;
+  } else if constexpr (has_is_flags<E>::value) {
+    return customize::enum_range<E>::is_flags ? enum_subtype::flags : enum_subtype::common;
+  } else {
+#if defined(MAGIC_ENUM_AUTO_IS_FLAGS)
+    constexpr auto flags_values = values<E, enum_subtype::flags>();
+    constexpr auto default_values = values<E, enum_subtype::common>();
+    if (flags_values.size() == 0 || default_values.size() > flags_values.size()) {
+      return enum_subtype::common;
+    }
+    for (std::size_t i = 0; i < default_values.size(); ++i) {
+      const auto v = static_cast<U>(default_values[i]);
+      if (v != 0 && (v & (v - 1)) != 0) {
+        return enum_subtype::common;
+      }
+    }
+    return enum_subtype::flags;
+#else
+    return enum_subtype::common;
+#endif
+  }
+}
+
+template <typename T>
+constexpr enum_subtype subtype(std::false_type) noexcept {
+  // For non-enum type return default common subtype.
+  return enum_subtype::common;
+}
+
+template <typename E, typename D = std::decay_t<E>>
+inline constexpr auto subtype_v = subtype<D>(std::is_enum<D>{});
+
+template <typename E, enum_subtype S>
+inline constexpr auto values_v = values<E, S>();
+
+template <typename E, enum_subtype S, typename D = std::decay_t<E>>
+using values_t = decltype((values_v<D, S>));
+
+template <typename E, enum_subtype S>
+inline constexpr auto count_v = values_v<E, S>.size();
+
+template <typename E, enum_subtype S, typename U = std::underlying_type_t<E>>
+inline constexpr auto min_v = (count_v<E, S> > 0) ? static_cast<U>(values_v<E, S>.front()) : U{0};
+
+template <typename E, enum_subtype S, typename U = std::underlying_type_t<E>>
+inline constexpr auto max_v = (count_v<E, S> > 0) ? static_cast<U>(values_v<E, S>.back()) : U{0};
+
+template <typename E, enum_subtype S, std::size_t... I>
+constexpr auto names(std::index_sequence<I...>) noexcept {
+  return std::array<string_view, sizeof...(I)>{{enum_name_v<E, values_v<E, S>[I]>...}};
+}
+
+template <typename E, enum_subtype S>
+inline constexpr auto names_v = names<E, S>(std::make_index_sequence<count_v<E, S>>{});
+
+template <typename E, enum_subtype S, typename D = std::decay_t<E>>
+using names_t = decltype((names_v<D, S>));
+
+template <typename E, enum_subtype S, std::size_t... I>
+constexpr auto entries(std::index_sequence<I...>) noexcept {
+  return std::array<std::pair<E, string_view>, sizeof...(I)>{{{values_v<E, S>[I], enum_name_v<E, values_v<E, S>[I]>}...}};
+}
+
+template <typename E, enum_subtype S>
+inline constexpr auto entries_v = entries<E, S>(std::make_index_sequence<count_v<E, S>>{});
+
+template <typename E, enum_subtype S, typename D = std::decay_t<E>>
+using entries_t = decltype((entries_v<D, S>));
+
+template <typename E, enum_subtype S, typename U = std::underlying_type_t<E>>
+constexpr bool is_sparse() noexcept {
+  if constexpr (count_v<E, S> == 0) {
+    return false;
+  } else if constexpr (std::is_same_v<U, bool>) { // bool special case
+    return false;
+  } else {
+    constexpr auto max = (S == enum_subtype::flags) ? log2(max_v<E, S>) : max_v<E, S>;
+    constexpr auto min = (S == enum_subtype::flags) ? log2(min_v<E, S>) : min_v<E, S>;
+    constexpr auto range_size = max - min + 1;
+
+    return range_size != count_v<E, S>;
+  }
+}
+
+template <typename E, enum_subtype S = subtype_v<E>>
+inline constexpr bool is_sparse_v = is_sparse<E, S>();
+
+template <typename E, enum_subtype S, typename U = std::underlying_type_t<E>>
+constexpr U values_ors() noexcept {
+  static_assert(S == enum_subtype::flags, "magic_enum::detail::values_ors requires valid subtype.");
+
+  auto ors = U{0};
+  for (std::size_t i = 0; i < count_v<E, S>; ++i) {
+    ors |= static_cast<U>(values_v<E, S>[i]);
+  }
+
+  return ors;
+}
+
+template <bool, typename R>
+struct enable_if_enum {};
+
+template <typename R>
+struct enable_if_enum<true, R> {
+  using type = R;
+  static_assert(supported<R>::value, "magic_enum unsupported compiler (https://github.com/Neargye/magic_enum#compiler-compatibility).");
+};
+
+template <typename T, typename R, typename BinaryPredicate = std::equal_to<>, typename D = std::decay_t<T>>
+using enable_if_t = typename enable_if_enum<std::is_enum_v<D> && std::is_invocable_r_v<bool, BinaryPredicate, char_type, char_type>, R>::type;
+
+template <typename T, std::enable_if_t<std::is_enum_v<std::decay_t<T>>, int> = 0>
+using enum_concept = T;
+
+template <typename T, bool = std::is_enum_v<T>>
+struct is_scoped_enum : std::false_type {};
+
+template <typename T>
+struct is_scoped_enum<T, true> : std::bool_constant<!std::is_convertible_v<T, std::underlying_type_t<T>>> {};
+
+template <typename T, bool = std::is_enum_v<T>>
+struct is_unscoped_enum : std::false_type {};
+
+template <typename T>
+struct is_unscoped_enum<T, true> : std::bool_constant<std::is_convertible_v<T, std::underlying_type_t<T>>> {};
+
+template <typename T, bool = std::is_enum_v<std::decay_t<T>>>
+struct underlying_type {};
+
+template <typename T>
+struct underlying_type<T, true> : std::underlying_type<std::decay_t<T>> {};
+
+#if defined(MAGIC_ENUM_ENABLE_HASH)
+
+template <typename T>
+inline constexpr bool has_hash = true;
+
+template <typename Value, typename = void>
+struct constexpr_hash_t;
+
+template <typename Value>
+struct constexpr_hash_t<Value, std::enable_if_t<is_enum_v<Value>>> {
+  constexpr auto operator()(Value value) const noexcept {
+    using U = typename underlying_type<Value>::type;
+    if constexpr (std::is_same_v<U, bool>) { // bool special case
+      return static_cast<std::size_t>(value);
+    } else {
+      return static_cast<U>(value);
+    }
+  }
+  using secondary_hash = constexpr_hash_t;
+};
+
+template <typename Value>
+struct constexpr_hash_t<Value, std::enable_if_t<std::is_same_v<Value, string_view>>> {
+  static constexpr std::uint32_t crc_table[256] {
+    0x00000000L, 0x77073096L, 0xee0e612cL, 0x990951baL, 0x076dc419L, 0x706af48fL, 0xe963a535L, 0x9e6495a3L,
+    0x0edb8832L, 0x79dcb8a4L, 0xe0d5e91eL, 0x97d2d988L, 0x09b64c2bL, 0x7eb17cbdL, 0xe7b82d07L, 0x90bf1d91L,
+    0x1db71064L, 0x6ab020f2L, 0xf3b97148L, 0x84be41deL, 0x1adad47dL, 0x6ddde4ebL, 0xf4d4b551L, 0x83d385c7L,
+    0x136c9856L, 0x646ba8c0L, 0xfd62f97aL, 0x8a65c9ecL, 0x14015c4fL, 0x63066cd9L, 0xfa0f3d63L, 0x8d080df5L,
+    0x3b6e20c8L, 0x4c69105eL, 0xd56041e4L, 0xa2677172L, 0x3c03e4d1L, 0x4b04d447L, 0xd20d85fdL, 0xa50ab56bL,
+    0x35b5a8faL, 0x42b2986cL, 0xdbbbc9d6L, 0xacbcf940L, 0x32d86ce3L, 0x45df5c75L, 0xdcd60dcfL, 0xabd13d59L,
+    0x26d930acL, 0x51de003aL, 0xc8d75180L, 0xbfd06116L, 0x21b4f4b5L, 0x56b3c423L, 0xcfba9599L, 0xb8bda50fL,
+    0x2802b89eL, 0x5f058808L, 0xc60cd9b2L, 0xb10be924L, 0x2f6f7c87L, 0x58684c11L, 0xc1611dabL, 0xb6662d3dL,
+    0x76dc4190L, 0x01db7106L, 0x98d220bcL, 0xefd5102aL, 0x71b18589L, 0x06b6b51fL, 0x9fbfe4a5L, 0xe8b8d433L,
+    0x7807c9a2L, 0x0f00f934L, 0x9609a88eL, 0xe10e9818L, 0x7f6a0dbbL, 0x086d3d2dL, 0x91646c97L, 0xe6635c01L,
+    0x6b6b51f4L, 0x1c6c6162L, 0x856530d8L, 0xf262004eL, 0x6c0695edL, 0x1b01a57bL, 0x8208f4c1L, 0xf50fc457L,
+    0x65b0d9c6L, 0x12b7e950L, 0x8bbeb8eaL, 0xfcb9887cL, 0x62dd1ddfL, 0x15da2d49L, 0x8cd37cf3L, 0xfbd44c65L,
+    0x4db26158L, 0x3ab551ceL, 0xa3bc0074L, 0xd4bb30e2L, 0x4adfa541L, 0x3dd895d7L, 0xa4d1c46dL, 0xd3d6f4fbL,
+    0x4369e96aL, 0x346ed9fcL, 0xad678846L, 0xda60b8d0L, 0x44042d73L, 0x33031de5L, 0xaa0a4c5fL, 0xdd0d7cc9L,
+    0x5005713cL, 0x270241aaL, 0xbe0b1010L, 0xc90c2086L, 0x5768b525L, 0x206f85b3L, 0xb966d409L, 0xce61e49fL,
+    0x5edef90eL, 0x29d9c998L, 0xb0d09822L, 0xc7d7a8b4L, 0x59b33d17L, 0x2eb40d81L, 0xb7bd5c3bL, 0xc0ba6cadL,
+    0xedb88320L, 0x9abfb3b6L, 0x03b6e20cL, 0x74b1d29aL, 0xead54739L, 0x9dd277afL, 0x04db2615L, 0x73dc1683L,
+    0xe3630b12L, 0x94643b84L, 0x0d6d6a3eL, 0x7a6a5aa8L, 0xe40ecf0bL, 0x9309ff9dL, 0x0a00ae27L, 0x7d079eb1L,
+    0xf00f9344L, 0x8708a3d2L, 0x1e01f268L, 0x6906c2feL, 0xf762575dL, 0x806567cbL, 0x196c3671L, 0x6e6b06e7L,
+    0xfed41b76L, 0x89d32be0L, 0x10da7a5aL, 0x67dd4accL, 0xf9b9df6fL, 0x8ebeeff9L, 0x17b7be43L, 0x60b08ed5L,
+    0xd6d6a3e8L, 0xa1d1937eL, 0x38d8c2c4L, 0x4fdff252L, 0xd1bb67f1L, 0xa6bc5767L, 0x3fb506ddL, 0x48b2364bL,
+    0xd80d2bdaL, 0xaf0a1b4cL, 0x36034af6L, 0x41047a60L, 0xdf60efc3L, 0xa867df55L, 0x316e8eefL, 0x4669be79L,
+    0xcb61b38cL, 0xbc66831aL, 0x256fd2a0L, 0x5268e236L, 0xcc0c7795L, 0xbb0b4703L, 0x220216b9L, 0x5505262fL,
+    0xc5ba3bbeL, 0xb2bd0b28L, 0x2bb45a92L, 0x5cb36a04L, 0xc2d7ffa7L, 0xb5d0cf31L, 0x2cd99e8bL, 0x5bdeae1dL,
+    0x9b64c2b0L, 0xec63f226L, 0x756aa39cL, 0x026d930aL, 0x9c0906a9L, 0xeb0e363fL, 0x72076785L, 0x05005713L,
+    0x95bf4a82L, 0xe2b87a14L, 0x7bb12baeL, 0x0cb61b38L, 0x92d28e9bL, 0xe5d5be0dL, 0x7cdcefb7L, 0x0bdbdf21L,
+    0x86d3d2d4L, 0xf1d4e242L, 0x68ddb3f8L, 0x1fda836eL, 0x81be16cdL, 0xf6b9265bL, 0x6fb077e1L, 0x18b74777L,
+    0x88085ae6L, 0xff0f6a70L, 0x66063bcaL, 0x11010b5cL, 0x8f659effL, 0xf862ae69L, 0x616bffd3L, 0x166ccf45L,
+    0xa00ae278L, 0xd70dd2eeL, 0x4e048354L, 0x3903b3c2L, 0xa7672661L, 0xd06016f7L, 0x4969474dL, 0x3e6e77dbL,
+    0xaed16a4aL, 0xd9d65adcL, 0x40df0b66L, 0x37d83bf0L, 0xa9bcae53L, 0xdebb9ec5L, 0x47b2cf7fL, 0x30b5ffe9L,
+    0xbdbdf21cL, 0xcabac28aL, 0x53b39330L, 0x24b4a3a6L, 0xbad03605L, 0xcdd70693L, 0x54de5729L, 0x23d967bfL,
+    0xb3667a2eL, 0xc4614ab8L, 0x5d681b02L, 0x2a6f2b94L, 0xb40bbe37L, 0xc30c8ea1L, 0x5a05df1bL, 0x2d02ef8dL
+  };
+  constexpr std::uint32_t operator()(string_view value) const noexcept {
+    auto crc = static_cast<std::uint32_t>(0xffffffffL);
+    for (const auto c : value) {
+      crc = (crc >> 8) ^ crc_table[(crc ^ static_cast<std::uint32_t>(c)) & 0xff];
+    }
+    return crc ^ 0xffffffffL;
+  }
+
+  struct secondary_hash {
+    constexpr std::uint32_t operator()(string_view value) const noexcept {
+      auto acc = static_cast<std::uint64_t>(2166136261ULL);
+      for (const auto c : value) {
+        acc = ((acc ^ static_cast<std::uint64_t>(c)) * static_cast<std::uint64_t>(16777619ULL)) & (std::numeric_limits<std::uint32_t>::max)();
+      }
+      return static_cast<std::uint32_t>(acc);
+    }
+  };
+};
+
+template <typename Hash>
+inline constexpr Hash hash_v{};
+
+template <auto* GlobValues, typename Hash>
+constexpr auto calculate_cases(std::size_t Page) noexcept {
+  constexpr std::array values = *GlobValues;
+  constexpr std::size_t size = values.size();
+
+  using switch_t = std::invoke_result_t<Hash, typename decltype(values)::value_type>;
+  static_assert(std::is_integral_v<switch_t> && !std::is_same_v<switch_t, bool>);
+  const std::size_t values_to = (std::min)(static_cast<std::size_t>(256), size - Page);
+
+  std::array<switch_t, 256> result{};
+  auto fill = result.begin();
+  {
+    auto first = values.begin() + static_cast<std::ptrdiff_t>(Page);
+    auto last = values.begin() + static_cast<std::ptrdiff_t>(Page + values_to);
+    while (first != last) {
+      *fill++ = hash_v<Hash>(*first++);
+    }
+  }
+
+  // dead cases, try to avoid case collisions
+  for (switch_t last_value = result[values_to - 1]; fill != result.end() && last_value != (std::numeric_limits<switch_t>::max)(); *fill++ = ++last_value) {
+  }
+
+  {
+    auto it = result.begin();
+    auto last_value = (std::numeric_limits<switch_t>::min)();
+    for (; fill != result.end(); *fill++ = last_value++) {
+      while (last_value == *it) {
+        ++last_value, ++it;
+      }
+    }
+  }
+
+  return result;
+}
+
+template <typename R, typename F, typename... Args>
+constexpr R invoke_r(F&& f, Args&&... args) noexcept(std::is_nothrow_invocable_r_v<R, F, Args...>) {
+  if constexpr (std::is_void_v<R>) {
+    std::forward<F>(f)(std::forward<Args>(args)...);
+  } else {
+    return static_cast<R>(std::forward<F>(f)(std::forward<Args>(args)...));
+  }
+}
+
+enum class case_call_t {
+  index,
+  value
+};
+
+template <typename T = void>
+inline constexpr auto default_result_type_lambda = []() noexcept(std::is_nothrow_default_constructible_v<T>) { return T{}; };
+
+template <>
+inline constexpr auto default_result_type_lambda<void> = []() noexcept {};
+
+template <auto* Arr, typename Hash>
+constexpr bool has_duplicate() noexcept {
+  using value_t = std::decay_t<decltype((*Arr)[0])>;
+  using hash_value_t = std::invoke_result_t<Hash, value_t>;
+  std::array<hash_value_t, Arr->size()> hashes{};
+  std::size_t size = 0;
+  for (auto elem : *Arr) {
+    hashes[size] = hash_v<Hash>(elem);
+    for (auto i = size++; i > 0; --i) {
+      if (hashes[i] < hashes[i - 1]) {
+        auto tmp = hashes[i];
+        hashes[i] = hashes[i - 1];
+        hashes[i - 1] = tmp;
+      } else if (hashes[i] == hashes[i - 1]) {
+        return false;
+      } else {
+        break;
+      }
+    }
+  }
+  return true;
+}
+
+#define MAGIC_ENUM_CASE(val)                                                                                                  \
+  case cases[val]:                                                                                                            \
+    if constexpr ((val) + Page < size) {                                                                                      \
+      if (!pred(values[val + Page], searched)) {                                                                              \
+        break;                                                                                                                \
+      }                                                                                                                       \
+      if constexpr (CallValue == case_call_t::index) {                                                                        \
+        if constexpr (std::is_invocable_r_v<result_t, Lambda, std::integral_constant<std::size_t, val + Page>>) {             \
+          return detail::invoke_r<result_t>(std::forward<Lambda>(lambda), std::integral_constant<std::size_t, val + Page>{}); \
+        } else if constexpr (std::is_invocable_v<Lambda, std::integral_constant<std::size_t, val + Page>>) {                  \
+          assert(false && "magic_enum::detail::constexpr_switch wrong result type.");                                         \
+        }                                                                                                                     \
+      } else if constexpr (CallValue == case_call_t::value) {                                                                 \
+        if constexpr (std::is_invocable_r_v<result_t, Lambda, enum_constant<values[val + Page]>>) {                           \
+          return detail::invoke_r<result_t>(std::forward<Lambda>(lambda), enum_constant<values[val + Page]>{});               \
+        } else if constexpr (std::is_invocable_r_v<result_t, Lambda, enum_constant<values[val + Page]>>) {                    \
+          assert(false && "magic_enum::detail::constexpr_switch wrong result type.");                                         \
+        }                                                                                                                     \
+      }                                                                                                                       \
+      break;                                                                                                                  \
+    } else [[fallthrough]];
+
+template <auto* GlobValues,
+          case_call_t CallValue,
+          std::size_t Page = 0,
+          typename Hash = constexpr_hash_t<typename std::decay_t<decltype(*GlobValues)>::value_type>,
+          typename BinaryPredicate = std::equal_to<>,
+          typename Lambda,
+          typename ResultGetterType>
+constexpr decltype(auto) constexpr_switch(
+    Lambda&& lambda,
+    typename std::decay_t<decltype(*GlobValues)>::value_type searched,
+    ResultGetterType&& def,
+    BinaryPredicate&& pred = {}) {
+  using result_t = std::invoke_result_t<ResultGetterType>;
+  using hash_t = std::conditional_t<has_duplicate<GlobValues, Hash>(), Hash, typename Hash::secondary_hash>;
+  static_assert(has_duplicate<GlobValues, hash_t>(), "magic_enum::detail::constexpr_switch duplicated hash found, please report it: https://github.com/Neargye/magic_enum/issues.");
+  constexpr std::array values = *GlobValues;
+  constexpr std::size_t size = values.size();
+  constexpr std::array cases = calculate_cases<GlobValues, hash_t>(Page);
+
+  switch (hash_v<hash_t>(searched)) {
+    MAGIC_ENUM_FOR_EACH_256(MAGIC_ENUM_CASE)
+    default:
+      if constexpr (size > 256 + Page) {
+        return constexpr_switch<GlobValues, CallValue, Page + 256, Hash>(std::forward<Lambda>(lambda), searched, std::forward<ResultGetterType>(def));
+      }
+      break;
+  }
+  return def();
+}
+
+#undef MAGIC_ENUM_CASE
+
+#else
+template <typename T>
+inline constexpr bool has_hash = false;
+#endif
+
+template <typename E, enum_subtype S, typename F, std::size_t... I>
+constexpr auto for_each(F&& f, std::index_sequence<I...>) {
+  constexpr bool has_void_return = (std::is_void_v<std::invoke_result_t<F, enum_constant<values_v<E, S>[I]>>> || ...);
+  constexpr bool all_same_return = (std::is_same_v<std::invoke_result_t<F, enum_constant<values_v<E, S>[0]>>, std::invoke_result_t<F, enum_constant<values_v<E, S>[I]>>> && ...);
+
+  if constexpr (has_void_return) {
+    (f(enum_constant<values_v<E, S>[I]>{}), ...);
+  } else if constexpr (all_same_return) {
+    return std::array{f(enum_constant<values_v<E, S>[I]>{})...};
+  } else {
+    return std::tuple{f(enum_constant<values_v<E, S>[I]>{})...};
+  }
+}
+
+template <typename E, enum_subtype S, typename F,std::size_t... I>
+constexpr bool all_invocable(std::index_sequence<I...>) {
+  if constexpr (count_v<E, S> == 0) {
+    return false;
+  } else {
+    return (std::is_invocable_v<F, enum_constant<values_v<E, S>[I]>> && ...);
+  }
+}
+
+} // namespace magic_enum::detail
+
+// Checks is magic_enum supported compiler.
+inline constexpr bool is_magic_enum_supported = detail::supported<void>::value;
+
+template <typename T>
+using Enum = detail::enum_concept<T>;
+
+// Checks whether T is an Unscoped enumeration type.
+// Provides the member constant value which is equal to true, if T is an [Unscoped enumeration](https://en.cppreference.com/w/cpp/language/enum#Unscoped_enumeration) type. Otherwise, value is equal to false.
+template <typename T>
+struct is_unscoped_enum : detail::is_unscoped_enum<T> {};
+
+template <typename T>
+inline constexpr bool is_unscoped_enum_v = is_unscoped_enum<T>::value;
+
+// Checks whether T is an Scoped enumeration type.
+// Provides the member constant value which is equal to true, if T is an [Scoped enumeration](https://en.cppreference.com/w/cpp/language/enum#Scoped_enumerations) type. Otherwise, value is equal to false.
+template <typename T>
+struct is_scoped_enum : detail::is_scoped_enum<T> {};
+
+template <typename T>
+inline constexpr bool is_scoped_enum_v = is_scoped_enum<T>::value;
+
+// If T is a complete enumeration type, provides a member typedef type that names the underlying type of T.
+// Otherwise, if T is not an enumeration type, there is no member type. Otherwise (T is an incomplete enumeration type), the program is ill-formed.
+template <typename T>
+struct underlying_type : detail::underlying_type<T> {};
+
+template <typename T>
+using underlying_type_t = typename underlying_type<T>::type;
+
+template <auto V>
+using enum_constant = detail::enum_constant<V>;
+
+// Returns type name of enum.
+template <typename E>
+[[nodiscard]] constexpr auto enum_type_name() noexcept -> detail::enable_if_t<E, string_view> {
+  constexpr string_view name = detail::type_name_v<std::decay_t<E>>;
+  static_assert(!name.empty(), "magic_enum::enum_type_name enum type does not have a name.");
+
+  return name;
+}
+
+// Returns number of enum values.
+template <typename E, detail::enum_subtype S = detail::subtype_v<E>>
+[[nodiscard]] constexpr auto enum_count() noexcept -> detail::enable_if_t<E, std::size_t> {
+  return detail::count_v<std::decay_t<E>, S>;
+}
+
+// Returns enum value at specified index.
+// No bounds checking is performed: the behavior is undefined if index >= number of enum values.
+template <typename E, detail::enum_subtype S = detail::subtype_v<E>>
+[[nodiscard]] constexpr auto enum_value(std::size_t index) noexcept -> detail::enable_if_t<E, std::decay_t<E>> {
+  using D = std::decay_t<E>;
+
+  if constexpr (detail::is_sparse_v<D, S>) {
+    return assert((index < detail::count_v<D, S>)), detail::values_v<D, S>[index];
+  } else {
+    constexpr auto min = (S == detail::enum_subtype::flags) ? detail::log2(detail::min_v<D, S>) : detail::min_v<D, S>;
+
+    return assert((index < detail::count_v<D, S>)), detail::value<D, min, S>(index);
+  }
+}
+
+// Returns enum value at specified index.
+template <typename E, std::size_t I, detail::enum_subtype S = detail::subtype_v<E>>
+[[nodiscard]] constexpr auto enum_value() noexcept -> detail::enable_if_t<E, std::decay_t<E>> {
+  using D = std::decay_t<E>;
+  static_assert(I < detail::count_v<D, S>, "magic_enum::enum_value out of range.");
+
+  return enum_value<D, S>(I);
+}
+
+// Returns std::array with enum values, sorted by enum value.
+template <typename E, detail::enum_subtype S = detail::subtype_v<E>>
+[[nodiscard]] constexpr auto enum_values() noexcept -> detail::enable_if_t<E, detail::values_t<E, S>> {
+  return detail::values_v<std::decay_t<E>, S>;
+}
+
+// Returns integer value from enum value.
+template <typename E>
+[[nodiscard]] constexpr auto enum_integer(E value) noexcept -> detail::enable_if_t<E, underlying_type_t<E>> {
+  return static_cast<underlying_type_t<E>>(value);
+}
+
+// Returns underlying value from enum value.
+template <typename E>
+[[nodiscard]] constexpr auto enum_underlying(E value) noexcept -> detail::enable_if_t<E, underlying_type_t<E>> {
+  return static_cast<underlying_type_t<E>>(value);
+}
+
+// Obtains index in enum values from enum value.
+// Returns optional with index.
+template <typename E, detail::enum_subtype S = detail::subtype_v<E>>
+[[nodiscard]] constexpr auto enum_index(E value) noexcept -> detail::enable_if_t<E, optional<std::size_t>> {
+  using D = std::decay_t<E>;
+  using U = underlying_type_t<D>;
+
+  if constexpr (detail::count_v<D, S> == 0) {
+    static_cast<void>(value);
+    return {}; // Empty enum.
+  } else if constexpr (detail::is_sparse_v<D, S> || (S == detail::enum_subtype::flags)) {
+#if defined(MAGIC_ENUM_ENABLE_HASH)
+    return detail::constexpr_switch<&detail::values_v<D, S>, detail::case_call_t::index>(
+        [](std::size_t i) { return optional<std::size_t>{i}; },
+        value,
+        detail::default_result_type_lambda<optional<std::size_t>>);
+#else
+    for (std::size_t i = 0; i < detail::count_v<D, S>; ++i) {
+      if (enum_value<D, S>(i) == value) {
+        return i;
+      }
+    }
+    return {}; // Invalid value or out of range.
+#endif
+  } else {
+    const auto v = static_cast<U>(value);
+    if (v >= detail::min_v<D, S> && v <= detail::max_v<D, S>) {
+      return static_cast<std::size_t>(v - detail::min_v<D, S>);
+    }
+    return {}; // Invalid value or out of range.
+  }
+}
+
+// Obtains index in enum values from enum value.
+// Returns optional with index.
+template <detail::enum_subtype S, typename E>
+[[nodiscard]] constexpr auto enum_index(E value) noexcept -> detail::enable_if_t<E, optional<std::size_t>> {
+  using D = std::decay_t<E>;
+
+  return enum_index<D, S>(value);
+}
+
+// Obtains index in enum values from static storage enum variable.
+template <auto V, detail::enum_subtype S = detail::subtype_v<std::decay_t<decltype(V)>>>
+[[nodiscard]] constexpr auto enum_index() noexcept -> detail::enable_if_t<decltype(V), std::size_t> {
+  constexpr auto index = enum_index<std::decay_t<decltype(V)>, S>(V);
+  static_assert(index, "magic_enum::enum_index enum value does not have a index.");
+
+  return *index;
+}
+
+// Returns name from static storage enum variable.
+// This version is much lighter on the compile times and is not restricted to the enum_range limitation.
+template <auto V>
+[[nodiscard]] constexpr auto enum_name() noexcept -> detail::enable_if_t<decltype(V), string_view> {
+  constexpr string_view name = detail::enum_name_v<std::decay_t<decltype(V)>, V>;
+  static_assert(!name.empty(), "magic_enum::enum_name enum value does not have a name.");
+
+  return name;
+}
+
+// Returns name from enum value.
+// If enum value does not have name or value out of range, returns empty string.
+template <typename E, detail::enum_subtype S = detail::subtype_v<E>>
+[[nodiscard]] constexpr auto enum_name(E value) noexcept -> detail::enable_if_t<E, string_view> {
+  using D = std::decay_t<E>;
+
+  if (const auto i = enum_index<D, S>(value)) {
+    return detail::names_v<D, S>[*i];
+  }
+  return {};
+}
+
+// Returns name from enum value.
+// If enum value does not have name or value out of range, returns empty string.
+template <detail::enum_subtype S, typename E>
+[[nodiscard]] constexpr auto enum_name(E value) -> detail::enable_if_t<E, string_view> {
+  using D = std::decay_t<E>;
+
+  return enum_name<D, S>(value);
+}
+
+// Returns std::array with names, sorted by enum value.
+template <typename E, detail::enum_subtype S = detail::subtype_v<E>>
+[[nodiscard]] constexpr auto enum_names() noexcept -> detail::enable_if_t<E, detail::names_t<E, S>> {
+  return detail::names_v<std::decay_t<E>, S>;
+}
+
+// Returns std::array with pairs (value, name), sorted by enum value.
+template <typename E, detail::enum_subtype S = detail::subtype_v<E>>
+[[nodiscard]] constexpr auto enum_entries() noexcept -> detail::enable_if_t<E, detail::entries_t<E, S>> {
+  return detail::entries_v<std::decay_t<E>, S>;
+}
+
+// Allows you to write magic_enum::enum_cast<foo>("bar", magic_enum::case_insensitive);
+inline constexpr auto case_insensitive = detail::case_insensitive<>{};
+
+// Obtains enum value from integer value.
+// Returns optional with enum value.
+template <typename E, detail::enum_subtype S = detail::subtype_v<E>>
+[[nodiscard]] constexpr auto enum_cast(underlying_type_t<E> value) noexcept -> detail::enable_if_t<E, optional<std::decay_t<E>>> {
+  using D = std::decay_t<E>;
+
+  if constexpr (detail::count_v<D, S> == 0) {
+    static_cast<void>(value);
+    return {}; // Empty enum.
+  } else {
+    if constexpr (detail::is_sparse_v<D, S> || (S == detail::enum_subtype::flags)) {
+#if defined(MAGIC_ENUM_ENABLE_HASH)
+      return detail::constexpr_switch<&detail::values_v<D, S>, detail::case_call_t::value>(
+          [](D v) { return optional<D>{v}; },
+          static_cast<D>(value),
+          detail::default_result_type_lambda<optional<D>>);
+#else
+      for (std::size_t i = 0; i < detail::count_v<D, S>; ++i) {
+        if (value == static_cast<underlying_type_t<D>>(enum_value<D, S>(i))) {
+          return static_cast<D>(value);
+        }
+      }
+      return {}; // Invalid value or out of range.
+#endif
+    } else {
+      if (value >= detail::min_v<D, S> && value <= detail::max_v<D, S>) {
+        return static_cast<D>(value);
+      }
+      return {}; // Invalid value or out of range.
+    }
+  }
+}
+
+// Obtains enum value from name.
+// Returns optional with enum value.
+template <typename E, detail::enum_subtype S = detail::subtype_v<E>, typename BinaryPredicate = std::equal_to<>>
+[[nodiscard]] constexpr auto enum_cast(string_view value, [[maybe_unused]] BinaryPredicate p = {}) noexcept(detail::is_nothrow_invocable<BinaryPredicate>()) -> detail::enable_if_t<E, optional<std::decay_t<E>>, BinaryPredicate> {
+  using D = std::decay_t<E>;
+
+  if constexpr (detail::count_v<D, S> == 0) {
+    static_cast<void>(value);
+    return {}; // Empty enum.
+  } else {
+    if constexpr (detail::is_default_predicate<BinaryPredicate>() && detail::has_hash<D>) {
+#if defined(MAGIC_ENUM_ENABLE_HASH)
+      return detail::constexpr_switch<&detail::names_v<D, S>, detail::case_call_t::index>(
+          [](std::size_t i) { return optional<D>{detail::values_v<D, S>[i]}; },
+          value,
+          detail::default_result_type_lambda<optional<D>>,
+          [&p](string_view lhs, string_view rhs) { return detail::cmp_equal(lhs, rhs, p); });
+#else
+      static_assert(detail::always_false_v<E>, "magic_enum::enum_cast invalid.");
+#endif
+    } else {
+      for (std::size_t i = 0; i < detail::count_v<D, S>; ++i) {
+        if (detail::cmp_equal(value, detail::names_v<D, S>[i], p)) {
+          return enum_value<D, S>(i);
+        }
+      }
+      return {}; // Invalid value or out of range.
+    }
+  }
+}
+
+// Checks whether enum contains value with such value.
+template <typename E, detail::enum_subtype S = detail::subtype_v<E>>
+[[nodiscard]] constexpr auto enum_contains(E value) noexcept -> detail::enable_if_t<E, bool> {
+  using D = std::decay_t<E>;
+  using U = underlying_type_t<D>;
+
+  return static_cast<bool>(enum_cast<D, S>(static_cast<U>(value)));
+}
+
+// Checks whether enum contains value with such value.
+template <detail::enum_subtype S, typename E>
+[[nodiscard]] constexpr auto enum_contains(E value) noexcept -> detail::enable_if_t<E, bool> {
+  using D = std::decay_t<E>;
+  using U = underlying_type_t<D>;
+
+  return static_cast<bool>(enum_cast<D, S>(static_cast<U>(value)));
+}
+
+// Checks whether enum contains value with such integer value.
+template <typename E, detail::enum_subtype S = detail::subtype_v<E>>
+[[nodiscard]] constexpr auto enum_contains(underlying_type_t<E> value) noexcept -> detail::enable_if_t<E, bool> {
+  using D = std::decay_t<E>;
+
+  return static_cast<bool>(enum_cast<D, S>(value));
+}
+
+// Checks whether enum contains enumerator with such name.
+template <typename E, detail::enum_subtype S = detail::subtype_v<E>, typename BinaryPredicate = std::equal_to<>>
+[[nodiscard]] constexpr auto enum_contains(string_view value, BinaryPredicate p = {}) noexcept(detail::is_nothrow_invocable<BinaryPredicate>()) -> detail::enable_if_t<E, bool, BinaryPredicate> {
+  using D = std::decay_t<E>;
+
+  return static_cast<bool>(enum_cast<D, S>(value, std::move(p)));
+}
+
+template <typename E, detail::enum_subtype S = detail::subtype_v<E>, typename F, detail::enable_if_t<E, int> = 0>
+constexpr auto enum_for_each(F&& f) {
+  using D = std::decay_t<E>;
+  static_assert(std::is_enum_v<D>, "magic_enum::enum_for_each requires enum type.");
+  constexpr auto sep = std::make_index_sequence<detail::count_v<D, S>>{};
+
+  if constexpr (detail::all_invocable<D, S, F>(sep)) {
+    return detail::for_each<D, S>(std::forward<F>(f), sep);
+  } else {
+    static_assert(detail::always_false_v<D>, "magic_enum::enum_for_each requires invocable of all enum value.");
+  }
+}
+
+template <bool AsFlags = true>
+inline constexpr auto as_flags = AsFlags ? detail::enum_subtype::flags : detail::enum_subtype::common;
+
+template <bool AsFlags = true>
+inline constexpr auto as_common = AsFlags ? detail::enum_subtype::common : detail::enum_subtype::flags;
+
+namespace bitwise_operators {
+
+template <typename E, detail::enable_if_t<E, int> = 0>
+constexpr E operator~(E rhs) noexcept {
+  return static_cast<E>(~static_cast<underlying_type_t<E>>(rhs));
+}
+
+template <typename E, detail::enable_if_t<E, int> = 0>
+constexpr E operator|(E lhs, E rhs) noexcept {
+  return static_cast<E>(static_cast<underlying_type_t<E>>(lhs) | static_cast<underlying_type_t<E>>(rhs));
+}
+
+template <typename E, detail::enable_if_t<E, int> = 0>
+constexpr E operator&(E lhs, E rhs) noexcept {
+  return static_cast<E>(static_cast<underlying_type_t<E>>(lhs) & static_cast<underlying_type_t<E>>(rhs));
+}
+
+template <typename E, detail::enable_if_t<E, int> = 0>
+constexpr E operator^(E lhs, E rhs) noexcept {
+  return static_cast<E>(static_cast<underlying_type_t<E>>(lhs) ^ static_cast<underlying_type_t<E>>(rhs));
+}
+
+template <typename E, detail::enable_if_t<E, int> = 0>
+constexpr E& operator|=(E& lhs, E rhs) noexcept {
+  return lhs = (lhs | rhs);
+}
+
+template <typename E, detail::enable_if_t<E, int> = 0>
+constexpr E& operator&=(E& lhs, E rhs) noexcept {
+  return lhs = (lhs & rhs);
+}
+
+template <typename E, detail::enable_if_t<E, int> = 0>
+constexpr E& operator^=(E& lhs, E rhs) noexcept {
+  return lhs = (lhs ^ rhs);
+}
+
+} // namespace magic_enum::bitwise_operators
+
+} // namespace magic_enum
+
+#if defined(__clang__)
+#  pragma clang diagnostic pop
+#elif defined(__GNUC__)
+#  pragma GCC diagnostic pop
+#elif defined(_MSC_VER)
+#  pragma warning(pop)
+#endif
+
+#undef MAGIC_ENUM_GET_ENUM_NAME_BUILTIN
+#undef MAGIC_ENUM_GET_TYPE_NAME_BUILTIN
+#undef MAGIC_ENUM_VS_2017_WORKAROUND
+#undef MAGIC_ENUM_ARRAY_CONSTEXPR
+#undef MAGIC_ENUM_FOR_EACH_256
+
+#endif // NEARGYE_MAGIC_ENUM_HPP
diff --git a/ext/sst/INSTALL.md b/ext/sst/INSTALL.md
index 91f92eb7ff..ba61996b32 100644
--- a/ext/sst/INSTALL.md
+++ b/ext/sst/INSTALL.md
@@ -1,8 +1,8 @@
 # Installing SST
 
-The links to download SST source code are available here
-[http://sst-simulator.org/SSTPages/SSTMainDownloads/].
-This guide is using the most recent SST version (11.0.0) as of September 2021.
+The links to download SST source code are available at
+<http://sst-simulator.org/SSTPages/SSTMainDownloads/>.
+This guide is using the most recent SST version (13.0.0) as of September 2023.
 The following guide assumes `$SST_CORE_HOME` as the location where SST will be
 installed.
 
@@ -11,14 +11,14 @@ installed.
 ### Downloading the SST-Core Source Code
 
 ```sh
-wget https://github.com/sstsimulator/sst-core/releases/download/v11.1.0_Final/sstcore-11.1.0.tar.gz
-tar xf sstcore-11.1.0.tar.gz
+wget https://github.com/sstsimulator/sst-core/releases/download/v13.0.0_Final/sstcore-13.0.0.tar.gz
+tar xzf sstcore-13.0.0.tar.gz
 ```
 
 ### Installing SST-Core
 
 ```sh
-cd sstcore-11.1.0
+cd sstcore-13.0.0
 ./configure --prefix=$SST_CORE_HOME --with-python=/usr/bin/python3-config \
             --disable-mpi # optional, used when MPI is not available.
 make all -j$(nproc)
@@ -36,14 +36,14 @@ export PATH=$SST_CORE_HOME/bin:$PATH
 ### Downloading the SST-Elements Source Code
 
 ```sh
-wget https://github.com/sstsimulator/sst-elements/releases/download/v11.1.0_Final/sstelements-11.1.0.tar.gz
-tar xf sstelements-11.1.0.tar.gz
+wget https://github.com/sstsimulator/sst-elements/releases/download/v13.0.0_Final/sstelements-13.0.0.tar.gz
+tar xzf sstelements-13.0.0.tar.gz
 ```
 
 ### Installing SST-Elements
 
 ```sh
-cd sst-elements-library-11.1.0
+cd sst-elements-library-13.0.0
 ./configure --prefix=$SST_CORE_HOME --with-python=/usr/bin/python3-config \
             --with-sst-core=$SST_CORE_HOME
 make all -j$(nproc)
@@ -58,24 +58,36 @@ echo "export PKG_CONFIG_PATH=$PKG_CONFIG_PATH:$SST_CORE_HOME/lib/pkgconfig/" >>
 
 ### Building gem5 library
 
-At the root of gem5 folder,
-
+At the root of the gem5 folder, you need to compile gem5 as a library. This
+varies  dependent on which OS you are using. If you're using Linux, then
+execute the following:
 ```sh
 scons build/RISCV/libgem5_opt.so -j $(nproc) --without-tcmalloc --duplicate-sources
 ```
+In case you're using Mac, then type the following:
+```sh
+scons build/RISCV/libgem5_opt.dylib -j $(nproc) --without-tcmalloc --duplicate-sources
+```
 
-**Note:** `--without-tcmalloc` is required to avoid a conflict with SST's malloc.
-`--duplicate-sources` is required as the compilation of SST depends on sources to be present in the "build" directory.
+**Note:**
+* `--without-tcmalloc` is required to avoid a conflict with SST's malloc.
+* `--duplicate-sources` is required as the compilation of SST depends on sources to be present in the "build" directory.
+* The Mac version was tested on a Macbook Air with M2 processor.
 
 ### Compiling the SST integration
 
-At the root of gem5 folder,
-
+Go to the SST directory in the gem5 repo.
 ```sh
 cd ext/sst
-make
 ```
-
+Depending on your OS, you need to copy the correct `Makefile.xxx` file to
+`Makefile`.
+```sh
+cp Makefile.xxx Makefile    # linux or mac
+make -j4
+```
+The make file is hardcoded to RISC-V. IN the case you wish to compile to ARM,
+edit the Makefile or pass `ARCH=RISCV` to `ARCH=ARM` while compiling.
 ### Running an example simulation
 
 See `README.md`
diff --git a/ext/sst/Makefile.linux b/ext/sst/Makefile.linux
new file mode 100644
index 0000000000..f44ecd46d9
--- /dev/null
+++ b/ext/sst/Makefile.linux
@@ -0,0 +1,21 @@
+SST_VERSION=SST-13.0.0 # Name of the .pc file in lib/pkgconfig where SST is installed
+GEM5_LIB=gem5_opt
+ARCH=RISCV
+OFLAG=3
+
+LDFLAGS=-shared -fno-common ${shell pkg-config ${SST_VERSION} --libs} -L../../build/${ARCH}/ -Wl,-rpath ../../build/${ARCH}
+CXXFLAGS=-std=c++17 -g -O${OFLAG} -fPIC ${shell pkg-config ${SST_VERSION} --cflags} ${shell python3-config --includes} -I../../build/${ARCH}/ -I../../ext/pybind11/include/ -I../../build/softfloat/ -I../../ext
+CPPFLAGS+=-MMD -MP
+SRC=$(wildcard *.cc)
+
+.PHONY: clean all
+
+all: libgem5.so
+
+libgem5.so: $(SRC:%.cc=%.o)
+	${CXX} ${CPPFLAGS} ${LDFLAGS} $? -o $@ -l${GEM5_LIB}
+
+-include $(SRC:%.cc=%.d)
+
+clean:
+	${RM} *.[do] libgem5.so
diff --git a/ext/sst/Makefile.mac b/ext/sst/Makefile.mac
new file mode 100644
index 0000000000..4a67570a44
--- /dev/null
+++ b/ext/sst/Makefile.mac
@@ -0,0 +1,21 @@
+SST_VERSION=SST-13.0.0 # Name of the .pc file in lib/pkgconfig where SST is installed
+GEM5_LIB=gem5_opt
+ARCH=RISCV
+OFLAG=3
+
+LDFLAGS=-shared -fno-common ${shell pkg-config ${SST_VERSION} --libs} -L../../build/${ARCH}/ -Wl,-rpath ../../build/${ARCH}
+CXXFLAGS=-std=c++17 -g -O${OFLAG} -fPIC ${shell pkg-config ${SST_VERSION} --cflags} ${shell python3-config --includes} -I../../build/${ARCH}/ -I../../ext/pybind11/include/ -I../../build/softfloat/ -I../../ext
+CPPFLAGS+=-MMD -MP
+SRC=$(wildcard *.cc)
+
+.PHONY: clean all
+
+all: libgem5.dylib
+
+libgem5.dylib: $(SRC:%.cc=%.o)
+	${CXX} ${CPPFLAGS} ${LDFLAGS} $? -o $@ -l${GEM5_LIB}
+
+-include $(SRC:%.cc=%.d)
+
+clean:
+	${RM} *.[do] libgem5.dylib
diff --git a/ext/sst/README.md b/ext/sst/README.md
index 1f37cb4c44..fb998b5e18 100644
--- a/ext/sst/README.md
+++ b/ext/sst/README.md
@@ -78,7 +78,7 @@ the `bbl-busybox-boot-exit` resource, which contains an m5 binary, and
 `m5 exit` will be called upon the booting process reaching the early userspace.
 More information about building a bootloader containing a Linux Kernel and a
 customized workload is available at
-[https://gem5.googlesource.com/public/gem5-resources/+/refs/heads/stable/src/riscv-boot-exit-nodisk/].
+[https://github.com/gem5/gem5-resources/tree/stable/src/riscv-boot-exit-nodisk].
 
 ## Running an example simulation (Arm)
 
diff --git a/ext/sst/gem5.cc b/ext/sst/gem5.cc
index 7af0eed7b7..3ea6127ecd 100644
--- a/ext/sst/gem5.cc
+++ b/ext/sst/gem5.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2021 The Regents of the University of California
+// Copyright (c) 2021-2023 The Regents of the University of California
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
@@ -70,7 +70,6 @@
 
 #include <sst/core/sst_config.h>
 #include <sst/core/componentInfo.h>
-#include <sst/core/interfaces/simpleMem.h>
 #include <sst/elements/memHierarchy/memEvent.h>
 #include <sst/elements/memHierarchy/memTypes.h>
 #include <sst/elements/memHierarchy/util.h>
@@ -169,16 +168,29 @@ gem5Component::gem5Component(SST::ComponentId_t id, SST::Params& params):
     registerAsPrimaryComponent();
     primaryComponentDoNotEndSim();
 
-    systemPort = \
-        loadUserSubComponent<SSTResponderSubComponent>("system_port",0);
-    cachePort = \
-        loadUserSubComponent<SSTResponderSubComponent>("cache_port", 0);
-
-    systemPort->setTimeConverter(timeConverter);
-    systemPort->setOutputStream(&(output));
-    cachePort->setTimeConverter(timeConverter);
-    cachePort->setOutputStream(&(output));
-
+    // We need to add another parameter when invoking gem5 scripts from SST to
+    // keep a track of all the OutgoingBridges. This will allow to add or
+    // remove OutgoingBridges from gem5 configs without the need to recompile
+    // the ext/sst source everytime.
+    std::string ports = params.find<std::string>("ports", "");
+    if (ports.empty()) {
+        output.fatal(
+            CALL_INFO, -1, "Component %s must have a 'ports' parameter.\n",
+            getName().c_str()
+        );
+    }
+    // Split the port names using the util method defined.
+    splitPortNames(ports);
+    for (int i = 0 ; i < sstPortCount ; i++) {
+        std::cout << sstPortNames[i] << std::endl;
+        sstPorts.push_back(
+            loadUserSubComponent<SSTResponderSubComponent>(sstPortNames[i], 0)
+        );
+        // If the name defined in the `ports` is incorrect, then the program
+        // will crash when calling `setTimeConverter`.
+        sstPorts[i]->setTimeConverter(timeConverter);
+        sstPorts[i]->setOutputStream(&(output));
+    }
 }
 
 gem5Component::~gem5Component()
@@ -192,13 +204,7 @@ gem5Component::init(unsigned phase)
 
     if (phase == 0) {
         initPython(args.size(), &args[0]);
-
-        const std::vector<std::string> m5_instantiate_commands = {
-            "import m5",
-            "m5.instantiate()"
-        };
-        execPythonCommands(m5_instantiate_commands);
-
+        // m5.instantiate() was moved to the gem5 script.
         // calling SimObject.startup()
         const std::vector<std::string> simobject_setup_commands = {
             "import atexit",
@@ -216,8 +222,9 @@ gem5Component::init(unsigned phase)
 
         // find the corresponding SimObject for each SSTResponderSubComponent
         gem5::Root* gem5_root = gem5::Root::root();
-        systemPort->findCorrespondingSimObject(gem5_root);
-        cachePort->findCorrespondingSimObject(gem5_root);
+        for (auto &port : sstPorts) {
+            port->findCorrespondingSimObject(gem5_root);
+        }
 
         // initialize the gem5 event queue
         if (!(threadInitialized)) {
@@ -230,17 +237,18 @@ gem5Component::init(unsigned phase)
         }
 
     }
-
-    systemPort->init(phase);
-    cachePort->init(phase);
+    for (auto &port : sstPorts) {
+        port->init(phase);
+    }
 }
 
 void
 gem5Component::setup()
 {
     output.verbose(CALL_INFO, 1, 0, "Component is being setup.\n");
-    systemPort->setup();
-    cachePort->setup();
+    for (auto &port : sstPorts) {
+        port->setup();
+    }
 }
 
 void
@@ -427,3 +435,16 @@ gem5Component::splitCommandArgs(std::string &cmd, std::vector<char*> &args)
     for (auto part: parsed_args)
         args.push_back(strdup(part.c_str()));
 }
+
+void
+gem5Component::splitPortNames(std::string port_names)
+{
+    std::vector<std::string> parsed_args = tokenizeString(
+        port_names, {'\\', ' ', '\'', '\"'}
+    );
+    sstPortCount = 0;
+    for (auto part: parsed_args) {
+        sstPortNames.push_back(strdup(part.c_str()));
+        sstPortCount++;
+    }
+}
diff --git a/ext/sst/gem5.hh b/ext/sst/gem5.hh
index 447c68c3b2..f9f00beabd 100644
--- a/ext/sst/gem5.hh
+++ b/ext/sst/gem5.hh
@@ -1,4 +1,4 @@
-// Copyright (c) 2021 The Regents of the University of California
+// Copyright (c) 2021-2023 The Regents of the University of California
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
@@ -79,9 +79,7 @@
 #include <sst/core/sst_config.h>
 #include <sst/core/component.h>
 
-#include <sst/core/simulation.h>
 #include <sst/core/interfaces/stringEvent.h>
-#include <sst/core/interfaces/simpleMem.h>
 
 #include <sim/simulate.hh>
 
@@ -108,15 +106,20 @@ class gem5Component: public SST::Component
 
   private:
     SST::Output output;
-    SSTResponderSubComponent* systemPort;
-    SSTResponderSubComponent* cachePort;
     uint64_t clocksProcessed;
     SST::TimeConverter* timeConverter;
     gem5::GlobalSimLoopExitEvent *simulateLimitEvent;
     std::vector<char*> args;
 
+    // We need a list of incoming port names so that we don't need to recompile
+    // everytime when we add a new OutgoingBridge from python.
+    std::vector<SSTResponderSubComponent*> sstPorts;
+    std::vector<std::string> sstPortNames;
+    int sstPortCount;
+
     void initPython(int argc, char **argv);
     void splitCommandArgs(std::string &cmd, std::vector<char*> &args);
+    void splitPortNames(std::string port_names);
 
     bool threadInitialized;
 
@@ -139,6 +142,7 @@ class gem5Component: public SST::Component
     )
 
     SST_ELI_DOCUMENT_SUBCOMPONENT_SLOTS(
+        // These are the generally expected ports.
         {"system_port", "Connection to gem5 system_port", "gem5.gem5Bridge"},
         {"cache_port", "Connection to gem5 CPU", "gem5.gem5Bridge"}
     )
diff --git a/ext/sst/sst/arm_example.py b/ext/sst/sst/arm_example.py
index cdee3ca40a..4bc111cb86 100644
--- a/ext/sst/sst/arm_example.py
+++ b/ext/sst/sst/arm_example.py
@@ -10,7 +10,7 @@
 # unmodified and in its entirety in all distributions of the software,
 # modified or unmodified, in source code or in binary form.
 #
-# Copyright (c) 2021 The Regents of the University of California
+# Copyright (c) 2021-2023 The Regents of the University of California
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -46,9 +46,10 @@
 
 kernel = "vmlinux_exit.arm64"
 cpu_clock_rate = "3GHz"
-# gem5 will send requests to physical addresses of range [0x80000000, inf) to memory
-# currently, we do not subtract 0x80000000 from the request's address to get the "real" address
-# so, the mem_size would always be 2GiB larger than the desired memory size
+# gem5 will send requests to physical addresses of range [0x80000000, inf) to
+# memory currently, we do not subtract 0x80000000 from the request's address to
+# get the "real" address so, the mem_size would always be 2GiB larger than the
+# desired memory size
 memory_size_gem5 = "4GiB"
 memory_size_sst = "16GiB"
 addr_range_end = UnitAlgebra(memory_size_sst).getRoundedValue()
@@ -69,9 +70,22 @@
     --cpu-clock-rate {cpu_clock_rate} \
     --memory-size {memory_size_gem5}"
 
+# We keep a track of all the memory ports that we have.
+sst_ports = {
+    "system_port" : "system.system_outgoing_bridge",
+    "cache_port" : "system.memory_outgoing_bridge"
+}
+
+# We need a list of ports.
+port_list = []
+for port in sst_ports:
+    port_list.append(port)
+
 cpu_params = {
     "frequency": cpu_clock_rate,
     "cmd": gem5_command,
+    "ports" : " ".join(port_list),
+    "debug_flags" : ""
 }
 
 gem5_node = sst.Component("gem5_node", "gem5.gem5Component")
@@ -79,16 +93,16 @@
 
 cache_bus = sst.Component("cache_bus", "memHierarchy.Bus")
 cache_bus.addParams( { "bus_frequency" : cpu_clock_rate } )
-
-system_port = gem5_node.setSubComponent("system_port", "gem5.gem5Bridge", 0) # for initialization
+# for initialization
+system_port = gem5_node.setSubComponent("system_port", "gem5.gem5Bridge", 0)
 system_port.addParams({
-    "response_receiver_name": "system.system_outgoing_bridge",
+    "response_receiver_name": sst_ports["system_port"],
     "mem_size": memory_size_sst
 })
-
-cache_port = gem5_node.setSubComponent("cache_port", "gem5.gem5Bridge", 0) # SST -> gem5
+# SST -> gem5
+cache_port = gem5_node.setSubComponent("cache_port", "gem5.gem5Bridge", 0)
 cache_port.addParams({
-    "response_receiver_name": "system.memory_outgoing_bridge",
+    "response_receiver_name": sst_ports["cache_port"],
     "mem_size": memory_size_sst
 })
 
@@ -98,11 +112,12 @@
 
 # Memory
 memctrl = sst.Component("memory", "memHierarchy.MemController")
+# `addr_range_end` should be changed accordingly to memory_size_sst
 memctrl.addParams({
     "debug" : "0",
     "clock" : "1GHz",
     "request_width" : "64",
-    "addr_range_end" : addr_range_end, # should be changed accordingly to memory_size_sst
+    "addr_range_end" : addr_range_end,
 })
 memory = memctrl.setSubComponent("backend", "memHierarchy.simpleMem")
 memory.addParams({
diff --git a/ext/sst/sst/example.py b/ext/sst/sst/example.py
index 76cf8ad24e..1c35bc3f83 100644
--- a/ext/sst/sst/example.py
+++ b/ext/sst/sst/example.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2021 The Regents of the University of California
+# Copyright (c) 2021-2023 The Regents of the University of California
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -34,9 +34,10 @@
 
 bbl = "riscv-boot-exit-nodisk"
 cpu_clock_rate = "3GHz"
-# gem5 will send requests to physical addresses of range [0x80000000, inf) to memory
-# currently, we do not subtract 0x80000000 from the request's address to get the "real" address
-# so, the mem_size would always be 2GiB larger than the desired memory size
+# gem5 will send requests to physical addresses of range [0x80000000, inf) to
+# memory currently, we do not subtract 0x80000000 from the request's address to
+# get the "real" address so, the mem_size would always be 2GiB larger than the
+# desired memory size
 memory_size_gem5 = "4GiB"
 memory_size_sst = "6GiB"
 addr_range_end = UnitAlgebra(memory_size_sst).getRoundedValue()
@@ -52,10 +53,24 @@
     "L1" : "1",
 }
 
+# We keep a track of all the memory ports that we have.
+sst_ports = {
+    "system_port" : "system.system_outgoing_bridge",
+    "cache_port" : "system.memory_outgoing_bridge"
+}
+
+# We need a list of ports.
+port_list = []
+for port in sst_ports:
+    port_list.append(port)
+
 cpu_params = {
     "frequency": cpu_clock_rate,
-    "cmd": " ../../configs/example/sst/riscv_fs.py --cpu-clock-rate {} --memory-size {}".format(cpu_clock_rate, memory_size_gem5),
-    "debug_flags": ""
+    "cmd": " ../../configs/example/sst/riscv_fs.py"
+            + f" --cpu-clock-rate {cpu_clock_rate}"
+            + f" --memory-size {memory_size_gem5}",
+    "debug_flags": "",
+    "ports" : " ".join(port_list)
 }
 
 gem5_node = sst.Component("gem5_node", "gem5.gem5Component")
@@ -64,11 +79,14 @@
 cache_bus = sst.Component("cache_bus", "memHierarchy.Bus")
 cache_bus.addParams( { "bus_frequency" : cpu_clock_rate } )
 
-system_port = gem5_node.setSubComponent("system_port", "gem5.gem5Bridge", 0) # for initialization
-system_port.addParams({ "response_receiver_name": "system.system_outgoing_bridge"}) # tell the SubComponent the name of the corresponding SimObject
+# for initialization
+system_port = gem5_node.setSubComponent(port_list[0], "gem5.gem5Bridge", 0)
+# tell the SubComponent the name of the corresponding SimObject
+system_port.addParams({ "response_receiver_name": sst_ports["system_port"]})
 
-cache_port = gem5_node.setSubComponent("cache_port", "gem5.gem5Bridge", 0) # SST -> gem5
-cache_port.addParams({ "response_receiver_name": "system.memory_outgoing_bridge"})
+# SST -> gem5
+cache_port = gem5_node.setSubComponent(port_list[1], "gem5.gem5Bridge", 0)
+cache_port.addParams({ "response_receiver_name": sst_ports["cache_port"]})
 
 # L1 cache
 l1_cache = sst.Component("l1_cache", "memHierarchy.Cache")
@@ -76,11 +94,12 @@
 
 # Memory
 memctrl = sst.Component("memory", "memHierarchy.MemController")
+# `addr_range_end` should be changed accordingly to memory_size_sst
 memctrl.addParams({
     "debug" : "0",
     "clock" : "1GHz",
     "request_width" : "64",
-    "addr_range_end" : addr_range_end, # should be changed accordingly to memory_size_sst
+    "addr_range_end" : addr_range_end,
 })
 memory = memctrl.setSubComponent("backend", "memHierarchy.simpleMem")
 memory.addParams({
diff --git a/ext/sst/sst_responder.hh b/ext/sst/sst_responder.hh
index a89d311064..5f483be845 100644
--- a/ext/sst/sst_responder.hh
+++ b/ext/sst/sst_responder.hh
@@ -1,4 +1,4 @@
-// Copyright (c) 2021 The Regents of the University of California
+// Copyright (c) 2021-2023 The Regents of the University of California
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
@@ -35,9 +35,8 @@
 #include <sst/core/sst_config.h>
 #include <sst/core/component.h>
 
-#include <sst/core/simulation.h>
 #include <sst/core/interfaces/stringEvent.h>
-#include <sst/core/interfaces/simpleMem.h>
+#include <sst/core/interfaces/stdMem.h>
 
 #include <sst/core/eli/elementinfo.h>
 #include <sst/core/link.h>
diff --git a/ext/sst/sst_responder_subcomponent.cc b/ext/sst/sst_responder_subcomponent.cc
index 366f99aecf..8cd2c04628 100644
--- a/ext/sst/sst_responder_subcomponent.cc
+++ b/ext/sst/sst_responder_subcomponent.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2021 The Regents of the University of California
+// Copyright (c) 2021-2023 The Regents of the University of California
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
@@ -64,13 +64,12 @@ SSTResponderSubComponent::setTimeConverter(SST::TimeConverter* tc)
     // SHARE_PORTS means the interface can use our port as if it were its own
     // INSERT_STATS means the interface will inherit our statistic
     //   configuration (e.g., if ours are enabled, the interface’s will be too)
-    memoryInterface = \
-        loadAnonymousSubComponent<SST::Interfaces::SimpleMem>(
-            "memHierarchy.memInterface", "memory", 0,
-            SST::ComponentInfo::SHARE_PORTS | SST::ComponentInfo::INSERT_STATS,
-            interface_params, timeConverter,
-            new SST::Interfaces::SimpleMem::Handler<SSTResponderSubComponent>(
-                this, &SSTResponderSubComponent::portEventHandler)
+    memoryInterface = loadAnonymousSubComponent<SST::Interfaces::StandardMem>(
+        "memHierarchy.standardInterface", "memory", 0,
+        SST::ComponentInfo::SHARE_PORTS | SST::ComponentInfo::INSERT_STATS,
+        interface_params, timeConverter,
+        new SST::Interfaces::StandardMem::Handler<SSTResponderSubComponent>(
+            this, &SSTResponderSubComponent::portEventHandler)
     );
     assert(memoryInterface != NULL);
 }
@@ -91,9 +90,9 @@ SSTResponderSubComponent::setResponseReceiver(
 
 bool
 SSTResponderSubComponent::handleTimingReq(
-    SST::Interfaces::SimpleMem::Request* request)
+    SST::Interfaces::StandardMem::Request* request)
 {
-    memoryInterface->sendRequest(request);
+    memoryInterface->send(request);
     return true;
 }
 
@@ -104,12 +103,10 @@ SSTResponderSubComponent::init(unsigned phase)
         for (auto p: responseReceiver->getInitData()) {
             gem5::Addr addr = p.first;
             std::vector<uint8_t> data = p.second;
-            SST::Interfaces::SimpleMem::Request* request = \
-                new SST::Interfaces::SimpleMem::Request(
-                    SST::Interfaces::SimpleMem::Request::Command::Write, addr,
-                    data.size(), data
-                );
-            memoryInterface->sendInitData(request);
+            SST::Interfaces::StandardMem::Request* request = \
+                new SST::Interfaces::StandardMem::Write(
+                    addr, data.size(), data);
+            memoryInterface->sendUntimedData(request);
         }
     }
     memoryInterface->init(phase);
@@ -132,20 +129,24 @@ SSTResponderSubComponent::findCorrespondingSimObject(gem5::Root* gem5_root)
 
 void
 SSTResponderSubComponent::handleSwapReqResponse(
-    SST::Interfaces::SimpleMem::Request* request)
+    SST::Interfaces::StandardMem::Request* request)
 {
     // get the data, then,
     //     1. send a response to gem5 with the original data
     //     2. send a write to memory with atomic op applied
 
-    SST::Interfaces::SimpleMem::Request::id_t request_id = request->id;
+    SST::Interfaces::StandardMem::Request::id_t request_id = request->getID();
     TPacketMap::iterator it = sstRequestIdToPacketMap.find(request_id);
     assert(it != sstRequestIdToPacketMap.end());
-    std::vector<uint8_t> data = request->data;
+    std::vector<uint8_t> data = \
+        dynamic_cast<SST::Interfaces::StandardMem::ReadResp*>(request)->data;
 
     // step 1
     gem5::PacketPtr pkt = it->second;
-    pkt->setData(request->data.data());
+    pkt->setData(
+        dynamic_cast<SST::Interfaces::StandardMem::ReadResp*>(
+            request)->data.data()
+    );
     pkt->makeAtomicResponse();
     pkt->headerDelay = pkt->payloadDelay = 0;
     if (blocked() || !responseReceiver->sendTimingResp(pkt))
@@ -153,27 +154,29 @@ SSTResponderSubComponent::handleSwapReqResponse(
 
     // step 2
     (*(pkt->getAtomicOp()))(data.data()); // apply the atomic op
-    SST::Interfaces::SimpleMem::Request::Command cmd = \
-         SST::Interfaces::SimpleMem::Request::Command::Write;
-    SST::Interfaces::SimpleMem::Addr addr = request->addr;
+    // This is a Write. Need to use the Write visitor class. But the original
+    // request is a read response. Therefore, we need to find the address and
+    // the data size and then call Write.
+    SST::Interfaces::StandardMem::Addr addr = \
+        dynamic_cast<SST::Interfaces::StandardMem::ReadResp*>(request)->pAddr;
     auto data_size = data.size();
-    SST::Interfaces::SimpleMem::Request* write_request = \
-        new SST::Interfaces::SimpleMem::Request(
-            cmd, addr, data_size, data
-        );
-    write_request->setMemFlags(
-        SST::Interfaces::SimpleMem::Request::Flags::F_LOCKED);
-    memoryInterface->sendRequest(write_request);
+    // Create the Write request here.
+    SST::Interfaces::StandardMem::Request* write_request = \
+        new SST::Interfaces::StandardMem::Write(addr, data_size, data);
+    // F_LOCKED flag in SimpleMem was changed to ReadLock and WriteUnlock
+    // visitor classes. This has to be addressed in the future. The boot test
+    // works without using ReadLock and WriteUnlock classes.
+    memoryInterface->send(write_request);
 
     delete request;
 }
 
 void
 SSTResponderSubComponent::portEventHandler(
-    SST::Interfaces::SimpleMem::Request* request)
+    SST::Interfaces::StandardMem::Request* request)
 {
     // Expect to handle an SST response
-    SST::Interfaces::SimpleMem::Request::id_t request_id = request->id;
+    SST::Interfaces::StandardMem::Request::id_t request_id = request->getID();
 
     TPacketMap::iterator it = sstRequestIdToPacketMap.find(request_id);
 
@@ -193,19 +196,27 @@ SSTResponderSubComponent::portEventHandler(
 
         Translator::inplaceSSTRequestToGem5PacketPtr(pkt, request);
 
-        if (blocked() || !(responseReceiver->sendTimingResp(pkt)))
+        if (blocked() || !(responseReceiver->sendTimingResp(pkt))) {
             responseQueue.push(pkt);
-    } else { // we can handle unexpected invalidates, but nothing else.
-        SST::Interfaces::SimpleMem::Request::Command cmd = request->cmd;
-        if (cmd == SST::Interfaces::SimpleMem::Request::Command::WriteResp)
+        }
+    } else {
+        // we can handle unexpected invalidates, but nothing else.
+        if (SST::Interfaces::StandardMem::Read* test =
+                dynamic_cast<SST::Interfaces::StandardMem::Read*>(request)) {
             return;
-        assert(cmd == SST::Interfaces::SimpleMem::Request::Command::Inv);
-
-        // make Req/Pkt for Snoop/no response needed
+        }
+        else if (SST::Interfaces::StandardMem::WriteResp* test =
+                dynamic_cast<SST::Interfaces::StandardMem::WriteResp*>(
+                request)) {
+            return;
+        }
+        // for Snoop/no response needed
         // presently no consideration for masterId, packet type, flags...
         gem5::RequestPtr req = std::make_shared<gem5::Request>(
-            request->addr, request->size, 0, 0
-        );
+            dynamic_cast<SST::Interfaces::StandardMem::FlushAddr*>(
+                request)->pAddr,
+            dynamic_cast<SST::Interfaces::StandardMem::FlushAddr*>(
+                request)->size, 0, 0);
 
         gem5::PacketPtr pkt = new gem5::Packet(
             req, gem5::MemCmd::InvalidateReq);
diff --git a/ext/sst/sst_responder_subcomponent.hh b/ext/sst/sst_responder_subcomponent.hh
index 51bc4f9318..ed9f09d6b8 100644
--- a/ext/sst/sst_responder_subcomponent.hh
+++ b/ext/sst/sst_responder_subcomponent.hh
@@ -1,4 +1,4 @@
-// Copyright (c) 2021 The Regents of the University of California
+// Copyright (c) 2021-2023 The Regents of the University of California
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
@@ -36,10 +36,8 @@
 
 #include <sst/core/sst_config.h>
 #include <sst/core/component.h>
-
-#include <sst/core/simulation.h>
 #include <sst/core/interfaces/stringEvent.h>
-#include <sst/core/interfaces/simpleMem.h>
+#include <sst/core/interfaces/stdMem.h>
 
 #include <sst/core/eli/elementinfo.h>
 #include <sst/core/link.h>
@@ -59,12 +57,12 @@ class SSTResponderSubComponent: public SST::SubComponent
     gem5::OutgoingRequestBridge* responseReceiver;
     gem5::SSTResponderInterface* sstResponder;
 
-    SST::Interfaces::SimpleMem* memoryInterface;
+    SST::Interfaces::StandardMem* memoryInterface;
     SST::TimeConverter* timeConverter;
     SST::Output* output;
     std::queue<gem5::PacketPtr> responseQueue;
 
-    std::vector<SST::Interfaces::SimpleMem::Request*> initRequests;
+    std::vector<SST::Interfaces::StandardMem::Request*> initRequests;
 
     std::string gem5SimObjectName;
     std::string memSize;
@@ -78,7 +76,7 @@ class SSTResponderSubComponent: public SST::SubComponent
     void setOutputStream(SST::Output* output_);
 
     void setResponseReceiver(gem5::OutgoingRequestBridge* gem5_bridge);
-    void portEventHandler(SST::Interfaces::SimpleMem::Request* request);
+    void portEventHandler(SST::Interfaces::StandardMem::Request* request);
 
     bool blocked();
     void setup();
@@ -86,18 +84,18 @@ class SSTResponderSubComponent: public SST::SubComponent
     // return true if the SimObject could be found
     bool findCorrespondingSimObject(gem5::Root* gem5_root);
 
-    bool handleTimingReq(SST::Interfaces::SimpleMem::Request* request);
+    bool handleTimingReq(SST::Interfaces::StandardMem::Request* request);
     void handleRecvRespRetry();
     void handleRecvFunctional(gem5::PacketPtr pkt);
-    void handleSwapReqResponse(SST::Interfaces::SimpleMem::Request* request);
+    void handleSwapReqResponse(SST::Interfaces::StandardMem::Request* request);
 
     TPacketMap sstRequestIdToPacketMap;
 
   public: // register the component to SST
     SST_ELI_REGISTER_SUBCOMPONENT_API(SSTResponderSubComponent);
-    SST_ELI_REGISTER_SUBCOMPONENT_DERIVED(
+    SST_ELI_REGISTER_SUBCOMPONENT(
         SSTResponderSubComponent,
-        "gem5", // SST will look for libgem5.so
+        "gem5", // SST will look for libgem5.so or libgem5.dylib
         "gem5Bridge",
         SST_ELI_ELEMENT_VERSION(1, 0, 0),
         "Initialize gem5 and link SST's ports to gem5's ports",
@@ -106,7 +104,7 @@ class SSTResponderSubComponent: public SST::SubComponent
 
     SST_ELI_DOCUMENT_SUBCOMPONENT_SLOTS(
         {"memory", "Interface to the memory subsystem", \
-         "SST::Interfaces::SimpleMem"}
+         "SST::Interfaces::StandardMem"}
     )
 
     SST_ELI_DOCUMENT_PORTS(
diff --git a/ext/sst/translator.hh b/ext/sst/translator.hh
index 2d8c8b782a..bf6a168d9a 100644
--- a/ext/sst/translator.hh
+++ b/ext/sst/translator.hh
@@ -1,4 +1,4 @@
-// Copyright (c) 2021 The Regents of the University of California
+// Copyright (c) 2021-2023 The Regents of the University of California
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
@@ -27,87 +27,143 @@
 #ifndef __TRANSLATOR_H__
 #define __TRANSLATOR_H__
 
-#include <sst/core/simulation.h>
+#include <sst/core/interfaces/stdMem.h>
 #include <sst/core/interfaces/stringEvent.h>
-#include <sst/core/interfaces/simpleMem.h>
 #include <sst/elements/memHierarchy/memEvent.h>
 #include <sst/elements/memHierarchy/memTypes.h>
 #include <sst/elements/memHierarchy/util.h>
 
-typedef std::unordered_map<SST::Interfaces::SimpleMem::Request::id_t,
+typedef std::unordered_map<SST::Interfaces::StandardMem::Request::id_t,
                            gem5::PacketPtr> TPacketMap;
 
 namespace Translator
 {
 
-inline SST::Interfaces::SimpleMem::Request*
+inline SST::Interfaces::StandardMem::Request*
 gem5RequestToSSTRequest(gem5::PacketPtr pkt,
                         TPacketMap& sst_request_id_to_packet_map)
 {
-    SST::Interfaces::SimpleMem::Request::Command cmd;
+    // Listing all the different SST Memory commands.
+    enum sst_standard_mem_commands
+    {
+        Read,
+        ReadResp,
+        Write,
+        WriteResp,
+        FlushAddr,
+        FlushResp,
+        ReadLock,
+        WriteUnlock,
+        LoadLink,
+        StoreConditional,
+        MoveData,
+        CustomReq,
+        CustomResp,
+        InvNotify
+
+    };
+    // SST's standard memory class has visitor classes for all the different
+    // types of memory commands. Request class now does not have a command
+    // variable. Instead for different types of request, we now need to
+    // dynamically cast the class object. I'm using an extra variable to map
+    // the type of command for SST.
+    int sst_command_type = -1;
+    // StandardMem only has one cache flush class with an option to flush or
+    // flush and invalidate an address. By default, this is set to true so that
+    // it corresponds to ge,::MemCmd::InvalidateReq
+    bool flush_addr_flag = true;
     switch ((gem5::MemCmd::Command)pkt->cmd.toInt()) {
         case gem5::MemCmd::HardPFReq:
         case gem5::MemCmd::SoftPFReq:
         case gem5::MemCmd::SoftPFExReq:
         case gem5::MemCmd::LoadLockedReq:
         case gem5::MemCmd::ReadExReq:
+        case gem5::MemCmd::ReadCleanReq:
+        case gem5::MemCmd::ReadSharedReq:
         case gem5::MemCmd::ReadReq:
         case gem5::MemCmd::SwapReq:
-            cmd = SST::Interfaces::SimpleMem::Request::Command::Read;
+            sst_command_type = Read;
             break;
         case gem5::MemCmd::StoreCondReq:
+        case gem5::MemCmd::WritebackDirty:
+        case gem5::MemCmd::WritebackClean:
         case gem5::MemCmd::WriteReq:
-            cmd = SST::Interfaces::SimpleMem::Request::Command::Write;
+            sst_command_type = Write;
             break;
         case gem5::MemCmd::CleanInvalidReq:
         case gem5::MemCmd::InvalidateReq:
-            cmd = SST::Interfaces::SimpleMem::Request::Command::FlushLineInv;
+            sst_command_type = FlushAddr;
             break;
         case gem5::MemCmd::CleanSharedReq:
-            cmd = SST::Interfaces::SimpleMem::Request::Command::FlushLine;
+            sst_command_type = FlushAddr;
+            flush_addr_flag = false;
             break;
         default:
             panic("Unable to convert gem5 packet: %s\n", pkt->cmd.toString());
     }
 
-    SST::Interfaces::SimpleMem::Addr addr = pkt->getAddr();
-
-    uint8_t* data_ptr = pkt->getPtr<uint8_t>();
+    SST::Interfaces::StandardMem::Addr addr = pkt->getAddr();
     auto data_size = pkt->getSize();
-    std::vector<uint8_t> data = std::vector<uint8_t>(
-        data_ptr, data_ptr + data_size
-    );
+    std::vector<uint8_t> data;
+    // Need to make sure that the command type is a Write to retrive the data
+    // data_ptr.
+    if (sst_command_type == Write) {
+        uint8_t* data_ptr = pkt->getPtr<uint8_t>();
+        data = std::vector<uint8_t>(data_ptr, data_ptr + data_size);
 
-    SST::Interfaces::SimpleMem::Request* request = \
-        new SST::Interfaces::SimpleMem::Request(
-            cmd, addr, data_size, data
-        );
+    }
+    // Now convert a sst StandardMem request.
+    SST::Interfaces::StandardMem::Request* request = nullptr;
+    // find the corresponding memory command type.
+    switch(sst_command_type) {
+        case Read:
+            request = new SST::Interfaces::StandardMem::Read(addr, data_size);
+            break;
+        case Write:
+            request =
+                new SST::Interfaces::StandardMem::Write(addr, data_size, data);
+            break;
+        case FlushAddr: {
+            // StandardMem::FlushAddr has a invoking variable called `depth`
+            // which defines the number of cache levels to invalidate. Ideally
+            // this has to be input from the SST config, however in
+            // implementation I'm hardcoding this value to 2.
+            int cache_depth = 2;
+            request =
+                new SST::Interfaces::StandardMem::FlushAddr(
+                    addr, data_size, flush_addr_flag, cache_depth);
+            break;
+        }
+        default:
+            panic("Unable to translate command %d to Request class!",
+                sst_command_type);
+    }
 
     if ((gem5::MemCmd::Command)pkt->cmd.toInt() == gem5::MemCmd::LoadLockedReq
         || (gem5::MemCmd::Command)pkt->cmd.toInt() == gem5::MemCmd::SwapReq
         || pkt->req->isLockedRMW()) {
-        request->setMemFlags(
-            SST::Interfaces::SimpleMem::Request::Flags::F_LOCKED);
-    } else if ((gem5::MemCmd::Command)pkt->cmd.toInt() == \
+        // F_LOCKED is deprecated. Therefore I'm skipping this flag for the
+        // StandardMem request.
+    } else if ((gem5::MemCmd::Command)pkt->cmd.toInt() ==
               gem5::MemCmd::StoreCondReq) {
-        request->setMemFlags(
-            SST::Interfaces::SimpleMem::Request::Flags::F_LLSC);
+        // F_LLSC is deprecated. Therefore I'm skipping this flag for the
+        // StandardMem request.
     }
 
     if (pkt->req->isUncacheable()) {
-        request->setFlags(
-            SST::Interfaces::SimpleMem::Request::Flags::F_NONCACHEABLE);
+        request->setFlag(
+            SST::Interfaces::StandardMem::Request::Flag::F_NONCACHEABLE);
     }
 
     if (pkt->needsResponse())
-        sst_request_id_to_packet_map[request->id] = pkt;
+        sst_request_id_to_packet_map[request->getID()] = pkt;
 
     return request;
 }
 
 inline void
 inplaceSSTRequestToGem5PacketPtr(gem5::PacketPtr pkt,
-                                 SST::Interfaces::SimpleMem::Request* request)
+                                SST::Interfaces::StandardMem::Request* request)
 {
     pkt->makeResponse();
 
@@ -116,8 +172,18 @@ inplaceSSTRequestToGem5PacketPtr(gem5::PacketPtr pkt,
         // SC interprets ExtraData == 1 as the store was successful
         pkt->req->setExtraData(1);
     }
-
-    pkt->setData(request->data.data());
+    // If there is data in the request, send it back. Only ReadResp requests
+    // have data associated with it. Other packets does not need to be casted.
+    if (!pkt->isWrite()) {
+        // Need to verify whether the packet is a ReadResp, otherwise the
+        // program will try to incorrectly cast the request object.
+        if (SST::Interfaces::StandardMem::ReadResp* test =
+            dynamic_cast<SST::Interfaces::StandardMem::ReadResp*>(request)) {
+            pkt->setData(dynamic_cast<SST::Interfaces::StandardMem::ReadResp*>(
+                request)->data.data()
+            );
+        }
+    }
 
     // Clear out bus delay notifications
     pkt->headerDelay = pkt->payloadDelay = 0;
diff --git a/ext/testlib/__init__.py b/ext/testlib/__init__.py
index 898205d9ab..2fad890b65 100644
--- a/ext/testlib/__init__.py
+++ b/ext/testlib/__init__.py
@@ -36,9 +36,9 @@
 from .configuration import *
 from .main import main
 
-#TODO Remove this awkward bootstrap
-#FIXME
+# TODO Remove this awkward bootstrap
+# FIXME
 from gem5 import *
 
-#TODO Remove this as an export, users should getcwd from os
+# TODO Remove this as an export, users should getcwd from os
 from os import getcwd
diff --git a/ext/testlib/configuration.py b/ext/testlib/configuration.py
index fd47e3b33a..a635b6d3a9 100644
--- a/ext/testlib/configuration.py
+++ b/ext/testlib/configuration.py
@@ -38,7 +38,7 @@
 #
 # Authors: Sean Wilson
 
-'''
+"""
 Global configuration module which exposes two types of configuration
 variables:
 
@@ -76,7 +76,7 @@
     common string names used across the test framework.
     :code:`_defaults.build_dir = None` Once this module has been imported
     constants should not be modified and their base attributes are frozen.
-'''
+"""
 import abc
 import argparse
 import copy
@@ -87,29 +87,35 @@
 
 from testlib.helper import absdirpath, AttrDict, FrozenAttrDict
 
+
 class UninitialzedAttributeException(Exception):
-    '''
+    """
     Signals that an attribute in the config file was not initialized.
-    '''
+    """
+
     pass
 
+
 class UninitializedConfigException(Exception):
-    '''
+    """
     Signals that the config was not initialized before trying to access an
     attribute.
-    '''
+    """
+
     pass
 
-class TagRegex(object):
+
+class TagRegex:
     def __init__(self, include, regex):
         self.include = include
         self.regex = re.compile(regex)
 
     def __str__(self):
-        type_ = 'Include' if self.include else 'Remove'
-        return '%10s: %s' % (type_, self.regex.pattern)
+        type_ = "Include" if self.include else "Remove"
+        return "%10s: %s" % (type_, self.regex.pattern)
+
 
-class _Config(object):
+class _Config:
     _initialized = False
 
     __shared_dict = {}
@@ -131,14 +137,14 @@ def _init(self, parser):
         self._initialized = True
 
     def _add_post_processor(self, attr, post_processor):
-        '''
+        """
         :param attr: Attribute to pass to and recieve from the
         :func:`post_processor`.
 
         :param post_processor: A callback functions called in a chain to
             perform additional setup for a config argument. Should return a
             tuple containing the new value for the config attr.
-        '''
+        """
         if attr not in self._post_processors:
             self._post_processors[attr] = []
         self._post_processors[attr].append(post_processor)
@@ -153,7 +159,7 @@ def _parse_commandline_args(self, parser):
 
         for attr in dir(args):
             # Ignore non-argument attributes.
-            if not attr.startswith('_'):
+            if not attr.startswith("_"):
                 self._config_file_args[attr] = getattr(args, attr)
         self._config.update(self._config_file_args)
 
@@ -166,100 +172,104 @@ def _run_post_processors(self):
                 newval = newval[0]
             self._set(attr, newval)
 
-
     def _lookup_val(self, attr):
-        '''
+        """
         Get the attribute from the config or fallback to defaults.
 
         :returns: If the value is not stored return None. Otherwise a tuple
             containing the value.
-        '''
+        """
         if attr in self._config:
             return (self._config[attr],)
         elif hasattr(self._defaults, attr):
             return (getattr(self._defaults, attr),)
 
     def __getattr__(self, attr):
-        if attr in dir(super(_Config, self)):
-            return getattr(super(_Config, self), attr)
+        if attr in dir(super()):
+            return getattr(super(), attr)
         elif not self._initialized:
             raise UninitializedConfigException(
-                'Cannot directly access elements from the config before it is'
-                ' initialized')
+                "Cannot directly access elements from the config before it is"
+                " initialized"
+            )
         else:
             val = self._lookup_val(attr)
             if val is not None:
                 return val[0]
             else:
                 raise UninitialzedAttributeException(
-                    '%s was not initialzed in the config.' % attr)
+                    "%s was not initialzed in the config." % attr
+                )
 
     def get_tags(self):
-        d = {typ: set(self.__getattr__(typ))
-            for typ in self.constants.supported_tags}
+        d = {
+            typ: set(self.__getattr__(typ))
+            for typ in self.constants.supported_tags
+        }
         if any(map(lambda vals: bool(vals), d.values())):
             return d
         else:
             return {}
 
+
 def define_defaults(defaults):
-    '''
+    """
     Defaults are provided by the config if the attribute is not found in the
     config or commandline. For instance, if we are using the list command
     fixtures might not be able to count on the build_dir being provided since
     we aren't going to build anything.
-    '''
-    defaults.base_dir = os.path.abspath(os.path.join(absdirpath(__file__),
-                                                      os.pardir,
-                                                      os.pardir))
-    defaults.result_path = os.path.join(os.getcwd(), 'testing-results')
-    defaults.resource_url = 'http://dist.gem5.org/dist/develop'
-    defaults.resource_path = os.path.abspath(os.path.join(defaults.base_dir,
-                                            'tests',
-                                            'gem5',
-                                            'resources'))
+    """
+    defaults.base_dir = os.path.abspath(
+        os.path.join(absdirpath(__file__), os.pardir, os.pardir)
+    )
+    defaults.result_path = os.path.join(os.getcwd(), "testing-results")
+    defaults.resource_url = "http://dist.gem5.org/dist/develop"
+    defaults.resource_path = os.path.abspath(
+        os.path.join(defaults.base_dir, "tests", "gem5", "resources")
+    )
+
 
 def define_constants(constants):
-    '''
+    """
     'constants' are values not directly exposed by the config, but are attached
     to the object for centralized access. These should be used for setting
     common string names used across the test framework. A simple typo in
     a string can take a lot of debugging to uncover the issue, attribute errors
     are easier to notice and most autocompletion systems detect them.
-    '''
-    constants.system_out_name = 'system-out'
-    constants.system_err_name = 'system-err'
-
-    constants.isa_tag_type = 'isa'
-    constants.x86_tag = 'X86'
-    constants.gcn3_x86_tag = 'GCN3_X86'
-    constants.vega_x86_tag = 'VEGA_X86'
-    constants.sparc_tag = 'SPARC'
-    constants.riscv_tag = 'RISCV'
-    constants.arm_tag = 'ARM'
-    constants.mips_tag = 'MIPS'
-    constants.power_tag = 'POWER'
-    constants.null_tag = 'NULL'
-    constants.all_compiled_tag = 'ALL'
-
-    constants.variant_tag_type = 'variant'
-    constants.opt_tag = 'opt'
-    constants.debug_tag = 'debug'
-    constants.fast_tag = 'fast'
-
-    constants.length_tag_type = 'length'
-    constants.quick_tag = 'quick'
-    constants.long_tag = 'long'
-    constants.very_long_tag = 'very-long'
-
-    constants.host_isa_tag_type = 'host'
-    constants.host_x86_64_tag = 'x86_64'
-    constants.host_arm_tag = 'aarch64'
-
-    constants.kvm_tag = 'kvm'
+    """
+    constants.system_out_name = "system-out"
+    constants.system_err_name = "system-err"
+
+    constants.isa_tag_type = "isa"
+    constants.x86_tag = "X86"
+    constants.gcn3_x86_tag = "GCN3_X86"
+    constants.vega_x86_tag = "VEGA_X86"
+    constants.sparc_tag = "SPARC"
+    constants.riscv_tag = "RISCV"
+    constants.arm_tag = "ARM"
+    constants.mips_tag = "MIPS"
+    constants.power_tag = "POWER"
+    constants.null_tag = "NULL"
+    constants.all_compiled_tag = "ALL"
+
+    constants.variant_tag_type = "variant"
+    constants.opt_tag = "opt"
+    constants.debug_tag = "debug"
+    constants.fast_tag = "fast"
+
+    constants.length_tag_type = "length"
+    constants.quick_tag = "quick"
+    constants.long_tag = "long"
+    constants.very_long_tag = "very-long"
+
+    constants.host_isa_tag_type = "host"
+    constants.host_x86_64_tag = "x86_64"
+    constants.host_arm_tag = "aarch64"
+
+    constants.kvm_tag = "kvm"
 
     constants.supported_tags = {
-        constants.isa_tag_type : (
+        constants.isa_tag_type: (
             constants.x86_tag,
             constants.gcn3_x86_tag,
             constants.vega_x86_tag,
@@ -270,7 +280,7 @@ def define_constants(constants):
             constants.power_tag,
             constants.null_tag,
             constants.all_compiled_tag,
-            ),
+        ),
         constants.variant_tag_type: (
             constants.opt_tag,
             constants.debug_tag,
@@ -290,41 +300,43 @@ def define_constants(constants):
     # Binding target ISA with host ISA. This is useful for the
     # case where host ISA and target ISA need to coincide
     constants.target_host = {
-        constants.arm_tag   : (constants.host_arm_tag,),
-        constants.x86_tag   : (constants.host_x86_64_tag,),
-        constants.gcn3_x86_tag : (constants.host_x86_64_tag,),
-        constants.vega_x86_tag : (constants.host_x86_64_tag,),
-        constants.sparc_tag : (constants.host_x86_64_tag,),
-        constants.riscv_tag : (constants.host_x86_64_tag,),
-        constants.mips_tag  : (constants.host_x86_64_tag,),
-        constants.power_tag : (constants.host_x86_64_tag,),
-        constants.null_tag  : (None,),
+        constants.arm_tag: (constants.host_arm_tag,),
+        constants.x86_tag: (constants.host_x86_64_tag,),
+        constants.gcn3_x86_tag: (constants.host_x86_64_tag,),
+        constants.vega_x86_tag: (constants.host_x86_64_tag,),
+        constants.sparc_tag: (constants.host_x86_64_tag,),
+        constants.riscv_tag: (constants.host_x86_64_tag,),
+        constants.mips_tag: (constants.host_x86_64_tag,),
+        constants.power_tag: (constants.host_x86_64_tag,),
+        constants.null_tag: (None,),
         constants.all_compiled_tag: (None,),
     }
 
-    constants.supported_isas = constants.supported_tags['isa']
-    constants.supported_variants = constants.supported_tags['variant']
-    constants.supported_lengths = constants.supported_tags['length']
-    constants.supported_hosts = constants.supported_tags['host']
-
-    constants.tempdir_fixture_name = 'tempdir'
-    constants.gem5_simulation_stderr = 'simerr'
-    constants.gem5_simulation_stdout = 'simout'
-    constants.gem5_simulation_stats = 'stats.txt'
-    constants.gem5_simulation_config_ini = 'config.ini'
-    constants.gem5_simulation_config_json = 'config.json'
-    constants.gem5_returncode_fixture_name = 'gem5-returncode'
-    constants.gem5_binary_fixture_name = 'gem5'
-    constants.xml_filename = 'results.xml'
-    constants.pickle_filename = 'results.pickle'
+    constants.supported_isas = constants.supported_tags["isa"]
+    constants.supported_variants = constants.supported_tags["variant"]
+    constants.supported_lengths = constants.supported_tags["length"]
+    constants.supported_hosts = constants.supported_tags["host"]
+
+    constants.tempdir_fixture_name = "tempdir"
+    constants.gem5_simulation_stderr = "simerr.txt"
+    constants.gem5_simulation_stdout = "simout.txt"
+    constants.gem5_simulation_stats = "stats.txt"
+    constants.gem5_simulation_config_ini = "config.ini"
+    constants.gem5_simulation_config_json = "config.json"
+    constants.gem5_returncode_fixture_name = "gem5-returncode"
+    constants.gem5_binary_fixture_name = "gem5"
+    constants.xml_filename = "results.xml"
+    constants.pickle_filename = "results.pickle"
     constants.pickle_protocol = highest_pickle_protocol
 
     # The root directory which all test names will be based off of.
-    constants.testing_base = absdirpath(os.path.join(absdirpath(__file__),
-                                                     os.pardir))
+    constants.testing_base = absdirpath(
+        os.path.join(absdirpath(__file__), os.pardir)
+    )
+
 
 def define_post_processors(config):
-    '''
+    """
     post_processors are used to do final configuration of variables. This is
     useful if there is a dynamically set default, or some function that needs
     to be applied after parsing in order to set a configration value.
@@ -333,17 +345,17 @@ def define_post_processors(config):
     containing the already set config value or ``None`` if the config value
     has not been set to anything. They must return the modified value in the
     same format.
-    '''
+    """
 
     def set_default_build_dir(build_dir):
-        '''
+        """
         Post-processor to set the default build_dir based on the base_dir.
 
         .. seealso :func:`~_Config._add_post_processor`
-        '''
+        """
         if not build_dir or build_dir[0] is None:
-            base_dir = config._lookup_val('base_dir')[0]
-            build_dir = (os.path.join(base_dir, 'build'),)
+            base_dir = config._lookup_val("base_dir")[0]
+            build_dir = (os.path.join(base_dir, "build"),)
         return build_dir
 
     def fix_verbosity_hack(verbose):
@@ -381,6 +393,7 @@ def default_host(host):
         if not host[0]:
             try:
                 import platform
+
                 host_machine = platform.machine()
                 if host_machine not in constants.supported_hosts:
                     raise ValueError("Invalid host machine")
@@ -398,87 +411,98 @@ def compile_tag_regex(positional_tags):
             positional_tags = positional_tags[0]
 
             for flag, regex in positional_tags:
-                if flag == 'exclude_tags':
+                if flag == "exclude_tags":
                     tag_regex = TagRegex(False, regex)
-                elif flag  == 'include_tags':
+                elif flag == "include_tags":
                     tag_regex = TagRegex(True, regex)
                 else:
-                    raise ValueError('Unsupported flag.')
+                    raise ValueError("Unsupported flag.")
                 new_positional_tags_list.append(tag_regex)
 
             return (new_positional_tags_list,)
 
-    config._add_post_processor('build_dir', set_default_build_dir)
-    config._add_post_processor('verbose', fix_verbosity_hack)
-    config._add_post_processor('isa', default_isa)
-    config._add_post_processor('variant', default_variant)
-    config._add_post_processor('length', default_length)
-    config._add_post_processor('host', default_host)
-    config._add_post_processor('threads', threads_as_int)
-    config._add_post_processor('test_threads', test_threads_as_int)
-    config._add_post_processor(StorePositionalTagsAction.position_kword,
-                               compile_tag_regex)
-class Argument(object):
-    '''
+    config._add_post_processor("build_dir", set_default_build_dir)
+    config._add_post_processor("verbose", fix_verbosity_hack)
+    config._add_post_processor("isa", default_isa)
+    config._add_post_processor("variant", default_variant)
+    config._add_post_processor("length", default_length)
+    config._add_post_processor("host", default_host)
+    config._add_post_processor("threads", threads_as_int)
+    config._add_post_processor("test_threads", test_threads_as_int)
+    config._add_post_processor(
+        StorePositionalTagsAction.position_kword, compile_tag_regex
+    )
+
+
+class Argument:
+    """
     Class represents a cli argument/flag for a argparse parser.
 
     :attr name: The long name of this object that will be stored in the arg
         output by the final parser.
-    '''
+    """
+
     def __init__(self, *flags, **kwargs):
         self.flags = flags
         self.kwargs = kwargs
 
         if len(flags) == 0:
             raise ValueError("Need at least one argument.")
-        elif 'dest' in kwargs:
-            self.name = kwargs['dest']
-        elif len(flags) > 1 or flags[0].startswith('-'):
+        elif "dest" in kwargs:
+            self.name = kwargs["dest"]
+        elif len(flags) > 1 or flags[0].startswith("-"):
             for flag in flags:
-                if not flag.startswith('-'):
-                    raise ValueError("invalid option string %s: must start"
-                    "with a character '-'" % flag)
+                if not flag.startswith("-"):
+                    raise ValueError(
+                        "invalid option string %s: must start"
+                        "with a character '-'" % flag
+                    )
 
-                if flag.startswith('--'):
-                    if not hasattr(self, 'name'):
-                        self.name = flag.lstrip('-')
+                if flag.startswith("--"):
+                    if not hasattr(self, "name"):
+                        self.name = flag.lstrip("-")
 
-        if not hasattr(self, 'name'):
-            self.name = flags[0].lstrip('-')
-        self.name = self.name.replace('-', '_')
+        if not hasattr(self, "name"):
+            self.name = flags[0].lstrip("-")
+        self.name = self.name.replace("-", "_")
 
     def add_to(self, parser):
-        '''Add this argument to the given parser.'''
+        """Add this argument to the given parser."""
         parser.add_argument(*self.flags, **self.kwargs)
 
     def copy(self):
-        '''Copy this argument so you might modify any of its kwargs.'''
+        """Copy this argument so you might modify any of its kwargs."""
         return copy.deepcopy(self)
 
 
 class _StickyInt:
-    '''
+    """
     A class that is used to cheat the verbosity count incrementer by
     pretending to be an int. This makes the int stay on the heap and eat other
     real numbers when they are added to it.
 
     We use this so we can allow the verbose flag to be provided before or after
     the subcommand. This likely has no utility outside of this use case.
-    '''
+    """
+
     def __init__(self, val=0):
         self.val = val
         self.type = int
+
     def __add__(self, other):
         self.val += other
         return self
 
+
 common_args = NotImplemented
 
+
 class StorePositionAction(argparse.Action):
-    '''Base class for classes wishing to create namespaces where
+    """Base class for classes wishing to create namespaces where
     arguments are stored in the order provided via the command line.
-    '''
-    position_kword = 'positional'
+    """
+
+    position_kword = "positional"
 
     def __call__(self, parser, namespace, values, option_string=None):
         if not self.position_kword in namespace:
@@ -487,120 +511,134 @@ def __call__(self, parser, namespace, values, option_string=None):
         previous.append((self.dest, values))
         setattr(namespace, self.position_kword, previous)
 
+
 class StorePositionalTagsAction(StorePositionAction):
-    position_kword = 'tag_filters'
+    position_kword = "tag_filters"
+
 
 def define_common_args(config):
-    '''
+    """
     Common args are arguments which are likely to be simular between different
     subcommands, so they are available to all by placing their definitions
     here.
-    '''
+    """
     global common_args
 
-    parse_comma_separated_string = lambda st: st.split(',')
+    parse_comma_separated_string = lambda st: st.split(",")
 
     # A list of common arguments/flags used across cli parsers.
     common_args = [
         Argument(
-            'directories',
-            nargs='*',
+            "directories",
+            nargs="*",
             default=[os.getcwd()],
-            help='Space separated list of directories to start searching '
-                 'for tests in'),
+            help="Space separated list of directories to start searching "
+            "for tests in",
+        ),
         Argument(
-            '--exclude-tags',
+            "--exclude-tags",
             action=StorePositionalTagsAction,
-            help='A tag comparison used to select tests.'),
+            help="A tag comparison used to select tests.",
+        ),
         Argument(
-            '--include-tags',
+            "--include-tags",
             action=StorePositionalTagsAction,
-            help='A tag comparison used to select tests.'),
+            help="A tag comparison used to select tests.",
+        ),
         Argument(
-            '--isa',
-            action='extend',
+            "--isa",
+            action="extend",
             default=[],
             type=parse_comma_separated_string,
             help="Only tests that are valid with one of these ISAs. "
-                 "Comma separated."),
+            "Comma separated.",
+        ),
         Argument(
-            '--variant',
-            action='extend',
+            "--variant",
+            action="extend",
             default=[],
             type=parse_comma_separated_string,
             help="Only tests that are valid with one of these binary variants"
-                 "(e.g., opt, debug). Comma separated."),
+            "(e.g., opt, debug). Comma separated.",
+        ),
         Argument(
-            '--length',
-            action='extend',
+            "--length",
+            action="extend",
             default=[],
             type=parse_comma_separated_string,
-            help="Only tests that are one of these lengths. Comma separated."),
+            help="Only tests that are one of these lengths. Comma separated.",
+        ),
         Argument(
-            '--host',
-            action='append',
+            "--host",
+            action="append",
             default=[],
-            help="Only tests that are meant to runnable on the selected host"),
+            help="Only tests that are meant to runnable on the selected host",
+        ),
         Argument(
-            '--uid',
-            action='store',
+            "--uid",
+            action="store",
             default=None,
-            help='UID of a specific test item to run.'),
+            help="UID of a specific test item to run.",
+        ),
         Argument(
-            '--build-dir',
-            action='store',
-            help='Build directory for SCons'),
+            "--build-dir", action="store", help="Build directory for SCons"
+        ),
         Argument(
-            '--base-dir',
-            action='store',
+            "--base-dir",
+            action="store",
             default=config._defaults.base_dir,
-            help='Directory to change to in order to exec scons.'),
+            help="Directory to change to in order to exec scons.",
+        ),
         Argument(
-            '-j', '--threads',
-            action='store',
+            "-j",
+            "--threads",
+            action="store",
             default=1,
-            help='Number of threads to run SCons with.'),
+            help="Number of threads to run SCons with.",
+        ),
         Argument(
-            '-t', '--test-threads',
-            action='store',
+            "-t",
+            "--test-threads",
+            action="store",
             default=1,
-            help='Number of threads to spawn to run concurrent tests with.'),
+            help="Number of threads to spawn to run concurrent tests with.",
+        ),
         Argument(
-            '-v',
-            action='count',
-            dest='verbose',
+            "-v",
+            action="count",
+            dest="verbose",
             default=_StickyInt(),
-            help='Increase verbosity'),
+            help="Increase verbosity",
+        ),
         Argument(
-            '--config-path',
-            action='store',
+            "--config-path",
+            action="store",
             default=os.getcwd(),
-            help='Path to read a testing.ini config in'
+            help="Path to read a testing.ini config in",
         ),
         Argument(
-            '--skip-build',
-            action='store_true',
+            "--skip-build",
+            action="store_true",
             default=False,
-            help='Skip the building component of SCons targets.'
+            help="Skip the building component of SCons targets.",
         ),
         Argument(
-            '--result-path',
-            action='store',
-            help='The path to store results in.'
+            "--result-path",
+            action="store",
+            help="The path to store results in.",
         ),
         Argument(
-            '--bin-path',
-            action='store',
+            "--bin-path",
+            action="store",
             default=config._defaults.resource_path,
-            help='Path where resources are stored (downloaded if not present)'
+            help="Path where resources are stored (downloaded if not present)",
         ),
         Argument(
-            '--resource-url',
-            action='store',
+            "--resource-url",
+            action="store",
             default=config._defaults.resource_url,
-            help='The URL where the resources reside.'
+            help="The URL where the resources reside.",
         ),
-
     ]
 
     # NOTE: There is a limitation which arises due to this format. If you have
@@ -610,9 +648,10 @@ def define_common_args(config):
     # e.g. if you have a -v argument which increments verbosity level and
     # a separate --verbose flag which 'store's verbosity level. the final
     # one in the list will be saved.
-    common_args = AttrDict({arg.name:arg for arg in common_args})
+    common_args = AttrDict({arg.name: arg for arg in common_args})
+
 
-class ArgParser(object, metaclass=abc.ABCMeta):
+class ArgParser(metaclass=abc.ABCMeta):
     class ExtendAction(argparse.Action):
         def __call__(self, parser, namespace, values, option_string=None):
             items = getattr(namespace, self.dest, [])
@@ -622,10 +661,10 @@ def __call__(self, parser, namespace, values, option_string=None):
     def __init__(self, parser):
         # Copy public methods of the parser.
         for attr in dir(parser):
-            if not attr.startswith('_'):
+            if not attr.startswith("_"):
                 setattr(self, attr, getattr(parser, attr))
         self.parser = parser
-        self.parser.register('action', 'extend', ArgParser.ExtendAction)
+        self.parser.register("action", "extend", ArgParser.ExtendAction)
         self.add_argument = self.parser.add_argument
 
         # Argument will be added to all parsers and subparsers.
@@ -633,27 +672,26 @@ def __init__(self, parser):
 
 
 class CommandParser(ArgParser):
-    '''
+    """
     Main parser which parses command strings and uses those to direct to
     a subparser.
-    '''
+    """
+
     def __init__(self):
         parser = argparse.ArgumentParser()
-        super(CommandParser, self).__init__(parser)
-        self.subparser = self.add_subparsers(dest='command')
+        super().__init__(parser)
+        self.subparser = self.add_subparsers(dest="command")
 
 
 class RunParser(ArgParser):
-    '''
+    """
     Parser for the \'run\' command.
-    '''
+    """
+
     def __init__(self, subparser):
-        parser = subparser.add_parser(
-            'run',
-            help='''Run Tests.'''
-        )
+        parser = subparser.add_parser("run", help="""Run Tests.""")
 
-        super(RunParser, self).__init__(parser)
+        super().__init__(parser)
 
         common_args.uid.add_to(parser)
         common_args.skip_build.add_to(parser)
@@ -672,46 +710,58 @@ def __init__(self, subparser):
 
 
 class ListParser(ArgParser):
-    '''
+    """
     Parser for the \'list\' command.
-    '''
+    """
+
     def __init__(self, subparser):
         parser = subparser.add_parser(
-            'list',
-            help='''List and query test metadata.'''
+            "list", help="""List and query test metadata."""
         )
-        super(ListParser, self).__init__(parser)
+        super().__init__(parser)
 
         Argument(
-            '--suites',
-            action='store_true',
+            "--suites",
+            action="store_true",
+            default=False,
+            help="List all test suites.",
+        ).add_to(parser)
+        Argument(
+            "--tests",
+            action="store_true",
             default=False,
-            help='List all test suites.'
+            help="List all test cases.",
         ).add_to(parser)
         Argument(
-            '--tests',
-            action='store_true',
+            "--fixtures",
+            action="store_true",
             default=False,
-            help='List all test cases.'
+            help="List all fixtures.",
         ).add_to(parser)
         Argument(
-            '--fixtures',
-            action='store_true',
+            "--all-tags",
+            action="store_true",
             default=False,
-            help='List all fixtures.'
+            help="List all tags.",
         ).add_to(parser)
         Argument(
-            '--all-tags',
-            action='store_true',
+            "--build-targets",
+            action="store_true",
             default=False,
-            help='List all tags.'
+            help="List all the gem5 build targets.",
         ).add_to(parser)
         Argument(
-            '-q',
-            dest='quiet',
-            action='store_true',
+            "-q",
+            dest="quiet",
+            action="store_true",
             default=False,
-            help='Quiet output (machine readable).'
+            help="Quiet output (machine readable).",
+        ).add_to(parser)
+        Argument(
+            "--uid",
+            action="store",
+            default=None,
+            help="UID of a specific test item to list.",
         ).add_to(parser)
 
         common_args.directories.add_to(parser)
@@ -726,11 +776,8 @@ def __init__(self, subparser):
 
 class RerunParser(ArgParser):
     def __init__(self, subparser):
-        parser = subparser.add_parser(
-            'rerun',
-            help='''Rerun failed tests.'''
-        )
-        super(RerunParser, self).__init__(parser)
+        parser = subparser.add_parser("rerun", help="""Rerun failed tests.""")
+        super().__init__(parser)
 
         common_args.skip_build.add_to(parser)
         common_args.directories.add_to(parser)
@@ -744,6 +791,7 @@ def __init__(self, subparser):
         common_args.length.add_to(parser)
         common_args.host.add_to(parser)
 
+
 config = _Config()
 define_constants(config.constants)
 
@@ -752,14 +800,16 @@ def __init__(self, subparser):
 config.constants = FrozenAttrDict(config.constants.__dict__)
 constants = config.constants
 
-'''
+"""
 This config object is the singleton config object available throughout the
 framework.
-'''
+"""
+
+
 def initialize_config():
-    '''
+    """
     Parse the commandline arguments and setup the config varibles.
-    '''
+    """
     global config
 
     # Setup constants and defaults
diff --git a/ext/testlib/fixture.py b/ext/testlib/fixture.py
index bcd22d9220..16fc39c29f 100644
--- a/ext/testlib/fixture.py
+++ b/ext/testlib/fixture.py
@@ -27,17 +27,22 @@
 # Authors: Sean Wilson
 
 import testlib.helper as helper
+from testlib.configuration import constants
+
+from typing import Optional
+
 
 class SkipException(Exception):
     def __init__(self, fixture, testitem):
-        self.msg = 'Fixture "%s" raised SkipException for "%s".' % (
-               fixture.name, testitem.name
+        self.msg = 'Fixture "{}" raised SkipException for "{}".'.format(
+            fixture.name,
+            testitem.name,
         )
-        super(SkipException, self).__init__(self.msg)
+        super().__init__(self.msg)
 
 
-class Fixture(object):
-    '''
+class Fixture:
+    """
     Base Class for a test Fixture.
 
     Fixtures are items which possibly require setup and/or tearing down after
@@ -50,11 +55,12 @@ class Fixture(object):
 
     .. note:: In order for Fixtures to be enumerated by the test system this
         class' :code:`__new__` method must be called.
-    '''
+    """
+
     collector = helper.InstanceCollector()
 
     def __new__(klass, *args, **kwargs):
-        obj = super(Fixture, klass).__new__(klass)
+        obj = super().__new__(klass)
         Fixture.collector.collect(obj)
         return obj
 
@@ -76,6 +82,21 @@ def post_test_procedure(self, testitem):
     def teardown(self, testitem):
         pass
 
+    def get_get_build_info(self) -> Optional[dict]:
+        # If this is a gem5 build it will return the target gem5 build path
+        # and any additional build information. E.g.:
+        #
+        # /path/to/gem5/build/NULL/gem5.opt--default=NULL PROTOCOL=MI_example
+        #
+        # In this example this may be passed to scons to build gem5 in
+        # accordance to the test's build requirements.
+        #
+        # If this fixtures is not a build of gem5, None is returned.
+        return None
+
+    def __str__(self):
+        return f"{self.name} fixture"
+
     def set_global(self):
         self._is_global = True
 
diff --git a/ext/testlib/handlers.py b/ext/testlib/handlers.py
index fa7aea9c89..6a6f654355 100644
--- a/ext/testlib/handlers.py
+++ b/ext/testlib/handlers.py
@@ -26,11 +26,11 @@
 #
 # Authors: Sean Wilson
 
-'''
+"""
 Handlers for the testlib Log.
 
 
-'''
+"""
 import multiprocessing
 import os
 import sys
@@ -48,15 +48,16 @@
 from testlib.configuration import constants
 
 
-class _TestStreamManager(object):
+class _TestStreamManager:
     def __init__(self):
         self._writers = {}
 
     def open_writer(self, test_result):
         if test_result in self._writers:
-            raise ValueError('Cannot have multiple writters on a single test.')
-        self._writers[test_result] = _TestStreams(test_result.stdout,
-                test_result.stderr)
+            raise ValueError("Cannot have multiple writters on a single test.")
+        self._writers[test_result] = _TestStreams(
+            test_result.stdout, test_result.stderr
+        )
 
     def get_writer(self, test_result):
         if test_result not in self._writers:
@@ -73,89 +74,94 @@ def close(self):
             writer.close()
         self._writers.clear()
 
-class _TestStreams(object):
+
+class _TestStreams:
     def __init__(self, stdout, stderr):
         helper.mkdir_p(os.path.dirname(stdout))
         helper.mkdir_p(os.path.dirname(stderr))
-        self.stdout = open(stdout, 'w')
-        self.stderr = open(stderr, 'w')
+        self.stdout = open(stdout, "w")
+        self.stderr = open(stderr, "w")
 
     def close(self):
         self.stdout.close()
         self.stderr.close()
 
-class ResultHandler(object):
-    '''
+
+class ResultHandler:
+    """
     Log handler which listens for test results and output saving data as
     it is reported.
 
     When the handler is closed it writes out test results in the python pickle
     format.
-    '''
+    """
+
     def __init__(self, schedule, directory):
-        '''
+        """
         :param schedule: The entire schedule as a :class:`LoadedLibrary`
             object.
 
         :param directory: Directory to save test stdout/stderr and aggregate
             results to.
-        '''
+        """
         self.directory = directory
-        self.internal_results = result.InternalLibraryResults(schedule,
-                directory)
+        self.internal_results = result.InternalLibraryResults(
+            schedule, directory
+        )
         self.test_stream_manager = _TestStreamManager()
         self._closed = False
 
         self.mapping = {
             log.LibraryStatus.type_id: self.handle_library_status,
-
             log.SuiteResult.type_id: self.handle_suite_result,
             log.TestResult.type_id: self.handle_test_result,
-
             log.TestStderr.type_id: self.handle_stderr,
             log.TestStdout.type_id: self.handle_stdout,
         }
 
     def handle(self, record):
         if not self._closed:
-            self.mapping.get(record.type_id, lambda _:None)(record)
+            self.mapping.get(record.type_id, lambda _: None)(record)
 
     def handle_library_status(self, record):
-        if record['status'] in (state.Status.Complete, state.Status.Avoided):
+        if record["status"] in (state.Status.Complete, state.Status.Avoided):
             self.test_stream_manager.close()
 
     def handle_suite_result(self, record):
         suite_result = self.internal_results.get_suite_result(
-                    record['metadata'].uid)
-        suite_result.result = record['result']
+            record["metadata"].uid
+        )
+        suite_result.result = record["result"]
 
     def handle_test_result(self, record):
         test_result = self._get_test_result(record)
-        test_result.result = record['result']
+        test_result.result = record["result"]
 
     def handle_stderr(self, record):
         self.test_stream_manager.get_writer(
             self._get_test_result(record)
-        ).stderr.write(record['buffer'])
+        ).stderr.write(record["buffer"])
 
     def handle_stdout(self, record):
         self.test_stream_manager.get_writer(
             self._get_test_result(record)
-        ).stdout.write(record['buffer'])
+        ).stdout.write(record["buffer"])
 
     def _get_test_result(self, test_record):
         return self.internal_results.get_test_result(
-                    test_record['metadata'].uid,
-                    test_record['metadata'].suite_uid)
+            test_record["metadata"].uid, test_record["metadata"].suite_uid
+        )
 
     def _save(self):
-        #FIXME Hardcoded path name
+        # FIXME Hardcoded path name
         result.InternalSavedResults.save(
             self.internal_results,
-            os.path.join(self.directory, constants.pickle_filename))
+            os.path.join(self.directory, constants.pickle_filename),
+        )
         result.JUnitSavedResults.save(
             self.internal_results,
-            os.path.join(self.directory, constants.xml_filename))
+            os.path.join(self.directory, constants.xml_filename),
+        )
 
     def close(self):
         if self._closed:
@@ -164,11 +170,11 @@ def close(self):
         self._save()
 
     def unsuccessful(self):
-        '''
+        """
         Performs an or reduce on all of the results.
         Returns true if at least one test is unsuccessful, false when all tests
         pass
-        '''
+        """
         for suite_result in self.internal_results:
             if suite_result.unsuccessful:
                 return True
@@ -176,20 +182,21 @@ def unsuccessful(self):
         return False
 
 
-#TODO Change from a handler to an internal post processor so it can be used
+# TODO Change from a handler to an internal post processor so it can be used
 # to reprint results
-class SummaryHandler(object):
-    '''
+class SummaryHandler:
+    """
     A log handler which listens to the log for test results
     and reports the aggregate results when closed.
-    '''
+    """
+
     color = terminal.get_termcap()
     reset = color.Normal
     colormap = {
-            state.Result.Errored: color.Red,
-            state.Result.Failed: color.Red,
-            state.Result.Passed: color.Green,
-            state.Result.Skipped: color.Cyan,
+        state.Result.Errored: color.Red,
+        state.Result.Failed: color.Red,
+        state.Result.Passed: color.Green,
+        state.Result.Skipped: color.Cyan,
     }
 
     def __init__(self):
@@ -201,24 +208,28 @@ def __init__(self):
         self.results = []
 
     def handle_library_status(self, record):
-        if record['status'] == state.Status.Building:
+        if record["status"] == state.Status.Building:
             self._timer.restart()
 
     def handle_testresult(self, record):
-        result = record['result'].value
-        if result in (state.Result.Skipped, state.Result.Failed,
-                state.Result.Passed, state.Result.Errored):
+        result = record["result"].value
+        if result in (
+            state.Result.Skipped,
+            state.Result.Failed,
+            state.Result.Passed,
+            state.Result.Errored,
+        ):
             self.results.append(result)
 
     def handle(self, record):
-        self.mapping.get(record.type_id, lambda _:None)(record)
+        self.mapping.get(record.type_id, lambda _: None)(record)
 
     def close(self):
         print(self._display_summary())
 
     def _display_summary(self):
         most_severe_outcome = None
-        outcome_fmt = ' {count} {outcome}'
+        outcome_fmt = " {count} {outcome}"
         strings = []
 
         outcome_count = [0] * len(state.Result.enums)
@@ -228,24 +239,31 @@ def _display_summary(self):
         # Iterate over enums so they are in order of severity
         for outcome in state.Result.enums:
             outcome = getattr(state.Result, outcome)
-            count  = outcome_count[outcome]
+            count = outcome_count[outcome]
             if count:
-                strings.append(outcome_fmt.format(count=count,
-                        outcome=state.Result.enums[outcome]))
+                strings.append(
+                    outcome_fmt.format(
+                        count=count, outcome=state.Result.enums[outcome]
+                    )
+                )
                 most_severe_outcome = outcome
-        string = ','.join(strings)
+        string = ",".join(strings)
         if most_severe_outcome is None:
-            string = ' No testing done'
+            string = " No testing done"
             most_severe_outcome = state.Result.Passed
         else:
-            string = ' Results:' + string + ' in {:.2} seconds '.format(
-                    self._timer.active_time())
-        string += ' '
+            string = (
+                " Results:"
+                + string
+                + f" in {self._timer.active_time():.2} seconds "
+            )
+        string += " "
         return terminal.insert_separator(
-                string,
-                color=self.colormap[most_severe_outcome] + self.color.Bold)
+            string, color=self.colormap[most_severe_outcome] + self.color.Bold
+        )
+
 
-class TerminalHandler(object):
+class TerminalHandler:
     color = terminal.get_termcap()
     verbosity_mapping = {
         log.LogLevel.Warn: color.Yellow,
@@ -268,75 +286,85 @@ def __init__(self, verbosity=log.LogLevel.Info, machine_only=False):
         }
 
     def _display_outcome(self, name, outcome, reason=None):
-        print(self.color.Bold
-                 + SummaryHandler.colormap[outcome]
-                 + name
-                 + ' '
-                 + state.Result.enums[outcome]
-                 + SummaryHandler.reset)
+        print(
+            self.color.Bold
+            + SummaryHandler.colormap[outcome]
+            + name
+            + " "
+            + state.Result.enums[outcome]
+            + SummaryHandler.reset
+        )
 
         if reason is not None:
-            log.test_log.info('')
-            log.test_log.info('Reason:')
+            log.test_log.info("")
+            log.test_log.info("Reason:")
             log.test_log.info(reason)
-            log.test_log.info(terminal.separator('-'))
+            log.test_log.info(terminal.separator("-"))
 
     def handle_teststatus(self, record):
-        if record['status'] == state.Status.Running:
-            log.test_log.debug('Starting Test Case: %s' %\
-                    record['metadata'].name)
+        if record["status"] == state.Status.Running:
+            log.test_log.debug(
+                "Starting Test Case: %s" % record["metadata"].name
+            )
 
     def handle_testresult(self, record):
         self._display_outcome(
-            'Test: %s'  % record['metadata'].name,
-            record['result'].value)
+            "Test: %s" % record["metadata"].name, record["result"].value
+        )
 
     def handle_suitestatus(self, record):
-        if record['status'] == state.Status.Running:
-              log.test_log.debug('Starting Test Suite: %s ' %\
-                    record['metadata'].name)
+        if record["status"] == state.Status.Running:
+            log.test_log.debug(
+                "Starting Test Suite: %s " % record["metadata"].name
+            )
 
     def handle_stderr(self, record):
         if self.stream:
-            print(record.data['buffer'], file=sys.stderr, end='')
+            print(record.data["buffer"], file=sys.stderr, end="")
 
     def handle_stdout(self, record):
         if self.stream:
-            print(record.data['buffer'], file=sys.stdout, end='')
+            print(record.data["buffer"], file=sys.stdout, end="")
 
     def handle_testmessage(self, record):
         if self.stream:
-            print(self._colorize(record['message'], record['level']))
+            print(self._colorize(record["message"], record["level"]))
 
     def handle_librarymessage(self, record):
-        if not self.machine_only or record.data.get('machine_readable', False):
-            print(self._colorize(record['message'], record['level'],
-                    record['bold']))
+        if not self.machine_only or record.data.get("machine_readable", False):
+            print(
+                self._colorize(
+                    record["message"], record["level"], record["bold"]
+                )
+            )
 
     def _colorize(self, message, level, bold=False):
-        return '%s%s%s%s' % (
-                self.color.Bold if bold else '',
-                self.verbosity_mapping.get(level, ''),
-                message,
-                self.default)
+        return "{}{}{}{}".format(
+            self.color.Bold if bold else "",
+            self.verbosity_mapping.get(level, ""),
+            message,
+            self.default,
+        )
 
     def handle(self, record):
-        if record.data.get('level', self.verbosity) > self.verbosity:
+        if record.data.get("level", self.verbosity) > self.verbosity:
             return
-        self.mapping.get(record.type_id, lambda _:None)(record)
+        self.mapping.get(record.type_id, lambda _: None)(record)
 
     def close(self):
         pass
 
-class MultiprocessingHandlerWrapper(object):
-    '''
+
+class MultiprocessingHandlerWrapper:
+    """
     A handler class which forwards log records to subhandlers, enabling
     logging across multiprocessing python processes.
 
     The 'parent' side of the handler should execute either
     :func:`async_process` or :func:`process` to forward
     log records to subhandlers.
-    '''
+    """
+
     def __init__(self, *subhandlers):
         # Create thread to spin handing recipt of messages
         # Create queue to push onto
@@ -350,7 +378,7 @@ def __init__(self, *subhandlers):
 
     def add_handler(self, handler):
         self._handler_lock.acquire()
-        self._subhandlers = (handler, ) + self._subhandlers
+        self._subhandlers = (handler,) + self._subhandlers
         self._handler_lock.release()
 
     def _with_handlers(self, callback):
@@ -405,7 +433,7 @@ def handle(self, record):
         self.queue.put(record)
 
     def _close(self):
-        if hasattr(self, 'thread'):
+        if hasattr(self, "thread"):
             self.thread.join()
         _wrap(self._drain)
         self._with_handlers(lambda handler: _wrap(handler.close))
@@ -415,9 +443,9 @@ def _close(self):
         # This sleep adds some time for the sender threads on this process to
         # finish pickling the object and complete shutdown after the queue is
         # closed.
-        time.sleep(.2)
+        time.sleep(0.2)
         self.queue.close()
-        time.sleep(.2)
+        time.sleep(0.2)
 
     def close(self):
         if not self._shutdown.is_set():
diff --git a/ext/testlib/helper.py b/ext/testlib/helper.py
index ea102f262b..0fd0cf539e 100644
--- a/ext/testlib/helper.py
+++ b/ext/testlib/helper.py
@@ -38,9 +38,9 @@
 #
 # Authors: Sean Wilson
 
-'''
+"""
 Helper classes for writing tests with this test library.
-'''
+"""
 from collections import namedtuple
 from collections.abc import MutableSet
 
@@ -55,7 +55,8 @@
 import threading
 import time
 
-class TimedWaitPID(object):
+
+class TimedWaitPID:
     """Utility to monkey-patch os.waitpid() with os.wait4().
 
     This allows process usage time to be obtained directly from the OS
@@ -69,9 +70,10 @@ class TimedWaitPID(object):
     it is read.
 
     """
-    TimeRecord = namedtuple( "_TimeRecord", "user_time system_time" )
 
-    class Wrapper(object):
+    TimeRecord = namedtuple("_TimeRecord", "user_time system_time")
+
+    class Wrapper:
         def __init__(self):
             self._time_for_pid = {}
             self._access_lock = threading.Lock()
@@ -79,11 +81,8 @@ def __init__(self):
         def __call__(self, pid, options):
             pid, status, resource_usage = os.wait4(pid, options)
             with self._access_lock:
-                self._time_for_pid[pid] = (
-                    TimedWaitPID.TimeRecord(
-                        resource_usage.ru_utime,
-                        resource_usage.ru_stime
-                    )
+                self._time_for_pid[pid] = TimedWaitPID.TimeRecord(
+                    resource_usage.ru_utime, resource_usage.ru_stime
                 )
             return (pid, status)
 
@@ -94,7 +93,7 @@ def has_time_for_pid(self, pid):
         def get_time_for_pid(self, pid):
             with self._access_lock:
                 if pid not in self._time_for_pid:
-                    raise Exception("No resource usage for pid {}".format(pid))
+                    raise Exception(f"No resource usage for pid {pid}")
                 time_for_pid = self._time_for_pid[pid]
                 del self._time_for_pid[pid]
                 return time_for_pid
@@ -108,14 +107,14 @@ def install():
         with TimedWaitPID._wrapper_lock:
             if TimedWaitPID._wrapper is None:
                 TimedWaitPID._wrapper = TimedWaitPID.Wrapper()
-            if TimedWaitPID._original_os_waitpid is None :
+            if TimedWaitPID._original_os_waitpid is None:
                 TimedWaitPID._original_os_waitpid = os.waitpid
                 os.waitpid = TimedWaitPID._wrapper
 
     @staticmethod
     def restore():
         with TimedWaitPID._wrapper_lock:
-            if TimedWaitPID._original_os_waitpid is not None :
+            if TimedWaitPID._original_os_waitpid is not None:
                 os.waitpid = TimedWaitPID._original_os_waitpid
                 TimedWaitPID._original_os_waitpid = None
 
@@ -129,12 +128,14 @@ def get_time_for_pid(pid):
         with TimedWaitPID._wrapper_lock:
             return TimedWaitPID._wrapper.get_time_for_pid(pid)
 
+
 # Patch os.waitpid()
 TimedWaitPID.install()
 
-#TODO Tear out duplicate logic from the sandbox IOManager
+
+# TODO Tear out duplicate logic from the sandbox IOManager
 def log_call(logger, command, time, *popenargs, **kwargs):
-    '''
+    """
     Calls the given process and automatically logs the command and output.
 
     If stdout or stderr are provided output will also be piped into those
@@ -145,7 +146,7 @@ def log_call(logger, command, time, *popenargs, **kwargs):
 
     :params stderr: Iterable of items to write to as we read from the
         subprocess.
-    '''
+    """
     if isinstance(command, str):
         cmdstr = command
     else:
@@ -159,33 +160,35 @@ def log_call(logger, command, time, *popenargs, **kwargs):
             raise e
 
     logger_callback = logger.trace
-    logger.trace('Logging call to command: %s' % cmdstr)
+    logger.trace("Logging call to command: %s" % cmdstr)
 
-    stdout_redirect = kwargs.get('stdout', tuple())
-    stderr_redirect = kwargs.get('stderr', tuple())
+    stdout_redirect = kwargs.get("stdout", tuple())
+    stderr_redirect = kwargs.get("stderr", tuple())
 
-    if hasattr(stdout_redirect, 'write'):
+    if hasattr(stdout_redirect, "write"):
         stdout_redirect = (stdout_redirect,)
-    if hasattr(stderr_redirect, 'write'):
+    if hasattr(stderr_redirect, "write"):
         stderr_redirect = (stderr_redirect,)
 
-    kwargs['stdout'] = subprocess.PIPE
-    kwargs['stderr'] = subprocess.PIPE
+    kwargs["stdout"] = subprocess.PIPE
+    kwargs["stderr"] = subprocess.PIPE
     p = subprocess.Popen(command, *popenargs, **kwargs)
 
     def log_output(log_callback, pipe, redirects=tuple()):
         # Read iteractively, don't allow input to fill the pipe.
-        for line in iter(pipe.readline, b''):
+        for line in iter(pipe.readline, b""):
             line = line.decode("utf-8")
             for r in redirects:
                 r.write(line)
             log_callback(line.rstrip())
 
-    stdout_thread = threading.Thread(target=log_output,
-                           args=(logger_callback, p.stdout, stdout_redirect))
+    stdout_thread = threading.Thread(
+        target=log_output, args=(logger_callback, p.stdout, stdout_redirect)
+    )
     stdout_thread.setDaemon(True)
-    stderr_thread = threading.Thread(target=log_output,
-                           args=(logger_callback, p.stderr, stderr_redirect))
+    stderr_thread = threading.Thread(
+        target=log_output, args=(logger_callback, p.stderr, stderr_redirect)
+    )
     stderr_thread.setDaemon(True)
 
     stdout_thread.start()
@@ -197,25 +200,26 @@ def log_output(log_callback, pipe, redirects=tuple()):
 
     if time is not None and TimedWaitPID.has_time_for_pid(p.pid):
         resource_usage = TimedWaitPID.get_time_for_pid(p.pid)
-        time['user_time'] = resource_usage.user_time
-        time['system_time'] = resource_usage.system_time
+        time["user_time"] = resource_usage.user_time
+        time["system_time"] = resource_usage.system_time
 
     # Return the return exit code of the process.
     if retval != 0:
         raise subprocess.CalledProcessError(retval, cmdstr)
 
+
 # lru_cache stuff (Introduced in python 3.2+)
 # Renamed and modified to cacheresult
 class _HashedSeq(list):
-    '''
+    """
     This class guarantees that hash() will be called no more than once per
     element. This is important because the cacheresult() will hash the key
     multiple times on a cache miss.
 
     .. note:: From cpython 3.7
-    '''
+    """
 
-    __slots__ = 'hashvalue'
+    __slots__ = "hashvalue"
 
     def __init__(self, tup, hash=hash):
         self[:] = tup
@@ -224,11 +228,18 @@ def __init__(self, tup, hash=hash):
     def __hash__(self):
         return self.hashvalue
 
-def _make_key(args, kwds, typed,
-             kwd_mark = (object(),),
-             fasttypes = {int, str, frozenset, type(None)},
-             tuple=tuple, type=type, len=len):
-    '''
+
+def _make_key(
+    args,
+    kwds,
+    typed,
+    kwd_mark=(object(),),
+    fasttypes={int, str, frozenset, type(None)},
+    tuple=tuple,
+    type=type,
+    len=len,
+):
+    """
     Make a cache key from optionally typed positional and keyword arguments.
     The key is constructed in a way that is flat as possible rather than as
     a nested structure that would take more memory.  If there is only a single
@@ -237,7 +248,7 @@ def _make_key(args, kwds, typed,
     lookup speed.
 
     .. note:: From cpython 3.7
-    '''
+    """
     key = args
     if kwds:
         key += kwd_mark
@@ -253,15 +264,16 @@ def _make_key(args, kwds, typed,
 
 
 def cacheresult(function, typed=False):
-    '''
+    """
     :param typed: If typed is True, arguments of different types will be
         cached separately. I.e. f(3.0) and f(3) will be treated as distinct
         calls with distinct results.
 
     .. note:: From cpython 3.7
-    '''
-    sentinel = object()          # unique object used to signal cache misses
+    """
+    sentinel = object()  # unique object used to signal cache misses
     cache = {}
+
     def wrapper(*args, **kwds):
         # Simple caching without ordering or size limit
         key = _make_key(args, kwds, typed)
@@ -271,19 +283,21 @@ def wrapper(*args, **kwds):
         result = function(*args, **kwds)
         cache[key] = result
         return result
+
     return wrapper
 
+
 class OrderedSet(MutableSet):
-    '''
+    """
     Maintain ordering of insertion in items to the set with quick iteration.
 
     http://code.activestate.com/recipes/576694/
-    '''
+    """
 
     def __init__(self, iterable=None):
         self.end = end = []
-        end += [None, end, end]         # sentinel node for doubly linked list
-        self.map = {}                   # key --> [key, prev, next]
+        end += [None, end, end]  # sentinel node for doubly linked list
+        self.map = {}  # key --> [key, prev, next]
         if iterable is not None:
             self |= iterable
 
@@ -325,35 +339,38 @@ def __reversed__(self):
 
     def pop(self, last=True):
         if not self:
-            raise KeyError('set is empty')
+            raise KeyError("set is empty")
         key = self.end[1][0] if last else self.end[2][0]
         self.discard(key)
         return key
 
     def __repr__(self):
         if not self:
-            return '%s()' % (self.__class__.__name__,)
-        return '%s(%r)' % (self.__class__.__name__, list(self))
+            return f"{self.__class__.__name__}()"
+        return f"{self.__class__.__name__}({list(self)!r})"
 
     def __eq__(self, other):
         if isinstance(other, OrderedSet):
             return len(self) == len(other) and list(self) == list(other)
         return set(self) == set(other)
 
+
 def absdirpath(path):
-    '''
+    """
     Return the directory component of the absolute path of the given path.
-    '''
+    """
     return os.path.dirname(os.path.abspath(path))
 
+
 joinpath = os.path.join
 
+
 def mkdir_p(path):
-    '''
+    """
     Same thing as mkdir -p
 
     https://stackoverflow.com/a/600612
-    '''
+    """
     try:
         os.makedirs(path)
     except OSError as exc:  # Python >2.5
@@ -364,12 +381,14 @@ def mkdir_p(path):
 
 
 class FrozenSetException(Exception):
-    '''Signals one tried to set a value in a 'frozen' object.'''
+    """Signals one tried to set a value in a 'frozen' object."""
+
     pass
 
 
-class AttrDict(object):
-    '''Object which exposes its own internal dictionary through attributes.'''
+class AttrDict:
+    """Object which exposes its own internal dictionary through attributes."""
+
     def __init__(self, dict_={}):
         self.update(dict_)
 
@@ -377,7 +396,7 @@ def __getattr__(self, attr):
         dict_ = self.__dict__
         if attr in dict_:
             return dict_[attr]
-        raise AttributeError('Could not find %s attribute' % attr)
+        raise AttributeError("Could not find %s attribute" % attr)
 
     def __setattr__(self, attr, val):
         self.__dict__[attr] = val
@@ -393,29 +412,33 @@ def update(self, items):
 
 
 class FrozenAttrDict(AttrDict):
-    '''An AttrDict whose attributes cannot be modified directly.'''
+    """An AttrDict whose attributes cannot be modified directly."""
+
     __initialized = False
+
     def __init__(self, dict_={}):
-        super(FrozenAttrDict, self).__init__(dict_)
+        super().__init__(dict_)
         self.__initialized = True
 
     def __setattr__(self, attr, val):
         if self.__initialized:
             raise FrozenSetException(
-                        'Cannot modify an attribute in a FozenAttrDict')
+                "Cannot modify an attribute in a FozenAttrDict"
+            )
         else:
-            super(FrozenAttrDict, self).__setattr__(attr, val)
+            super().__setattr__(attr, val)
 
     def update(self, items):
         if self.__initialized:
             raise FrozenSetException(
-                        'Cannot modify an attribute in a FozenAttrDict')
+                "Cannot modify an attribute in a FozenAttrDict"
+            )
         else:
-            super(FrozenAttrDict, self).update(items)
+            super().update(items)
 
 
-class InstanceCollector(object):
-    '''
+class InstanceCollector:
+    """
     A class used to simplify collecting of Classes.
 
     >> instance_list = collector.create()
@@ -423,7 +446,8 @@ class InstanceCollector(object):
     >> # instance_list contains all instances created since
     >> # collector.create was called
     >> collector.remove(instance_list)
-    '''
+    """
+
     def __init__(self):
         self.collectors = []
 
@@ -441,16 +465,17 @@ def collect(self, instance):
 
 
 def append_dictlist(dict_, key, value):
-    '''
+    """
     Append the `value` to a list associated with `key` in `dict_`.
     If `key` doesn't exist, create a new list in the `dict_` with value in it.
-    '''
+    """
     list_ = dict_.get(key, [])
     list_.append(value)
     dict_[key] = list_
 
+
 def _filter_file(fname, filters):
-    with open(fname, "r") as file_:
+    with open(fname) as file_:
         for line in file_:
             for regex in filters:
                 if re.match(regex, line):
@@ -460,19 +485,19 @@ def _filter_file(fname, filters):
 
 
 def _copy_file_keep_perms(source, target):
-    '''Copy a file keeping the original permisions of the target.'''
+    """Copy a file keeping the original permisions of the target."""
     st = os.stat(target)
     shutil.copy2(source, target)
     os.chown(target, st[stat.ST_UID], st[stat.ST_GID])
 
 
 def _filter_file_inplace(fname, dir, filters):
-    '''
+    """
     Filter the given file writing filtered lines out to a temporary file, then
     copy that tempfile back into the original file.
-    '''
+    """
     (_, tfname) = tempfile.mkstemp(dir=dir, text=True)
-    with open(tfname, 'w') as tempfile_:
+    with open(tfname, "w") as tempfile_:
         for line in _filter_file(fname, filters):
             tempfile_.write(line)
 
@@ -481,39 +506,45 @@ def _filter_file_inplace(fname, dir, filters):
 
 
 def diff_out_file(ref_file, out_file, logger, ignore_regexes=tuple()):
-    '''Diff two files returning the diff as a string.'''
+    """Diff two files returning the diff as a string."""
 
     if not os.path.exists(ref_file):
-        raise OSError("%s doesn't exist in reference directory"\
-                                     % ref_file)
+        raise OSError("%s doesn't exist in reference directory" % ref_file)
     if not os.path.exists(out_file):
         raise OSError("%s doesn't exist in output directory" % out_file)
 
     _filter_file_inplace(out_file, os.path.dirname(out_file), ignore_regexes)
     _filter_file_inplace(ref_file, os.path.dirname(out_file), ignore_regexes)
 
-    #try :
+    # try :
     (_, tfname) = tempfile.mkstemp(dir=os.path.dirname(out_file), text=True)
-    with open(tfname, 'r+') as tempfile_:
+    with open(tfname, "r+") as tempfile_:
         try:
-            log_call(logger, ['diff', out_file, ref_file],
-                time=None, stdout=tempfile_)
+            log_call(
+                logger,
+                ["diff", out_file, ref_file],
+                time=None,
+                stdout=tempfile_,
+            )
         except OSError:
             # Likely signals that diff does not exist on this system. fallback
             # to difflib
-            with open(out_file, 'r') as outf, open(ref_file, 'r') as reff:
-                diff = difflib.unified_diff(iter(reff.readline, ''),
-                                            iter(outf.readline, ''),
-                                            fromfile=ref_file,
-                                            tofile=out_file)
-                return ''.join(diff)
+            with open(out_file) as outf, open(ref_file) as reff:
+                diff = difflib.unified_diff(
+                    iter(reff.readline, ""),
+                    iter(outf.readline, ""),
+                    fromfile=ref_file,
+                    tofile=out_file,
+                )
+                return "".join(diff)
         except subprocess.CalledProcessError:
             tempfile_.seek(0)
-            return ''.join(tempfile_.readlines())
+            return "".join(tempfile_.readlines())
         else:
             return None
 
-class Timer():
+
+class Timer:
     def __init__(self):
         self.restart()
 
diff --git a/ext/testlib/loader.py b/ext/testlib/loader.py
index 58b1b2e777..192632adab 100644
--- a/ext/testlib/loader.py
+++ b/ext/testlib/loader.py
@@ -26,7 +26,7 @@
 #
 # Authors: Sean Wilson
 
-'''
+"""
 Contains the :class:`Loader` which is responsible for discovering and loading
 tests.
 
@@ -63,7 +63,7 @@
 a :class:`TestSuite` named after the module.
 
 .. seealso:: :func:`load_file`
-'''
+"""
 
 import os
 import re
@@ -77,44 +77,52 @@
 import testlib.wrappers as wrappers
 import testlib.uid as uid
 
+
 class DuplicateTestItemException(Exception):
-    '''
+    """
     Exception indicates multiple test items with the same UID
     were discovered.
-    '''
+    """
+
     pass
 
 
 # Match filenames that either begin or end with 'test' or tests and use
 # - or _ to separate additional name components.
 default_filepath_regex = re.compile(
-            r'(((.+[_])?tests?)|(tests?([-_].+)?))\.py$')
+    r"(((.+[_])?tests?)|(tests?([-_].+)?))\.py$"
+)
+
 
 def default_filepath_filter(filepath):
-    '''The default filter applied to filepaths to marks as test sources.'''
+    """The default filter applied to filepaths to marks as test sources."""
     filepath = os.path.basename(filepath)
     if default_filepath_regex.match(filepath):
         # Make sure doesn't start with .
-        return not filepath.startswith('.')
+        return not filepath.startswith(".")
     return False
 
+
 def path_as_modulename(filepath):
-    '''Return the given filepath as a module name.'''
+    """Return the given filepath as a module name."""
     # Remove the file extention (.py)
     return os.path.splitext(os.path.basename(filepath))[0]
 
+
 def path_as_suitename(filepath):
-    return os.path.split(os.path.dirname(os.path.abspath((filepath))))[-1]
+    return os.path.split(os.path.dirname(os.path.abspath(filepath)))[-1]
+
 
 def _assert_files_in_same_dir(files):
     if __debug__:
         if files:
             directory = os.path.dirname(files[0])
             for f in files:
-                assert(os.path.dirname(f) == directory)
+                assert os.path.dirname(f) == directory
+
 
-class Loader(object):
-    '''
+class Loader:
+    """
     Class for discovering tests.
 
     Discovered :class:`TestCase` and :class:`TestSuite` objects are wrapped by
@@ -135,7 +143,8 @@ class Loader(object):
     .. warn:: This class is extremely thread-unsafe.
        It modifies the sys path and global config.
        Use with care.
-    '''
+    """
+
     def __init__(self):
         self.suites = []
         self.suite_uids = {}
@@ -153,16 +162,15 @@ def load_schedule_for_suites(self, *uids):
         for file_ in files:
             self.load_file(file_)
 
-        return wrappers.LoadedLibrary(
-                [self.suite_uids[id_] for id_ in uids])
+        return wrappers.LoadedLibrary([self.suite_uids[id_] for id_ in uids])
 
     def _verify_no_duplicate_suites(self, new_suites):
         new_suite_uids = self.suite_uids.copy()
         for suite in new_suites:
             if suite.uid in new_suite_uids:
                 raise DuplicateTestItemException(
-                        "More than one suite with UID '%s' was defined" %\
-                                suite.uid)
+                    "More than one suite with UID '%s' was defined" % suite.uid
+                )
             new_suite_uids[suite.uid] = suite
 
     def _verify_no_duplicate_tests_in_suites(self, new_suites):
@@ -170,17 +178,17 @@ def _verify_no_duplicate_tests_in_suites(self, new_suites):
             test_uids = set()
             for test in suite:
                 if test.uid in test_uids:
-                     raise DuplicateTestItemException(
-                            "More than one test with UID '%s' was defined"
-                            " in suite '%s'"
-                            % (test.uid, suite.uid))
+                    raise DuplicateTestItemException(
+                        "More than one test with UID '%s' was defined"
+                        " in suite '%s'" % (test.uid, suite.uid)
+                    )
                 test_uids.add(test.uid)
 
     def load_root(self, root):
-        '''
+        """
         Load files from the given root directory which match
         `self.filepath_filter`.
-        '''
+        """
         for directory in self._discover_files(root):
             directory = list(directory)
             if directory:
@@ -193,17 +201,18 @@ def load_file(self, path):
 
         if path in self._files:
             if not self._files[path]:
-                raise Exception('Attempted to load a file which already'
-                        ' failed to load')
+                raise Exception(
+                    "Attempted to load a file which already" " failed to load"
+                )
             else:
-                log.test_log.debug('Tried to reload: %s' % path)
+                log.test_log.debug("Tried to reload: %s" % path)
                 return
 
         # Create a custom dictionary for the loaded module.
         newdict = {
-            '__builtins__':__builtins__,
-            '__name__': path_as_modulename(path),
-            '__file__': path,
+            "__builtins__": __builtins__,
+            "__name__": path_as_modulename(path),
+            "__file__": path,
         }
 
         # Add the file's containing directory to the system path. So it can do
@@ -222,9 +231,9 @@ def load_file(self, path):
         except Exception as e:
             log.test_log.debug(traceback.format_exc())
             log.test_log.warn(
-                              'Exception thrown while loading "%s"\n'
-                              'Ignoring all tests in this file.'
-                               % (path))
+                'Exception thrown while loading "%s"\n'
+                "Ignoring all tests in this file." % (path)
+            )
             # Clean up
             sys.path[:] = old_path
             os.chdir(cwd)
@@ -247,27 +256,34 @@ def load_file(self, path):
             # tests.
             # NOTE: This is automatically collected (we still have the
             # collector active.)
-            suite_mod.TestSuite(tests=orphan_tests,
-                    name=path_as_suitename(path))
+            suite_mod.TestSuite(
+                tests=orphan_tests, name=path_as_suitename(path)
+            )
 
         try:
-            loaded_suites = [wrappers.LoadedSuite(suite, path)
-                    for suite in new_suites]
+            loaded_suites = [
+                wrappers.LoadedSuite(suite, path) for suite in new_suites
+            ]
 
             self._verify_no_duplicate_suites(loaded_suites)
             self._verify_no_duplicate_tests_in_suites(loaded_suites)
         except Exception as e:
-            log.test_log.warn('%s\n'
-                    'Exception thrown while loading "%s"\n'
-                    'Ignoring all tests in this file.'
-                    % (traceback.format_exc(), path))
+            log.test_log.warn(
+                "%s\n"
+                'Exception thrown while loading "%s"\n'
+                "Ignoring all tests in this file."
+                % (traceback.format_exc(), path)
+            )
         else:
-            log.test_log.info('Discovered %d tests and %d suites in %s'
-                    '' % (len(new_tests), len(loaded_suites), path))
+            log.test_log.info(
+                "Discovered %d tests and %d suites in %s"
+                "" % (len(new_tests), len(loaded_suites), path)
+            )
 
             self.suites.extend(loaded_suites)
-            self.suite_uids.update({suite.uid: suite
-                    for suite in loaded_suites})
+            self.suite_uids.update(
+                {suite.uid: suite for suite in loaded_suites}
+            )
         # Clean up
         sys.path[:] = old_path
         os.chdir(cwd)
@@ -276,18 +292,19 @@ def load_file(self, path):
         fixture_mod.Fixture.collector.remove(new_fixtures)
 
     def _discover_files(self, root):
-        '''
+        """
         Recurse down from the given root directory returning a list of
         directories which contain a list of files matching
         `self.filepath_filter`.
-        '''
+        """
         # Will probably want to order this traversal.
         for root, dirnames, filenames in os.walk(root):
             dirnames.sort()
             if filenames:
                 filenames.sort()
-                filepaths = [os.path.join(root, filename) \
-                             for filename in filenames]
+                filepaths = [
+                    os.path.join(root, filename) for filename in filenames
+                ]
                 filepaths = filter(self.filepath_filter, filepaths)
                 if filepaths:
                     yield filepaths
diff --git a/ext/testlib/log.py b/ext/testlib/log.py
index fb5907cd5c..e66d1bbc02 100644
--- a/ext/testlib/log.py
+++ b/ext/testlib/log.py
@@ -26,49 +26,51 @@
 #
 # Authors: Sean Wilson
 
-'''
+"""
 This module supplies the global `test_log` object which all testing
 results and messages are reported through.
-'''
+"""
 import testlib.wrappers as wrappers
 
-class LogLevel():
+
+class LogLevel:
     Fatal = 0
     Error = 1
-    Warn  = 2
-    Info  = 3
+    Warn = 2
+    Info = 3
     Debug = 4
     Trace = 5
 
 
 class RecordTypeCounterMetaclass(type):
-    '''
+    """
     Record type metaclass.
 
     Adds a static integer value in addition to typeinfo so identifiers
     are common across processes, networks and module reloads.
-    '''
+    """
+
     counter = 0
+
     def __init__(cls, name, bases, dct):
         cls.type_id = RecordTypeCounterMetaclass.counter
         RecordTypeCounterMetaclass.counter += 1
 
 
-class Record(object, metaclass=RecordTypeCounterMetaclass):
-    '''
+class Record(metaclass=RecordTypeCounterMetaclass):
+    """
     A generic object that is passed to the :class:`Log` and its handlers.
 
     ..note: Although not statically enforced, all items in the record should be
         be pickleable. This enables logging accross multiple processes.
-    '''
+    """
 
     def __init__(self, **data):
         self.data = data
 
     def __getitem__(self, item):
         if item not in self.data:
-            raise KeyError('%s not in record %s' %\
-                    (item, self.__class__.__name__))
+            raise KeyError(f"{item} not in record {self.__class__.__name__}")
         return self.data[item]
 
     def __str__(self):
@@ -78,35 +80,57 @@ def __str__(self):
 class StatusRecord(Record):
     def __init__(self, obj, status):
         Record.__init__(self, metadata=obj.metadata, status=status)
+
+
 class ResultRecord(Record):
     def __init__(self, obj, result):
         Record.__init__(self, metadata=obj.metadata, result=result)
-#TODO Refactor this shit... Not ideal. Should just specify attributes.
+
+
+# TODO Refactor this shit... Not ideal. Should just specify attributes.
 class TestStatus(StatusRecord):
     pass
+
+
 class SuiteStatus(StatusRecord):
     pass
+
+
 class LibraryStatus(StatusRecord):
     pass
+
+
 class TestResult(ResultRecord):
     pass
+
+
 class SuiteResult(ResultRecord):
     pass
+
+
 class LibraryResult(ResultRecord):
     pass
+
+
 # Test Output Types
 class TestStderr(Record):
     pass
+
+
 class TestStdout(Record):
     pass
+
+
 # Message (Raw String) Types
 class TestMessage(Record):
     pass
+
+
 class LibraryMessage(Record):
     pass
 
 
-class Log(object):
+class Log:
     _result_typemap = {
         wrappers.LoadedLibrary.__name__: LibraryResult,
         wrappers.LoadedSuite.__name__: SuiteResult,
@@ -121,8 +145,8 @@ class Log(object):
     def __init__(self, test=None):
         self.test = test
         self.handlers = []
-        self._opened = False # TODO Guards to methods
-        self._closed = False # TODO Guards to methods
+        self._opened = False  # TODO Guards to methods
+        self._closed = False  # TODO Guards to methods
 
     def finish_init(self):
         self._opened = True
@@ -136,19 +160,25 @@ def log(self, record):
         if not self._opened:
             self.finish_init()
         if self._closed:
-            raise Exception('The log has been closed'
-                ' and is no longer available.')
+            raise Exception(
+                "The log has been closed" " and is no longer available."
+            )
 
         for handler in self.handlers:
             handler.handle(record)
 
     def message(self, message, level=LogLevel.Info, bold=False, **metadata):
         if self.test:
-            record = TestMessage(message=message, level=level,
-                test_uid=self.test.uid, suite_uid=self.test.parent_suite.uid)
+            record = TestMessage(
+                message=message,
+                level=level,
+                test_uid=self.test.uid,
+                suite_uid=self.test.parent_suite.uid,
+            )
         else:
-            record = LibraryMessage(message=message, level=level,
-                bold=bold, **metadata)
+            record = LibraryMessage(
+                message=message, level=level, bold=bold, **metadata
+            )
 
         self.log(record)
 
@@ -168,20 +198,19 @@ def trace(self, message):
         self.message(message, LogLevel.Trace)
 
     def status_update(self, obj, status):
-        self.log(
-            self._status_typemap[obj.__class__.__name__](obj, status))
+        self.log(self._status_typemap[obj.__class__.__name__](obj, status))
 
     def result_update(self, obj, result):
-        self.log(
-            self._result_typemap[obj.__class__.__name__](obj, result))
+        self.log(self._result_typemap[obj.__class__.__name__](obj, result))
 
     def add_handler(self, handler):
         if self._opened:
-            raise Exception('Unable to add a handler once the log is open.')
+            raise Exception("Unable to add a handler once the log is open.")
         self.handlers.append(handler)
 
     def close_handler(self, handler):
         handler.close()
         self.handlers.remove(handler)
 
+
 test_log = Log()
diff --git a/ext/testlib/main.py b/ext/testlib/main.py
index b9d8e93e66..fdd4c17b2f 100644
--- a/ext/testlib/main.py
+++ b/ext/testlib/main.py
@@ -39,21 +39,26 @@
 import testlib.terminal as terminal
 import testlib.uid as uid
 
+
 def entry_message():
     log.test_log.message("Running the new gem5 testing script.")
     log.test_log.message("For more information see TESTING.md.")
-    log.test_log.message("To see details as the testing scripts are"
-                         " running, use the option"
-                         " -v, -vv, or -vvv")
+    log.test_log.message(
+        "To see details as the testing scripts are"
+        " running, use the option"
+        " -v, -vv, or -vvv"
+    )
+
 
-class RunLogHandler():
+class RunLogHandler:
     def __init__(self):
         term_handler = handlers.TerminalHandler(
-            verbosity=configuration.config.verbose+log.LogLevel.Info
+            verbosity=configuration.config.verbose + log.LogLevel.Info
         )
         summary_handler = handlers.SummaryHandler()
         self.mp_handler = handlers.MultiprocessingHandlerWrapper(
-                summary_handler, term_handler)
+            summary_handler, term_handler
+        )
         self.mp_handler.async_process()
         log.test_log.add_handler(self.mp_handler)
         entry_message()
@@ -61,7 +66,8 @@ def __init__(self):
     def schedule_finalized(self, test_schedule):
         # Create the result handler object.
         self.result_handler = handlers.ResultHandler(
-                test_schedule, configuration.config.result_path)
+            test_schedule, configuration.config.result_path
+        )
         self.mp_handler.add_handler(self.result_handler)
 
     def finish_testing(self):
@@ -78,35 +84,43 @@ def close(self):
         self.mp_handler.close()
 
     def unsuccessful(self):
-        '''
+        """
         Performs an or reduce on all of the results.
         Returns true if at least one test is unsuccessful, false when all tests
         pass
-        '''
+        """
         return self.result_handler.unsuccessful()
 
+
 def get_config_tags():
-    return getattr(configuration.config,
-            configuration.StorePositionalTagsAction.position_kword)
+    return getattr(
+        configuration.config,
+        configuration.StorePositionalTagsAction.position_kword,
+    )
+
 
 def filter_with_config_tags(loaded_library):
     tags = get_config_tags()
     final_tags = []
-    regex_fmt = '^%s$'
+    regex_fmt = "^%s$"
     cfg = configuration.config
 
     def _append_inc_tag_filter(name):
         if hasattr(cfg, name):
             tag_opts = getattr(cfg, name)
             for tag in tag_opts:
-                final_tags.append(configuration.TagRegex(True, regex_fmt % tag))
+                final_tags.append(
+                    configuration.TagRegex(True, regex_fmt % tag)
+                )
 
     def _append_rem_tag_filter(name):
         if hasattr(cfg, name):
             tag_opts = getattr(cfg, name)
             for tag in cfg.constants.supported_tags[name]:
                 if tag not in tag_opts:
-                    final_tags.append(configuration.TagRegex(False, regex_fmt % tag))
+                    final_tags.append(
+                        configuration.TagRegex(False, regex_fmt % tag)
+                    )
 
     # Append additional tags for the isa, length, and variant options.
     # They apply last (they take priority)
@@ -114,7 +128,7 @@ def _append_rem_tag_filter(name):
         cfg.constants.isa_tag_type,
         cfg.constants.length_tag_type,
         cfg.constants.host_isa_tag_type,
-        cfg.constants.variant_tag_type
+        cfg.constants.variant_tag_type,
     )
 
     for tagname in special_tags:
@@ -126,15 +140,15 @@ def _append_rem_tag_filter(name):
         tags = tuple()
 
     filters = list(itertools.chain(final_tags, tags))
-    string = 'Filtering suites with tags as follows:\n'
-    filter_string = '\t\n'.join((str(f) for f in filters))
+    string = "Filtering suites with tags as follows:\n"
+    filter_string = "\t\n".join(str(f) for f in filters)
     log.test_log.trace(string + filter_string)
 
     return filter_with_tags(loaded_library, filters)
 
 
 def filter_with_tags(loaded_library, filters):
-    '''
+    """
     Filter logic supports two filter types:
     --include-tags <regex>
     --exclude-tags <regex>
@@ -168,7 +182,7 @@ def filter_with_tags(loaded_library, filters):
         set()               # Removed all suites which have tags
         # Process --include-tags "X86"
         set(suite_X86)
-    '''
+    """
     if not filters:
         return
 
@@ -182,6 +196,7 @@ def filter_with_tags(loaded_library, filters):
 
     def exclude(excludes):
         return suites - excludes
+
     def include(includes):
         return suites | includes
 
@@ -189,38 +204,65 @@ def include(includes):
         matched_tags = (tag for tag in tags if tag_regex.regex.search(tag))
         for tag in matched_tags:
             matched_suites = set(query_runner.suites_with_tag(tag))
-            suites = include(matched_suites) if tag_regex.include \
-                    else exclude(matched_suites)
+            suites = (
+                include(matched_suites)
+                if tag_regex.include
+                else exclude(matched_suites)
+            )
 
     # Set the library's suites to only those which where accepted by our filter
-    loaded_library.suites = [suite for suite in loaded_library.suites
-            if suite in suites]
+    loaded_library.suites = [
+        suite for suite in loaded_library.suites if suite in suites
+    ]
+
 
 # TODO Add results command for listing previous results.
 
+
 def load_tests():
-    '''
+    """
     Create a TestLoader and load tests for the directory given by the config.
-    '''
+    """
     testloader = loader_mod.Loader()
     log.test_log.message(terminal.separator())
-    log.test_log.message('Loading Tests', bold=True)
+    log.test_log.message("Loading Tests", bold=True)
 
     for root in configuration.config.directories:
         testloader.load_root(root)
 
     return testloader
 
+
 def do_list():
     term_handler = handlers.TerminalHandler(
-        verbosity=configuration.config.verbose+log.LogLevel.Info,
-        machine_only=configuration.config.quiet
+        verbosity=configuration.config.verbose + log.LogLevel.Info,
+        machine_only=configuration.config.quiet,
     )
     log.test_log.add_handler(term_handler)
 
     entry_message()
 
-    test_schedule = load_tests().schedule
+    if configuration.config.uid:
+        uid_ = uid.UID.from_uid(configuration.config.uid)
+        if isinstance(uid_, uid.TestUID):
+            log.test_log.error(
+                "Unable to list a standalone test.\n"
+                "Gem5 expects test suites to be the smallest unit "
+                " of test.\n\n"
+                "Pass a SuiteUID instead."
+            )
+            return
+        test_schedule = loader_mod.Loader().load_schedule_for_suites(uid_)
+        if get_config_tags():
+            log.test_log.warn(
+                "The '--uid' flag was supplied,"
+                " '--include-tags' and '--exclude-tags' will be ignored."
+            )
+    else:
+        test_schedule = load_tests().schedule
+        # Filter tests based on tags
+        filter_with_config_tags(test_schedule)
+
     filter_with_config_tags(test_schedule)
 
     qrunner = query.QueryRunner(test_schedule)
@@ -231,15 +273,21 @@ def do_list():
         qrunner.list_tests()
     elif configuration.config.all_tags:
         qrunner.list_tags()
+    elif configuration.config.fixtures:
+        qrunner.list_fixtures()
+    elif configuration.config.build_targets:
+        qrunner.list_build_targets()
     else:
         qrunner.list_suites()
         qrunner.list_tests()
         qrunner.list_tags()
+        qrunner.list_build_targets()
 
     return 0
 
+
 def run_schedule(test_schedule, log_handler):
-    '''
+    """
     Test Phases
     -----------
     * Test Collection
@@ -253,15 +301,18 @@ def run_schedule(test_schedule, log_handler):
           * Test Fixture Teardown
        * Suite Fixture Teardown
     * Global Fixture Teardown
-    '''
+    """
 
     log_handler.schedule_finalized(test_schedule)
 
     log.test_log.message(terminal.separator())
-    log.test_log.message('Running Tests from {} suites'
-            .format(len(test_schedule.suites)), bold=True)
-    log.test_log.message("Results will be stored in {}".format(
-                configuration.config.result_path))
+    log.test_log.message(
+        f"Running Tests from {len(test_schedule.suites)} suites",
+        bold=True,
+    )
+    log.test_log.message(
+        f"Results will be stored in {configuration.config.result_path}"
+    )
     log.test_log.message(terminal.separator())
 
     # Build global fixtures and exectute scheduled test suites.
@@ -278,16 +329,19 @@ def run_schedule(test_schedule, log_handler):
 
     return 1 if failed else 0
 
+
 def do_run():
     # Initialize early parts of the log.
     with RunLogHandler() as log_handler:
         if configuration.config.uid:
             uid_ = uid.UID.from_uid(configuration.config.uid)
             if isinstance(uid_, uid.TestUID):
-                log.test_log.error('Unable to run a standalone test.\n'
-                        'Gem5 expects test suites to be the smallest unit '
-                        ' of test.\n\n'
-                        'Pass a SuiteUID instead.')
+                log.test_log.error(
+                    "Unable to run a standalone test.\n"
+                    "Gem5 expects test suites to be the smallest unit "
+                    " of test.\n\n"
+                    "Pass a SuiteUID instead."
+                )
                 return
             test_schedule = loader_mod.Loader().load_schedule_for_suites(uid_)
             if get_config_tags():
@@ -302,13 +356,17 @@ def do_run():
         # Execute the tests
         return run_schedule(test_schedule, log_handler)
 
+
 def do_rerun():
     # Init early parts of log
     with RunLogHandler() as log_handler:
         # Load previous results
         results = result.InternalSavedResults.load(
-                os.path.join(configuration.config.result_path,
-                configuration.constants.pickle_filename))
+            os.path.join(
+                configuration.config.result_path,
+                configuration.constants.pickle_filename,
+            )
+        )
 
         rerun_suites = (suite.uid for suite in results if suite.unsuccessful)
 
@@ -319,16 +377,17 @@ def do_rerun():
         # Execute the tests
         return run_schedule(test_schedule, log_handler)
 
+
 def main():
-    '''
+    """
     Main entrypoint for the testlib test library.
     Returns 0 on success and 1 otherwise so it can be used as a return code
     for scripts.
-    '''
+    """
     configuration.initialize_config()
 
     # 'do' the given command.
-    result = globals()['do_'+configuration.config.command]()
+    result = globals()["do_" + configuration.config.command]()
     log.test_log.close()
 
     return result
diff --git a/ext/testlib/query.py b/ext/testlib/query.py
index 174af626fe..ead567a360 100644
--- a/ext/testlib/query.py
+++ b/ext/testlib/query.py
@@ -29,9 +29,10 @@
 import testlib.terminal as terminal
 import testlib.log as log
 
+
 # TODO Refactor print logic out of this so the objects
 # created are separate from print logic.
-class QueryRunner(object):
+class QueryRunner:
     def __init__(self, test_schedule):
         self.schedule = test_schedule
 
@@ -49,23 +50,42 @@ def suites_with_tag(self, tag):
 
     def list_tests(self):
         log.test_log.message(terminal.separator())
-        log.test_log.message('Listing all Test Cases.', bold=True)
+        log.test_log.message("Listing all Test Cases.", bold=True)
         log.test_log.message(terminal.separator())
         for suite in self.schedule:
             for test in suite:
                 log.test_log.message(test.uid, machine_readable=True)
 
+    def list_fixtures(self):
+        log.test_log.message(terminal.separator())
+        log.test_log.message("Listing all Test Fixtures.", bold=True)
+        log.test_log.message(terminal.separator())
+        for fixture in self.schedule.all_fixtures():
+            log.test_log.message(fixture, machine_readable=True)
+
+    def list_build_targets(self):
+        log.test_log.message(terminal.separator())
+        log.test_log.message("Listing all gem5 Build Targets.", bold=True)
+        log.test_log.message(terminal.separator())
+        builds = []
+        for fixture in self.schedule.all_fixtures():
+            build = fixture.get_get_build_info()
+            if build and build not in builds:
+                builds.append(build)
+        for build in builds:
+            log.test_log.message(build, machine_readable=True)
+
     def list_suites(self):
         log.test_log.message(terminal.separator())
-        log.test_log.message('Listing all Test Suites.', bold=True)
+        log.test_log.message("Listing all Test Suites.", bold=True)
         log.test_log.message(terminal.separator())
         for suite in self.suites():
             log.test_log.message(suite.uid, machine_readable=True)
 
     def list_tags(self):
         log.test_log.message(terminal.separator())
-        log.test_log.message('Listing all Test Tags.', bold=True)
+        log.test_log.message("Listing all Test Tags.", bold=True)
         log.test_log.message(terminal.separator())
 
         for tag in self.tags():
-            log.test_log.message(tag, machine_readable=True)
\ No newline at end of file
+            log.test_log.message(tag, machine_readable=True)
diff --git a/ext/testlib/result.py b/ext/testlib/result.py
index 786febde2a..b6977d3542 100644
--- a/ext/testlib/result.py
+++ b/ext/testlib/result.py
@@ -46,6 +46,7 @@
 import testlib.helper as helper
 import testlib.state as state
 
+
 def _create_uid_index(iterable):
     index = {}
     for item in iterable:
@@ -58,12 +59,15 @@ class _CommonMetadataMixin:
     @property
     def name(self):
         return self._metadata.name
+
     @property
     def uid(self):
         return self._metadata.uid
+
     @property
     def result(self):
         return self._metadata.result
+
     @result.setter
     def result(self, result):
         self._metadata.result = result
@@ -83,12 +87,10 @@ def __init__(self, obj, suite, directory):
         self.suite = suite
 
         self.stderr = os.path.join(
-            InternalSavedResults.output_path(self.uid, suite.uid),
-            'stderr'
+            InternalSavedResults.output_path(self.uid, suite.uid), "stderr"
         )
         self.stdout = os.path.join(
-            InternalSavedResults.output_path(self.uid, suite.uid),
-            'stdout'
+            InternalSavedResults.output_path(self.uid, suite.uid), "stdout"
         )
 
 
@@ -99,8 +101,9 @@ def __init__(self, obj, directory):
         self._wrap_tests(obj)
 
     def _wrap_tests(self, obj):
-        self._tests = [InternalTestResult(test, self, self.directory)
-                       for test in obj]
+        self._tests = [
+            InternalTestResult(test, self, self.directory) for test in obj
+        ]
         self._tests_index = _create_uid_index(self._tests)
 
     def get_test(self, uid):
@@ -129,13 +132,14 @@ def __iter__(self):
         return iter(self._suites)
 
     def _wrap_suites(self, obj):
-        self._suites = [InternalSuiteResult(suite, self.directory)
-                        for suite in obj]
+        self._suites = [
+            InternalSuiteResult(suite, self.directory) for suite in obj
+        ]
         self._suites_index = _create_uid_index(self._suites)
 
     def add_suite(self, suite):
         if suite.uid in self._suites:
-            raise ValueError('Cannot have duplicate suite UIDs.')
+            raise ValueError("Cannot have duplicate suite UIDs.")
         self._suites[suite.uid] = suite
 
     def get_suite_result(self, suite_uid):
@@ -151,84 +155,85 @@ def aggregate_test_results(self):
                 helper.append_dictlist(results, test.result.value, test)
         return results
 
+
 class InternalSavedResults:
     @staticmethod
     def output_path(test_uid, suite_uid, base=None):
-        '''
+        """
         Return the path which results for a specific test case should be
         stored.
-        '''
+        """
         if base is None:
             base = config.result_path
         return os.path.join(
-                base,
-                str(suite_uid).replace(os.path.sep, '-'),
-                str(test_uid).replace(os.path.sep, '-'))
+            base,
+            str(suite_uid).replace(os.path.sep, "-"),
+            str(test_uid).replace(os.path.sep, "-"),
+        )
 
     @staticmethod
     def save(results, path, protocol=pickle.HIGHEST_PROTOCOL):
         if not os.path.exists(os.path.dirname(path)):
-           try:
-               os.makedirs(os.path.dirname(path))
-           except OSError as exc: # Guard against race condition
-               if exc.errno != errno.EEXIST:
-                   raise
+            try:
+                os.makedirs(os.path.dirname(path))
+            except OSError as exc:  # Guard against race condition
+                if exc.errno != errno.EEXIST:
+                    raise
 
-        with open(path, 'wb') as f:
+        with open(path, "wb") as f:
             pickle.dump(results, f, protocol)
 
     @staticmethod
     def load(path):
-        with open(path, 'rb') as f:
+        with open(path, "rb") as f:
             return pickle.load(f)
 
 
-class XMLElement(object):
+class XMLElement:
     def write(self, file_):
         self.begin(file_)
         self.end(file_)
 
     def begin(self, file_):
-        file_.write('<')
+        file_.write("<")
         file_.write(self.name)
-        if hasattr(self, 'attributes'):
+        if hasattr(self, "attributes"):
             for attr in self.attributes:
-                file_.write(' ')
+                file_.write(" ")
                 attr.write(file_)
-        file_.write('>')
+        file_.write(">")
 
         self.body(file_)
 
     def body(self, file_):
-        if hasattr(self, 'elements'):
+        if hasattr(self, "elements"):
             for elem in self.elements:
-                file_.write('\n')
+                file_.write("\n")
                 elem.write(file_)
-        if hasattr(self, 'content'):
-                file_.write('\n')
-                file_.write(
-                    xml.sax.saxutils.escape(self.content))
-        file_.write('\n')
+        if hasattr(self, "content"):
+            file_.write("\n")
+            file_.write(xml.sax.saxutils.escape(self.content))
+        file_.write("\n")
 
     def end(self, file_):
-        file_.write('</%s>' % self.name)
+        file_.write("</%s>" % self.name)
 
-class XMLAttribute(object):
+
+class XMLAttribute:
     def __init__(self, name, value):
         self.name = name
         self.value = value
 
     def write(self, file_):
-        file_.write('%s=%s' % (self.name,
-                xml.sax.saxutils.quoteattr(self.value)))
+        file_.write(f"{self.name}={xml.sax.saxutils.quoteattr(self.value)}")
 
 
 class JUnitTestSuites(XMLElement):
-    name = 'testsuites'
+    name = "testsuites"
     result_map = {
-        state.Result.Errored: 'errors',
-        state.Result.Failed: 'failures',
-        state.Result.Passed: 'tests'
+        state.Result.Errored: "errors",
+        state.Result.Failed: "failures",
+        state.Result.Passed: "tests",
     }
 
     def __init__(self, internal_results):
@@ -236,8 +241,9 @@ def __init__(self, internal_results):
 
         self.attributes = []
         for result, tests in results.items():
-            self.attributes.append(self.result_attribute(result,
-                    str(len(tests))))
+            self.attributes.append(
+                self.result_attribute(result, str(len(tests)))
+            )
 
         self.elements = []
         for suite in internal_results:
@@ -246,24 +252,24 @@ def __init__(self, internal_results):
     def result_attribute(self, result, count):
         return XMLAttribute(self.result_map[result], count)
 
+
 class JUnitTestSuite(JUnitTestSuites):
-    name = 'testsuite'
+    name = "testsuite"
     result_map = {
-        state.Result.Errored: 'errors',
-        state.Result.Failed: 'failures',
-        state.Result.Passed: 'tests',
-        state.Result.Skipped: 'skipped'
+        state.Result.Errored: "errors",
+        state.Result.Failed: "failures",
+        state.Result.Passed: "tests",
+        state.Result.Skipped: "skipped",
     }
 
     def __init__(self, suite_result):
         results = suite_result.aggregate_test_results()
 
-        self.attributes = [
-            XMLAttribute('name', suite_result.name)
-        ]
+        self.attributes = [XMLAttribute("name", suite_result.name)]
         for result, tests in results.items():
-            self.attributes.append(self.result_attribute(result,
-                    str(len(tests))))
+            self.attributes.append(
+                self.result_attribute(result, str(len(tests)))
+            )
 
         self.elements = []
         for test in suite_result:
@@ -272,40 +278,42 @@ def __init__(self, suite_result):
     def result_attribute(self, result, count):
         return XMLAttribute(self.result_map[result], count)
 
+
 class JUnitTestCase(XMLElement):
-    name = 'testcase'
+    name = "testcase"
+
     def __init__(self, test_result):
         self.attributes = [
-            XMLAttribute('name', test_result.name),
-             # TODO JUnit expects class of test.. add as test metadata.
-            XMLAttribute('classname', str(test_result.uid)),
-            XMLAttribute('status', str(test_result.result)),
-            XMLAttribute('time', str(test_result.time["user_time"])),
+            XMLAttribute("name", test_result.name),
+            # TODO JUnit expects class of test.. add as test metadata.
+            XMLAttribute("classname", str(test_result.uid)),
+            XMLAttribute("status", str(test_result.result)),
+            XMLAttribute("time", str(test_result.time["user_time"])),
         ]
 
         # TODO JUnit expects a message for the reason a test was
         # skipped or errored, save this with the test metadata.
         # http://llg.cubic.org/docs/junit/
         self.elements = [
-            LargeFileElement('system-err', test_result.stderr),
-            LargeFileElement('system-out', test_result.stdout),
+            LargeFileElement("system-err", test_result.stderr),
+            LargeFileElement("system-out", test_result.stdout),
         ]
 
-        if str(test_result.result) == 'Failed':
-            self.elements.append(JUnitFailure(
-                'Test failed',
-                str(test_result.result.reason))
+        if str(test_result.result) == "Failed":
+            self.elements.append(
+                JUnitFailure("Test failed", str(test_result.result.reason))
             )
 
 
 class JUnitFailure(XMLElement):
-    name = 'failure'
+    name = "failure"
+
     def __init__(self, message, cause):
         self.attributes = [
-            XMLAttribute('message', message),
+            XMLAttribute("message", message),
         ]
         cause_element = XMLElement()
-        cause_element.name = 'cause'
+        cause_element.name = "cause"
         cause_element.content = cause
         self.elements = [cause_element]
 
@@ -318,10 +326,10 @@ def __init__(self, name, filename):
 
     def body(self, file_):
         try:
-            with open(self.filename, 'r') as f:
+            with open(self.filename) as f:
                 for line in f:
                     file_.write(xml.sax.saxutils.escape(line))
-        except IOError:
+        except OSError:
             # TODO Better error logic, this is sometimes O.K.
             # if there was no stdout/stderr captured for the test
             #
@@ -330,15 +338,13 @@ def body(self, file_):
             pass
 
 
-
 class JUnitSavedResults:
     @staticmethod
     def save(results, path):
-        '''
+        """
         Compile the internal results into JUnit format writting it to the
         given file.
-        '''
+        """
         results = JUnitTestSuites(results)
-        with open(path, 'w') as f:
+        with open(path, "w") as f:
             results.write(f)
-
diff --git a/ext/testlib/runner.py b/ext/testlib/runner.py
index 16ff952985..1e3512af2b 100644
--- a/ext/testlib/runner.py
+++ b/ext/testlib/runner.py
@@ -47,14 +47,15 @@
 from testlib.state import Status, Result
 from testlib.fixture import SkipException
 
+
 def compute_aggregate_result(iterable):
-    '''
+    """
     Status of the test suite by default is:
     * Passed if all contained tests passed
     * Errored if any contained tests errored
     * Failed if no tests errored, but one or more failed.
     * Skipped if all contained tests were skipped
-    '''
+    """
     failed = []
     skipped = []
     for testitem in iterable:
@@ -73,18 +74,18 @@ def compute_aggregate_result(iterable):
     else:
         return Result(Result.Passed)
 
-class TestParameters(object):
+
+class TestParameters:
     def __init__(self, test, suite):
         self.test = test
         self.suite = suite
         self.log = log.test_log
         self.log.test = test
-        self.time = {
-            "user_time" : 0, "system_time" : 0}
+        self.time = {"user_time": 0, "system_time": 0}
 
     @helper.cacheresult
     def _fixtures(self):
-        fixtures = {fixture.name:fixture for fixture in self.suite.fixtures}
+        fixtures = {fixture.name: fixture for fixture in self.suite.fixtures}
         for fixture in self.test.fixtures:
             fixtures[fixture.name] = fixture
         return fixtures
@@ -139,18 +140,18 @@ def run(self):
         else:
             self.testable.status = Status.Complete
 
+
 class TestRunner(RunnerPattern):
     def test(self):
-        test_params = TestParameters(
-            self.testable,
-            self.testable.parent_suite)
+        test_params = TestParameters(self.testable, self.testable.parent_suite)
 
         try:
             # Running the test
             test_params.test.test(test_params)
         except Exception:
-            self.testable.result = Result(Result.Failed,
-                    traceback.format_exc())
+            self.testable.result = Result(
+                Result.Failed, traceback.format_exc()
+            )
         else:
             self.testable.result = Result(Result.Passed)
 
@@ -161,8 +162,7 @@ class SuiteRunner(RunnerPattern):
     def test(self):
         for test in self.testable:
             test.runner(test).run()
-        self.testable.result = compute_aggregate_result(
-                iter(self.testable))
+        self.testable.result = compute_aggregate_result(iter(self.testable))
 
 
 class LibraryRunner(SuiteRunner):
@@ -175,23 +175,23 @@ def set_threads(self, threads):
 
     def test(self):
         pool = multiprocessing.dummy.Pool(self.threads)
-        pool.map(lambda suite : suite.runner(suite).run(), self.testable)
-        self.testable.result = compute_aggregate_result(
-                iter(self.testable))
+        pool.map(lambda suite: suite.runner(suite).run(), self.testable)
+        self.testable.result = compute_aggregate_result(iter(self.testable))
 
 
 class BrokenFixtureException(Exception):
     def __init__(self, fixture, testitem, trace):
         self.trace = trace
 
-        self.msg = ('%s\n'
-                   'Exception raised building "%s" raised SkipException'
-                   ' for "%s".' %
-                   (trace, fixture.name, testitem.name)
+        self.msg = (
+            "%s\n"
+            'Exception raised building "%s" raised SkipException'
+            ' for "%s".' % (trace, fixture.name, testitem.name)
         )
-        super(BrokenFixtureException, self).__init__(self.msg)
+        super().__init__(self.msg)
+
 
-class FixtureBuilder(object):
+class FixtureBuilder:
     def __init__(self, fixtures):
         self.fixtures = fixtures
         self.built_fixtures = []
@@ -207,12 +207,15 @@ def setup(self, testitem):
                 raise
             except Exception as e:
                 exc = traceback.format_exc()
-                msg = 'Exception raised while setting up fixture for %s' %\
-                        testitem.uid
-                log.test_log.warn('%s\n%s' % (exc, msg))
+                msg = (
+                    "Exception raised while setting up fixture for %s"
+                    % testitem.uid
+                )
+                log.test_log.warn(f"{exc}\n{msg}")
 
-                raise BrokenFixtureException(fixture, testitem,
-                        traceback.format_exc())
+                raise BrokenFixtureException(
+                    fixture, testitem, traceback.format_exc()
+                )
 
     def post_test_procedure(self, testitem):
         for fixture in self.built_fixtures:
@@ -225,6 +228,8 @@ def teardown(self, testitem):
             except Exception:
                 # Log exception but keep cleaning up.
                 exc = traceback.format_exc()
-                msg = 'Exception raised while tearing down fixture for %s' %\
-                        testitem.uid
-                log.test_log.warn('%s\n%s' % (exc, msg))
+                msg = (
+                    "Exception raised while tearing down fixture for %s"
+                    % testitem.uid
+                )
+                log.test_log.warn(f"{exc}\n{msg}")
diff --git a/ext/testlib/state.py b/ext/testlib/state.py
index d220bb1019..21a23628bc 100644
--- a/ext/testlib/state.py
+++ b/ext/testlib/state.py
@@ -24,14 +24,15 @@
 #
 # Authors: Sean Wilson
 
+
 class Result:
-    enums = '''
+    enums = """
         NotRun
         Skipped
         Passed
         Failed
         Errored
-    '''.split()
+    """.split()
     for idx, enum in enumerate(enums):
         locals()[enum] = idx
 
@@ -46,15 +47,16 @@ def __init__(self, value, reason=None):
     def __str__(self):
         return self.name(self.value)
 
+
 class Status:
-    enums = '''
+    enums = """
         Unscheduled
         Building
         Running
         TearingDown
         Complete
         Avoided
-    '''.split()
+    """.split()
     for idx, enum in enumerate(enums):
         locals()[enum] = idx
 
diff --git a/ext/testlib/suite.py b/ext/testlib/suite.py
index eae52fd922..4a24c9c481 100644
--- a/ext/testlib/suite.py
+++ b/ext/testlib/suite.py
@@ -30,8 +30,9 @@
 import testlib.helper as helper
 import testlib.runner as runner_mod
 
-class TestSuite(object):
-    '''
+
+class TestSuite:
+    """
     An object grouping a collection of tests. It provides tags which enable
     filtering during list and run selection. All tests held in the suite must
     have a unique name.
@@ -44,7 +45,8 @@ class TestSuite(object):
         To reduce test definition boilerplate, the :func:`init` method is
         forwarded all `*args` and `**kwargs`. This means derived classes can
         define init without boilerplate super().__init__(*args, **kwargs).
-    '''
+    """
+
     runner = runner_mod.SuiteRunner
     collector = helper.InstanceCollector()
     fixtures = []
@@ -52,12 +54,18 @@ class TestSuite(object):
     tags = set()
 
     def __new__(klass, *args, **kwargs):
-        obj = super(TestSuite, klass).__new__(klass)
+        obj = super().__new__(klass)
         TestSuite.collector.collect(obj)
         return obj
 
-    def __init__(self, name=None, fixtures=tuple(), tests=tuple(),
-                 tags=tuple(), **kwargs):
+    def __init__(
+        self,
+        name=None,
+        fixtures=tuple(),
+        tests=tuple(),
+        tags=tuple(),
+        **kwargs
+    ):
         self.fixtures = self.fixtures + list(fixtures)
         self.tags = self.tags | set(tags)
         self.tests = self.tests + list(tests)
@@ -66,4 +74,4 @@ def __init__(self, name=None, fixtures=tuple(), tests=tuple(),
         self.name = name
 
     def __iter__(self):
-        return iter(self.tests)
\ No newline at end of file
+        return iter(self.tests)
diff --git a/ext/testlib/terminal.py b/ext/testlib/terminal.py
index be489f5296..f295aa2dc4 100644
--- a/ext/testlib/terminal.py
+++ b/ext/testlib/terminal.py
@@ -41,7 +41,7 @@
 
 # ANSI color names in index order
 color_names = "Black Red Green Yellow Blue Magenta Cyan White".split()
-default_separator = '='
+default_separator = "="
 
 # Character attribute capabilities.  Note that not all terminals
 # support all of these capabilities, or support them
@@ -54,39 +54,46 @@
 # Please feel free to add information about other terminals here.
 #
 capability_map = {
-         'Bold': 'bold',
-          'Dim': 'dim',
-        'Blink': 'blink',
-    'Underline': 'smul',
-      'Reverse': 'rev',
-     'Standout': 'smso',
-       'Normal': 'sgr0'
+    "Bold": "bold",
+    "Dim": "dim",
+    "Blink": "blink",
+    "Underline": "smul",
+    "Reverse": "rev",
+    "Standout": "smso",
+    "Normal": "sgr0",
 }
 
 capability_names = capability_map.keys()
 
+
 def null_cap_string(s, *args):
-    return ''
+    return ""
+
 
 try:
     import curses
+
     curses.setupterm()
+
     def cap_string(s, *args):
         cap = curses.tigetstr(s)
         if cap:
             return curses.tparm(cap, *args).decode("utf-8")
         else:
-            return ''
+            return ""
+
 except:
     cap_string = null_cap_string
 
-class ColorStrings(object):
+
+class ColorStrings:
     def __init__(self, cap_string):
         for i, c in enumerate(color_names):
-            setattr(self, c, cap_string('setaf', i))
+            setattr(self, c, cap_string("setaf", i))
         for name, cap in capability_map.items():
             setattr(self, name, cap_string(cap))
 
+
 termcap = ColorStrings(cap_string)
 no_termcap = ColorStrings(null_cap_string)
 
@@ -95,7 +102,8 @@ def __init__(self, cap_string):
 else:
     tty_termcap = no_termcap
 
-def get_termcap(use_colors = None):
+
+def get_termcap(use_colors=None):
     if use_colors:
         return termcap
     elif use_colors is None:
@@ -104,65 +112,78 @@ def get_termcap(use_colors = None):
     else:
         return no_termcap
 
+
 def terminal_size():
-    '''Return the (width, heigth) of the terminal screen.'''
+    """Return the (width, heigth) of the terminal screen."""
     try:
-        h, w, hp, wp = struct.unpack('HHHH',
-            fcntl.ioctl(0, termios.TIOCGWINSZ,
-            struct.pack('HHHH', 0, 0, 0, 0)))
+        h, w, hp, wp = struct.unpack(
+            "HHHH",
+            fcntl.ioctl(
+                0, termios.TIOCGWINSZ, struct.pack("HHHH", 0, 0, 0, 0)
+            ),
+        )
         return w, h
-    except IOError:
+    except OSError:
         # It's possible that in sandboxed environments the above ioctl is not
         # allowed (e.g., some jenkins setups)
         return 80, 24
 
 
 def separator(char=default_separator, color=None):
-    '''
+    """
     Return a separator of the given character that is the length of the full
     width of the terminal screen.
-    '''
+    """
     (w, h) = terminal_size()
     if color:
-        return color + char*w + termcap.Normal
+        return color + char * w + termcap.Normal
     else:
-        return char*w
+        return char * w
 
-def insert_separator(inside, char=default_separator,
-                     min_barrier=3, color=None):
-    '''
+
+def insert_separator(
+    inside, char=default_separator, min_barrier=3, color=None
+):
+    """
     Place the given string inside of the separator. If it does not fit inside,
     expand the separator to fit it with at least min_barrier.
 
     .. seealso:: :func:`separator`
-    '''
+    """
     # Use a bytearray so it's efficient to manipulate
-    string = bytearray(separator(char, color=color), 'utf-8')
+    string = bytearray(separator(char, color=color), "utf-8")
 
     # Check if we can fit inside with at least min_barrier.
     gap = (len(string) - len(inside)) - min_barrier * 2
     if gap > 0:
         # We'll need to expand the string to fit us.
-        string.extend([ char for _ in range(-gap)])
+        string.extend([char for _ in range(-gap)])
     # Emplace inside
-    middle = (len(string)-1)//2
-    start_idx = middle - len(inside)//2
-    string[start_idx:len(inside)+start_idx] = str.encode(inside)
+    middle = (len(string) - 1) // 2
+    start_idx = middle - len(inside) // 2
+    string[start_idx : len(inside) + start_idx] = str.encode(inside)
     return str(string.decode("utf-8"))
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
+
     def test_termcap(obj):
         for c_name in color_names:
             c_str = getattr(obj, c_name)
             print(c_str + c_name + obj.Normal)
             for attr_name in capability_names:
-                if attr_name == 'Normal':
+                if attr_name == "Normal":
                     continue
                 attr_str = getattr(obj, attr_name)
                 print(attr_str + c_str + attr_name + " " + c_name + obj.Normal)
-            print(obj.Bold + obj.Underline + \
-                  c_name + "Bold Underline " + c_str + obj.Normal)
+            print(
+                obj.Bold
+                + obj.Underline
+                + c_name
+                + "Bold Underline "
+                + c_str
+                + obj.Normal
+            )
 
     print("=== termcap enabled ===")
     test_termcap(termcap)
diff --git a/ext/testlib/test_util.py b/ext/testlib/test_util.py
index 22e2c973f6..3ef26191c3 100644
--- a/ext/testlib/test_util.py
+++ b/ext/testlib/test_util.py
@@ -29,14 +29,16 @@
 import testlib.helper as helper
 import testlib.runner as runner_mod
 
-class TestCase(object):
-    '''
+
+class TestCase:
+    """
     Base class for all tests.
 
     ..note::
         The :func:`__new__` method enables collection of test cases, it must
         be called in order for test cases to be collected.
-    '''
+    """
+
     fixtures = []
 
     # TODO, remove explicit dependency. Use the loader to set the
@@ -45,7 +47,7 @@ class TestCase(object):
     collector = helper.InstanceCollector()
 
     def __new__(cls, *args, **kwargs):
-        obj = super(TestCase, cls).__new__(cls)
+        obj = super().__new__(cls)
         TestCase.collector.collect(obj)
         return obj
 
@@ -55,10 +57,12 @@ def __init__(self, name=None, fixtures=tuple(), **kwargs):
             name = self.__class__.__name__
         self.name = name
 
+
 class TestFunction(TestCase):
-    '''
+    """
     TestCase implementation which uses a callable object as a test.
-    '''
+    """
+
     def __init__(self, function, name=None, **kwargs):
         self.test_function = function
         if name is None:
diff --git a/ext/testlib/uid.py b/ext/testlib/uid.py
index f8951a28da..84403f80fe 100644
--- a/ext/testlib/uid.py
+++ b/ext/testlib/uid.py
@@ -31,8 +31,9 @@
 
 import testlib.configuration as configuration
 
-class UID(object):
-    sep = ':'
+
+class UID:
+    sep = ":"
     type_idx, path_idx = range(2)
 
     def __init__(self, path, *args):
@@ -41,9 +42,10 @@ def __init__(self, path, *args):
 
     @staticmethod
     def _shorten_path(path):
-        return os.path.relpath(path,
-                os.path.commonprefix((configuration.constants.testing_base,
-                                      path)))
+        return os.path.relpath(
+            path,
+            os.path.commonprefix((configuration.constants.testing_base, path)),
+        )
 
     @staticmethod
     def _full_path(short_path):
@@ -75,11 +77,11 @@ def from_uid(cls, uid):
     def __str__(self):
         common_opts = {
             self.path_idx: self.path,
-            self.type_idx: self.__class__.__name__
+            self.type_idx: self.__class__.__name__,
         }
-        return self.sep.join(itertools.chain(
-            [common_opts[0], common_opts[1]],
-            self.attributes))
+        return self.sep.join(
+            itertools.chain([common_opts[0], common_opts[1]], self.attributes)
+        )
 
     def __hash__(self):
         return hash(str(self))
diff --git a/ext/testlib/wrappers.py b/ext/testlib/wrappers.py
index b2b887b0f9..936d9b604d 100644
--- a/ext/testlib/wrappers.py
+++ b/ext/testlib/wrappers.py
@@ -38,16 +38,17 @@
 #
 # Authors: Sean Wilson
 
-'''
+"""
 Module contains wrappers for test items that have been
 loaded by the testlib :class:`testlib.loader.Loader`.
-'''
+"""
 import itertools
 
 import testlib.uid as uid
 from testlib.state import Status, Result
 
-class TestCaseMetadata():
+
+class TestCaseMetadata:
     def __init__(self, name, uid, path, result, status, suite_uid):
         self.name = name
         self.uid = uid
@@ -57,7 +58,7 @@ def __init__(self, name, uid, path, result, status, suite_uid):
         self.suite_uid = suite_uid
 
 
-class TestSuiteMetadata():
+class TestSuiteMetadata:
     def __init__(self, name, uid, tags, path, status, result):
         self.name = name
         self.uid = uid
@@ -67,21 +68,22 @@ def __init__(self, name, uid, tags, path, status, result):
         self.result = result
 
 
-class LibraryMetadata():
+class LibraryMetadata:
     def __init__(self, name, result, status):
         self.name = name
         self.result = result
         self.status = status
 
 
-class LoadedTestable(object):
-    '''
+class LoadedTestable:
+    """
     Base class for loaded test items.
 
     :property:`result` and :property:`status` setters
     notify testlib via the :func:`log_result` and :func:`log_status`
     of the updated status.
-    '''
+    """
+
     def __init__(self, obj):
         self.obj = obj
         self.metadata = self._generate_metadata()
@@ -135,10 +137,12 @@ def time(self, value):
     # TODO Change log to provide status_update, result_update for all types.
     def log_status(self, status):
         import testlib.log as log
+
         log.test_log.status_update(self, status)
 
     def log_result(self, result):
         import testlib.log as log
+
         log.test_log.result_update(self, result)
 
     def __iter__(self):
@@ -155,16 +159,18 @@ def test(self, *args, **kwargs):
         self.obj.test(*args, **kwargs)
 
     def _generate_metadata(self):
-        return TestCaseMetadata( **{
-            'name':self.obj.name,
-            'path': self._path,
-            'uid': uid.TestUID(self._path,
-                               self.obj.name,
-                               self.parent_suite.name),
-            'status': Status.Unscheduled,
-            'result': Result(Result.NotRun),
-            'suite_uid': self.parent_suite.metadata.uid
-        })
+        return TestCaseMetadata(
+            **{
+                "name": self.obj.name,
+                "path": self._path,
+                "uid": uid.TestUID(
+                    self._path, self.obj.name, self.parent_suite.name
+                ),
+                "status": Status.Unscheduled,
+                "result": Result(Result.NotRun),
+                "suite_uid": self.parent_suite.metadata.uid,
+            }
+        )
 
 
 class LoadedSuite(LoadedTestable):
@@ -174,18 +180,21 @@ def __init__(self, suite_obj, path):
         self.tests = self._wrap_children(suite_obj)
 
     def _wrap_children(self, suite_obj):
-        return [LoadedTest(test, self, self.metadata.path)
-                for test in suite_obj]
+        return [
+            LoadedTest(test, self, self.metadata.path) for test in suite_obj
+        ]
 
     def _generate_metadata(self):
-        return TestSuiteMetadata( **{
-            'name': self.obj.name,
-            'tags':self.obj.tags,
-            'path': self._path,
-            'uid': uid.SuiteUID(self._path, self.obj.name),
-            'status': Status.Unscheduled,
-            'result': Result(Result.NotRun)
-        })
+        return TestSuiteMetadata(
+            **{
+                "name": self.obj.name,
+                "tags": self.obj.tags,
+                "path": self._path,
+                "uid": uid.SuiteUID(self._path, self.obj.name),
+                "status": Status.Unscheduled,
+                "result": Result(Result.NotRun),
+            }
+        )
 
     def __iter__(self):
         return iter(self.tests)
@@ -196,41 +205,44 @@ def tags(self):
 
 
 class LoadedLibrary(LoadedTestable):
-    '''
+    """
     Wraps a collection of all loaded test suites and
     provides utility functions for accessing fixtures.
-    '''
+    """
+
     def __init__(self, suites):
         LoadedTestable.__init__(self, suites)
 
     def _generate_metadata(self):
-        return LibraryMetadata( **{
-            'name': 'Test Library',
-            'status': Status.Unscheduled,
-            'result': Result(Result.NotRun)
-        })
+        return LibraryMetadata(
+            **{
+                "name": "Test Library",
+                "status": Status.Unscheduled,
+                "result": Result(Result.NotRun),
+            }
+        )
 
     def __iter__(self):
-        '''
+        """
         :returns: an iterator over contained :class:`TestSuite` objects.
-        '''
+        """
         return iter(self.obj)
 
     def all_fixtures(self):
-        '''
+        """
         :returns: an interator overall all global, suite,
           and test fixtures
-        '''
-        return itertools.chain(itertools.chain(
-            *(suite.fixtures for suite in self.obj)),
+        """
+        return itertools.chain(
+            itertools.chain(*(suite.fixtures for suite in self.obj)),
             *(self.test_fixtures(suite) for suite in self.obj)
         )
 
     def test_fixtures(self, suite):
-        '''
+        """
         :returns: an interator over all fixtures of each
           test contained in the given suite
-        '''
+        """
         return itertools.chain(*(test.fixtures for test in suite))
 
     @property
diff --git a/requirements.txt b/requirements.txt
index 561cab79cf..4b820f51ba 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1 +1,2 @@
+mypy==1.5.1
 pre-commit==2.20.0
diff --git a/site_scons/gem5_scons/__init__.py b/site_scons/gem5_scons/__init__.py
index 6d6226cdc6..c93a5e29e0 100644
--- a/site_scons/gem5_scons/__init__.py
+++ b/site_scons/gem5_scons/__init__.py
@@ -88,7 +88,7 @@ def new_spawn(sh, esc, cmd, args, sh_env):
 # Generate a string of the form:
 #   common/path/prefix/src1, src2 -> tgt1, tgt2
 # to print while building.
-class Transform(object):
+class Transform:
     # all specific color settings should be here and nowhere else
     tool_color = termcap.Normal
     pfx_color = termcap.Yellow
diff --git a/site_scons/gem5_scons/builders/config_file.py b/site_scons/gem5_scons/builders/config_file.py
index 2ab7bf87b4..7ee5e4d658 100755
--- a/site_scons/gem5_scons/builders/config_file.py
+++ b/site_scons/gem5_scons/builders/config_file.py
@@ -53,7 +53,7 @@ def ConfigFile(env):
     # operands are the name of the variable and a Value node containing the
     # value of the variable.
     def build_config_file(target, source, env):
-        (variable, value) = [s.get_contents().decode("utf-8") for s in source]
+        (variable, value) = (s.get_contents().decode("utf-8") for s in source)
         with open(str(target[0].abspath), "w") as f:
             print("#define", variable, value, file=f)
         return None
diff --git a/site_scons/gem5_scons/configure.py b/site_scons/gem5_scons/configure.py
index d04cdd49cb..c1b9fb56cc 100644
--- a/site_scons/gem5_scons/configure.py
+++ b/site_scons/gem5_scons/configure.py
@@ -80,17 +80,17 @@ def CheckLinkFlag(context, flag, autoadd=True, set_for_shared=True):
 def CheckMember(context, include, decl, member, include_quotes="<>"):
     context.Message(f"Checking for member {member} in {decl}...")
     text = """
-#include %(header)s
-int main(){
-  %(decl)s test;
-  (void)test.%(member)s;
+#include {header}
+int main(){{
+  {decl} test;
+  (void)test.{member};
   return 0;
-};
-""" % {
-        "header": include_quotes[0] + include + include_quotes[1],
-        "decl": decl,
-        "member": member,
-    }
+}};
+""".format(
+        header=include_quotes[0] + include + include_quotes[1],
+        decl=decl,
+        member=member,
+    )
 
     ret = context.TryCompile(text, extension=".cc")
     context.Result(ret)
diff --git a/site_scons/gem5_scons/defaults.py b/site_scons/gem5_scons/defaults.py
index 996cfd495f..4de9a93339 100644
--- a/site_scons/gem5_scons/defaults.py
+++ b/site_scons/gem5_scons/defaults.py
@@ -44,31 +44,31 @@
 
 
 def EnvDefaults(env):
-    # export TERM so that clang reports errors in color
-    use_vars = set(
-        [
-            "AS",
-            "AR",
-            "CC",
-            "CXX",
-            "HOME",
-            "LD_LIBRARY_PATH",
-            "LIBRARY_PATH",
-            "PATH",
-            "PKG_CONFIG_PATH",
-            "PROTOC",
-            "PYTHONPATH",
-            "RANLIB",
-            "TERM",
-            "PYTHON_CONFIG",
-            "CCFLAGS_EXTRA",
-            "GEM5PY_CCFLAGS_EXTRA",
-            "GEM5PY_LINKFLAGS_EXTRA",
-            "LINKFLAGS_EXTRA",
-            "LANG",
-            "LC_CTYPE",
-        ]
-    )
+    # initialize the toolchain related env with host environment
+    use_vars = {
+        "AS",
+        "AR",
+        "CC",
+        "CXX",
+        "HOME",
+        "CPATH",
+        "LD_LIBRARY_PATH",
+        "LIBRARY_PATH",
+        "PATH",
+        "PKG_CONFIG_PATH",
+        "PROTOC",
+        "PYTHONPATH",
+        "RANLIB",
+        "TERM",  # for clang reports errors in color
+        "PYTHON_CONFIG",  # gem5 specific build env
+        "CCFLAGS_EXTRA",  # gem5 specific build env
+        "GEM5PY_CCFLAGS_EXTRA",  # gem5 specific build env
+        "GEM5PY_LINKFLAGS_EXTRA",  # gem5 specific build env
+        "LINKFLAGS_EXTRA",  # gem5 specific build env
+        "LANG",  # for work with non-ascii directory path
+        "LC_CTYPE",  # for work with non-ascii directory path
+        "DISPLAY",  # for gui program, ex kconfig guiconfig
+    }
 
     use_prefixes = [
         "ASAN_",  # address sanitizer symbolizer path and settings
diff --git a/site_scons/gem5_scons/sources.py b/site_scons/gem5_scons/sources.py
index 54aeb24de1..46392a1ac0 100644
--- a/site_scons/gem5_scons/sources.py
+++ b/site_scons/gem5_scons/sources.py
@@ -126,7 +126,7 @@ def resolve_tags(env, tags):
     return tags
 
 
-class SourceFilter(object):
+class SourceFilter:
     factories = {}
 
     def __init__(self, predicate):
@@ -209,11 +209,11 @@ class SourceMeta(type):
     particular type."""
 
     def __init__(cls, name, bases, dict):
-        super(SourceMeta, cls).__init__(name, bases, dict)
+        super().__init__(name, bases, dict)
         cls.all = SourceList()
 
 
-class SourceItem(object, metaclass=SourceMeta):
+class SourceItem(metaclass=SourceMeta):
     """Base object that encapsulates the notion of a source component for
     gem5. This specifies a set of tags which help group components into groups
     based on arbitrary properties."""
diff --git a/site_scons/gem5_scons/util.py b/site_scons/gem5_scons/util.py
index 045fd4ef32..ee8efdc49a 100644
--- a/site_scons/gem5_scons/util.py
+++ b/site_scons/gem5_scons/util.py
@@ -100,7 +100,7 @@ def make_version_list(v):
             return v
         elif isinstance(v, str):
             return list(
-                map(lambda x: int(re.match("\d+", x).group()), v.split("."))
+                map(lambda x: int(re.match(r"\d+", x).group()), v.split("."))
             )
         else:
             raise TypeError()
diff --git a/site_scons/site_init.py b/site_scons/site_init.py
index 480dfa74da..39c893ea38 100644
--- a/site_scons/site_init.py
+++ b/site_scons/site_init.py
@@ -38,7 +38,6 @@
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-from __future__ import print_function
 
 # Check for recent-enough Python and SCons versions.
 try:
diff --git a/src/Doxyfile b/src/Doxyfile
index 325040fee0..1ffbb7cce5 100644
--- a/src/Doxyfile
+++ b/src/Doxyfile
@@ -31,7 +31,7 @@ PROJECT_NAME           = gem5
 # This could be handy for archiving the generated documentation or
 # if some version control system is used.
 
-PROJECT_NUMBER         = v23.0.0.0
+PROJECT_NUMBER         = v23.0.1.0
 
 # The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute)
 # base path where the generated documentation will be put.
diff --git a/src/SConscript b/src/SConscript
index 1b4430327c..52051b3256 100644
--- a/src/SConscript
+++ b/src/SConscript
@@ -536,6 +536,14 @@ Export('DebugFormatFlag')
 # the corresponding build directory to pick up generated include
 # files.
 env.Append(CPPPATH=Dir('.'))
+parent_dir_set = set()
+
+for extra_dir in extras_dir_list:
+    parent_dir_set.add(str(Dir(extra_dir).Dir('..').abspath))
+
+if not GetOption('duplicate_sources'):
+    for parent_dir in parent_dir_set:
+        env.Append(CPPPATH=Dir(parent_dir))
 
 for extra_dir in extras_dir_list:
     env.Append(CPPPATH=Dir(extra_dir))
@@ -615,7 +623,7 @@ PySource('m5', 'python/m5/defines.py')
 
 # Generate a file that wraps the basic top level files
 gem5py_env.Command('python/m5/info.py',
-            [ File('#/COPYING'), File('#/LICENSE'), File('#/README'),
+            [ File('#/COPYING'), File('#/LICENSE'), File('#/README.md'),
                 "${GEM5PY}", "${INFOPY_PY}" ],
             MakeAction('"${GEM5PY}" "${INFOPY_PY}" "${TARGET}" '
                        '${SOURCES[:-2]}',
@@ -626,11 +634,12 @@ PySource('m5', 'python/m5/info.py')
 gem5py_m5_env = gem5py_env.Clone()
 gem5py_env.Append(CPPPATH=env['CPPPATH'])
 gem5py_env.Append(LIBS='z')
+gem5py_env.Append(LINKFLAGS='-rdynamic')
 gem5py_env.Program(gem5py, 'python/gem5py.cc')[0]
 m5_module_source = \
         Source.all.with_all_tags(env, 'm5_module', 'gem5 lib')
 m5_module_static = list(map(lambda s: s.static(gem5py_env), m5_module_source))
-gem5py_env.Program(gem5py_m5, [ 'python/gem5py.cc' ] + m5_module_static)
+gem5py_env.Program(gem5py_m5, [ 'python/gem5py_m5.cc' ] + m5_module_static)
 
 
 # version tags
@@ -647,6 +656,7 @@ env.AlwaysBuild(tags)
 #
 
 env['SHOBJSUFFIX'] = '${OBJSUFFIX}s'
+env.Append(LINKFLAGS='-rdynamic')
 
 envs = {
     'debug': env.Clone(ENV_LABEL='debug', OBJSUFFIX='.do'),
diff --git a/src/arch/amdgpu/common/tlb_coalescer.cc b/src/arch/amdgpu/common/tlb_coalescer.cc
index 0be1387977..0d2715fca3 100644
--- a/src/arch/amdgpu/common/tlb_coalescer.cc
+++ b/src/arch/amdgpu/common/tlb_coalescer.cc
@@ -482,7 +482,7 @@ TLBCoalescer::processProbeTLBEvent()
                     stats.localqueuingCycles += (curTick() * pkt_cnt);
                 }
 
-                DPRINTF(GPUTLB, "Successfully sent TLB request for page %#x",
+                DPRINTF(GPUTLB, "Successfully sent TLB request for page %#x\n",
                        virt_page_addr);
 
                 //copy coalescedReq to issuedTranslationsTable
diff --git a/src/arch/amdgpu/vega/decoder.cc b/src/arch/amdgpu/vega/decoder.cc
index fd3a803bb8..a86dd668ec 100644
--- a/src/arch/amdgpu/vega/decoder.cc
+++ b/src/arch/amdgpu/vega/decoder.cc
@@ -495,7 +495,7 @@ namespace VegaISA
         &Decoder::decode_invalid,
         &Decoder::decode_invalid,
         &Decoder::subDecode_OP_FLAT,
-        &Decoder::decode_invalid,
+        &Decoder::subDecode_OP_FLAT,
         &Decoder::subDecode_OP_FLAT,
         &Decoder::subDecode_OP_FLAT,
         &Decoder::decode_invalid,
@@ -3140,8 +3140,8 @@ namespace VegaISA
         &Decoder::decode_OP_VOP1__V_CVT_NORM_I16_F16,
         &Decoder::decode_OP_VOP1__V_CVT_NORM_U16_F16,
         &Decoder::decode_OP_VOP1__V_SAT_PK_U8_I16,
-        &Decoder::decode_OP_VOP1__V_SWAP_B32,
         &Decoder::decode_invalid,
+        &Decoder::decode_OP_VOP1__V_SWAP_B32,
         &Decoder::decode_invalid,
         &Decoder::decode_invalid,
         &Decoder::decode_invalid,
diff --git a/src/arch/amdgpu/vega/insts/instructions.cc b/src/arch/amdgpu/vega/insts/instructions.cc
index 6c014bc107..74b6abee62 100644
--- a/src/arch/amdgpu/vega/insts/instructions.cc
+++ b/src/arch/amdgpu/vega/insts/instructions.cc
@@ -4728,6 +4728,7 @@ namespace VegaISA
     Inst_SOPP__S_SETPRIO::Inst_SOPP__S_SETPRIO(InFmt_SOPP *iFmt)
         : Inst_SOPP(iFmt, "s_setprio")
     {
+        setFlag(ALU);
     } // Inst_SOPP__S_SETPRIO
 
     Inst_SOPP__S_SETPRIO::~Inst_SOPP__S_SETPRIO()
@@ -4742,7 +4743,10 @@ namespace VegaISA
     void
     Inst_SOPP__S_SETPRIO::execute(GPUDynInstPtr gpuDynInst)
     {
-        panicUnimplemented();
+        ScalarRegU16 simm16 = instData.SIMM16;
+        ScalarRegU32 userPrio = simm16 & 0x3;
+
+        warn_once("S_SETPRIO ignored -- Requested priority %d\n", userPrio);
     } // execute
     // --- Inst_SOPP__S_SENDMSG class methods ---
 
@@ -6384,65 +6388,17 @@ namespace VegaISA
     void
     Inst_VOP2__V_MUL_U32_U24::execute(GPUDynInstPtr gpuDynInst)
     {
-        Wavefront *wf = gpuDynInst->wavefront();
-        ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
-        VecOperandU32 src1(gpuDynInst, instData.VSRC1);
-        VecOperandU32 vdst(gpuDynInst, instData.VDST);
-
-        src0.readSrc();
-        src1.read();
-
-        if (isSDWAInst()) {
-            VecOperandU32 src0_sdwa(gpuDynInst, extData.iFmt_VOP_SDWA.SRC0);
-            // use copies of original src0, src1, and dest during selecting
-            VecOperandU32 origSrc0_sdwa(gpuDynInst,
-                                        extData.iFmt_VOP_SDWA.SRC0);
-            VecOperandU32 origSrc1(gpuDynInst, instData.VSRC1);
-            VecOperandU32 origVdst(gpuDynInst, instData.VDST);
-
-            src0_sdwa.read();
-            origSrc0_sdwa.read();
-            origSrc1.read();
-
-            DPRINTF(VEGA, "Handling V_MUL_U32_U24 SRC SDWA. SRC0: register "
-                    "v[%d], DST_SEL: %d, DST_U: %d, CLMP: %d, SRC0_SEL: "
-                    "%d, SRC0_SEXT: %d, SRC0_NEG: %d, SRC0_ABS: %d, SRC1_SEL: "
-                    "%d, SRC1_SEXT: %d, SRC1_NEG: %d, SRC1_ABS: %d\n",
-                    extData.iFmt_VOP_SDWA.SRC0, extData.iFmt_VOP_SDWA.DST_SEL,
-                    extData.iFmt_VOP_SDWA.DST_U,
-                    extData.iFmt_VOP_SDWA.CLMP,
-                    extData.iFmt_VOP_SDWA.SRC0_SEL,
-                    extData.iFmt_VOP_SDWA.SRC0_SEXT,
-                    extData.iFmt_VOP_SDWA.SRC0_NEG,
-                    extData.iFmt_VOP_SDWA.SRC0_ABS,
-                    extData.iFmt_VOP_SDWA.SRC1_SEL,
-                    extData.iFmt_VOP_SDWA.SRC1_SEXT,
-                    extData.iFmt_VOP_SDWA.SRC1_NEG,
-                    extData.iFmt_VOP_SDWA.SRC1_ABS);
-
-            processSDWA_src(extData.iFmt_VOP_SDWA, src0_sdwa, origSrc0_sdwa,
-                            src1, origSrc1);
-
-            for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
-                if (wf->execMask(lane)) {
-                    vdst[lane] = bits(src0_sdwa[lane], 23, 0) *
-                                 bits(src1[lane], 23, 0);
-                    origVdst[lane] = vdst[lane]; // keep copy consistent
-                }
-            }
-
-            processSDWA_dst(extData.iFmt_VOP_SDWA, vdst, origVdst);
-        } else {
+        auto opImpl = [](VecOperandU32& src0, VecOperandU32& src1,
+                         VecOperandU32& vdst, Wavefront* wf) {
             for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
                 if (wf->execMask(lane)) {
                     vdst[lane] = bits(src0[lane], 23, 0) *
                                  bits(src1[lane], 23, 0);
                 }
             }
-        }
-
+        };
 
-        vdst.write();
+        vop2Helper<ConstVecOperandU32, VecOperandU32>(gpuDynInst, opImpl);
     } // execute
     // --- Inst_VOP2__V_MUL_HI_U32_U24 class methods ---
 
@@ -36019,6 +35975,11 @@ namespace VegaISA
          */
         wf->computeUnit->vrf[wf->simdId]->
             scheduleWriteOperandsFromLoad(wf, gpuDynInst);
+        /**
+         * Similarly, this counter could build up over time, even across
+         * multiple wavefronts, and cause a deadlock.
+         */
+        wf->rdLmReqsInPipe--;
     } // execute
     // --- Inst_DS__DS_PERMUTE_B32 class methods ---
 
@@ -36102,6 +36063,11 @@ namespace VegaISA
          */
         wf->computeUnit->vrf[wf->simdId]->
             scheduleWriteOperandsFromLoad(wf, gpuDynInst);
+        /**
+         * Similarly, this counter could build up over time, even across
+         * multiple wavefronts, and cause a deadlock.
+         */
+        wf->rdLmReqsInPipe--;
     } // execute
     // --- Inst_DS__DS_BPERMUTE_B32 class methods ---
 
@@ -36185,6 +36151,11 @@ namespace VegaISA
          */
         wf->computeUnit->vrf[wf->simdId]->
             scheduleWriteOperandsFromLoad(wf, gpuDynInst);
+        /**
+         * Similarly, this counter could build up over time, even across
+         * multiple wavefronts, and cause a deadlock.
+         */
+        wf->rdLmReqsInPipe--;
     } // execute
 
     // --- Inst_DS__DS_ADD_U64 class methods ---
@@ -40614,8 +40585,87 @@ namespace VegaISA
     void
     Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP::execute(GPUDynInstPtr gpuDynInst)
     {
-        panicUnimplemented();
+        Wavefront *wf = gpuDynInst->wavefront();
+
+        if (gpuDynInst->exec_mask.none()) {
+            wf->decVMemInstsIssued();
+            return;
+        }
+
+        gpuDynInst->execUnitId = wf->execUnitId;
+        gpuDynInst->latency.init(gpuDynInst->computeUnit());
+        gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
+
+        ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR);
+        ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1);
+        ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4);
+        ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET);
+        ConstVecOperandU32 src(gpuDynInst, extData.VDATA);
+        ConstVecOperandU32 cmp(gpuDynInst, extData.VDATA + 1);
+
+        rsrcDesc.read();
+        offset.read();
+        src.read();
+        cmp.read();
+
+        int inst_offset = instData.OFFSET;
+
+        if (!instData.IDXEN && !instData.OFFEN) {
+            calcAddr<ConstVecOperandU32, ConstVecOperandU32,
+                ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
+                    addr0, addr1, rsrcDesc, offset, inst_offset);
+        } else if (!instData.IDXEN && instData.OFFEN) {
+            addr0.read();
+            calcAddr<ConstVecOperandU32, ConstVecOperandU32,
+                ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
+                    addr0, addr1, rsrcDesc, offset, inst_offset);
+        } else if (instData.IDXEN && !instData.OFFEN) {
+            addr0.read();
+            calcAddr<ConstVecOperandU32, ConstVecOperandU32,
+                ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
+                    addr1, addr0, rsrcDesc, offset, inst_offset);
+        } else {
+            addr0.read();
+            addr1.read();
+            calcAddr<ConstVecOperandU32, ConstVecOperandU32,
+                ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
+                    addr1, addr0, rsrcDesc, offset, inst_offset);
+        }
+
+        for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
+            if (gpuDynInst->exec_mask[lane]) {
+                (reinterpret_cast<VecElemU32*>(gpuDynInst->x_data))[lane]
+                    = src[lane];
+                (reinterpret_cast<VecElemU32*>(gpuDynInst->a_data))[lane]
+                    = cmp[lane];
+            }
+        }
+
+        gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst);
     } // execute
+
+    void
+    Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP::initiateAcc(GPUDynInstPtr gpuDynInst)
+    {
+        initAtomicAccess<VecElemU32>(gpuDynInst);
+    } // initiateAcc
+
+    void
+    Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP::completeAcc(GPUDynInstPtr gpuDynInst)
+    {
+        if (isAtomicRet()) {
+            VecOperandU32 vdst(gpuDynInst, extData.VDATA);
+
+            for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
+                if (gpuDynInst->exec_mask[lane]) {
+                    vdst[lane] = (reinterpret_cast<VecElemU32*>(
+                        gpuDynInst->d_data))[lane];
+                }
+            }
+
+            vdst.write();
+        }
+    } // completeAcc
     // --- Inst_MUBUF__BUFFER_ATOMIC_ADD class methods ---
 
     Inst_MUBUF__BUFFER_ATOMIC_ADD
@@ -43927,9 +43977,11 @@ namespace VegaISA
     {
         Wavefront *wf = gpuDynInst->wavefront();
 
-        if (gpuDynInst->exec_mask.none() && isFlat()) {
+        if (gpuDynInst->exec_mask.none()) {
             wf->decVMemInstsIssued();
-            wf->decLGKMInstsIssued();
+            if (isFlat()) {
+                wf->decLGKMInstsIssued();
+            }
             return;
         }
 
@@ -44011,9 +44063,11 @@ namespace VegaISA
     {
         Wavefront *wf = gpuDynInst->wavefront();
 
-        if (gpuDynInst->exec_mask.none() && isFlat()) {
+        if (gpuDynInst->exec_mask.none()) {
             wf->decVMemInstsIssued();
-            wf->decLGKMInstsIssued();
+            if (isFlat()) {
+                wf->decLGKMInstsIssued();
+            }
             return;
         }
 
@@ -44096,9 +44150,11 @@ namespace VegaISA
     {
         Wavefront *wf = gpuDynInst->wavefront();
 
-        if (gpuDynInst->exec_mask.none() && isFlat()) {
+        if (gpuDynInst->exec_mask.none()) {
             wf->decVMemInstsIssued();
-            wf->decLGKMInstsIssued();
+            if (isFlat()) {
+                wf->decLGKMInstsIssued();
+            }
             return;
         }
 
@@ -44151,9 +44207,11 @@ namespace VegaISA
     {
         Wavefront *wf = gpuDynInst->wavefront();
 
-        if (gpuDynInst->exec_mask.none() && isFlat()) {
+        if (gpuDynInst->exec_mask.none()) {
             wf->decVMemInstsIssued();
-            wf->decLGKMInstsIssued();
+            if (isFlat()) {
+                wf->decLGKMInstsIssued();
+            }
             return;
         }
 
@@ -44206,9 +44264,11 @@ namespace VegaISA
     {
         Wavefront *wf = gpuDynInst->wavefront();
 
-        if (gpuDynInst->exec_mask.none() && isFlat()) {
+        if (gpuDynInst->exec_mask.none()) {
             wf->decVMemInstsIssued();
-            wf->decLGKMInstsIssued();
+            if (isFlat()) {
+                wf->decLGKMInstsIssued();
+            }
             return;
         }
 
@@ -44270,9 +44330,11 @@ namespace VegaISA
     {
         Wavefront *wf = gpuDynInst->wavefront();
 
-        if (gpuDynInst->exec_mask.none() && isFlat()) {
+        if (gpuDynInst->exec_mask.none()) {
             wf->decVMemInstsIssued();
-            wf->decLGKMInstsIssued();
+            if (isFlat()) {
+                wf->decLGKMInstsIssued();
+            }
             return;
         }
 
@@ -44337,9 +44399,11 @@ namespace VegaISA
     {
         Wavefront *wf = gpuDynInst->wavefront();
 
-        if (gpuDynInst->exec_mask.none() && isFlat()) {
+        if (gpuDynInst->exec_mask.none()) {
             wf->decVMemInstsIssued();
-            wf->decLGKMInstsIssued();
+            if (isFlat()) {
+                wf->decLGKMInstsIssued();
+            }
             wf->decExpInstsIssued();
             return;
         }
@@ -44394,9 +44458,11 @@ namespace VegaISA
     {
         Wavefront *wf = gpuDynInst->wavefront();
 
-        if (gpuDynInst->exec_mask.none() && isFlat()) {
+        if (gpuDynInst->exec_mask.none()) {
             wf->decVMemInstsIssued();
-            wf->decLGKMInstsIssued();
+            if (isFlat()) {
+                wf->decLGKMInstsIssued();
+            }
             wf->decExpInstsIssued();
             return;
         }
@@ -44451,9 +44517,11 @@ namespace VegaISA
     {
         Wavefront *wf = gpuDynInst->wavefront();
 
-        if (gpuDynInst->exec_mask.none() && isFlat()) {
+        if (gpuDynInst->exec_mask.none()) {
             wf->decVMemInstsIssued();
-            wf->decLGKMInstsIssued();
+            if (isFlat()) {
+                wf->decLGKMInstsIssued();
+            }
             wf->decExpInstsIssued();
             return;
         }
@@ -44509,9 +44577,11 @@ namespace VegaISA
     {
         Wavefront *wf = gpuDynInst->wavefront();
 
-        if (gpuDynInst->exec_mask.none() && isFlat()) {
+        if (gpuDynInst->exec_mask.none()) {
             wf->decVMemInstsIssued();
-            wf->decLGKMInstsIssued();
+            if (isFlat()) {
+                wf->decLGKMInstsIssued();
+            }
             wf->decExpInstsIssued();
             return;
         }
@@ -44567,9 +44637,11 @@ namespace VegaISA
     {
         Wavefront *wf = gpuDynInst->wavefront();
 
-        if (gpuDynInst->exec_mask.none() && isFlat()) {
+        if (gpuDynInst->exec_mask.none()) {
             wf->decVMemInstsIssued();
-            wf->decLGKMInstsIssued();
+            if (isFlat()) {
+                wf->decLGKMInstsIssued();
+            }
             wf->decExpInstsIssued();
             return;
         }
@@ -44633,9 +44705,11 @@ namespace VegaISA
     {
         Wavefront *wf = gpuDynInst->wavefront();
 
-        if (gpuDynInst->exec_mask.none() && isFlat()) {
+        if (gpuDynInst->exec_mask.none()) {
             wf->decVMemInstsIssued();
-            wf->decLGKMInstsIssued();
+            if (isFlat()) {
+                wf->decLGKMInstsIssued();
+            }
             wf->decExpInstsIssued();
             return;
         }
@@ -44710,9 +44784,11 @@ namespace VegaISA
     {
         Wavefront *wf = gpuDynInst->wavefront();
 
-        if (gpuDynInst->exec_mask.none() && isFlat()) {
+        if (gpuDynInst->exec_mask.none()) {
             wf->decVMemInstsIssued();
-            wf->decLGKMInstsIssued();
+            if (isFlat()) {
+                wf->decLGKMInstsIssued();
+            }
             return;
         }
 
@@ -44790,9 +44866,11 @@ namespace VegaISA
     {
         Wavefront *wf = gpuDynInst->wavefront();
 
-        if (gpuDynInst->exec_mask.none() && isFlat()) {
+        if (gpuDynInst->exec_mask.none()) {
             wf->decVMemInstsIssued();
-            wf->decLGKMInstsIssued();
+            if (isFlat()) {
+                wf->decLGKMInstsIssued();
+            }
             return;
         }
 
@@ -44870,9 +44948,11 @@ namespace VegaISA
     {
         Wavefront *wf = gpuDynInst->wavefront();
 
-        if (gpuDynInst->exec_mask.none() && isFlat()) {
+        if (gpuDynInst->exec_mask.none()) {
             wf->decVMemInstsIssued();
-            wf->decLGKMInstsIssued();
+            if (isFlat()) {
+                wf->decLGKMInstsIssued();
+            }
             return;
         }
 
@@ -45403,9 +45483,11 @@ namespace VegaISA
     {
         Wavefront *wf = gpuDynInst->wavefront();
 
-        if (gpuDynInst->exec_mask.none() && isFlat()) {
+        if (gpuDynInst->exec_mask.none()) {
             wf->decVMemInstsIssued();
-            wf->decLGKMInstsIssued();
+            if (isFlat()) {
+                wf->decLGKMInstsIssued();
+            }
             return;
         }
 
@@ -45484,9 +45566,11 @@ namespace VegaISA
     {
         Wavefront *wf = gpuDynInst->wavefront();
 
-        if (gpuDynInst->exec_mask.none() && isFlat()) {
+        if (gpuDynInst->exec_mask.none()) {
             wf->decVMemInstsIssued();
-            wf->decLGKMInstsIssued();
+            if (isFlat()) {
+                wf->decLGKMInstsIssued();
+            }
             return;
         }
 
diff --git a/src/arch/amdgpu/vega/insts/instructions.hh b/src/arch/amdgpu/vega/insts/instructions.hh
index d45a84c7b8..ca349c365f 100644
--- a/src/arch/amdgpu/vega/insts/instructions.hh
+++ b/src/arch/amdgpu/vega/insts/instructions.hh
@@ -31534,8 +31534,8 @@ namespace VegaISA
         } // getOperandSize
 
         void execute(GPUDynInstPtr) override;
-        void initiateAcc(GPUDynInstPtr gpuDynInst);
-        void completeAcc(GPUDynInstPtr gpuDynInst);
+        void initiateAcc(GPUDynInstPtr gpuDynInst) override;
+        void completeAcc(GPUDynInstPtr gpuDynInst) override;
     }; // Inst_DS__DS_OR_B32
 
     class Inst_DS__DS_XOR_B32 : public Inst_DS
@@ -37220,6 +37220,8 @@ namespace VegaISA
         } // getOperandSize
 
         void execute(GPUDynInstPtr) override;
+        void initiateAcc(GPUDynInstPtr) override;
+        void completeAcc(GPUDynInstPtr) override;
     }; // Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP
 
     class Inst_MUBUF__BUFFER_ATOMIC_ADD : public Inst_MUBUF
diff --git a/src/arch/amdgpu/vega/insts/op_encodings.cc b/src/arch/amdgpu/vega/insts/op_encodings.cc
index cc650fbbd0..c934094d9b 100644
--- a/src/arch/amdgpu/vega/insts/op_encodings.cc
+++ b/src/arch/amdgpu/vega/insts/op_encodings.cc
@@ -1546,6 +1546,8 @@ namespace VegaISA
         // The SEG field specifies FLAT(0) SCRATCH(1) or GLOBAL(2)
         if (iFmt->SEG == 0) {
             setFlag(Flat);
+        } else if (iFmt->SEG == 1) {
+            setFlag(FlatScratch);
         } else if (iFmt->SEG == 2) {
             setFlag(FlatGlobal);
         } else {
@@ -1573,12 +1575,12 @@ namespace VegaISA
     Inst_FLAT::initOperandInfo()
     {
         // One of the flat subtypes should be specified via flags
-        assert(isFlat() ^ isFlatGlobal());
+        assert(isFlat() ^ isFlatGlobal() ^ isFlatScratch());
 
         if (isFlat()) {
             initFlatOperandInfo();
-        } else if (isFlatGlobal()) {
-            initGlobalOperandInfo();
+        } else if (isFlatGlobal() || isFlatScratch()) {
+            initGlobalScratchOperandInfo();
         } else {
             panic("Unknown flat subtype!\n");
         }
@@ -1622,7 +1624,7 @@ namespace VegaISA
     }
 
     void
-    Inst_FLAT::initGlobalOperandInfo()
+    Inst_FLAT::initGlobalScratchOperandInfo()
     {
         //3 formats:
         // 1 dst + 2 src (load)
@@ -1691,12 +1693,12 @@ namespace VegaISA
     Inst_FLAT::generateDisassembly()
     {
         // One of the flat subtypes should be specified via flags
-        assert(isFlat() ^ isFlatGlobal());
+        assert(isFlat() ^ isFlatGlobal() ^ isFlatScratch());
 
         if (isFlat()) {
             generateFlatDisassembly();
-        } else if (isFlatGlobal()) {
-            generateGlobalDisassembly();
+        } else if (isFlatGlobal() || isFlatScratch()) {
+            generateGlobalScratchDisassembly();
         } else {
             panic("Unknown flat subtype!\n");
         }
@@ -1720,11 +1722,16 @@ namespace VegaISA
     }
 
     void
-    Inst_FLAT::generateGlobalDisassembly()
+    Inst_FLAT::generateGlobalScratchDisassembly()
     {
         // Replace flat_ with global_ in assembly string
         std::string global_opcode = _opcode;
-        global_opcode.replace(0, 4, "global");
+        if (isFlatGlobal()) {
+            global_opcode.replace(0, 4, "global");
+        } else {
+            assert(isFlatScratch());
+            global_opcode.replace(0, 4, "scratch");
+        }
 
         std::stringstream dis_stream;
         dis_stream << global_opcode << " ";
diff --git a/src/arch/amdgpu/vega/insts/op_encodings.hh b/src/arch/amdgpu/vega/insts/op_encodings.hh
index 1071eada0e..a1c5e99c91 100644
--- a/src/arch/amdgpu/vega/insts/op_encodings.hh
+++ b/src/arch/amdgpu/vega/insts/op_encodings.hh
@@ -272,6 +272,111 @@ namespace VegaISA
         InstFormat extData;
         uint32_t varSize;
 
+        template<typename T>
+        T sdwaSrcHelper(GPUDynInstPtr gpuDynInst, T & src1)
+        {
+            T src0_sdwa(gpuDynInst, extData.iFmt_VOP_SDWA.SRC0);
+            // use copies of original src0, src1, and dest during selecting
+            T origSrc0_sdwa(gpuDynInst, extData.iFmt_VOP_SDWA.SRC0);
+            T origSrc1(gpuDynInst, instData.VSRC1);
+
+            src0_sdwa.read();
+            origSrc0_sdwa.read();
+            origSrc1.read();
+
+            DPRINTF(VEGA, "Handling %s SRC SDWA. SRC0: register v[%d], "
+                "DST_SEL: %d, DST_U: %d, CLMP: %d, SRC0_SEL: %d, SRC0_SEXT: "
+                "%d, SRC0_NEG: %d, SRC0_ABS: %d, SRC1_SEL: %d, SRC1_SEXT: %d, "
+                "SRC1_NEG: %d, SRC1_ABS: %d\n",
+                opcode().c_str(), extData.iFmt_VOP_SDWA.SRC0,
+                extData.iFmt_VOP_SDWA.DST_SEL, extData.iFmt_VOP_SDWA.DST_U,
+                extData.iFmt_VOP_SDWA.CLMP, extData.iFmt_VOP_SDWA.SRC0_SEL,
+                extData.iFmt_VOP_SDWA.SRC0_SEXT,
+                extData.iFmt_VOP_SDWA.SRC0_NEG, extData.iFmt_VOP_SDWA.SRC0_ABS,
+                extData.iFmt_VOP_SDWA.SRC1_SEL,
+                extData.iFmt_VOP_SDWA.SRC1_SEXT,
+                extData.iFmt_VOP_SDWA.SRC1_NEG,
+                extData.iFmt_VOP_SDWA.SRC1_ABS);
+
+            processSDWA_src(extData.iFmt_VOP_SDWA, src0_sdwa, origSrc0_sdwa,
+                            src1, origSrc1);
+
+            return src0_sdwa;
+        }
+
+        template<typename T>
+        void sdwaDstHelper(GPUDynInstPtr gpuDynInst, T & vdst)
+        {
+            T origVdst(gpuDynInst, instData.VDST);
+
+            Wavefront *wf = gpuDynInst->wavefront();
+            for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
+                if (wf->execMask(lane)) {
+                    origVdst[lane] = vdst[lane]; // keep copy consistent
+                }
+            }
+
+            processSDWA_dst(extData.iFmt_VOP_SDWA, vdst, origVdst);
+        }
+
+        template<typename T>
+        T dppHelper(GPUDynInstPtr gpuDynInst, T & src1)
+        {
+            T src0_dpp(gpuDynInst, extData.iFmt_VOP_DPP.SRC0);
+            src0_dpp.read();
+
+            DPRINTF(VEGA, "Handling %s SRC DPP. SRC0: register v[%d], "
+                "DPP_CTRL: 0x%#x, SRC0_ABS: %d, SRC0_NEG: %d, SRC1_ABS: %d, "
+                "SRC1_NEG: %d, BC: %d, BANK_MASK: %d, ROW_MASK: %d\n",
+                opcode().c_str(), extData.iFmt_VOP_DPP.SRC0,
+                extData.iFmt_VOP_DPP.DPP_CTRL, extData.iFmt_VOP_DPP.SRC0_ABS,
+                extData.iFmt_VOP_DPP.SRC0_NEG, extData.iFmt_VOP_DPP.SRC1_ABS,
+                extData.iFmt_VOP_DPP.SRC1_NEG, extData.iFmt_VOP_DPP.BC,
+                extData.iFmt_VOP_DPP.BANK_MASK, extData.iFmt_VOP_DPP.ROW_MASK);
+
+            processDPP(gpuDynInst, extData.iFmt_VOP_DPP, src0_dpp, src1);
+
+            return src0_dpp;
+        }
+
+        template<typename ConstT, typename T>
+        void vop2Helper(GPUDynInstPtr gpuDynInst,
+                        void (*fOpImpl)(T&, T&, T&, Wavefront*))
+        {
+            Wavefront *wf = gpuDynInst->wavefront();
+            T src0(gpuDynInst, instData.SRC0);
+            T src1(gpuDynInst, instData.VSRC1);
+            T vdst(gpuDynInst, instData.VDST);
+
+            src0.readSrc();
+            src1.read();
+
+            if (isSDWAInst()) {
+                T src0_sdwa = sdwaSrcHelper(gpuDynInst, src1);
+                fOpImpl(src0_sdwa, src1, vdst, wf);
+                sdwaDstHelper(gpuDynInst, vdst);
+            } else if (isDPPInst()) {
+                T src0_dpp = dppHelper(gpuDynInst, src1);
+                fOpImpl(src0_dpp, src1, vdst, wf);
+            } else {
+                // src0 is unmodified. We need to use the const container
+                // type to allow reading scalar operands from src0. Only
+                // src0 can index scalar operands. We copy this to vdst
+                // temporarily to pass to the lambda so the instruction
+                // does not need to write two lambda functions (one for
+                // a const src0 and one of a mutable src0).
+                ConstT const_src0(gpuDynInst, instData.SRC0);
+                const_src0.readSrc();
+
+                for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
+                    vdst[lane] = const_src0[lane];
+                }
+                fOpImpl(vdst, src1, vdst, wf);
+            }
+
+            vdst.write();
+        }
+
       private:
         bool hasSecondDword(InFmt_VOP2 *);
     }; // Inst_VOP2
@@ -608,6 +713,19 @@ namespace VegaISA
             gpuDynInst->exec_mask = old_exec_mask;
         }
 
+        template<typename T>
+        void
+        initAtomicAccess(GPUDynInstPtr gpuDynInst)
+        {
+            // temporarily modify exec_mask to supress memory accesses to oob
+            // regions.  Only issue memory requests for lanes that have their
+            // exec_mask set and are not out of bounds.
+            VectorMask old_exec_mask = gpuDynInst->exec_mask;
+            gpuDynInst->exec_mask &= ~oobMask;
+            initMemReqHelper<T, 1>(gpuDynInst, MemCmd::SwapReq, true);
+            gpuDynInst->exec_mask = old_exec_mask;
+        }
+
         void
         injectGlobalMemFence(GPUDynInstPtr gpuDynInst)
         {
@@ -821,7 +939,8 @@ namespace VegaISA
         void
         initMemRead(GPUDynInstPtr gpuDynInst)
         {
-            if (gpuDynInst->executedAs() == enums::SC_GLOBAL) {
+            if (gpuDynInst->executedAs() == enums::SC_GLOBAL ||
+                gpuDynInst->executedAs() == enums::SC_PRIVATE) {
                 initMemReqHelper<T, 1>(gpuDynInst, MemCmd::ReadReq);
             } else if (gpuDynInst->executedAs() == enums::SC_GROUP) {
                 Wavefront *wf = gpuDynInst->wavefront();
@@ -839,7 +958,8 @@ namespace VegaISA
         void
         initMemRead(GPUDynInstPtr gpuDynInst)
         {
-            if (gpuDynInst->executedAs() == enums::SC_GLOBAL) {
+            if (gpuDynInst->executedAs() == enums::SC_GLOBAL ||
+                gpuDynInst->executedAs() == enums::SC_PRIVATE) {
                 initMemReqHelper<VecElemU32, N>(gpuDynInst, MemCmd::ReadReq);
             } else if (gpuDynInst->executedAs() == enums::SC_GROUP) {
                 Wavefront *wf = gpuDynInst->wavefront();
@@ -861,7 +981,8 @@ namespace VegaISA
         void
         initMemWrite(GPUDynInstPtr gpuDynInst)
         {
-            if (gpuDynInst->executedAs() == enums::SC_GLOBAL) {
+            if (gpuDynInst->executedAs() == enums::SC_GLOBAL ||
+                gpuDynInst->executedAs() == enums::SC_PRIVATE) {
                 initMemReqHelper<T, 1>(gpuDynInst, MemCmd::WriteReq);
             } else if (gpuDynInst->executedAs() == enums::SC_GROUP) {
                 Wavefront *wf = gpuDynInst->wavefront();
@@ -879,7 +1000,8 @@ namespace VegaISA
         void
         initMemWrite(GPUDynInstPtr gpuDynInst)
         {
-            if (gpuDynInst->executedAs() == enums::SC_GLOBAL) {
+            if (gpuDynInst->executedAs() == enums::SC_GLOBAL ||
+                gpuDynInst->executedAs() == enums::SC_PRIVATE) {
                 initMemReqHelper<VecElemU32, N>(gpuDynInst, MemCmd::WriteReq);
             } else if (gpuDynInst->executedAs() == enums::SC_GROUP) {
                 Wavefront *wf = gpuDynInst->wavefront();
@@ -901,6 +1023,10 @@ namespace VegaISA
         void
         initAtomicAccess(GPUDynInstPtr gpuDynInst)
         {
+            // Flat scratch requests may not be atomic according to ISA manual
+            // up to MI200. See MI200 manual Table 45.
+            assert(gpuDynInst->executedAs() != enums::SC_PRIVATE);
+
             if (gpuDynInst->executedAs() == enums::SC_GLOBAL) {
                 initMemReqHelper<T, 1>(gpuDynInst, MemCmd::SwapReq, true);
             } else if (gpuDynInst->executedAs() == enums::SC_GROUP) {
@@ -939,7 +1065,8 @@ namespace VegaISA
             // If saddr = 0x7f there is no scalar reg to read and address will
             // be a 64-bit address. Otherwise, saddr is the reg index for a
             // scalar reg used as the base address for a 32-bit address.
-            if ((saddr == 0x7f && isFlatGlobal()) || isFlat()) {
+            if ((saddr == 0x7f && (isFlatGlobal() || isFlatScratch()))
+                || isFlat()) {
                 ConstVecOperandU64 vbase(gpuDynInst, vaddr);
                 vbase.read();
 
@@ -958,9 +1085,13 @@ namespace VegaISA
 
             if (isFlat()) {
                 gpuDynInst->resolveFlatSegment(gpuDynInst->exec_mask);
-            } else {
+            } else if (isFlatGlobal()) {
                 gpuDynInst->staticInstruction()->executed_as =
                     enums::SC_GLOBAL;
+            } else {
+                assert(isFlatScratch());
+                gpuDynInst->staticInstruction()->executed_as =
+                    enums::SC_PRIVATE;
             }
         }
 
@@ -976,7 +1107,9 @@ namespace VegaISA
                 gpuDynInst->computeUnit()->localMemoryPipe
                     .issueRequest(gpuDynInst);
             } else {
-                fatal("Unsupported scope for flat instruction.\n");
+                assert(gpuDynInst->executedAs() == enums::SC_PRIVATE);
+                gpuDynInst->computeUnit()->globalMemoryPipe
+                    .issueRequest(gpuDynInst);
             }
         }
 
@@ -993,10 +1126,10 @@ namespace VegaISA
 
       private:
         void initFlatOperandInfo();
-        void initGlobalOperandInfo();
+        void initGlobalScratchOperandInfo();
 
         void generateFlatDisassembly();
-        void generateGlobalDisassembly();
+        void generateGlobalScratchDisassembly();
 
         void
         calcAddrSgpr(GPUDynInstPtr gpuDynInst, ConstVecOperandU32 &vaddr,
diff --git a/src/arch/amdgpu/vega/pagetable_walker.cc b/src/arch/amdgpu/vega/pagetable_walker.cc
index 96ac0fe179..6a71b14838 100644
--- a/src/arch/amdgpu/vega/pagetable_walker.cc
+++ b/src/arch/amdgpu/vega/pagetable_walker.cc
@@ -239,9 +239,22 @@ Walker::WalkerState::walkStateMachine(PageTableEntry &pte, Addr &nextRead,
     Addr part2 = 0;
     PageDirectoryEntry pde = static_cast<PageDirectoryEntry>(pte);
 
-    // For a four level page table block fragment size should not be needed.
-    // For now issue a panic to prevent strange behavior if it is non-zero.
-    panic_if(pde.blockFragmentSize, "PDE blockFragmentSize must be 0");
+    // Block fragment size can change the size of the pages pointed to while
+    // moving to the next PDE. A value of 0 implies native page size. A
+    // non-zero value implies the next leaf in the page table is a PTE unless
+    // the F bit is set. If we see a non-zero value, set it here and print
+    // for debugging.
+    if (pde.blockFragmentSize) {
+        DPRINTF(GPUPTWalker,
+                "blockFragmentSize: %d, pde: %#016lx, state: %d\n",
+                pde.blockFragmentSize, pde, state);
+        blockFragmentSize = pde.blockFragmentSize;
+
+        // At this time, only a value of 9 is used in the driver:
+        // https://github.com/torvalds/linux/blob/master/drivers/gpu/drm/
+        //     amd/amdgpu/gmc_v9_0.c#L1165
+        assert(pde.blockFragmentSize == 9);
+    }
 
     switch(state) {
       case PDE2:
@@ -287,7 +300,7 @@ Walker::WalkerState::walkStateMachine(PageTableEntry &pte, Addr &nextRead,
         nextState = PDE0;
         break;
       case PDE0:
-        if (pde.p) {
+        if (pde.p || (blockFragmentSize && !pte.f)) {
             DPRINTF(GPUPTWalker, "Treating PDE0 as PTE: %#016x frag: %d\n",
                     (uint64_t)pte, pte.fragment);
             entry.pte = pte;
@@ -299,7 +312,15 @@ Walker::WalkerState::walkStateMachine(PageTableEntry &pte, Addr &nextRead,
         }
         // Read the PteAddr
         part1 = ((((uint64_t)pte) >> 6) << 3);
-        part2 = offsetFunc(vaddr, 9, 0);
+        if (pte.f) {
+            // For F bit we want to use the blockFragmentSize in the previous
+            // PDE and the blockFragmentSize in this PTE for offset function.
+            part2 = offsetFunc(vaddr,
+                               blockFragmentSize,
+                               pde.blockFragmentSize);
+        } else {
+            part2 = offsetFunc(vaddr, 9, 0);
+        }
         nextRead = ((part1 + part2) << 3) & mask(48);
         DPRINTF(GPUPTWalker,
                 "Got PDE0 entry %#016x. write:%s->%#016x va:%#016x\n",
@@ -369,6 +390,7 @@ bool Walker::sendTiming(WalkerState* sending_walker, PacketPtr pkt)
         return true;
     } else {
         (void)pkt->popSenderState();
+        delete walker_state;
     }
 
     return false;
diff --git a/src/arch/amdgpu/vega/pagetable_walker.hh b/src/arch/amdgpu/vega/pagetable_walker.hh
index 2ad0748c14..232be5de70 100644
--- a/src/arch/amdgpu/vega/pagetable_walker.hh
+++ b/src/arch/amdgpu/vega/pagetable_walker.hh
@@ -99,11 +99,13 @@ class Walker : public ClockedObject
         bool started;
         bool timing;
         PacketPtr tlbPkt;
+        int blockFragmentSize;
 
       public:
         WalkerState(Walker *_walker, PacketPtr pkt, bool is_functional = false)
             : walker(_walker), state(Ready), nextState(Ready), dataSize(8),
-              enableNX(true), retrying(false), started(false), tlbPkt(pkt)
+              enableNX(true), retrying(false), started(false), tlbPkt(pkt),
+              blockFragmentSize(0)
         {
             DPRINTF(GPUPTWalker, "Walker::WalkerState %p %p %d\n",
                     this, walker, state);
diff --git a/src/arch/arm/ArmISA.py b/src/arch/arm/ArmISA.py
index 8c1ee5ae42..97c2609f50 100644
--- a/src/arch/arm/ArmISA.py
+++ b/src/arch/arm/ArmISA.py
@@ -41,6 +41,7 @@
 from m5.objects.ArmSystem import SveVectorLength, SmeVectorLength, ArmRelease
 from m5.objects.BaseISA import BaseISA
 
+
 # Enum for DecoderFlavor
 class DecoderFlavor(Enum):
     vals = ["Generic"]
diff --git a/src/arch/arm/ArmMMU.py b/src/arch/arm/ArmMMU.py
index dba6618567..e2e548b1b3 100644
--- a/src/arch/arm/ArmMMU.py
+++ b/src/arch/arm/ArmMMU.py
@@ -42,6 +42,7 @@
 from m5.params import *
 from m5.proxy import *
 
+
 # Basic stage 1 translation objects
 class ArmTableWalker(ClockedObject):
     type = "ArmTableWalker"
diff --git a/src/arch/arm/ArmPMU.py b/src/arch/arm/ArmPMU.py
index a4a2ebe843..3fd619b55d 100644
--- a/src/arch/arm/ArmPMU.py
+++ b/src/arch/arm/ArmPMU.py
@@ -43,7 +43,7 @@
 from m5.util.fdthelper import *
 
 
-class ProbeEvent(object):
+class ProbeEvent:
     def __init__(self, pmu, _eventId, obj, *listOfNames):
         self.obj = obj
         self.names = listOfNames
@@ -58,7 +58,7 @@ def register(self):
                 )
 
 
-class SoftwareIncrement(object):
+class SoftwareIncrement:
     def __init__(self, pmu, _eventId):
         self.eventId = _eventId
         self.pmu = pmu
diff --git a/src/arch/arm/ArmSystem.py b/src/arch/arm/ArmSystem.py
index 40a3a04b90..f66c9c83d2 100644
--- a/src/arch/arm/ArmSystem.py
+++ b/src/arch/arm/ArmSystem.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2009, 2012-2013, 2015-2022 ARM Limited
+# Copyright (c) 2009, 2012-2013, 2015-2023 Arm Limited
 # All rights reserved.
 #
 # The license below extends only to copyright in the software and shall
@@ -89,6 +89,7 @@ class ArmExtension(ScopedEnum):
         # Armv8.4
         "FEAT_SEL2",
         "FEAT_TLBIOS",
+        "FEAT_TLBIRANGE",
         "FEAT_FLAGM",
         "FEAT_IDST",
         # Armv8.5
@@ -96,8 +97,13 @@ class ArmExtension(ScopedEnum):
         "FEAT_RNG",
         "FEAT_RNG_TRAP",
         "FEAT_EVT",
+        # Armv8.6
+        "FEAT_FGT",
         # Armv8.7
         "FEAT_HCX",
+        # Armv8.9
+        "FEAT_SCTLR2",
+        "FEAT_TCR2",
         # Armv9.2
         "FEAT_SME",  # Optional in Armv9.2
         # Others
@@ -181,11 +187,14 @@ class ArmDefaultRelease(Armv8):
         # Armv8.4
         "FEAT_SEL2",
         "FEAT_TLBIOS",
+        "FEAT_TLBIRANGE",
         "FEAT_FLAGM",
         "FEAT_IDST",
         # Armv8.5
         "FEAT_FLAGM2",
         "FEAT_EVT",
+        # Armv8.6
+        "FEAT_FGT",
         # Armv8.7
         "FEAT_HCX",
         # Armv9.2
@@ -225,6 +234,7 @@ class Armv84(Armv83):
     extensions = Armv83.extensions + [
         "FEAT_SEL2",
         "FEAT_TLBIOS",
+        "FEAT_TLBIRANGE",
         "FEAT_FLAGM",
         "FEAT_IDST",
     ]
@@ -239,14 +249,24 @@ class Armv85(Armv84):
     ]
 
 
-class Armv87(Armv85):
+class Armv86(Armv85):
     extensions = Armv85.extensions + [
+        "FEAT_FGT",
+    ]
+
+
+class Armv87(Armv86):
+    extensions = Armv86.extensions + [
         "FEAT_HCX",
     ]
 
 
-class Armv92(Armv87):
-    extensions = Armv87.extensions + ["FEAT_SME"]
+class Armv89(Armv87):
+    extensions = Armv87.extensions + ["FEAT_SCTLR2", "FEAT_TCR2"]
+
+
+class Armv92(Armv89):
+    extensions = Armv89.extensions + ["FEAT_SME"]
 
 
 class ArmAllRelease(ArmRelease):
diff --git a/src/arch/arm/decoder.hh b/src/arch/arm/decoder.hh
index 83690936c0..75488b6750 100644
--- a/src/arch/arm/decoder.hh
+++ b/src/arch/arm/decoder.hh
@@ -138,6 +138,7 @@ class Decoder : public InstDecoder
         StaticInstPtr si = defaultCache.decode(this, mach_inst, addr);
         DPRINTF(Decode, "Decode: Decoded %s instruction: %#x\n",
                 si->getName(), mach_inst);
+        si->size((!emi.thumb || emi.bigThumb) ? 4 : 2);
         return si;
     }
 
diff --git a/src/arch/arm/fastmodel/CortexA76/thread_context.cc b/src/arch/arm/fastmodel/CortexA76/thread_context.cc
index c6704852fc..eb936b8ea4 100644
--- a/src/arch/arm/fastmodel/CortexA76/thread_context.cc
+++ b/src/arch/arm/fastmodel/CortexA76/thread_context.cc
@@ -228,7 +228,7 @@ Iris::ThreadContext::IdxNameMap CortexA76TC::miscRegIdxNameMap({
         // ArmISA::MISCREG_SCTLR_RST?
         { ArmISA::MISCREG_SEV_MAILBOX, "SEV_STATE" },
 
-        // AArch32 CP14 registers (debug/trace/ThumbEE/Jazelle control)
+        // AArch32 CP14 registers (debug/trace control)
         // ArmISA::MISCREG_DBGDIDR?
         // ArmISA::MISCREG_DBGDSCRint?
         // ArmISA::MISCREG_DBGDCCINT?
diff --git a/src/arch/arm/fastmodel/CortexA76/x1/x1.sgproj b/src/arch/arm/fastmodel/CortexA76/x1/x1.sgproj
index f3fd0db70f..7101b499db 100644
--- a/src/arch/arm/fastmodel/CortexA76/x1/x1.sgproj
+++ b/src/arch/arm/fastmodel/CortexA76/x1/x1.sgproj
@@ -16,8 +16,6 @@ config "gcc"
     SIMGEN_COMMAND_LINE = "--num-comps-file 50";
     TARGET_SYSTEMC = "1";
     TARGET_SYSTEMC_AUTO = "1";
-
-    INCLUDE_DIRS="../../../../../";
 }
 files
 {
diff --git a/src/arch/arm/fastmodel/CortexA76/x2/x2.sgproj b/src/arch/arm/fastmodel/CortexA76/x2/x2.sgproj
index abec8addca..54327dc723 100644
--- a/src/arch/arm/fastmodel/CortexA76/x2/x2.sgproj
+++ b/src/arch/arm/fastmodel/CortexA76/x2/x2.sgproj
@@ -16,8 +16,6 @@ config "gcc"
     SIMGEN_COMMAND_LINE = "--num-comps-file 50";
     TARGET_SYSTEMC = "1";
     TARGET_SYSTEMC_AUTO = "1";
-
-    INCLUDE_DIRS="../../../../../";
 }
 files
 {
diff --git a/src/arch/arm/fastmodel/CortexA76/x3/x3.sgproj b/src/arch/arm/fastmodel/CortexA76/x3/x3.sgproj
index 666b1dc0f4..5809b31f66 100644
--- a/src/arch/arm/fastmodel/CortexA76/x3/x3.sgproj
+++ b/src/arch/arm/fastmodel/CortexA76/x3/x3.sgproj
@@ -16,8 +16,6 @@ config "gcc"
     SIMGEN_COMMAND_LINE = "--num-comps-file 50";
     TARGET_SYSTEMC = "1";
     TARGET_SYSTEMC_AUTO = "1";
-
-    INCLUDE_DIRS="../../../../../";
 }
 files
 {
diff --git a/src/arch/arm/fastmodel/CortexA76/x4/x4.sgproj b/src/arch/arm/fastmodel/CortexA76/x4/x4.sgproj
index e3c9063ac7..1866c98be8 100644
--- a/src/arch/arm/fastmodel/CortexA76/x4/x4.sgproj
+++ b/src/arch/arm/fastmodel/CortexA76/x4/x4.sgproj
@@ -16,8 +16,6 @@ config "gcc"
     SIMGEN_COMMAND_LINE = "--num-comps-file 50";
     TARGET_SYSTEMC = "1";
     TARGET_SYSTEMC_AUTO = "1";
-
-    INCLUDE_DIRS="../../../../../";
 }
 files
 {
diff --git a/src/arch/arm/fastmodel/CortexR52/thread_context.cc b/src/arch/arm/fastmodel/CortexR52/thread_context.cc
index a20f8e0a89..b88bd7d99b 100644
--- a/src/arch/arm/fastmodel/CortexR52/thread_context.cc
+++ b/src/arch/arm/fastmodel/CortexR52/thread_context.cc
@@ -188,7 +188,7 @@ Iris::ThreadContext::IdxNameMap CortexR52TC::miscRegIdxNameMap({
         // ArmISA::MISCREG_SCTLR_RST?
         // ArmISA::MISCREG_SEV_MAILBOX?
 
-        // AArch32 CP14 registers (debug/trace/ThumbEE/Jazelle control)
+        // AArch32 CP14 registers (debug/trace control)
         // ArmISA::MISCREG_DBGDIDR?
         // ArmISA::MISCREG_DBGDSCRint?
         // ArmISA::MISCREG_DBGDCCINT?
diff --git a/src/arch/arm/fastmodel/CortexR52/x1/x1.sgproj b/src/arch/arm/fastmodel/CortexR52/x1/x1.sgproj
index 9d2a5749f6..83c0584869 100644
--- a/src/arch/arm/fastmodel/CortexR52/x1/x1.sgproj
+++ b/src/arch/arm/fastmodel/CortexR52/x1/x1.sgproj
@@ -16,8 +16,6 @@ config "gcc"
     SIMGEN_COMMAND_LINE = "--num-comps-file 50";
     TARGET_SYSTEMC = "1";
     TARGET_SYSTEMC_AUTO = "1";
-
-    INCLUDE_DIRS="../../../../../";
 }
 files
 {
diff --git a/src/arch/arm/fastmodel/CortexR52/x2/x2.sgproj b/src/arch/arm/fastmodel/CortexR52/x2/x2.sgproj
index e103170ed1..ab8399a419 100644
--- a/src/arch/arm/fastmodel/CortexR52/x2/x2.sgproj
+++ b/src/arch/arm/fastmodel/CortexR52/x2/x2.sgproj
@@ -16,8 +16,6 @@ config "gcc"
     SIMGEN_COMMAND_LINE = "--num-comps-file 50";
     TARGET_SYSTEMC = "1";
     TARGET_SYSTEMC_AUTO = "1";
-
-    INCLUDE_DIRS="../../../../../";
 }
 files
 {
diff --git a/src/arch/arm/fastmodel/CortexR52/x3/x3.sgproj b/src/arch/arm/fastmodel/CortexR52/x3/x3.sgproj
index 0c92809924..a9bfa3fa08 100644
--- a/src/arch/arm/fastmodel/CortexR52/x3/x3.sgproj
+++ b/src/arch/arm/fastmodel/CortexR52/x3/x3.sgproj
@@ -16,8 +16,6 @@ config "gcc"
     SIMGEN_COMMAND_LINE = "--num-comps-file 50";
     TARGET_SYSTEMC = "1";
     TARGET_SYSTEMC_AUTO = "1";
-
-    INCLUDE_DIRS="../../../../../";
 }
 files
 {
diff --git a/src/arch/arm/fastmodel/CortexR52/x4/x4.sgproj b/src/arch/arm/fastmodel/CortexR52/x4/x4.sgproj
index 6a145fd1c1..dc333763cf 100644
--- a/src/arch/arm/fastmodel/CortexR52/x4/x4.sgproj
+++ b/src/arch/arm/fastmodel/CortexR52/x4/x4.sgproj
@@ -16,8 +16,6 @@ config "gcc"
     SIMGEN_COMMAND_LINE = "--num-comps-file 50";
     TARGET_SYSTEMC = "1";
     TARGET_SYSTEMC_AUTO = "1";
-
-    INCLUDE_DIRS="../../../../../";
 }
 files
 {
diff --git a/src/arch/arm/fastmodel/GIC/GIC.sgproj b/src/arch/arm/fastmodel/GIC/GIC.sgproj
index aa5e6aec32..fd5c04a5a5 100644
--- a/src/arch/arm/fastmodel/GIC/GIC.sgproj
+++ b/src/arch/arm/fastmodel/GIC/GIC.sgproj
@@ -5,7 +5,7 @@ ACTIVE_CONFIG_LINUX  = "gcc";
 ACTIVE_CONFIG_WINDOWS  = "Win64-Release-VC2015";
 config "gcc"
 {
-    ADDITIONAL_COMPILER_SETTINGS = "-O3 -Wall -std=c++14 -Wno-deprecated -Wno-unused-function -I../../../../../";
+    ADDITIONAL_COMPILER_SETTINGS = "-O3 -Wall -std=c++14 -Wno-deprecated -Wno-unused-function";
     ADDITIONAL_LINKER_SETTINGS = "-Wl,--no-undefined";
     BUILD_DIR = "./gcc";
     COMPILER = "gcc-7.3";
diff --git a/src/arch/arm/fastmodel/PL330_DMAC/PL330.sgproj b/src/arch/arm/fastmodel/PL330_DMAC/PL330.sgproj
index d59849c409..c972ad760c 100644
--- a/src/arch/arm/fastmodel/PL330_DMAC/PL330.sgproj
+++ b/src/arch/arm/fastmodel/PL330_DMAC/PL330.sgproj
@@ -5,7 +5,7 @@ ACTIVE_CONFIG_LINUX  = "gcc";
 ACTIVE_CONFIG_WINDOWS  = "Win64-Release-VC2015";
 config "gcc"
 {
-    ADDITIONAL_COMPILER_SETTINGS = "-O3 -Wall -std=c++14 -Wno-deprecated -Wno-unused-function -I../../../../../";
+    ADDITIONAL_COMPILER_SETTINGS = "-O3 -Wall -std=c++14 -Wno-deprecated -Wno-unused-function";
     ADDITIONAL_LINKER_SETTINGS = "-Wl,--no-undefined";
     BUILD_DIR = "./gcc";
     COMPILER = "gcc-7.3";
diff --git a/src/arch/arm/fastmodel/SConscript b/src/arch/arm/fastmodel/SConscript
index 9d9d183516..7c6019e2a8 100644
--- a/src/arch/arm/fastmodel/SConscript
+++ b/src/arch/arm/fastmodel/SConscript
@@ -218,10 +218,9 @@ class ProjectFileParser(Grammar):
     t_ID = r'[A-Za-z_]\w*'
 
     def t_STRLIT(self, t):
-        r'(?m)"([^"])*"'
+        r'"([^"])*"'
         # strip off quotes
         t.value = t.value[1:-1]
-        t.lexer.lineno += t.value.count('\n')
         return t
 
     t_EQUALS = r'='
@@ -377,11 +376,12 @@ class ArmFastModelComponent(object):
         self.rpaths = [simgen_dir, project_file_dir]
         self.log = gen_dir.File('build_%s.log' % tlc)
         self.simgen_cmd = env.subst('${CONF["SIMGEN"]} -p %s '
-            '--configuration %s -b --verbose off --num-build-cpus %d %s '
+            '--configuration %s -b --verbose off --num-build-cpus %d -I %s %s '
             '--build-dir %s >%s') % \
             (shlex.quote(project_file.srcnode().abspath),
              shlex.quote(config_name),
              GetOption('num_jobs'),
+             shlex.quote(Dir('#/src').srcnode().abspath),
              simgen_command_line,
              shlex.quote(simgen_dir.abspath),
              shlex.quote(self.log.abspath))
diff --git a/src/arch/arm/fastmodel/arm_fast_model.py b/src/arch/arm/fastmodel/arm_fast_model.py
index 5a38eb132b..45a97d7957 100644
--- a/src/arch/arm/fastmodel/arm_fast_model.py
+++ b/src/arch/arm/fastmodel/arm_fast_model.py
@@ -23,6 +23,7 @@
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+import datetime
 import logging
 import os
 import socket
@@ -44,11 +45,46 @@ def set_armlmd_license_file(force=False):
         os.environ[ARM_LICENSE_ENV] = license_file
 
 
-def check_armlmd_license(timeout):
+def check_armlmd_server(server, timeout):
+    """Check if the "server" passed as parameter is available. server
+    can also be a license file"""
+    if os.path.exists(server):
+        logging.debug(f"License file {server} exists.")
+        return True
+
+    tuple = server.split("@")
+    if len(tuple) != 2:
+        # Probably not a server, and we know the file doesn't exist.
+        logging.debug(f'License file "{server}" does not exist.')
+        return False
+
+    try:
+        start = datetime.datetime.now()
+        # Try to connect to license server. This doesn't attempt to
+        # communicate with it, just checking reachability.
+        s = socket.create_connection(
+            (tuple[1], int(tuple[0])), timeout=timeout
+        )
+        end = datetime.datetime.now()
+        s.close()
+        time = end - start
+        logging.info(
+            f"License server {server} is reachable ({time.total_seconds()} seconds)."
+        )
+        return True
+    except Exception as e:
+        logging.debug(
+            f"Cannot connect to license server {server} ({type(e).__name__}: {e})."
+        )
+        return False
+
+
+def check_armlmd_license(timeout, tries):
     """Check if any of the provided license server can be reached, or
     if a license file is provided. This allows to fail early and fast,
     as fastmodel code makes multiple lengthy attempts to connect to
-    license server. "timeout" is in seconds.
+    license server. "timeout" is in seconds. Makes "retries" attempt to
+    connect.
     """
     servers = os.environ[ARM_LICENSE_ENV].split(":")
 
@@ -62,33 +98,17 @@ def check_armlmd_license(timeout):
         if extra not in servers:
             servers.append(extra)
 
-    for server in servers:
-        if os.path.exists(server):
-            logging.debug(f"License file {server} exists.")
-            break
-
-        tuple = server.split("@")
-        if len(tuple) != 2:
-            # Probably not a server, and we know the file doesn't exist.
-            logging.debug(f'License file "{server}" does not exist.')
-            continue
-
-        try:
-            # Try to connect to license server. This doesn't attempt to
-            # communicate with it, just checking reachability.
-            s = socket.create_connection(
-                (tuple[1], int(tuple[0])), timeout=timeout
-            )
-            s.close()
-            logging.debug(f"License server {server} is reachable.")
-            break
-        except Exception as e:
-            logging.debug(
-                f"Cannot connect to license server {server} ({type(e).__name__}: {e})."
+    for try_count in range(1, tries + 1):
+        for server in servers:
+            if check_armlmd_server(server, timeout):
+                return
+        if try_count == tries:
+            raise ConnectionError(
+                f"Cannot connect to any of the license servers ({', '.join(servers)})."
             )
-    else:
-        raise ConnectionError(
-            f"Cannot connect to any of the license servers ({', '.join(servers)})."
+        # retry
+        logging.warning(
+            "Cannot connect to any of the license servers, retrying..."
         )
 
 
@@ -199,10 +219,11 @@ def setup_simulation(
     exit_on_dmi_warning=False,
     license_precheck=False,
     license_precheck_timeout=1,
+    license_precheck_tries=3,
 ):
     set_armlmd_license_file()
     if license_precheck:
-        check_armlmd_license(license_precheck_timeout)
+        check_armlmd_license(license_precheck_timeout, license_precheck_tries)
     scx_initialize(sim_name)
     scx_set_min_sync_latency(min_sync_latency)
     if exit_on_dmi_warning:
diff --git a/src/arch/arm/fastmodel/iris/thread_context.cc b/src/arch/arm/fastmodel/iris/thread_context.cc
index 462995a19a..0919251a9b 100644
--- a/src/arch/arm/fastmodel/iris/thread_context.cc
+++ b/src/arch/arm/fastmodel/iris/thread_context.cc
@@ -581,8 +581,6 @@ ThreadContext::pcState() const
 
     pc.thumb(cpsr.t);
     pc.nextThumb(pc.thumb());
-    pc.jazelle(cpsr.j);
-    pc.nextJazelle(cpsr.j);
     pc.aarch64(!cpsr.width);
     pc.nextAArch64(!cpsr.width);
     pc.illegalExec(false);
diff --git a/src/arch/arm/faults.cc b/src/arch/arm/faults.cc
index 379e761f98..4b906f226f 100644
--- a/src/arch/arm/faults.cc
+++ b/src/arch/arm/faults.cc
@@ -565,7 +565,6 @@ ArmFault::invoke32(ThreadContext *tc, const StaticInstPtr &inst)
         cpsr.i = 1;
     }
     cpsr.it1 = cpsr.it2 = 0;
-    cpsr.j = 0;
     cpsr.pan = span ? 1 : saved_cpsr.pan;
     tc->setMiscReg(MISCREG_CPSR, cpsr);
 
@@ -622,8 +621,6 @@ ArmFault::invoke32(ThreadContext *tc, const StaticInstPtr &inst)
     PCState pc(new_pc);
     pc.thumb(cpsr.t);
     pc.nextThumb(pc.thumb());
-    pc.jazelle(cpsr.j);
-    pc.nextJazelle(pc.jazelle());
     pc.aarch64(!cpsr.width);
     pc.nextAArch64(!cpsr.width);
     pc.illegalExec(false);
@@ -666,7 +663,6 @@ ArmFault::invoke64(ThreadContext *tc, const StaticInstPtr &inst)
         // Force some bitfields to 0
         spsr.q = 0;
         spsr.it1 = 0;
-        spsr.j = 0;
         spsr.ge = 0;
         spsr.it2 = 0;
         spsr.t = 0;
diff --git a/src/arch/arm/insts/misc64.cc b/src/arch/arm/insts/misc64.cc
index 4f573fca83..4919d92da8 100644
--- a/src/arch/arm/insts/misc64.cc
+++ b/src/arch/arm/insts/misc64.cc
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011-2013,2017-2022 Arm Limited
+ * Copyright (c) 2011-2013,2017-2023 Arm Limited
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -852,6 +852,354 @@ TlbiOp64::performTlbi(ExecContext *xc, MiscRegIndex dest_idx, RegVal value) cons
             }
             return;
         }
+      case MISCREG_TLBI_RVAE1_Xt:
+        {
+            SCR scr = tc->readMiscReg(MISCREG_SCR_EL3);
+            auto asid = asid_16bits ? bits(value, 63, 48) :
+                                      bits(value, 55, 48);
+
+            ExceptionLevel target_el = EL1;
+            if (EL2Enabled(tc)) {
+                HCR hcr = tc->readMiscReg(MISCREG_HCR_EL2);
+                if (hcr.tge && hcr.e2h) {
+                    target_el = EL2;
+                }
+            }
+
+            bool secure = release->has(ArmExtension::SECURITY) && !scr.ns;
+            TLBIRMVA tlbiOp(target_el, secure, value, asid, false);
+
+            if (tlbiOp.valid())
+                tlbiOp(tc);
+            return;
+        }
+      case MISCREG_TLBI_RVAE1IS_Xt:
+      case MISCREG_TLBI_RVAE1OS_Xt:
+        {
+            SCR scr = tc->readMiscReg(MISCREG_SCR_EL3);
+            auto asid = asid_16bits ? bits(value, 63, 48) :
+                                      bits(value, 55, 48);
+
+            ExceptionLevel target_el = EL1;
+            if (EL2Enabled(tc)) {
+                HCR hcr = tc->readMiscReg(MISCREG_HCR_EL2);
+                if (hcr.tge && hcr.e2h) {
+                    target_el = EL2;
+                }
+            }
+
+            bool secure = release->has(ArmExtension::SECURITY) && !scr.ns;
+            TLBIRMVA tlbiOp(target_el, secure, value, asid, false);
+
+            if (tlbiOp.valid())
+                tlbiOp.broadcast(tc);
+            return;
+        }
+      case MISCREG_TLBI_RVAAE1_Xt:
+        {
+            SCR scr = tc->readMiscReg(MISCREG_SCR_EL3);
+
+            ExceptionLevel target_el = EL1;
+            if (EL2Enabled(tc)) {
+                HCR hcr = tc->readMiscReg(MISCREG_HCR_EL2);
+                if (hcr.tge && hcr.e2h) {
+                    target_el = EL2;
+                }
+            }
+
+            bool secure = release->has(ArmExtension::SECURITY) && !scr.ns;
+            TLBIRMVAA tlbiOp(target_el, secure, value, false);
+
+            if (tlbiOp.valid())
+                tlbiOp(tc);
+            return;
+        }
+      case MISCREG_TLBI_RVAAE1IS_Xt:
+      case MISCREG_TLBI_RVAAE1OS_Xt:
+        {
+            SCR scr = tc->readMiscReg(MISCREG_SCR_EL3);
+
+            ExceptionLevel target_el = EL1;
+            if (EL2Enabled(tc)) {
+                HCR hcr = tc->readMiscReg(MISCREG_HCR_EL2);
+                if (hcr.tge && hcr.e2h) {
+                    target_el = EL2;
+                }
+            }
+
+            bool secure = release->has(ArmExtension::SECURITY) && !scr.ns;
+            TLBIRMVAA tlbiOp(target_el, secure, value, false);
+
+            if (tlbiOp.valid())
+                tlbiOp.broadcast(tc);
+            return;
+        }
+      case MISCREG_TLBI_RVALE1_Xt:
+        {
+            SCR scr = tc->readMiscReg(MISCREG_SCR_EL3);
+            auto asid = asid_16bits ? bits(value, 63, 48) :
+                                      bits(value, 55, 48);
+
+            ExceptionLevel target_el = EL1;
+            if (EL2Enabled(tc)) {
+                HCR hcr = tc->readMiscReg(MISCREG_HCR_EL2);
+                if (hcr.tge && hcr.e2h) {
+                    target_el = EL2;
+                }
+            }
+
+            bool secure = release->has(ArmExtension::SECURITY) && !scr.ns;
+            TLBIRMVA tlbiOp(target_el, secure, value, asid, true);
+
+            if (tlbiOp.valid())
+                tlbiOp(tc);
+            return;
+        }
+      case MISCREG_TLBI_RVALE1IS_Xt:
+      case MISCREG_TLBI_RVALE1OS_Xt:
+        {
+            SCR scr = tc->readMiscReg(MISCREG_SCR_EL3);
+            auto asid = asid_16bits ? bits(value, 63, 48) :
+                                      bits(value, 55, 48);
+
+            ExceptionLevel target_el = EL1;
+            if (EL2Enabled(tc)) {
+                HCR hcr = tc->readMiscReg(MISCREG_HCR_EL2);
+                if (hcr.tge && hcr.e2h) {
+                    target_el = EL2;
+                }
+            }
+
+            bool secure = release->has(ArmExtension::SECURITY) && !scr.ns;
+            TLBIRMVA tlbiOp(target_el, secure, value, asid, true);
+
+            if (tlbiOp.valid())
+                tlbiOp.broadcast(tc);
+            return;
+        }
+      case MISCREG_TLBI_RVAALE1_Xt:
+        {
+            SCR scr = tc->readMiscReg(MISCREG_SCR_EL3);
+
+            ExceptionLevel target_el = EL1;
+            if (EL2Enabled(tc)) {
+                HCR hcr = tc->readMiscReg(MISCREG_HCR_EL2);
+                if (hcr.tge && hcr.e2h) {
+                    target_el = EL2;
+                }
+            }
+
+            bool secure = release->has(ArmExtension::SECURITY) && !scr.ns;
+            TLBIRMVAA tlbiOp(target_el, secure, value, true);
+
+            if (tlbiOp.valid())
+                tlbiOp(tc);
+            return;
+        }
+      case MISCREG_TLBI_RVAALE1IS_Xt:
+      case MISCREG_TLBI_RVAALE1OS_Xt:
+        {
+            SCR scr = tc->readMiscReg(MISCREG_SCR_EL3);
+
+            ExceptionLevel target_el = EL1;
+            if (EL2Enabled(tc)) {
+                HCR hcr = tc->readMiscReg(MISCREG_HCR_EL2);
+                if (hcr.tge && hcr.e2h) {
+                    target_el = EL2;
+                }
+            }
+
+            bool secure = release->has(ArmExtension::SECURITY) && !scr.ns;
+            TLBIRMVAA tlbiOp(target_el, secure, value, true);
+
+            if (tlbiOp.valid())
+                tlbiOp.broadcast(tc);
+            return;
+        }
+      case MISCREG_TLBI_RIPAS2E1_Xt:
+        {
+            if (EL2Enabled(tc)) {
+                SCR scr = tc->readMiscReg(MISCREG_SCR_EL3);
+
+                bool secure = release->has(ArmExtension::SECURITY) &&
+                    !scr.ns && !bits(value, 63);
+
+                TLBIRIPA tlbiOp(EL1, secure, value, false);
+
+                tlbiOp(tc);
+            }
+            return;
+        }
+      case MISCREG_TLBI_RIPAS2E1IS_Xt:
+        {
+            if (EL2Enabled(tc)) {
+                SCR scr = tc->readMiscReg(MISCREG_SCR_EL3);
+
+                bool secure = release->has(ArmExtension::SECURITY) &&
+                    !scr.ns && !bits(value, 63);
+
+                TLBIRIPA tlbiOp(EL1, secure, value, false);
+
+                tlbiOp.broadcast(tc);
+            }
+            return;
+        }
+      case MISCREG_TLBI_RIPAS2LE1_Xt:
+        {
+            if (EL2Enabled(tc)) {
+                SCR scr = tc->readMiscReg(MISCREG_SCR_EL3);
+
+                bool secure = release->has(ArmExtension::SECURITY) &&
+                    !scr.ns && !bits(value, 63);
+
+                TLBIRIPA tlbiOp(EL1, secure, value, true);
+
+                tlbiOp(tc);
+            }
+            return;
+        }
+      case MISCREG_TLBI_RIPAS2LE1IS_Xt:
+        {
+            if (EL2Enabled(tc)) {
+                SCR scr = tc->readMiscReg(MISCREG_SCR_EL3);
+
+                bool secure = release->has(ArmExtension::SECURITY) &&
+                    !scr.ns && !bits(value, 63);
+
+                TLBIRIPA tlbiOp(EL1, secure, value, true);
+
+                tlbiOp.broadcast(tc);
+            }
+            return;
+        }
+      case MISCREG_TLBI_RVAE2_Xt:
+        {
+            SCR scr = tc->readMiscReg(MISCREG_SCR_EL3);
+            HCR hcr = tc->readMiscReg(MISCREG_HCR_EL2);
+
+            bool secure = release->has(ArmExtension::SECURITY) && !scr.ns;
+
+            if (hcr.e2h) {
+                // The asid will only be used when e2h == 1
+                auto asid = asid_16bits ? bits(value, 63, 48) :
+                                          bits(value, 55, 48);
+
+                TLBIRMVA tlbiOp(EL2, secure, value, asid, false);
+
+                if (tlbiOp.valid())
+                    tlbiOp(tc);
+            } else {
+                TLBIRMVAA tlbiOp(EL2, secure, value, false);
+
+                if (tlbiOp.valid())
+                    tlbiOp(tc);
+            }
+            return;
+        }
+      case MISCREG_TLBI_RVAE2IS_Xt:
+      case MISCREG_TLBI_RVAE2OS_Xt:
+        {
+            SCR scr = tc->readMiscReg(MISCREG_SCR_EL3);
+            HCR hcr = tc->readMiscReg(MISCREG_HCR_EL2);
+
+            bool secure = release->has(ArmExtension::SECURITY) && !scr.ns;
+
+            if (hcr.e2h) {
+                // The asid will only be used when e2h == 1
+                auto asid = asid_16bits ? bits(value, 63, 48) :
+                                          bits(value, 55, 48);
+
+                TLBIRMVA tlbiOp(EL2, secure, value, asid, false);
+
+                if (tlbiOp.valid())
+                    tlbiOp.broadcast(tc);
+            } else {
+                TLBIRMVAA tlbiOp(EL2, secure, value, false);
+
+                if (tlbiOp.valid())
+                    tlbiOp.broadcast(tc);
+            }
+            return;
+        }
+      case MISCREG_TLBI_RVALE2_Xt:
+        {
+            SCR scr = tc->readMiscReg(MISCREG_SCR_EL3);
+            HCR hcr = tc->readMiscReg(MISCREG_HCR_EL2);
+
+            bool secure = release->has(ArmExtension::SECURITY) && !scr.ns;
+
+            if (hcr.e2h) {
+                // The asid will only be used when e2h == 1
+                auto asid = asid_16bits ? bits(value, 63, 48) :
+                                          bits(value, 55, 48);
+
+                TLBIRMVA tlbiOp(EL2, secure, value, asid, true);
+
+                if (tlbiOp.valid())
+                    tlbiOp(tc);
+            } else {
+                TLBIRMVAA tlbiOp(EL2, secure, value, true);
+
+                if (tlbiOp.valid())
+                    tlbiOp(tc);
+            }
+            return;
+        }
+      case MISCREG_TLBI_RVALE2IS_Xt:
+      case MISCREG_TLBI_RVALE2OS_Xt:
+        {
+            SCR scr = tc->readMiscReg(MISCREG_SCR_EL3);
+            HCR hcr = tc->readMiscReg(MISCREG_HCR_EL2);
+
+            bool secure = release->has(ArmExtension::SECURITY) && !scr.ns;
+
+            if (hcr.e2h) {
+                // The asid will only be used when e2h == 1
+                auto asid = asid_16bits ? bits(value, 63, 48) :
+                                          bits(value, 55, 48);
+
+                TLBIRMVA tlbiOp(EL2, secure, value, asid, true);
+
+                if (tlbiOp.valid())
+                    tlbiOp.broadcast(tc);
+            } else {
+                TLBIRMVAA tlbiOp(EL2, secure, value, true);
+
+                if (tlbiOp.valid())
+                    tlbiOp.broadcast(tc);
+            }
+            return;
+        }
+      case MISCREG_TLBI_RVAE3_Xt:
+        {
+            TLBIRMVAA tlbiOp(EL3, true, value, false);
+            if (tlbiOp.valid())
+                tlbiOp(tc);
+            return;
+        }
+      case MISCREG_TLBI_RVAE3IS_Xt:
+      case MISCREG_TLBI_RVAE3OS_Xt:
+        {
+            TLBIRMVAA tlbiOp(EL3, true, value, false);
+            if (tlbiOp.valid())
+                tlbiOp.broadcast(tc);
+            return;
+        }
+      case MISCREG_TLBI_RVALE3_Xt:
+        {
+            TLBIRMVAA tlbiOp(EL3, true, value, true);
+            if (tlbiOp.valid())
+                tlbiOp(tc);
+            return;
+        }
+      case MISCREG_TLBI_RVALE3IS_Xt:
+      case MISCREG_TLBI_RVALE3OS_Xt:
+        {
+            TLBIRMVAA tlbiOp(EL3, true, value, true);
+            if (tlbiOp.valid())
+                tlbiOp.broadcast(tc);
+            return;
+        }
       default:
         panic("Invalid TLBI\n");
     }
diff --git a/src/arch/arm/insts/pred_inst.hh b/src/arch/arm/insts/pred_inst.hh
index da3db6c6a5..6f46831edb 100644
--- a/src/arch/arm/insts/pred_inst.hh
+++ b/src/arch/arm/insts/pred_inst.hh
@@ -378,6 +378,15 @@ class PredMacroOp : public PredOp
 
     std::string generateDisassembly(
             Addr pc, const loader::SymbolTable *symtab) const override;
+
+
+    void size(size_t newSize) override
+    {
+        for (int i = 0; i < numMicroops; i++) {
+            microOps[i]->size(newSize);
+        }
+        _size = newSize;
+    }
 };
 
 /**
diff --git a/src/arch/arm/interrupts.cc b/src/arch/arm/interrupts.cc
index 57b1334b77..c2f1009147 100644
--- a/src/arch/arm/interrupts.cc
+++ b/src/arch/arm/interrupts.cc
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2009, 2012-2013, 2016, 2019 ARM Limited
+ * Copyright (c) 2009, 2012-2013, 2016, 2019, 2023 Arm Limited
  * All rights reserved.
  *
  * The license below extends only to copyright in the software and shall
@@ -43,12 +43,9 @@ namespace gem5
 {
 
 bool
-ArmISA::Interrupts::takeInt(InterruptTypes int_type) const
+ArmISA::Interrupts::takeInt32(InterruptTypes int_type) const
 {
-    // Table G1-17~19 of ARM V8 ARM
     InterruptMask mask;
-    bool highest_el_is_64 = ArmSystem::highestELIs64(tc);
-
     CPSR cpsr = tc->readMiscReg(MISCREG_CPSR);
     SCR scr = tc->readMiscReg(MISCREG_SCR_EL3);;
     HCR hcr = tc->readMiscReg(MISCREG_HCR_EL2);
@@ -82,74 +79,210 @@ ArmISA::Interrupts::takeInt(InterruptTypes int_type) const
     if (hcr.tge)
         hcr_mask_override_bit = 1;
 
-    if (!highest_el_is_64) {
-        // AArch32
-        if (!scr_routing_bit) {
-            // SCR IRQ == 0
-            if (!hcr_mask_override_bit)
-                mask = INT_MASK_M;
-            else {
-                if (!is_secure && (el == EL0 || el == EL1))
-                    mask = INT_MASK_T;
-                else
-                    mask = INT_MASK_M;
-            }
-        } else {
-            // SCR IRQ == 1
-            if ((!is_secure) &&
-                (hcr_mask_override_bit ||
-                    (!scr_fwaw_bit && !hcr_mask_override_bit)))
+    if (!scr_routing_bit) {
+        // SCR IRQ == 0
+        if (!hcr_mask_override_bit)
+            mask = INT_MASK_M;
+        else {
+            if (!is_secure && (el == EL0 || el == EL1))
                 mask = INT_MASK_T;
             else
                 mask = INT_MASK_M;
         }
     } else {
-        // AArch64
-        if (!scr_routing_bit) {
-            // SCR IRQ == 0
-            if (!scr.rw) {
-                // SCR RW == 0
-                if (!hcr_mask_override_bit) {
+        // SCR IRQ == 1
+        if ((!is_secure) &&
+            (hcr_mask_override_bit ||
+                (!scr_fwaw_bit && !hcr_mask_override_bit)))
+            mask = INT_MASK_T;
+        else
+            mask = INT_MASK_M;
+    }
+    return ((mask == INT_MASK_T) ||
+            ((mask == INT_MASK_M) && !cpsr_mask_bit)) &&
+            (mask != INT_MASK_P);
+}
+
+
+bool
+ArmISA::Interrupts::takeInt64(InterruptTypes int_type) const
+{
+    InterruptMask mask;
+    CPSR cpsr = tc->readMiscReg(MISCREG_CPSR);
+    SCR scr = tc->readMiscReg(MISCREG_SCR_EL3);;
+    HCR hcr = tc->readMiscReg(MISCREG_HCR_EL2);
+    ExceptionLevel el = currEL(tc);
+    bool cpsr_mask_bit, scr_routing_bit, hcr_mask_override_bit;
+    bool is_secure = isSecureBelowEL3(tc);
+
+    switch(int_type) {
+      case INT_FIQ:
+        cpsr_mask_bit = cpsr.f;
+        scr_routing_bit = scr.fiq;
+        hcr_mask_override_bit = hcr.fmo;
+        break;
+      case INT_IRQ:
+        cpsr_mask_bit = cpsr.i;
+        scr_routing_bit = scr.irq;
+        hcr_mask_override_bit = hcr.imo;
+        break;
+      case INT_ABT:
+        cpsr_mask_bit = cpsr.a;
+        scr_routing_bit = scr.ea;
+        hcr_mask_override_bit = hcr.amo;
+        break;
+      default:
+        panic("Unhandled interrupt type!");
+    }
+
+    if (is_secure) {
+        if (!scr.eel2) {
+            if (!scr_routing_bit) {
+                // NS=0,EEL2=0,EAI/IRQ/FIQ=0
+                if (el == EL3)
+                    mask = INT_MASK_P;
+                else
+                    mask = INT_MASK_M;
+            } else {
+                // NS=0,EEL2=0,EAI/IRQ/FIQ=1
+                if (el == EL3)
+                    mask = INT_MASK_M;
+                else
+                    mask = INT_MASK_T;
+            }
+        } else {
+            if (!scr_routing_bit) {
+                if (!hcr.tge) {
+                    if (!hcr_mask_override_bit) {
+                        // NS=0,EEL2=1,EAI/IRQ/FIQ=0,TGE=0,AMO/IMO/FMO=0
+                        if (el == EL3 || el == EL2)
+                            mask = INT_MASK_P;
+                        else
+                            mask = INT_MASK_M;
+                    } else {
+                        // NS=0,EEL2=1,EAI/IRQ/FIQ=0,TGE=0,AMO/IMO/FMO=1
+                        if (el == EL3)
+                            mask = INT_MASK_P;
+                        else if (el == EL2)
+                            mask = INT_MASK_M;
+                        else
+                            mask = INT_MASK_T;
+                    }
+                } else {
+                    if (!hcr.e2h) {
+                        // NS=0,EEL2=1,EAI/IRQ/FIQ=0,TGE=1,E2H=0
+                        if (el == EL3)
+                            mask = INT_MASK_P;
+                        else if (el == EL2)
+                            mask = INT_MASK_M;
+                        else
+                            mask = INT_MASK_T;
+                    } else {
+                        // NS=0,EEL2=1,EAI/IRQ/FIQ=0,TGE=1,E2H=1
+                        if (el == EL3)
+                            mask = INT_MASK_P;
+                        else
+                            mask = INT_MASK_M;
+                    }
+                }
+            } else {
+                if (!hcr.tge) {
+                    // NS=0,EEL2=1,EAI/IRQ/FIQ=1,TGE=0
                     if (el == EL3)
-                        mask = INT_MASK_P;
-                    else
                         mask = INT_MASK_M;
+                    else
+                        mask = INT_MASK_T;
                 } else {
+                    // NS=0,EEL2=1,EAI/IRQ/FIQ=1,TGE=1
                     if (el == EL3)
-                        mask = INT_MASK_T;
-                    else if (is_secure || el == EL2)
                         mask = INT_MASK_M;
                     else
                         mask = INT_MASK_T;
                 }
-            } else {
-                // SCR RW == 1
-                if (!hcr_mask_override_bit) {
-                    if (el == EL3 || el == EL2)
-                        mask = INT_MASK_P;
-                    else
-                        mask = INT_MASK_M;
+            }
+        }
+    } else {
+        if (!scr_routing_bit) {
+            if (!scr.rw) {
+                if (!hcr.tge) {
+                    if (!hcr_mask_override_bit) {
+                        // NS=1,EAI/IRQ/FIQ=0,RW=0,TGE=0,AMO/IMO?/FMO=0
+                        if (el == EL3)
+                            mask = INT_MASK_P;
+                        else
+                            mask = INT_MASK_M;
+                    } else {
+                        // NS=1,EAI/IRQ/FIQ=0,RW=0,TGE=0,AMO/IMO?/FMO=1
+                        if (el == EL3)
+                            mask = INT_MASK_P;
+                        else if (el == EL2)
+                            mask = INT_MASK_M;
+                        else
+                            mask = INT_MASK_T;
+                    }
                 } else {
+                    // NS=1,EAI/IRQ/FIQ=0,RW=0,TGE=1
                     if (el == EL3)
                         mask = INT_MASK_P;
-                    else if (is_secure || el == EL2)
+                    else if (el == EL2)
                         mask = INT_MASK_M;
                     else
                         mask = INT_MASK_T;
                 }
+            } else {
+                if (!hcr.tge) {
+                    if (!hcr_mask_override_bit) {
+                        // NS=1,EAI/IRQ/FIQ=0,RW=1,TGE=0,AMO/IMO/FMO=0
+                        if (el == EL3 || el == EL2)
+                            mask = INT_MASK_P;
+                        else
+                            mask = INT_MASK_M;
+                    } else {
+                        // NS=1,EAI/IRQ/FIQ=0,RW=1,TGE=0,AMO/IMO/FMO=1
+                        if (el == EL3)
+                            mask = INT_MASK_P;
+                        else if (el == EL2)
+                            mask = INT_MASK_M;
+                        else
+                            mask = INT_MASK_T;
+                    }
+                } else {
+                    if (!hcr.e2h) {
+                        // NS=1,EAI/IRQ/FIQ=0,RW=1,TGE=1,E2H=0
+                        if (el == EL3)
+                            mask = INT_MASK_P;
+                        else if (el == EL2)
+                            mask = INT_MASK_M;
+                        else
+                            mask = INT_MASK_T;
+                    } else {
+                        // NS=1,EAI/IRQ/FIQ=0,RW=1,TGE=1,E2H=1
+                        if (el == EL3)
+                            mask = INT_MASK_P;
+                        else
+                            mask = INT_MASK_M;
+                    }
+                }
             }
         } else {
-            // SCR IRQ == 1
             if (el == EL3)
                 mask = INT_MASK_M;
             else
                 mask = INT_MASK_T;
         }
     }
-
     return ((mask == INT_MASK_T) ||
             ((mask == INT_MASK_M) && !cpsr_mask_bit)) &&
             (mask != INT_MASK_P);
 }
 
+bool
+ArmISA::Interrupts::takeInt(InterruptTypes int_type) const
+{
+    // Table G1-17~19 of ARM V8 ARM
+    return ArmSystem::highestELIs64(tc) ? takeInt64(int_type) :
+                                          takeInt32(int_type);
+
+}
+
 } // namespace gem5
diff --git a/src/arch/arm/interrupts.hh b/src/arch/arm/interrupts.hh
index 178ee6cea5..c2a2d13a8d 100644
--- a/src/arch/arm/interrupts.hh
+++ b/src/arch/arm/interrupts.hh
@@ -129,6 +129,8 @@ class Interrupts : public BaseInterrupts
     };
 
     bool takeInt(InterruptTypes int_type) const;
+    bool takeInt32(InterruptTypes int_type) const;
+    bool takeInt64(InterruptTypes int_type) const;
 
     bool
     checkInterrupts() const override
diff --git a/src/arch/arm/isa.cc b/src/arch/arm/isa.cc
index 02129266cf..f961a2d2c4 100644
--- a/src/arch/arm/isa.cc
+++ b/src/arch/arm/isa.cc
@@ -320,6 +320,8 @@ ISA::redirectRegVHE(int misc_reg)
         return ELIsInHost(tc, currEL()) ? MISCREG_CNTPCT_EL0 : misc_reg;
       case MISCREG_SCTLR_EL12:
         return MISCREG_SCTLR_EL1;
+      case MISCREG_SCTLR2_EL12:
+        return MISCREG_SCTLR2_EL1;
       case MISCREG_CPACR_EL12:
         return MISCREG_CPACR_EL1;
       case MISCREG_ZCR_EL12:
@@ -330,6 +332,8 @@ ISA::redirectRegVHE(int misc_reg)
         return MISCREG_TTBR1_EL1;
       case MISCREG_TCR_EL12:
         return MISCREG_TCR_EL1;
+      case MISCREG_TCR2_EL12:
+        return MISCREG_TCR2_EL1;
       case MISCREG_SPSR_EL12:
         return MISCREG_SPSR_EL1;
       case MISCREG_ELR_EL12:
@@ -403,7 +407,6 @@ ISA::readMiscReg(RegIndex idx)
     if (idx == MISCREG_CPSR) {
         cpsr = miscRegs[idx];
         auto pc = tc->pcState().as<PCState>();
-        cpsr.j = pc.jazelle() ? 1 : 0;
         cpsr.t = pc.thumb() ? 1 : 0;
         return cpsr;
     }
@@ -674,7 +677,6 @@ ISA::setMiscReg(RegIndex idx, RegVal val)
                 miscRegs[idx], cpsr, cpsr.f, cpsr.i, cpsr.a, cpsr.mode);
         PCState pc = tc->pcState().as<PCState>();
         pc.nextThumb(cpsr.t);
-        pc.nextJazelle(cpsr.j);
         pc.illegalExec(cpsr.il == 1);
         selfDebug->setDebugMask(cpsr.d == 1);
 
diff --git a/src/arch/arm/isa/formats/aarch64.isa b/src/arch/arm/isa/formats/aarch64.isa
index 47d509e808..30f9009121 100644
--- a/src/arch/arm/isa/formats/aarch64.isa
+++ b/src/arch/arm/isa/formats/aarch64.isa
@@ -1,4 +1,4 @@
-// Copyright (c) 2011-2022 Arm Limited
+// Copyright (c) 2011-2023 Arm Limited
 // All rights reserved
 //
 // The license below extends only to copyright in the software and shall
@@ -542,24 +542,70 @@ namespace Aarch64
                               case MISCREG_TLBI_VAALE1_Xt:
                               case MISCREG_TLBI_IPAS2E1_Xt:
                               case MISCREG_TLBI_IPAS2LE1_Xt:
+                              case MISCREG_TLBI_RVAE1_Xt:
+                              case MISCREG_TLBI_RVAAE1_Xt:
+                              case MISCREG_TLBI_RVALE1_Xt:
+                              case MISCREG_TLBI_RVAALE1_Xt:
+                              case MISCREG_TLBI_RIPAS2E1_Xt:
+                              case MISCREG_TLBI_RIPAS2LE1_Xt:
+                              case MISCREG_TLBI_RVAE2_Xt:
+                              case MISCREG_TLBI_RVALE2_Xt:
+                              case MISCREG_TLBI_RVAE3_Xt:
+                              case MISCREG_TLBI_RVALE3_Xt:
                                 return new Tlbi64LocalHub(
                                   machInst, miscReg, rt);
                               case MISCREG_TLBI_ALLE3IS:
+                              case MISCREG_TLBI_ALLE3OS:
                               case MISCREG_TLBI_ALLE2IS:
+                              case MISCREG_TLBI_ALLE2OS:
                               case MISCREG_TLBI_ALLE1IS:
+                              case MISCREG_TLBI_ALLE1OS:
                               case MISCREG_TLBI_VMALLS12E1IS:
+                              case MISCREG_TLBI_VMALLS12E1OS:
                               case MISCREG_TLBI_VMALLE1IS:
+                              case MISCREG_TLBI_VMALLE1OS:
                               case MISCREG_TLBI_VAE3IS_Xt:
+                              case MISCREG_TLBI_VAE3OS_Xt:
                               case MISCREG_TLBI_VALE3IS_Xt:
+                              case MISCREG_TLBI_VALE3OS_Xt:
                               case MISCREG_TLBI_VAE2IS_Xt:
+                              case MISCREG_TLBI_VAE2OS_Xt:
                               case MISCREG_TLBI_VALE2IS_Xt:
+                              case MISCREG_TLBI_VALE2OS_Xt:
                               case MISCREG_TLBI_VAE1IS_Xt:
+                              case MISCREG_TLBI_VAE1OS_Xt:
                               case MISCREG_TLBI_VALE1IS_Xt:
+                              case MISCREG_TLBI_VALE1OS_Xt:
                               case MISCREG_TLBI_ASIDE1IS_Xt:
+                              case MISCREG_TLBI_ASIDE1OS_Xt:
                               case MISCREG_TLBI_VAAE1IS_Xt:
+                              case MISCREG_TLBI_VAAE1OS_Xt:
                               case MISCREG_TLBI_VAALE1IS_Xt:
+                              case MISCREG_TLBI_VAALE1OS_Xt:
                               case MISCREG_TLBI_IPAS2E1IS_Xt:
+                              case MISCREG_TLBI_IPAS2E1OS_Xt:
                               case MISCREG_TLBI_IPAS2LE1IS_Xt:
+                              case MISCREG_TLBI_IPAS2LE1OS_Xt:
+                              case MISCREG_TLBI_RVAE1IS_Xt:
+                              case MISCREG_TLBI_RVAE1OS_Xt:
+                              case MISCREG_TLBI_RVAAE1IS_Xt:
+                              case MISCREG_TLBI_RVAAE1OS_Xt:
+                              case MISCREG_TLBI_RVALE1IS_Xt:
+                              case MISCREG_TLBI_RVALE1OS_Xt:
+                              case MISCREG_TLBI_RVAALE1IS_Xt:
+                              case MISCREG_TLBI_RVAALE1OS_Xt:
+                              case MISCREG_TLBI_RIPAS2E1IS_Xt:
+                              case MISCREG_TLBI_RIPAS2E1OS_Xt:
+                              case MISCREG_TLBI_RIPAS2LE1IS_Xt:
+                              case MISCREG_TLBI_RIPAS2LE1OS_Xt:
+                              case MISCREG_TLBI_RVAE2IS_Xt:
+                              case MISCREG_TLBI_RVAE2OS_Xt:
+                              case MISCREG_TLBI_RVALE2IS_Xt:
+                              case MISCREG_TLBI_RVALE2OS_Xt:
+                              case MISCREG_TLBI_RVAE3IS_Xt:
+                              case MISCREG_TLBI_RVAE3OS_Xt:
+                              case MISCREG_TLBI_RVALE3IS_Xt:
+                              case MISCREG_TLBI_RVALE3OS_Xt:
                                 return new Tlbi64ShareableHub(
                                   machInst, miscReg, rt, dec.dvmEnabled);
                               default:
diff --git a/src/arch/arm/isa/formats/branch.isa b/src/arch/arm/isa/formats/branch.isa
index ff6bfda54e..8213ab9ff8 100644
--- a/src/arch/arm/isa/formats/branch.isa
+++ b/src/arch/arm/isa/formats/branch.isa
@@ -212,10 +212,6 @@ def format Thumb32BranchesAndMiscCtrl() {{
                     {
                         const uint32_t op = bits(machInst, 7, 4);
                         switch (op) {
-                          case 0x0:
-                            return new Leavex(machInst);
-                          case 0x1:
-                            return new Enterx(machInst);
                           case 0x2:
                             return new Clrex(machInst);
                           case 0x4:
diff --git a/src/arch/arm/isa/insts/amo64.isa b/src/arch/arm/isa/insts/amo64.isa
index 72eea89518..3de9a41bfe 100644
--- a/src/arch/arm/isa/insts/amo64.isa
+++ b/src/arch/arm/isa/insts/amo64.isa
@@ -827,35 +827,35 @@ let {{
                    ret_op=False, flavor="release").emit(OP_DICT['MIN'])
 
     AtomicArithmeticSingleOp("swpb",   "SWPB",    1, unsign=True,
-                   ret_op=False, flavor="normal").emit(OP_DICT['SWP'])
+                   flavor="normal").emit(OP_DICT['SWP'])
     AtomicArithmeticSingleOp("swplb",  "SWPLB",   1, unsign=True,
-                   ret_op=False, flavor="release").emit(OP_DICT['SWP'])
+                   flavor="release").emit(OP_DICT['SWP'])
     AtomicArithmeticSingleOp("swpab",  "SWPAB",   1, unsign=True,
-                   ret_op=False, flavor="acquire").emit(OP_DICT['SWP'])
+                   flavor="acquire").emit(OP_DICT['SWP'])
     AtomicArithmeticSingleOp("swplab", "SWPLAB",  1, unsign=True,
-                   ret_op=False, flavor="acquire_release").emit(OP_DICT['SWP'])
+                   flavor="acquire_release").emit(OP_DICT['SWP'])
     AtomicArithmeticSingleOp("swph",   "SWPH",    2, unsign=True,
-                   ret_op=False, flavor="normal").emit(OP_DICT['SWP'])
+                   flavor="normal").emit(OP_DICT['SWP'])
     AtomicArithmeticSingleOp("swplh",  "SWPLH",   2, unsign=True,
-                   ret_op=False, flavor="release").emit(OP_DICT['SWP'])
+                   flavor="release").emit(OP_DICT['SWP'])
     AtomicArithmeticSingleOp("swpah",  "SWPAH",   2, unsign=True,
-                   ret_op=False, flavor="acquire").emit(OP_DICT['SWP'])
+                   flavor="acquire").emit(OP_DICT['SWP'])
     AtomicArithmeticSingleOp("swplah", "SWPLAH",  2, unsign=True,
-                   ret_op=False, flavor="acquire_release").emit(OP_DICT['SWP'])
+                   flavor="acquire_release").emit(OP_DICT['SWP'])
     AtomicArithmeticSingleOp("swp",    "SWP",     4, unsign=True,
-                   ret_op=False, flavor="normal").emit(OP_DICT['SWP'])
+                   flavor="normal").emit(OP_DICT['SWP'])
     AtomicArithmeticSingleOp("swpl",   "SWPL",    4, unsign=True,
-                   ret_op=False, flavor="release").emit(OP_DICT['SWP'])
+                   flavor="release").emit(OP_DICT['SWP'])
     AtomicArithmeticSingleOp("swpa",   "SWPA",    4, unsign=True,
-                   ret_op=False, flavor="acquire").emit(OP_DICT['SWP'])
+                   flavor="acquire").emit(OP_DICT['SWP'])
     AtomicArithmeticSingleOp("swpla",  "SWPLA",   4, unsign=True,
-                   ret_op=False, flavor="acquire_release").emit(OP_DICT['SWP'])
+                   flavor="acquire_release").emit(OP_DICT['SWP'])
     AtomicArithmeticSingleOp("swp64",  "SWP64",    8, unsign=True,
-                   ret_op=False, flavor="normal").emit(OP_DICT['SWP'])
+                   flavor="normal").emit(OP_DICT['SWP'])
     AtomicArithmeticSingleOp("swpl64", "SWPL64",   8, unsign=True,
-                   ret_op=False, flavor="release").emit(OP_DICT['SWP'])
+                   flavor="release").emit(OP_DICT['SWP'])
     AtomicArithmeticSingleOp("swpa64", "SWPA64",   8, unsign=True,
-                   ret_op=False, flavor="acquire").emit(OP_DICT['SWP'])
+                   flavor="acquire").emit(OP_DICT['SWP'])
     AtomicArithmeticSingleOp("swpla64", "SWPLA64", 8, unsign=True,
-                   ret_op=False, flavor="acquire_release").emit(OP_DICT['SWP'])
+                   flavor="acquire_release").emit(OP_DICT['SWP'])
 }};
diff --git a/src/arch/arm/isa/insts/branch64.isa b/src/arch/arm/isa/insts/branch64.isa
index f437651af3..5910a434f0 100644
--- a/src/arch/arm/isa/insts/branch64.isa
+++ b/src/arch/arm/isa/insts/branch64.isa
@@ -1,6 +1,6 @@
 // -*- mode:c++ -*-
 
-// Copyright (c) 2011-2013, 2016, 2018, 2020 ARM Limited
+// Copyright (c) 2011-2013, 2016, 2018, 2020, 2023 Arm Limited
 // All rights reserved
 //
 // The license below extends only to copyright in the software and shall
@@ -200,11 +200,21 @@ let {{
                         HtmFailureFaultCause::EXCEPTION);
                     return fault;
                 }
+
                 Addr newPc;
                 CPSR cpsr = Cpsr;
                 CPSR spsr = Spsr;
 
                 ExceptionLevel curr_el = currEL(cpsr);
+
+                if (fgtEnabled(xc->tcBase()) && curr_el == EL1 &&
+                    static_cast<HFGITR>(
+                        xc->tcBase()->readMiscReg(MISCREG_HFGITR_EL2)).eret)
+                {
+                    return std::make_shared<HypervisorTrap>(
+                        machInst, %(trap_iss)d, ExceptionClass::TRAPPED_ERET);
+                }
+
                 switch (curr_el) {
                   case EL3:
                     newPc = xc->tcBase()->readMiscReg(MISCREG_ELR_EL3);
@@ -268,7 +278,7 @@ let {{
     '''
     instFlags = ['IsSerializeAfter', 'IsNonSpeculative', 'IsSquashAfter']
     bIop = ArmInstObjParams('eret', 'Eret64', "BranchEret64",
-                            bCode%{'op': ''}, instFlags)
+                            bCode%{'op': '', 'trap_iss' : 0b00}, instFlags)
     header_output += BasicDeclare.subst(bIop)
     decoder_output += BasicConstructor64.subst(bIop)
     exec_output += BasicExecute.subst(bIop)
@@ -278,7 +288,8 @@ let {{
                 fault = authIA(xc->tcBase(), newPc, XOp1, &newPc);
                 '''
     bIop = ArmInstObjParams('eretaa', 'Eretaa', "BranchEretA64",
-                            bCode % {'op': pac_code} , instFlags)
+                            bCode % {'op': pac_code, 'trap_iss' : 0b10},
+                            instFlags)
     header_output  += BasicDeclare.subst(bIop)
     decoder_output += BasicConstructor64.subst(bIop)
     exec_output    += BasicExecute.subst(bIop)
@@ -288,7 +299,8 @@ let {{
                 fault = authIB(xc->tcBase(), newPc, XOp1, &newPc);
                 '''
     bIop = ArmInstObjParams('eretab', 'Eretab', "BranchEretA64",
-                            bCode % {'op': pac_code} , instFlags)
+                            bCode % {'op': pac_code, 'trap_iss' : 0b11},
+                            instFlags)
     header_output += BasicDeclare.subst(bIop)
     decoder_output += BasicConstructor64.subst(bIop)
     exec_output += BasicExecute.subst(bIop)
diff --git a/src/arch/arm/isa/insts/data.isa b/src/arch/arm/isa/insts/data.isa
index 31fc172883..cec761905e 100644
--- a/src/arch/arm/isa/insts/data.isa
+++ b/src/arch/arm/isa/insts/data.isa
@@ -268,7 +268,6 @@ let {{
             CondCodesGE = new_cpsr.ge;
 
             NextThumb = (new_cpsr).t;
-            NextJazelle = (new_cpsr).j;
             NextItState = (((new_cpsr).it2 << 2) & 0xFC)
                 | ((new_cpsr).it1 & 0x3);
             SevMailbox = 1;
diff --git a/src/arch/arm/isa/insts/data64.isa b/src/arch/arm/isa/insts/data64.isa
index a617dc3ebb..87f87130ce 100644
--- a/src/arch/arm/isa/insts/data64.isa
+++ b/src/arch/arm/isa/insts/data64.isa
@@ -1,6 +1,6 @@
 // -*- mode:c++ -*-
 
-// Copyright (c) 2011-2013, 2016-2022 Arm Limited
+// Copyright (c) 2011-2013, 2016-2023 Arm Limited
 // All rights reserved
 //
 // The license below extends only to copyright in the software and shall
@@ -335,13 +335,18 @@ let {{
     decoder_output += RegMiscRegOp64Constructor.subst(mrsIop)
     exec_output += BasicExecute.subst(mrsIop)
 
-    buildDataXRegInst("mrsNZCV", 1, '''
+    mrsNZCVCode = '''
         CPSR cpsr = 0;
         cpsr.nz = CondCodesNZ;
         cpsr.c = CondCodesC;
         cpsr.v = CondCodesV;
         XDest = cpsr;
-    ''')
+    '''
+    mrsNZCViop = ArmInstObjParams("mrs", "MrsNZCV64",
+                                  "RegMiscRegImmOp64", mrsNZCVCode)
+    header_output += RegMiscRegOp64Declare.subst(mrsNZCViop)
+    decoder_output += RegMiscRegOp64Constructor.subst(mrsNZCViop)
+    exec_output += BasicExecute.subst(mrsNZCViop)
 
     msrCode = msr_check_code + '''
         MiscDest_ud = XOp1;
@@ -382,12 +387,17 @@ let {{
     exec_output += DvmInitiateAcc.subst(msrTlbiSIop)
     exec_output += DvmCompleteAcc.subst(msrTlbiSIop)
 
-    buildDataXRegInst("msrNZCV", 1, '''
+    msrNZCVCode = '''
         CPSR cpsr = XOp1;
         CondCodesNZ = cpsr.nz;
         CondCodesC = cpsr.c;
         CondCodesV = cpsr.v;
-    ''')
+    '''
+    msrNZCVIop = ArmInstObjParams("msr", "MsrNZCV64", "MiscRegRegImmOp64",
+                                  msrNZCVCode)
+    header_output += MiscRegRegOp64Declare.subst(msrNZCVIop)
+    decoder_output += MiscRegRegOp64Constructor.subst(msrNZCVIop)
+    exec_output += BasicExecute.subst(msrNZCVIop)
 
 
     msrdczva_ea_code = msr_check_code
diff --git a/src/arch/arm/isa/insts/m5ops.isa b/src/arch/arm/isa/insts/m5ops.isa
index 4e508f0186..f912204fe7 100644
--- a/src/arch/arm/isa/insts/m5ops.isa
+++ b/src/arch/arm/isa/insts/m5ops.isa
@@ -1,5 +1,5 @@
 //
-// Copyright (c) 2010, 2012-2013 ARM Limited
+// Copyright (c) 2010, 2012-2013, 2023 Arm Limited
 // All rights reserved
 //
 // The license below extends only to copyright in the software and shall
@@ -47,7 +47,8 @@ let {{
                                  { "code": gem5OpCode % "RegABI64" +
                                    'X0 = ret;',
                                    "predicate_test": predicateTest },
-                                 [ "IsNonSpeculative", "IsUnverifiable" ]);
+                                 [ "IsNonSpeculative", "IsUnverifiable",
+                                   "IsPseudo" ]);
     header_output += BasicDeclare.subst(gem5OpIop)
     decoder_output += BasicConstructor.subst(gem5OpIop)
     exec_output += PredOpExecute.subst(gem5OpIop)
@@ -57,7 +58,8 @@ let {{
                                    'R0 = bits(ret, 31, 0);\n' + \
                                    'R1 = bits(ret, 63, 32);',
                                    "predicate_test": predicateTest },
-                                 [ "IsNonSpeculative", "IsUnverifiable" ]);
+                                 [ "IsNonSpeculative", "IsUnverifiable",
+                                   "IsPseudo" ]);
     header_output += BasicDeclare.subst(gem5OpIop)
     decoder_output += BasicConstructor.subst(gem5OpIop)
     exec_output += PredOpExecute.subst(gem5OpIop)
diff --git a/src/arch/arm/isa/insts/macromem.isa b/src/arch/arm/isa/insts/macromem.isa
index edd7228cf8..5e11c3ba95 100644
--- a/src/arch/arm/isa/insts/macromem.isa
+++ b/src/arch/arm/isa/insts/macromem.isa
@@ -668,7 +668,6 @@ let {{
                                      0xF, true, sctlr.nmfi, xc->tcBase());
                     Cpsr = ~CondCodesMask & new_cpsr;
                     NextThumb = new_cpsr.t;
-                    NextJazelle = new_cpsr.j;
                     NextItState = ((((CPSR)URb).it2 << 2) & 0xFC)
                                     | (((CPSR)URb).it1 & 0x3);
                     CondCodesNZ = new_cpsr.nz;
diff --git a/src/arch/arm/isa/insts/misc.isa b/src/arch/arm/isa/insts/misc.isa
index bfcb69340d..9ee753e385 100644
--- a/src/arch/arm/isa/insts/misc.isa
+++ b/src/arch/arm/isa/insts/misc.isa
@@ -1,6 +1,6 @@
 // -*- mode:c++ -*-
 
-// Copyright (c) 2010-2013,2017-2021 Arm Limited
+// Copyright (c) 2010-2013,2017-2021,2023 Arm Limited
 // All rights reserved
 //
 // The license below extends only to copyright in the software and shall
@@ -40,6 +40,13 @@ let {{
     svcCode = '''
     ThreadContext *tc = xc->tcBase();
 
+    if (fgtEnabled(tc) && currEL(tc) == EL0 && !ELIsInHost(tc, EL0) &&
+        ELIs64(tc, EL1) && static_cast<HFGITR>(
+            tc->readMiscReg(MISCREG_HFGITR_EL2)).svcEL0) {
+        return std::make_shared<HypervisorTrap>(
+            machInst, imm, ExceptionClass::SVC);
+    }
+
     bool have_semi = ArmSystem::haveSemihosting(tc);
     if (have_semi && Thumb && imm == ArmSemihosting::T32Imm) {
         // Enable gem5 extensions since we can't distinguish in thumb.
@@ -166,9 +173,8 @@ let {{
         CondCodesGE = new_cpsr.ge;
 
         NextThumb = (new_cpsr).t;
-                    NextJazelle = (new_cpsr).j;
-                    NextItState = (((new_cpsr).it2 << 2) & 0xFC)
-                        | ((new_cpsr).it1 & 0x3);
+        NextItState = (((new_cpsr).it2 << 2) & 0xFC)
+            | ((new_cpsr).it1 & 0x3);
 
         NPC = (old_cpsr.mode == MODE_HYP) ? ElrHyp : LR;
     '''
@@ -1076,28 +1082,6 @@ let {{
     exec_output += PredOpExecute.subst(mcrr15Iop)
 
 
-    enterxCode = '''
-        NextThumb = true;
-        NextJazelle = true;
-    '''
-    enterxIop = ArmInstObjParams("enterx", "Enterx", "PredOp",
-                                 { "code": enterxCode,
-                                   "predicate_test": predicateTest }, [])
-    header_output += BasicDeclare.subst(enterxIop)
-    decoder_output += BasicConstructor.subst(enterxIop)
-    exec_output += PredOpExecute.subst(enterxIop)
-
-    leavexCode = '''
-        NextThumb = true;
-        NextJazelle = false;
-    '''
-    leavexIop = ArmInstObjParams("leavex", "Leavex", "PredOp",
-                                 { "code": leavexCode,
-                                   "predicate_test": predicateTest }, [])
-    header_output += BasicDeclare.subst(leavexIop)
-    decoder_output += BasicConstructor.subst(leavexIop)
-    exec_output += PredOpExecute.subst(leavexIop)
-
     setendCode = '''
         CPSR cpsr = Cpsr;
         cpsr.e = imm;
diff --git a/src/arch/arm/isa/insts/misc64.isa b/src/arch/arm/isa/insts/misc64.isa
index 46d72d21c3..5678195415 100644
--- a/src/arch/arm/isa/insts/misc64.isa
+++ b/src/arch/arm/isa/insts/misc64.isa
@@ -1,6 +1,6 @@
 // -*- mode:c++ -*-
 
-// Copyright (c) 2011-2013, 2016-2018, 2020-2021 Arm Limited
+// Copyright (c) 2011-2013, 2016-2018, 2020-2021, 2023 Arm Limited
 // All rights reserved
 //
 // The license below extends only to copyright in the software and shall
@@ -43,7 +43,29 @@ let {{
             HtmFailureFaultCause::EXCEPTION);
         return fault;
     }
-    fault = std::make_shared<SupervisorCall>(machInst, bits(machInst, 20, 5));
+
+    const uint32_t iss = bits(machInst, 20, 5);
+    if (fgtEnabled(xc->tcBase())) {
+        ExceptionLevel curr_el = currEL(xc->tcBase());
+        HFGITR hfgitr = xc->tcBase()->readMiscReg(MISCREG_HFGITR_EL2);
+        switch (curr_el) {
+          case EL0:
+            if (!ELIsInHost(xc->tcBase(), curr_el) && hfgitr.svcEL0) {
+                return std::make_shared<HypervisorTrap>(
+                    machInst, iss, ExceptionClass::SVC_64);
+            }
+            break;
+          case EL1:
+            if (hfgitr.svcEL1) {
+                return std::make_shared<HypervisorTrap>(
+                    machInst, iss, ExceptionClass::SVC_64);
+            }
+            break;
+          default:
+            break;
+        }
+    }
+    fault = std::make_shared<SupervisorCall>(machInst, iss);
     '''
 
     svcIop = ArmInstObjParams("svc", "Svc64", "ImmOp64",
diff --git a/src/arch/arm/isa/insts/neon64.isa b/src/arch/arm/isa/insts/neon64.isa
index 6608f61688..a3b79be912 100644
--- a/src/arch/arm/isa/insts/neon64.isa
+++ b/src/arch/arm/isa/insts/neon64.isa
@@ -3403,7 +3403,7 @@ let {{
                     destElem = (srcElem1 >> shiftAmt);
                 }
                 destElem += rBit;
-            } else {
+            } else if (shiftAmt > 0) {
                 if (shiftAmt >= sizeof(Element) * 8) {
                     if (srcElem1 != 0) {
                         destElem = mask(sizeof(Element) * 8);
@@ -3421,6 +3421,8 @@ let {{
                         destElem = srcElem1 << shiftAmt;
                     }
                 }
+            } else {
+                destElem = srcElem1;
             }
             FpscrQc = fpscr;
     '''
diff --git a/src/arch/arm/isa/operands.isa b/src/arch/arm/isa/operands.isa
index 5bba00f138..5e6506d0e8 100644
--- a/src/arch/arm/isa/operands.isa
+++ b/src/arch/arm/isa/operands.isa
@@ -526,7 +526,6 @@ def operands {{
     'IWNPC': PCStateReg('instIWNPC', srtPC),
     'Thumb': PCStateReg('thumb', srtPC),
     'NextThumb': PCStateReg('nextThumb', srtMode),
-    'NextJazelle': PCStateReg('nextJazelle', srtMode),
     'NextItState': PCStateReg('nextItstate', srtMode),
     'Itstate': PCStateReg('itstate', srtMode),
     'NextAArch64': PCStateReg('nextAArch64', srtMode),
diff --git a/src/arch/arm/kvm/arm_cpu.hh b/src/arch/arm/kvm/arm_cpu.hh
index 849aa769a5..302d40e17a 100644
--- a/src/arch/arm/kvm/arm_cpu.hh
+++ b/src/arch/arm/kvm/arm_cpu.hh
@@ -100,7 +100,7 @@ class ArmKvmCPU : public BaseKvmCPU
     void
     stutterPC(PCStateBase &pc) const
     {
-        pc.as<X86ISA::PCState>().setNPC(pc->instAddr());
+        pc.as<ArmISA::PCState>().setNPC(pc->instAddr());
     }
 
     /**
diff --git a/src/arch/arm/mmu.cc b/src/arch/arm/mmu.cc
index 824974ab21..956f95d3b3 100644
--- a/src/arch/arm/mmu.cc
+++ b/src/arch/arm/mmu.cc
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2010-2013, 2016-2022 Arm Limited
+ * Copyright (c) 2010-2013, 2016-2023 Arm Limited
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -263,14 +263,17 @@ MMU::translateSe(const RequestPtr &req, ThreadContext *tc, Mode mode,
         }
     }
 
-    Addr paddr;
     Process *p = tc->getProcessPtr();
-
-    if (!p->pTable->translate(vaddr, paddr))
+    if (const auto pte = p->pTable->lookup(vaddr); !pte) {
         return std::make_shared<GenericPageTableFault>(vaddr_tainted);
-    req->setPaddr(paddr);
+    } else {
+        req->setPaddr(pte->paddr + p->pTable->pageOffset(vaddr));
 
-    return finalizePhysical(req, tc, mode);
+        if (pte->flags & EmulationPageTable::Uncacheable)
+            req->setFlags(Request::UNCACHEABLE);
+
+        return finalizePhysical(req, tc, mode);
+    }
 }
 
 Fault
diff --git a/src/arch/arm/pagetable.hh b/src/arch/arm/pagetable.hh
index 8300175144..a1e9028e8f 100644
--- a/src/arch/arm/pagetable.hh
+++ b/src/arch/arm/pagetable.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2010, 2012-2013, 2021 Arm Limited
+ * Copyright (c) 2010, 2012-2013, 2021, 2023 Arm Limited
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -186,6 +186,12 @@ struct TlbEntry : public Serializable
     {
         // virtual address
         Addr va = 0;
+        // lookup size:
+        // * != 0 -> this is a range based lookup.
+        //           end_address = va + size
+        // * == 0 -> This is a normal lookup. size should
+        //           be ignored
+        Addr size = 0;
         // context id/address space id to use
         uint16_t asn = 0;
         // if on lookup asn should be ignored
@@ -219,6 +225,7 @@ struct TlbEntry : public Serializable
 
     uint16_t asid;          // Address Space Identifier
     vmid_t vmid;            // Virtual machine Identifier
+    GrainSize tg;           // Translation Granule Size
     uint8_t N;              // Number of bits in pagesize
     uint8_t innerAttrs;
     uint8_t outerAttrs;
@@ -263,7 +270,7 @@ struct TlbEntry : public Serializable
              bool uncacheable, bool read_only) :
          pfn(_paddr >> PageShift), size(PageBytes - 1), vpn(_vaddr >> PageShift),
          attributes(0), lookupLevel(LookupLevel::L1),
-         asid(_asn), vmid(0), N(0),
+         asid(_asn), vmid(0), tg(Grain4KB), N(0),
          innerAttrs(0), outerAttrs(0), ap(read_only ? 0x3 : 0), hap(0x3),
          domain(DomainType::Client),  mtype(MemoryType::StronglyOrdered),
          longDescFormat(false), isHyp(false), global(false), valid(true),
@@ -281,7 +288,7 @@ struct TlbEntry : public Serializable
 
     TlbEntry() :
          pfn(0), size(0), vpn(0), attributes(0), lookupLevel(LookupLevel::L1),
-         asid(0), vmid(0), N(0),
+         asid(0), vmid(0), tg(ReservedGrain), N(0),
          innerAttrs(0), outerAttrs(0), ap(0), hap(0x3),
          domain(DomainType::Client), mtype(MemoryType::StronglyOrdered),
          longDescFormat(false), isHyp(false), global(false), valid(false),
@@ -306,12 +313,25 @@ struct TlbEntry : public Serializable
         return pfn << PageShift;
     }
 
+    bool
+    matchAddress(const Lookup &lookup) const
+    {
+        Addr page_addr = vpn << N;
+        if (lookup.size) {
+            // This is a range based loookup
+            return lookup.va <= page_addr + size &&
+                   lookup.va + lookup.size > page_addr;
+        } else {
+            // This is a normal lookup
+            return lookup.va >= page_addr && lookup.va <= page_addr + size;
+        }
+    }
+
     bool
     match(const Lookup &lookup) const
     {
         bool match = false;
-        Addr v = vpn << N;
-        if (valid && lookup.va >= v && lookup.va <= v + size &&
+        if (valid && matchAddress(lookup) &&
             (lookup.secure == !nstid) && (lookup.hyp == isHyp))
         {
             match = checkELMatch(lookup.targetEL, lookup.inHost);
@@ -319,8 +339,8 @@ struct TlbEntry : public Serializable
             if (match && !lookup.ignoreAsn) {
                 match = global || (lookup.asn == asid);
             }
-            if (match && nstid) {
-                match = isHyp || (lookup.vmid == vmid);
+            if (match && useVMID(lookup.targetEL, lookup.inHost)) {
+                match = lookup.vmid == vmid;
             }
         }
         return match;
diff --git a/src/arch/arm/pcstate.hh b/src/arch/arm/pcstate.hh
index 7b75ed8184..98e3202eb3 100644
--- a/src/arch/arm/pcstate.hh
+++ b/src/arch/arm/pcstate.hh
@@ -75,7 +75,6 @@ class PCState : public GenericISA::UPCState<4>
     enum FlagBits
     {
         ThumbBit = (1 << 0),
-        JazelleBit = (1 << 1),
         AArch64Bit = (1 << 2)
     };
 
@@ -92,7 +91,7 @@ class PCState : public GenericISA::UPCState<4>
 
   public:
     void
-    set(Addr val)
+    set(Addr val) override
     {
         Base::set(val);
         npc(val + (thumb() ? 2 : 4));
@@ -202,36 +201,6 @@ class PCState : public GenericISA::UPCState<4>
     }
 
 
-    bool
-    jazelle() const
-    {
-        return flags & JazelleBit;
-    }
-
-    void
-    jazelle(bool val)
-    {
-        if (val)
-            flags |= JazelleBit;
-        else
-            flags &= ~JazelleBit;
-    }
-
-    bool
-    nextJazelle() const
-    {
-        return nextFlags & JazelleBit;
-    }
-
-    void
-    nextJazelle(bool val)
-    {
-        if (val)
-            nextFlags |= JazelleBit;
-        else
-            nextFlags &= ~JazelleBit;
-    }
-
     bool
     aarch64() const
     {
@@ -354,29 +323,18 @@ class PCState : public GenericISA::UPCState<4>
     void
     instIWNPC(Addr val)
     {
-        bool thumbEE = (thumb() && jazelle());
-
-        Addr newPC = val;
-        if (thumbEE) {
-            if (bits(newPC, 0)) {
-                newPC = newPC & ~mask(1);
-            }  // else we have a bad interworking address; do not call
-               // panic() since the instruction could be executed
-               // speculatively
+        if (bits(val, 0)) {
+            nextThumb(true);
+            val = val & ~mask(1);
+        } else if (!bits(val, 1)) {
+            nextThumb(false);
         } else {
-            if (bits(newPC, 0)) {
-                nextThumb(true);
-                newPC = newPC & ~mask(1);
-            } else if (!bits(newPC, 1)) {
-                nextThumb(false);
-            } else {
-                // This state is UNPREDICTABLE in the ARM architecture
-                // The easy thing to do is just mask off the bit and
-                // stay in the current mode, so we'll do that.
-                newPC &= ~mask(2);
-            }
+            // This state is UNPREDICTABLE in the ARM architecture
+            // The easy thing to do is just mask off the bit and
+            // stay in the current mode, so we'll do that.
+            val &= ~mask(2);
         }
-        npc(newPC);
+        npc(val);
     }
 
     // Perform an interworking branch in ARM mode, a regular branch
@@ -384,7 +342,7 @@ class PCState : public GenericISA::UPCState<4>
     void
     instAIWNPC(Addr val)
     {
-        if (!thumb() && !jazelle())
+        if (!thumb())
             instIWNPC(val);
         else
             instNPC(val);
diff --git a/src/arch/arm/process.cc b/src/arch/arm/process.cc
index 9aa519fe36..b169f849d1 100644
--- a/src/arch/arm/process.cc
+++ b/src/arch/arm/process.cc
@@ -169,7 +169,7 @@ ArmProcess32::armHwcapImpl() const
     };
 
     return Arm_Swp | Arm_Half | Arm_Thumb | Arm_FastMult |
-           Arm_Vfp | Arm_Edsp | Arm_ThumbEE | Arm_Neon |
+           Arm_Vfp | Arm_Edsp | Arm_Neon |
            Arm_Vfpv3 | Arm_Vfpv3d16;
 }
 
diff --git a/src/arch/arm/regs/misc.cc b/src/arch/arm/regs/misc.cc
index f1c69cc007..e768edeee3 100644
--- a/src/arch/arm/regs/misc.cc
+++ b/src/arch/arm/regs/misc.cc
@@ -759,12 +759,24 @@ std::unordered_map<MiscRegNum64, MiscRegIndex> miscRegNumToIdx{
     { MiscRegNum64(1, 0, 8, 1, 3), MISCREG_TLBI_VAAE1OS_Xt },
     { MiscRegNum64(1, 0, 8, 1, 5), MISCREG_TLBI_VALE1OS_Xt },
     { MiscRegNum64(1, 0, 8, 1, 7), MISCREG_TLBI_VAALE1OS_Xt },
+    { MiscRegNum64(1, 0, 8, 2, 1), MISCREG_TLBI_RVAE1IS_Xt },
+    { MiscRegNum64(1, 0, 8, 2, 3), MISCREG_TLBI_RVAAE1IS_Xt },
+    { MiscRegNum64(1, 0, 8, 2, 5), MISCREG_TLBI_RVALE1IS_Xt },
+    { MiscRegNum64(1, 0, 8, 2, 7), MISCREG_TLBI_RVAALE1IS_Xt },
     { MiscRegNum64(1, 0, 8, 3, 0), MISCREG_TLBI_VMALLE1IS },
     { MiscRegNum64(1, 0, 8, 3, 1), MISCREG_TLBI_VAE1IS_Xt },
     { MiscRegNum64(1, 0, 8, 3, 2), MISCREG_TLBI_ASIDE1IS_Xt },
     { MiscRegNum64(1, 0, 8, 3, 3), MISCREG_TLBI_VAAE1IS_Xt },
     { MiscRegNum64(1, 0, 8, 3, 5), MISCREG_TLBI_VALE1IS_Xt },
     { MiscRegNum64(1, 0, 8, 3, 7), MISCREG_TLBI_VAALE1IS_Xt },
+    { MiscRegNum64(1, 0, 8, 5, 1), MISCREG_TLBI_RVAE1OS_Xt },
+    { MiscRegNum64(1, 0, 8, 5, 3), MISCREG_TLBI_RVAAE1OS_Xt },
+    { MiscRegNum64(1, 0, 8, 5, 5), MISCREG_TLBI_RVALE1OS_Xt },
+    { MiscRegNum64(1, 0, 8, 5, 7), MISCREG_TLBI_RVAALE1OS_Xt },
+    { MiscRegNum64(1, 0, 8, 6, 1), MISCREG_TLBI_RVAE1_Xt },
+    { MiscRegNum64(1, 0, 8, 6, 3), MISCREG_TLBI_RVAAE1_Xt },
+    { MiscRegNum64(1, 0, 8, 6, 5), MISCREG_TLBI_RVALE1_Xt },
+    { MiscRegNum64(1, 0, 8, 6, 7), MISCREG_TLBI_RVAALE1_Xt },
     { MiscRegNum64(1, 0, 8, 7, 0), MISCREG_TLBI_VMALLE1 },
     { MiscRegNum64(1, 0, 8, 7, 1), MISCREG_TLBI_VAE1_Xt },
     { MiscRegNum64(1, 0, 8, 7, 2), MISCREG_TLBI_ASIDE1_Xt },
@@ -783,12 +795,16 @@ std::unordered_map<MiscRegNum64, MiscRegIndex> miscRegNumToIdx{
     { MiscRegNum64(1, 4, 7, 8, 6), MISCREG_AT_S12E0R_Xt },
     { MiscRegNum64(1, 4, 7, 8, 7), MISCREG_AT_S12E0W_Xt },
     { MiscRegNum64(1, 4, 8, 0, 1), MISCREG_TLBI_IPAS2E1IS_Xt },
+    { MiscRegNum64(1, 4, 8, 0, 2), MISCREG_TLBI_RIPAS2E1IS_Xt },
     { MiscRegNum64(1, 4, 8, 0, 5), MISCREG_TLBI_IPAS2LE1IS_Xt },
     { MiscRegNum64(1, 4, 8, 1, 0), MISCREG_TLBI_ALLE2OS },
     { MiscRegNum64(1, 4, 8, 1, 1), MISCREG_TLBI_VAE2OS_Xt },
     { MiscRegNum64(1, 4, 8, 1, 4), MISCREG_TLBI_ALLE1OS },
     { MiscRegNum64(1, 4, 8, 1, 5), MISCREG_TLBI_VALE2OS_Xt },
     { MiscRegNum64(1, 4, 8, 1, 6), MISCREG_TLBI_VMALLS12E1OS },
+    { MiscRegNum64(1, 4, 8, 0, 6), MISCREG_TLBI_RIPAS2LE1IS_Xt },
+    { MiscRegNum64(1, 4, 8, 2, 1), MISCREG_TLBI_RVAE2IS_Xt },
+    { MiscRegNum64(1, 4, 8, 2, 5), MISCREG_TLBI_RVALE2IS_Xt },
     { MiscRegNum64(1, 4, 8, 3, 0), MISCREG_TLBI_ALLE2IS },
     { MiscRegNum64(1, 4, 8, 3, 1), MISCREG_TLBI_VAE2IS_Xt },
     { MiscRegNum64(1, 4, 8, 3, 4), MISCREG_TLBI_ALLE1IS },
@@ -796,8 +812,16 @@ std::unordered_map<MiscRegNum64, MiscRegIndex> miscRegNumToIdx{
     { MiscRegNum64(1, 4, 8, 3, 6), MISCREG_TLBI_VMALLS12E1IS },
     { MiscRegNum64(1, 4, 8, 4, 0), MISCREG_TLBI_IPAS2E1OS_Xt },
     { MiscRegNum64(1, 4, 8, 4, 1), MISCREG_TLBI_IPAS2E1_Xt },
+    { MiscRegNum64(1, 4, 8, 4, 2), MISCREG_TLBI_RIPAS2E1_Xt },
+    { MiscRegNum64(1, 4, 8, 4, 3), MISCREG_TLBI_RIPAS2E1OS_Xt },
     { MiscRegNum64(1, 4, 8, 4, 4), MISCREG_TLBI_IPAS2LE1OS_Xt },
     { MiscRegNum64(1, 4, 8, 4, 5), MISCREG_TLBI_IPAS2LE1_Xt },
+    { MiscRegNum64(1, 4, 8, 4, 6), MISCREG_TLBI_RIPAS2LE1_Xt },
+    { MiscRegNum64(1, 4, 8, 4, 7), MISCREG_TLBI_RIPAS2LE1OS_Xt },
+    { MiscRegNum64(1, 4, 8, 5, 1), MISCREG_TLBI_RVAE2OS_Xt },
+    { MiscRegNum64(1, 4, 8, 5, 5), MISCREG_TLBI_RVALE2OS_Xt },
+    { MiscRegNum64(1, 4, 8, 6, 1), MISCREG_TLBI_RVAE2_Xt },
+    { MiscRegNum64(1, 4, 8, 6, 5), MISCREG_TLBI_RVALE2_Xt },
     { MiscRegNum64(1, 4, 8, 7, 0), MISCREG_TLBI_ALLE2 },
     { MiscRegNum64(1, 4, 8, 7, 1), MISCREG_TLBI_VAE2_Xt },
     { MiscRegNum64(1, 4, 8, 7, 4), MISCREG_TLBI_ALLE1 },
@@ -808,9 +832,15 @@ std::unordered_map<MiscRegNum64, MiscRegIndex> miscRegNumToIdx{
     { MiscRegNum64(1, 6, 8, 1, 0), MISCREG_TLBI_ALLE3OS },
     { MiscRegNum64(1, 6, 8, 1, 1), MISCREG_TLBI_VAE3OS_Xt },
     { MiscRegNum64(1, 6, 8, 1, 5), MISCREG_TLBI_VALE3OS_Xt },
+    { MiscRegNum64(1, 6, 8, 2, 1), MISCREG_TLBI_RVAE3IS_Xt },
+    { MiscRegNum64(1, 6, 8, 2, 5), MISCREG_TLBI_RVALE3IS_Xt },
     { MiscRegNum64(1, 6, 8, 3, 0), MISCREG_TLBI_ALLE3IS },
     { MiscRegNum64(1, 6, 8, 3, 1), MISCREG_TLBI_VAE3IS_Xt },
     { MiscRegNum64(1, 6, 8, 3, 5), MISCREG_TLBI_VALE3IS_Xt },
+    { MiscRegNum64(1, 6, 8, 5, 1), MISCREG_TLBI_RVAE3OS_Xt },
+    { MiscRegNum64(1, 6, 8, 5, 5), MISCREG_TLBI_RVALE3OS_Xt },
+    { MiscRegNum64(1, 6, 8, 6, 1), MISCREG_TLBI_RVAE3_Xt },
+    { MiscRegNum64(1, 6, 8, 6, 5), MISCREG_TLBI_RVALE3_Xt },
     { MiscRegNum64(1, 6, 8, 7, 0), MISCREG_TLBI_ALLE3 },
     { MiscRegNum64(1, 6, 8, 7, 1), MISCREG_TLBI_VAE3_Xt },
     { MiscRegNum64(1, 6, 8, 7, 5), MISCREG_TLBI_VALE3_Xt },
@@ -951,7 +981,7 @@ std::unordered_map<MiscRegNum64, MiscRegIndex> miscRegNumToIdx{
     { MiscRegNum64(3, 0, 0, 7, 0), MISCREG_ID_AA64MMFR0_EL1 },
     { MiscRegNum64(3, 0, 0, 7, 1), MISCREG_ID_AA64MMFR1_EL1 },
     { MiscRegNum64(3, 0, 0, 7, 2), MISCREG_ID_AA64MMFR2_EL1 },
-    { MiscRegNum64(3, 0, 0, 7, 3), MISCREG_RAZ },
+    { MiscRegNum64(3, 0, 0, 7, 3), MISCREG_ID_AA64MMFR3_EL1 },
     { MiscRegNum64(3, 0, 0, 7, 4), MISCREG_RAZ },
     { MiscRegNum64(3, 0, 0, 7, 5), MISCREG_RAZ },
     { MiscRegNum64(3, 0, 0, 7, 6), MISCREG_RAZ },
@@ -959,12 +989,14 @@ std::unordered_map<MiscRegNum64, MiscRegIndex> miscRegNumToIdx{
     { MiscRegNum64(3, 0, 1, 0, 0), MISCREG_SCTLR_EL1 },
     { MiscRegNum64(3, 0, 1, 0, 1), MISCREG_ACTLR_EL1 },
     { MiscRegNum64(3, 0, 1, 0, 2), MISCREG_CPACR_EL1 },
+    { MiscRegNum64(3, 0, 1, 0, 3), MISCREG_SCTLR2_EL1 },
     { MiscRegNum64(3, 0, 1, 2, 0), MISCREG_ZCR_EL1 },
     { MiscRegNum64(3, 0, 1, 2, 4), MISCREG_SMPRI_EL1 },
     { MiscRegNum64(3, 0, 1, 2, 6), MISCREG_SMCR_EL1 },
     { MiscRegNum64(3, 0, 2, 0, 0), MISCREG_TTBR0_EL1 },
     { MiscRegNum64(3, 0, 2, 0, 1), MISCREG_TTBR1_EL1 },
     { MiscRegNum64(3, 0, 2, 0, 2), MISCREG_TCR_EL1 },
+    { MiscRegNum64(3, 0, 2, 0, 3), MISCREG_TCR2_EL1 },
     { MiscRegNum64(3, 0, 2, 1, 0), MISCREG_APIAKeyLo_EL1 },
     { MiscRegNum64(3, 0, 2, 1, 1), MISCREG_APIAKeyHi_EL1 },
     { MiscRegNum64(3, 0, 2, 1, 2), MISCREG_APIBKeyLo_EL1 },
@@ -1108,12 +1140,14 @@ std::unordered_map<MiscRegNum64, MiscRegIndex> miscRegNumToIdx{
     { MiscRegNum64(3, 4, 0, 0, 5), MISCREG_VMPIDR_EL2 },
     { MiscRegNum64(3, 4, 1, 0, 0), MISCREG_SCTLR_EL2 },
     { MiscRegNum64(3, 4, 1, 0, 1), MISCREG_ACTLR_EL2 },
+    { MiscRegNum64(3, 4, 1, 0, 3), MISCREG_SCTLR2_EL2 },
     { MiscRegNum64(3, 4, 1, 1, 0), MISCREG_HCR_EL2 },
     { MiscRegNum64(3, 4, 1, 1, 1), MISCREG_MDCR_EL2 },
     { MiscRegNum64(3, 4, 1, 1, 2), MISCREG_CPTR_EL2 },
     { MiscRegNum64(3, 4, 1, 1, 3), MISCREG_HSTR_EL2 },
     { MiscRegNum64(3, 4, 1, 1, 4), MISCREG_HFGRTR_EL2 },
     { MiscRegNum64(3, 4, 1, 1, 5), MISCREG_HFGWTR_EL2 },
+    { MiscRegNum64(3, 4, 1, 1, 6), MISCREG_HFGITR_EL2 },
     { MiscRegNum64(3, 4, 1, 1, 7), MISCREG_HACR_EL2 },
     { MiscRegNum64(3, 4, 1, 2, 0), MISCREG_ZCR_EL2 },
     { MiscRegNum64(3, 4, 1, 2, 2), MISCREG_HCRX_EL2 },
@@ -1122,6 +1156,7 @@ std::unordered_map<MiscRegNum64, MiscRegIndex> miscRegNumToIdx{
     { MiscRegNum64(3, 4, 2, 0, 0), MISCREG_TTBR0_EL2 },
     { MiscRegNum64(3, 4, 2, 0, 1), MISCREG_TTBR1_EL2 },
     { MiscRegNum64(3, 4, 2, 0, 2), MISCREG_TCR_EL2 },
+    { MiscRegNum64(3, 4, 2, 0, 3), MISCREG_TCR2_EL2 },
     { MiscRegNum64(3, 4, 2, 1, 0), MISCREG_VTTBR_EL2 },
     { MiscRegNum64(3, 4, 2, 1, 2), MISCREG_VTCR_EL2 },
     { MiscRegNum64(3, 4, 2, 6, 0), MISCREG_VSTTBR_EL2 },
@@ -1196,11 +1231,13 @@ std::unordered_map<MiscRegNum64, MiscRegIndex> miscRegNumToIdx{
     { MiscRegNum64(3, 4, 14, 5, 2), MISCREG_CNTHPS_CVAL_EL2 },
     { MiscRegNum64(3, 5, 1, 0, 0), MISCREG_SCTLR_EL12 },
     { MiscRegNum64(3, 5, 1, 0, 2), MISCREG_CPACR_EL12 },
+    { MiscRegNum64(3, 5, 1, 0, 3), MISCREG_SCTLR2_EL12 },
     { MiscRegNum64(3, 5, 1, 2, 0), MISCREG_ZCR_EL12 },
     { MiscRegNum64(3, 5, 1, 2, 6), MISCREG_SMCR_EL12 },
     { MiscRegNum64(3, 5, 2, 0, 0), MISCREG_TTBR0_EL12 },
     { MiscRegNum64(3, 5, 2, 0, 1), MISCREG_TTBR1_EL12 },
     { MiscRegNum64(3, 5, 2, 0, 2), MISCREG_TCR_EL12 },
+    { MiscRegNum64(3, 5, 2, 0, 3), MISCREG_TCR2_EL12 },
     { MiscRegNum64(3, 5, 4, 0, 0), MISCREG_SPSR_EL12 },
     { MiscRegNum64(3, 5, 4, 0, 1), MISCREG_ELR_EL12 },
     { MiscRegNum64(3, 5, 5, 1, 0), MISCREG_AFSR0_EL12 },
@@ -1220,6 +1257,7 @@ std::unordered_map<MiscRegNum64, MiscRegIndex> miscRegNumToIdx{
     { MiscRegNum64(3, 5, 14, 3, 2), MISCREG_CNTV_CVAL_EL02 },
     { MiscRegNum64(3, 6, 1, 0, 0), MISCREG_SCTLR_EL3 },
     { MiscRegNum64(3, 6, 1, 0, 1), MISCREG_ACTLR_EL3 },
+    { MiscRegNum64(3, 6, 1, 0, 3), MISCREG_SCTLR2_EL3 },
     { MiscRegNum64(3, 6, 1, 1, 0), MISCREG_SCR_EL3 },
     { MiscRegNum64(3, 6, 1, 1, 1), MISCREG_SDER32_EL3 },
     { MiscRegNum64(3, 6, 1, 1, 2), MISCREG_CPTR_EL3 },
@@ -1249,6 +1287,164 @@ std::unordered_map<MiscRegNum64, MiscRegIndex> miscRegNumToIdx{
     { MiscRegNum64(3, 7, 14, 2, 2), MISCREG_CNTPS_CVAL_EL1 }
 };
 
+template <bool read>
+HFGTR
+fgtRegister(ThreadContext *tc)
+{
+    if constexpr (read) {
+        return tc->readMiscReg(MISCREG_HFGRTR_EL2);
+    } else {
+        return tc->readMiscReg(MISCREG_HFGWTR_EL2);
+    }
+}
+
+/**
+ * Template helper for fine grained traps at EL0
+ *
+ * @tparam read: is this a read access to the register?
+ * @tparam r_bitfield: register (HFGTR) bitfield
+ */
+template<bool read, auto r_bitfield>
+Fault
+faultFgtEL0(const MiscRegLUTEntry &entry,
+    ThreadContext *tc, const MiscRegOp64 &inst)
+{
+    const HCR hcr = tc->readMiscReg(MISCREG_HCR_EL2);
+    const bool in_host = EL2Enabled(tc) && hcr.e2h && hcr.tge;
+    if (fgtEnabled(tc) && !in_host &&
+        fgtRegister<read>(tc).*r_bitfield) {
+        return inst.generateTrap(EL2);
+    } else {
+        return NoFault;
+    }
+}
+
+/**
+ * Template helper for fine grained traps at EL1
+ *
+ * @tparam read: is this a read access to the register?
+ * @tparam r_bitfield: register (HFGTR) bitfield
+ */
+template<bool read, auto r_bitfield>
+Fault
+faultFgtEL1(const MiscRegLUTEntry &entry,
+    ThreadContext *tc, const MiscRegOp64 &inst)
+{
+    if (fgtEnabled(tc) && fgtRegister<read>(tc).*r_bitfield) {
+        return inst.generateTrap(EL2);
+    } else {
+        return NoFault;
+    }
+}
+
+/**
+ * Template helper for fine grained traps at EL1
+ *
+ * @tparam r_bitfield: register (HFGITR) bitfield
+ */
+template<auto r_bitfield>
+Fault
+faultFgtInstEL1(const MiscRegLUTEntry &entry,
+    ThreadContext *tc, const MiscRegOp64 &inst)
+{
+    if (fgtEnabled(tc) &&
+        static_cast<HFGITR>(tc->readMiscReg(MISCREG_HFGITR_EL2)).*r_bitfield) {
+        return inst.generateTrap(EL2);
+    } else {
+        return NoFault;
+    }
+}
+
+/**
+ * Template helper for fine grained traps at EL1
+ *
+ * @tparam g_bitfield: group (HCR) bitfield
+ */
+template <auto g_bitfield>
+Fault
+faultHcrEL1(const MiscRegLUTEntry &entry,
+    ThreadContext *tc, const MiscRegOp64 &inst)
+{
+    const HCR hcr = tc->readMiscReg(MISCREG_HCR_EL2);
+    if (EL2Enabled(tc) && hcr.*g_bitfield) {
+        return inst.generateTrap(EL2);
+    } else {
+        return NoFault;
+    }
+}
+
+/**
+ * Template helper for fine grained traps at EL0
+ *
+ * @tparam read: is this a read access to the register?
+ * @tparam g_bitfield: group (HCR) bitfield
+ * @tparam r_bitfield: register (HFGTR) bitfield
+ */
+template<bool read, auto g_bitfield, auto r_bitfield>
+Fault
+faultHcrFgtEL0(const MiscRegLUTEntry &entry,
+    ThreadContext *tc, const MiscRegOp64 &inst)
+{
+    const HCR hcr = tc->readMiscReg(MISCREG_HCR_EL2);
+    const bool in_host = EL2Enabled(tc) && hcr.e2h && hcr.tge;
+
+    if (EL2Enabled(tc) && !in_host && hcr.*g_bitfield) {
+        return inst.generateTrap(EL2);
+    } else if (auto fault = faultFgtEL0<read, r_bitfield>(entry, tc, inst);
+               fault != NoFault) {
+        return fault;
+    } else {
+        return NoFault;
+    }
+}
+
+/**
+ * Template helper for fine grained traps at EL1
+ *
+ * @tparam read: is this a read access to the register?
+ * @tparam g_bitfield: group (HCR) bitfield
+ * @tparam r_bitfield: register (HFGTR) bitfield
+ */
+template<bool read, auto g_bitfield, auto r_bitfield>
+Fault
+faultHcrFgtEL1(const MiscRegLUTEntry &entry,
+    ThreadContext *tc, const MiscRegOp64 &inst)
+{
+    const HCR hcr = tc->readMiscReg(MISCREG_HCR_EL2);
+
+    if (EL2Enabled(tc) && hcr.*g_bitfield) {
+        return inst.generateTrap(EL2);
+    } else if (auto fault = faultFgtEL1<read, r_bitfield>(entry, tc, inst);
+               fault != NoFault) {
+        return fault;
+    } else {
+        return NoFault;
+    }
+}
+
+/**
+ * Template helper for fine grained traps at EL1
+ *
+ * @tparam g_bitfield: group (HCR) bitfield
+ * @tparam r_bitfield: register (HFGITR) bitfield
+ */
+template<auto g_bitfield, auto r_bitfield>
+Fault
+faultHcrFgtInstEL1(const MiscRegLUTEntry &entry,
+    ThreadContext *tc, const MiscRegOp64 &inst)
+{
+    const HCR hcr = tc->readMiscReg(MISCREG_HCR_EL2);
+
+    if (EL2Enabled(tc) && hcr.*g_bitfield) {
+        return inst.generateTrap(EL2);
+    } else if (auto fault = faultFgtInstEL1<r_bitfield>(entry, tc, inst);
+               fault != NoFault) {
+        return fault;
+    } else {
+        return NoFault;
+    }
+}
+
 Fault
 faultSpEL0(const MiscRegLUTEntry &entry, ThreadContext *tc,
            const MiscRegOp64 &inst)
@@ -1455,6 +1651,7 @@ faultPouEL0(const MiscRegLUTEntry &entry,
     }
 }
 
+template <auto bitfield>
 Fault
 faultPouEL1(const MiscRegLUTEntry &entry,
     ThreadContext *tc, const MiscRegOp64 &inst)
@@ -1466,11 +1663,15 @@ faultPouEL1(const MiscRegLUTEntry &entry,
     } else if (el2_enabled && HaveExt(tc, ArmExtension::FEAT_EVT) &&
                hcr.tocu) {
         return inst.generateTrap(EL2);
+    } else if (auto fault = faultFgtInstEL1<bitfield>(entry, tc, inst);
+               fault != NoFault) {
+        return fault;
     } else {
         return NoFault;
     }
 }
 
+template <auto bitfield>
 Fault
 faultPouIsEL1(const MiscRegLUTEntry &entry,
     ThreadContext *tc, const MiscRegOp64 &inst)
@@ -1482,6 +1683,9 @@ faultPouIsEL1(const MiscRegLUTEntry &entry,
     } else if (el2_enabled && HaveExt(tc, ArmExtension::FEAT_EVT) &&
                hcr.ticab) {
         return inst.generateTrap(EL2);
+    } else if (auto fault = faultFgtInstEL1<bitfield>(entry, tc, inst);
+               fault != NoFault) {
+        return fault;
     } else {
         return NoFault;
     }
@@ -1503,8 +1707,10 @@ faultCtrEL0(const MiscRegLUTEntry &entry,
          } else {
             return inst.generateTrap(EL1);
          }
-     } else if (el2_enabled && !in_host && hcr.tid2) {
-        return inst.generateTrap(EL2);
+     } else if (auto fault = faultHcrFgtEL0<
+                    true, &HCR::tid2, &HFGTR::ctrEL0>(entry, tc, inst);
+                fault != NoFault) {
+        return fault;
      } else if (el2_enabled && in_host && !sctlr2.uct) {
         return inst.generateTrap(EL2);
      } else {
@@ -1714,14 +1920,173 @@ faultIccSgiEL2(const MiscRegLUTEntry &entry,
     }
 }
 
+template<bool read, auto g_bitfield>
+Fault
+faultSctlr2EL1(const MiscRegLUTEntry &entry,
+    ThreadContext *tc, const MiscRegOp64 &inst)
+{
+    if (HaveExt(tc, ArmExtension::FEAT_SCTLR2)) {
+        const SCR scr = tc->readMiscReg(MISCREG_SCR_EL3);
+        const HCRX hcrx = tc->readMiscReg(MISCREG_HCRX_EL2);
+        if (
+                auto fault = faultHcrFgtEL1<read, g_bitfield, &HFGTR::sctlrEL1>
+                (
+                    entry,
+                    tc,
+                    inst
+                );
+                fault != NoFault
+        ) {
+            return fault;
+        } else if (
+                    EL2Enabled(tc) && (!isHcrxEL2Enabled(tc) || !hcrx.sctlr2En)
+                  ) {
+            return inst.generateTrap(EL2);
+        } else if (ArmSystem::haveEL(tc, EL3) && !scr.sctlr2En) {
+            return inst.generateTrap(EL3);
+        } else {
+            return NoFault;
+        }
+    } else {
+        return inst.undefined();
+    }
+}
+
+Fault
+faultSctlr2EL2(const MiscRegLUTEntry &entry,
+    ThreadContext *tc, const MiscRegOp64 &inst)
+{
+    if (HaveExt(tc, ArmExtension::FEAT_SCTLR2)) {
+        const SCR scr = tc->readMiscReg(MISCREG_SCR_EL3);
+        if (ArmSystem::haveEL(tc, EL3) && !scr.sctlr2En) {
+            return inst.generateTrap(EL3);
+        } else {
+            return NoFault;
+        }
+    } else {
+        return inst.undefined();
+    }
+}
+
+Fault
+faultSctlr2VheEL2(const MiscRegLUTEntry &entry,
+    ThreadContext *tc, const MiscRegOp64 &inst)
+{
+    if (HaveExt(tc, ArmExtension::FEAT_SCTLR2)) {
+        const HCR hcr = tc->readMiscRegNoEffect(MISCREG_HCR_EL2);
+        const SCR scr = tc->readMiscReg(MISCREG_SCR_EL3);
+        if (hcr.e2h) {
+            if (ArmSystem::haveEL(tc, EL3) && !scr.sctlr2En) {
+                return inst.generateTrap(EL3);
+            } else {
+                return NoFault;
+            }
+        } else {
+            return inst.undefined();
+        }
+    } else {
+        return inst.undefined();
+    }
+}
+
+template<bool read, auto g_bitfield>
+Fault
+faultTcr2EL1(const MiscRegLUTEntry &entry,
+    ThreadContext *tc, const MiscRegOp64 &inst)
+{
+    if (HaveExt(tc, ArmExtension::FEAT_TCR2)) {
+        const SCR scr = tc->readMiscReg(MISCREG_SCR_EL3);
+        const HCRX hcrx = tc->readMiscReg(MISCREG_HCRX_EL2);
+        if (
+                auto fault = faultHcrFgtEL1<read, g_bitfield, &HFGTR::sctlrEL1>
+                (
+                    entry,
+                    tc,
+                    inst
+                );
+                fault != NoFault
+        ) {
+            return fault;
+        } else if (EL2Enabled(tc) && (!isHcrxEL2Enabled(tc) || !hcrx.tcr2En)) {
+            return inst.generateTrap(EL2);
+        } else if (ArmSystem::haveEL(tc, EL3) && !scr.tcr2En) {
+            return inst.generateTrap(EL3);
+        } else {
+            return NoFault;
+        }
+    } else {
+        return inst.undefined();
+    }
+}
+
+Fault
+faultTcr2EL2(const MiscRegLUTEntry &entry,
+    ThreadContext *tc, const MiscRegOp64 &inst)
+{
+    if (HaveExt(tc, ArmExtension::FEAT_TCR2)) {
+        const SCR scr = tc->readMiscReg(MISCREG_SCR_EL3);
+        if (ArmSystem::haveEL(tc, EL3) && !scr.tcr2En) {
+            return inst.generateTrap(EL3);
+        } else {
+            return NoFault;
+        }
+    } else {
+        return inst.undefined();
+    }
+}
+
+Fault
+faultTcr2VheEL2(const MiscRegLUTEntry &entry,
+    ThreadContext *tc, const MiscRegOp64 &inst)
+{
+    if (HaveExt(tc, ArmExtension::FEAT_TCR2)) {
+        const HCR hcr = tc->readMiscRegNoEffect(MISCREG_HCR_EL2);
+        const SCR scr = tc->readMiscReg(MISCREG_SCR_EL3);
+        if (hcr.e2h) {
+            if (ArmSystem::haveEL(tc, EL3) && !scr.tcr2En) {
+                return inst.generateTrap(EL3);
+            } else {
+                return NoFault;
+            }
+        } else {
+            return inst.undefined();
+        }
+    } else {
+        return inst.undefined();
+    }
+}
+
+Fault
+faultTcr2VheEL3(const MiscRegLUTEntry &entry,
+    ThreadContext *tc, const MiscRegOp64 &inst)
+{
+    if (HaveExt(tc, ArmExtension::FEAT_TCR2)) {
+        const HCR hcr = tc->readMiscRegNoEffect(MISCREG_HCR_EL2);
+        const bool el2_host = EL2Enabled(tc) && hcr.e2h;
+        if (el2_host) {
+            return NoFault;
+        } else {
+            return inst.undefined();
+        }
+    } else {
+        return inst.undefined();
+    }
+}
+
+template<bool read, auto r_bitfield>
 Fault
 faultCpacrEL1(const MiscRegLUTEntry &entry,
     ThreadContext *tc, const MiscRegOp64 &inst)
 {
     const CPTR cptr_el2 = tc->readMiscReg(MISCREG_CPTR_EL2);
     const CPTR cptr_el3 = tc->readMiscReg(MISCREG_CPTR_EL3);
-    if (EL2Enabled(tc) && cptr_el2.tcpac) {
+
+    const bool el2_enabled = EL2Enabled(tc);
+    if (el2_enabled && cptr_el2.tcpac) {
         return inst.generateTrap(EL2);
+    } else if (auto fault = faultFgtEL1<read, r_bitfield>(entry, tc, inst);
+               fault != NoFault) {
+        return fault;
     } else if (ArmSystem::haveEL(tc, EL3) && cptr_el3.tcpac) {
         return inst.generateTrap(EL3);
     } else {
@@ -1753,17 +2118,7 @@ faultCpacrVheEL2(const MiscRegLUTEntry &entry,
     }
 }
 
-#define HCR_TRAP(bitfield) [] (const MiscRegLUTEntry &entry, \
-    ThreadContext *tc, const MiscRegOp64 &inst) -> Fault     \
-{                                                            \
-    const HCR hcr = tc->readMiscReg(MISCREG_HCR_EL2);        \
-    if (EL2Enabled(tc) && hcr.bitfield) {                    \
-        return inst.generateTrap(EL2);                       \
-    } else {                                                 \
-        return NoFault;                                      \
-    }                                                        \
-}
-
+template <auto bitfield>
 Fault
 faultTlbiOsEL1(const MiscRegLUTEntry &entry,
     ThreadContext *tc, const MiscRegOp64 &inst)
@@ -1775,11 +2130,15 @@ faultTlbiOsEL1(const MiscRegLUTEntry &entry,
     } else if (el2_enabled && HaveExt(tc, ArmExtension::FEAT_EVT) &&
                hcr.ttlbos) {
         return inst.generateTrap(EL2);
+    } else if (auto fault = faultFgtInstEL1<bitfield>(entry, tc, inst);
+               fault != NoFault) {
+        return fault;
     } else {
         return NoFault;
     }
 }
 
+template <auto bitfield>
 Fault
 faultTlbiIsEL1(const MiscRegLUTEntry &entry,
     ThreadContext *tc, const MiscRegOp64 &inst)
@@ -1791,11 +2150,15 @@ faultTlbiIsEL1(const MiscRegLUTEntry &entry,
     } else if (el2_enabled && HaveExt(tc, ArmExtension::FEAT_EVT) &&
                hcr.ttlbis) {
         return inst.generateTrap(EL2);
+    } else if (auto fault = faultFgtInstEL1<bitfield>(entry, tc, inst);
+               fault != NoFault) {
+        return fault;
     } else {
         return NoFault;
     }
 }
 
+template <bool read, auto r_bitfield>
 Fault
 faultCacheEL1(const MiscRegLUTEntry &entry,
     ThreadContext *tc, const MiscRegOp64 &inst)
@@ -1807,19 +2170,28 @@ faultCacheEL1(const MiscRegLUTEntry &entry,
     } else if (el2_enabled && HaveExt(tc, ArmExtension::FEAT_EVT) &&
                hcr.tid4) {
         return inst.generateTrap(EL2);
+    } else if (auto fault = faultFgtEL1<read, r_bitfield>(entry, tc, inst);
+               fault != NoFault) {
+        return fault;
     } else {
         return NoFault;
     }
 }
 
+template <bool read, auto r_bitfield>
 Fault
 faultPauthEL1(const MiscRegLUTEntry &entry,
     ThreadContext *tc, const MiscRegOp64 &inst)
 {
     const HCR hcr = tc->readMiscReg(MISCREG_HCR_EL2);
     const SCR scr = tc->readMiscReg(MISCREG_SCR_EL3);
-    if (EL2Enabled(tc) && !hcr.apk) {
+    const bool el2_enabled = EL2Enabled(tc);
+
+    if (el2_enabled && !hcr.apk) {
         return inst.generateTrap(EL2);
+    } else if (auto fault = faultFgtEL1<read, r_bitfield>(entry, tc, inst);
+               fault != NoFault) {
+        return fault;
     } else if (ArmSystem::haveEL(tc, EL3) && !scr.apk) {
         return inst.generateTrap(EL3);
     } else {
@@ -2140,6 +2512,22 @@ faultRng(const MiscRegLUTEntry &entry,
     }
 }
 
+Fault
+faultFgtCtrlRegs(const MiscRegLUTEntry &entry,
+    ThreadContext *tc, const MiscRegOp64 &inst)
+{
+    if (HaveExt(tc, ArmExtension::FEAT_FGT)) {
+        const SCR scr = tc->readMiscReg(MISCREG_SCR_EL3);
+        if (ArmSystem::haveEL(tc, EL3) && !scr.fgten) {
+            return inst.generateTrap(EL3);
+        } else {
+            return NoFault;
+        }
+    } else {
+        return inst.undefined();
+    }
+}
+
 Fault
 faultIdst(const MiscRegLUTEntry &entry,
     ThreadContext *tc, const MiscRegOp64 &inst)
@@ -3528,18 +3916,18 @@ ISA::initializeMiscRegMetadata()
     InitReg(MISCREG_MDCCINT_EL1)
       .fault(EL1, faultMdccsrEL1)
       .fault(EL2, faultMdccsrEL2)
-      .allPrivileges();
+      .allPrivileges().exceptUserMode();
     InitReg(MISCREG_OSDTRRX_EL1)
-      .allPrivileges()
+      .allPrivileges().exceptUserMode()
       .mapsTo(MISCREG_DBGDTRRXext);
     InitReg(MISCREG_MDSCR_EL1)
-      .allPrivileges()
+      .allPrivileges().exceptUserMode()
       .mapsTo(MISCREG_DBGDSCRext);
     InitReg(MISCREG_OSDTRTX_EL1)
-      .allPrivileges()
+      .allPrivileges().exceptUserMode()
       .mapsTo(MISCREG_DBGDTRTXext);
     InitReg(MISCREG_OSECCR_EL1)
-      .allPrivileges()
+      .allPrivileges().exceptUserMode()
       .mapsTo(MISCREG_DBGOSECCR);
     InitReg(MISCREG_DBGBVR0_EL1)
       .allPrivileges().exceptUserMode()
@@ -3878,28 +4266,28 @@ ISA::initializeMiscRegMetadata()
       .fault(EL2, faultDebugEL2)
       .mapsTo(MISCREG_DBGVCR);
     InitReg(MISCREG_MDRAR_EL1)
-      .allPrivileges().monSecureWrite(0).monNonSecureWrite(0)
+      .allPrivileges().exceptUserMode().writes(0)
       .mapsTo(MISCREG_DBGDRAR);
     InitReg(MISCREG_OSLAR_EL1)
-      .allPrivileges().monSecureRead(0).monNonSecureRead(0)
+      .allPrivileges().exceptUserMode().reads(0)
       .mapsTo(MISCREG_DBGOSLAR);
     InitReg(MISCREG_OSLSR_EL1)
-      .allPrivileges().monSecureWrite(0).monNonSecureWrite(0)
+      .allPrivileges().exceptUserMode().writes(0)
       .mapsTo(MISCREG_DBGOSLSR);
     InitReg(MISCREG_OSDLR_EL1)
-      .allPrivileges()
+      .allPrivileges().exceptUserMode()
       .mapsTo(MISCREG_DBGOSDLR);
     InitReg(MISCREG_DBGPRCR_EL1)
-      .allPrivileges()
+      .allPrivileges().exceptUserMode()
       .mapsTo(MISCREG_DBGPRCR);
     InitReg(MISCREG_DBGCLAIMSET_EL1)
-      .allPrivileges()
+      .allPrivileges().exceptUserMode()
       .mapsTo(MISCREG_DBGCLAIMSET);
     InitReg(MISCREG_DBGCLAIMCLR_EL1)
-      .allPrivileges()
+      .allPrivileges().exceptUserMode()
       .mapsTo(MISCREG_DBGCLAIMCLR);
     InitReg(MISCREG_DBGAUTHSTATUS_EL1)
-      .allPrivileges().monSecureWrite(0).monNonSecureWrite(0)
+      .allPrivileges().exceptUserMode().writes(0)
       .mapsTo(MISCREG_DBGAUTHSTATUS);
     InitReg(MISCREG_TEECR32_EL1);
     InitReg(MISCREG_TEEHBR32_EL1);
@@ -3908,108 +4296,110 @@ ISA::initializeMiscRegMetadata()
     InitReg(MISCREG_MIDR_EL1)
       .allPrivileges().exceptUserMode().writes(0)
       .faultRead(EL0, faultIdst)
+      .faultRead(EL1, faultFgtEL1<true, &HFGTR::midrEL1>)
       .mapsTo(MISCREG_MIDR);
     InitReg(MISCREG_MPIDR_EL1)
       .allPrivileges().exceptUserMode().writes(0)
       .faultRead(EL0, faultIdst)
+      .faultRead(EL1, faultFgtEL1<true, &HFGTR::mpidrEL1>)
       .mapsTo(MISCREG_MPIDR);
     InitReg(MISCREG_REVIDR_EL1)
       .faultRead(EL0, faultIdst)
-      .faultRead(EL1, HCR_TRAP(tid1))
+      .faultRead(EL1, faultHcrFgtEL1<true, &HCR::tid1, &HFGTR::revidrEL1>)
       .allPrivileges().exceptUserMode().writes(0);
     InitReg(MISCREG_ID_PFR0_EL1)
       .allPrivileges().exceptUserMode().writes(0)
       .faultRead(EL0, faultIdst)
-      .faultRead(EL1, HCR_TRAP(tid3))
+      .faultRead(EL1, faultHcrEL1<&HCR::tid3>)
       .mapsTo(MISCREG_ID_PFR0);
     InitReg(MISCREG_ID_PFR1_EL1)
       .allPrivileges().exceptUserMode().writes(0)
       .faultRead(EL0, faultIdst)
-      .faultRead(EL1, HCR_TRAP(tid3))
+      .faultRead(EL1, faultHcrEL1<&HCR::tid3>)
       .mapsTo(MISCREG_ID_PFR1);
     InitReg(MISCREG_ID_DFR0_EL1)
       .allPrivileges().exceptUserMode().writes(0)
       .faultRead(EL0, faultIdst)
-      .faultRead(EL1, HCR_TRAP(tid3))
+      .faultRead(EL1, faultHcrEL1<&HCR::tid3>)
       .mapsTo(MISCREG_ID_DFR0);
     InitReg(MISCREG_ID_AFR0_EL1)
       .allPrivileges().exceptUserMode().writes(0)
       .faultRead(EL0, faultIdst)
-      .faultRead(EL1, HCR_TRAP(tid3))
+      .faultRead(EL1, faultHcrEL1<&HCR::tid3>)
       .mapsTo(MISCREG_ID_AFR0);
     InitReg(MISCREG_ID_MMFR0_EL1)
       .allPrivileges().exceptUserMode().writes(0)
       .faultRead(EL0, faultIdst)
-      .faultRead(EL1, HCR_TRAP(tid3))
+      .faultRead(EL1, faultHcrEL1<&HCR::tid3>)
       .mapsTo(MISCREG_ID_MMFR0);
     InitReg(MISCREG_ID_MMFR1_EL1)
       .allPrivileges().exceptUserMode().writes(0)
       .faultRead(EL0, faultIdst)
-      .faultRead(EL1, HCR_TRAP(tid3))
+      .faultRead(EL1, faultHcrEL1<&HCR::tid3>)
       .mapsTo(MISCREG_ID_MMFR1);
     InitReg(MISCREG_ID_MMFR2_EL1)
       .allPrivileges().exceptUserMode().writes(0)
       .faultRead(EL0, faultIdst)
-      .faultRead(EL1, HCR_TRAP(tid3))
+      .faultRead(EL1, faultHcrEL1<&HCR::tid3>)
       .mapsTo(MISCREG_ID_MMFR2);
     InitReg(MISCREG_ID_MMFR3_EL1)
       .allPrivileges().exceptUserMode().writes(0)
       .faultRead(EL0, faultIdst)
-      .faultRead(EL1, HCR_TRAP(tid3))
+      .faultRead(EL1, faultHcrEL1<&HCR::tid3>)
       .mapsTo(MISCREG_ID_MMFR3);
     InitReg(MISCREG_ID_MMFR4_EL1)
       .allPrivileges().exceptUserMode().writes(0)
       .faultRead(EL0, faultIdst)
-      .faultRead(EL1, HCR_TRAP(tid3))
+      .faultRead(EL1, faultHcrEL1<&HCR::tid3>)
       .mapsTo(MISCREG_ID_MMFR4);
     InitReg(MISCREG_ID_ISAR0_EL1)
       .allPrivileges().exceptUserMode().writes(0)
       .faultRead(EL0, faultIdst)
-      .faultRead(EL1, HCR_TRAP(tid3))
+      .faultRead(EL1, faultHcrEL1<&HCR::tid3>)
       .mapsTo(MISCREG_ID_ISAR0);
     InitReg(MISCREG_ID_ISAR1_EL1)
       .allPrivileges().exceptUserMode().writes(0)
       .faultRead(EL0, faultIdst)
-      .faultRead(EL1, HCR_TRAP(tid3))
+      .faultRead(EL1, faultHcrEL1<&HCR::tid3>)
       .mapsTo(MISCREG_ID_ISAR1);
     InitReg(MISCREG_ID_ISAR2_EL1)
       .allPrivileges().exceptUserMode().writes(0)
       .faultRead(EL0, faultIdst)
-      .faultRead(EL1, HCR_TRAP(tid3))
+      .faultRead(EL1, faultHcrEL1<&HCR::tid3>)
       .mapsTo(MISCREG_ID_ISAR2);
     InitReg(MISCREG_ID_ISAR3_EL1)
       .allPrivileges().exceptUserMode().writes(0)
       .faultRead(EL0, faultIdst)
-      .faultRead(EL1, HCR_TRAP(tid3))
+      .faultRead(EL1, faultHcrEL1<&HCR::tid3>)
       .mapsTo(MISCREG_ID_ISAR3);
     InitReg(MISCREG_ID_ISAR4_EL1)
       .allPrivileges().exceptUserMode().writes(0)
       .faultRead(EL0, faultIdst)
-      .faultRead(EL1, HCR_TRAP(tid3))
+      .faultRead(EL1, faultHcrEL1<&HCR::tid3>)
       .mapsTo(MISCREG_ID_ISAR4);
     InitReg(MISCREG_ID_ISAR5_EL1)
       .allPrivileges().exceptUserMode().writes(0)
       .faultRead(EL0, faultIdst)
-      .faultRead(EL1, HCR_TRAP(tid3))
+      .faultRead(EL1, faultHcrEL1<&HCR::tid3>)
       .mapsTo(MISCREG_ID_ISAR5);
     InitReg(MISCREG_ID_ISAR6_EL1)
       .allPrivileges().exceptUserMode().writes(0)
       .faultRead(EL0, faultIdst)
-      .faultRead(EL1, HCR_TRAP(tid3))
+      .faultRead(EL1, faultHcrEL1<&HCR::tid3>)
       .mapsTo(MISCREG_ID_ISAR6);
     InitReg(MISCREG_MVFR0_EL1)
       .faultRead(EL0, faultIdst)
-      .faultRead(EL1, HCR_TRAP(tid3))
+      .faultRead(EL1, faultHcrEL1<&HCR::tid3>)
       .allPrivileges().exceptUserMode().writes(0)
       .mapsTo(MISCREG_MVFR0);
     InitReg(MISCREG_MVFR1_EL1)
       .faultRead(EL0, faultIdst)
-      .faultRead(EL1, HCR_TRAP(tid3))
+      .faultRead(EL1, faultHcrEL1<&HCR::tid3>)
       .allPrivileges().exceptUserMode().writes(0)
       .mapsTo(MISCREG_MVFR1);
     InitReg(MISCREG_MVFR2_EL1)
       .faultRead(EL0, faultIdst)
-      .faultRead(EL1, HCR_TRAP(tid3))
+      .faultRead(EL1, faultHcrEL1<&HCR::tid3>)
       .allPrivileges().exceptUserMode().writes(0);
     InitReg(MISCREG_ID_AA64PFR0_EL1)
       .reset([this,release=release,tc=tc](){
@@ -4025,14 +4415,14 @@ ISA::initializeMiscRegMetadata()
       }())
       .unserialize(0)
       .faultRead(EL0, faultIdst)
-      .faultRead(EL1, HCR_TRAP(tid3))
+      .faultRead(EL1, faultHcrEL1<&HCR::tid3>)
       .allPrivileges().writes(0);
     InitReg(MISCREG_ID_AA64PFR1_EL1)
       .reset(release->has(ArmExtension::FEAT_SME) ?
           0x1 << 24 : 0)
       .unserialize(0)
       .faultRead(EL0, faultIdst)
-      .faultRead(EL1, HCR_TRAP(tid3))
+      .faultRead(EL1, faultHcrEL1<&HCR::tid3>)
       .allPrivileges().writes(0);
     InitReg(MISCREG_ID_AA64DFR0_EL1)
       .reset([p](){
@@ -4041,22 +4431,22 @@ ISA::initializeMiscRegMetadata()
           return dfr0_el1;
       }())
       .faultRead(EL0, faultIdst)
-      .faultRead(EL1, HCR_TRAP(tid3))
+      .faultRead(EL1, faultHcrEL1<&HCR::tid3>)
       .allPrivileges().writes(0);
     InitReg(MISCREG_ID_AA64DFR1_EL1)
       .reset(p.id_aa64dfr1_el1)
       .faultRead(EL0, faultIdst)
-      .faultRead(EL1, HCR_TRAP(tid3))
+      .faultRead(EL1, faultHcrEL1<&HCR::tid3>)
       .allPrivileges().writes(0);
     InitReg(MISCREG_ID_AA64AFR0_EL1)
       .reset(p.id_aa64afr0_el1)
       .faultRead(EL0, faultIdst)
-      .faultRead(EL1, HCR_TRAP(tid3))
+      .faultRead(EL1, faultHcrEL1<&HCR::tid3>)
       .allPrivileges().writes(0);
     InitReg(MISCREG_ID_AA64AFR1_EL1)
       .reset(p.id_aa64afr1_el1)
       .faultRead(EL0, faultIdst)
-      .faultRead(EL1, HCR_TRAP(tid3))
+      .faultRead(EL1, faultHcrEL1<&HCR::tid3>)
       .allPrivileges().writes(0);
     InitReg(MISCREG_ID_AA64ISAR0_EL1)
       .reset([p,release=release](){
@@ -4076,7 +4466,9 @@ ISA::initializeMiscRegMetadata()
           isar0_el1.atomic = release->has(ArmExtension::FEAT_LSE) ? 0x2 : 0x0;
           isar0_el1.rdm = release->has(ArmExtension::FEAT_RDM) ? 0x1 : 0x0;
           isar0_el1.tme = release->has(ArmExtension::TME) ? 0x1 : 0x0;
-          isar0_el1.tlb = release->has(ArmExtension::FEAT_TLBIOS) ? 0x1 : 0x0;
+          isar0_el1.tlb = release->has(ArmExtension::FEAT_TLBIRANGE) ?
+              0x2 : release->has(ArmExtension::FEAT_TLBIOS) ?
+                  0x1 : 0x0;
           isar0_el1.ts = release->has(ArmExtension::FEAT_FLAGM2) ?
               0x2 : release->has(ArmExtension::FEAT_FLAGM) ?
                   0x1 : 0x0;
@@ -4084,7 +4476,7 @@ ISA::initializeMiscRegMetadata()
           return isar0_el1;
       }())
       .faultRead(EL0, faultIdst)
-      .faultRead(EL1, HCR_TRAP(tid3))
+      .faultRead(EL1, faultHcrEL1<&HCR::tid3>)
       .allPrivileges().writes(0);
     InitReg(MISCREG_ID_AA64ISAR1_EL1)
       .reset([p,release=release](){
@@ -4097,7 +4489,7 @@ ISA::initializeMiscRegMetadata()
           return isar1_el1;
       }())
       .faultRead(EL0, faultIdst)
-      .faultRead(EL1, HCR_TRAP(tid3))
+      .faultRead(EL1, faultHcrEL1<&HCR::tid3>)
       .allPrivileges().writes(0);
     InitReg(MISCREG_ID_AA64MMFR0_EL1)
       .reset([p,asidbits=haveLargeAsid64,parange=physAddrRange](){
@@ -4107,12 +4499,13 @@ ISA::initializeMiscRegMetadata()
           return mmfr0_el1;
       }())
       .faultRead(EL0, faultIdst)
-      .faultRead(EL1, HCR_TRAP(tid3))
+      .faultRead(EL1, faultHcrEL1<&HCR::tid3>)
       .allPrivileges().writes(0);
     InitReg(MISCREG_ID_AA64MMFR1_EL1)
       .reset([p,release=release](){
           AA64MMFR1 mmfr1_el1 = p.id_aa64mmfr1_el1;
-          mmfr1_el1.vmidbits = release->has(ArmExtension::FEAT_VMID16) ? 0x2 : 0x0;
+          mmfr1_el1.vmidbits =
+            release->has(ArmExtension::FEAT_VMID16) ? 0x2 : 0x0;
           mmfr1_el1.vh = release->has(ArmExtension::FEAT_VHE) ? 0x1 : 0x0;
           mmfr1_el1.hpds = release->has(ArmExtension::FEAT_HPDS) ? 0x1 : 0x0;
           mmfr1_el1.pan = release->has(ArmExtension::FEAT_PAN) ? 0x1 : 0x0;
@@ -4120,7 +4513,7 @@ ISA::initializeMiscRegMetadata()
           return mmfr1_el1;
       }())
       .faultRead(EL0, faultIdst)
-      .faultRead(EL1, HCR_TRAP(tid3))
+      .faultRead(EL1, faultHcrEL1<&HCR::tid3>)
       .allPrivileges().writes(0);
     InitReg(MISCREG_ID_AA64MMFR2_EL1)
       .reset([p,release=release](){
@@ -4132,73 +4525,97 @@ ISA::initializeMiscRegMetadata()
           return mmfr2_el1;
       }())
       .faultRead(EL0, faultIdst)
-      .faultRead(EL1, HCR_TRAP(tid3))
+      .faultRead(EL1, faultHcrEL1<&HCR::tid3>)
+      .allPrivileges().writes(0);
+    InitReg(MISCREG_ID_AA64MMFR3_EL1)
+      .reset([p,release=release](){
+          AA64MMFR3 mmfr3_el1 = 0;
+          mmfr3_el1.sctlrx =
+            release->has(ArmExtension::FEAT_SCTLR2) ? 0x1 : 0x0;
+          mmfr3_el1.tcrx = release->has(ArmExtension::FEAT_TCR2) ? 0x1 : 0x0;
+          return mmfr3_el1;
+      }())
+      .faultRead(EL0, faultIdst)
+      .faultRead(EL1, faultHcrEL1<&HCR::tid3>)
       .allPrivileges().writes(0);
 
     InitReg(MISCREG_APDAKeyHi_EL1)
-      .fault(EL1, faultPauthEL1)
+      .faultRead(EL1, faultPauthEL1<true, &HFGTR::apdaKey>)
+      .faultWrite(EL1, faultPauthEL1<false, &HFGTR::apdaKey>)
       .fault(EL2, faultPauthEL2)
       .allPrivileges().exceptUserMode();
     InitReg(MISCREG_APDAKeyLo_EL1)
-      .fault(EL1, faultPauthEL1)
+      .faultRead(EL1, faultPauthEL1<true, &HFGTR::apdaKey>)
+      .faultWrite(EL1, faultPauthEL1<false, &HFGTR::apdaKey>)
       .fault(EL2, faultPauthEL2)
       .allPrivileges().exceptUserMode();
     InitReg(MISCREG_APDBKeyHi_EL1)
-      .fault(EL1, faultPauthEL1)
+      .faultRead(EL1, faultPauthEL1<true, &HFGTR::apdbKey>)
+      .faultWrite(EL1, faultPauthEL1<false, &HFGTR::apdbKey>)
       .fault(EL2, faultPauthEL2)
       .allPrivileges().exceptUserMode();
     InitReg(MISCREG_APDBKeyLo_EL1)
-      .fault(EL1, faultPauthEL1)
+      .faultRead(EL1, faultPauthEL1<true, &HFGTR::apdbKey>)
+      .faultWrite(EL1, faultPauthEL1<false, &HFGTR::apdbKey>)
       .fault(EL2, faultPauthEL2)
       .allPrivileges().exceptUserMode();
     InitReg(MISCREG_APGAKeyHi_EL1)
-      .fault(EL1, faultPauthEL1)
+      .faultRead(EL1, faultPauthEL1<true, &HFGTR::apgaKey>)
+      .faultWrite(EL1, faultPauthEL1<false, &HFGTR::apgaKey>)
       .fault(EL2, faultPauthEL2)
       .allPrivileges().exceptUserMode();
     InitReg(MISCREG_APGAKeyLo_EL1)
-      .fault(EL1, faultPauthEL1)
+      .faultRead(EL1, faultPauthEL1<true, &HFGTR::apgaKey>)
+      .faultWrite(EL1, faultPauthEL1<false, &HFGTR::apgaKey>)
       .fault(EL2, faultPauthEL2)
       .allPrivileges().exceptUserMode();
     InitReg(MISCREG_APIAKeyHi_EL1)
-      .fault(EL1, faultPauthEL1)
+      .faultRead(EL1, faultPauthEL1<true, &HFGTR::apiaKey>)
+      .faultWrite(EL1, faultPauthEL1<false, &HFGTR::apiaKey>)
       .fault(EL2, faultPauthEL2)
       .allPrivileges().exceptUserMode();
     InitReg(MISCREG_APIAKeyLo_EL1)
-      .fault(EL1, faultPauthEL1)
+      .faultRead(EL1, faultPauthEL1<true, &HFGTR::apiaKey>)
+      .faultWrite(EL1, faultPauthEL1<false, &HFGTR::apiaKey>)
       .fault(EL2, faultPauthEL2)
       .allPrivileges().exceptUserMode();
     InitReg(MISCREG_APIBKeyHi_EL1)
-      .fault(EL1, faultPauthEL1)
+      .faultRead(EL1, faultPauthEL1<true, &HFGTR::apibKey>)
+      .faultWrite(EL1, faultPauthEL1<false, &HFGTR::apibKey>)
       .fault(EL2, faultPauthEL2)
       .allPrivileges().exceptUserMode();
     InitReg(MISCREG_APIBKeyLo_EL1)
-      .fault(EL1, faultPauthEL1)
+      .faultRead(EL1, faultPauthEL1<true, &HFGTR::apibKey>)
+      .faultWrite(EL1, faultPauthEL1<false, &HFGTR::apibKey>)
       .fault(EL2, faultPauthEL2)
       .allPrivileges().exceptUserMode();
 
     InitReg(MISCREG_CCSIDR_EL1)
       .faultRead(EL0, faultIdst)
-      .faultRead(EL1, faultCacheEL1)
+      .faultRead(EL1, faultCacheEL1<true, &HFGTR::ccsidrEL1>)
       .allPrivileges().writes(0);
     InitReg(MISCREG_CLIDR_EL1)
       .faultRead(EL0, faultIdst)
-      .faultRead(EL1, faultCacheEL1)
+      .faultRead(EL1, faultCacheEL1<true, &HFGTR::clidrEL1>)
       .allPrivileges().writes(0);
     InitReg(MISCREG_AIDR_EL1)
       .faultRead(EL0, faultIdst)
-      .faultRead(EL1, HCR_TRAP(tid1))
+      .faultRead(EL1, faultHcrFgtEL1<true, &HCR::tid1, &HFGTR::aidrEL1>)
       .allPrivileges().writes(0);
     InitReg(MISCREG_CSSELR_EL1)
       .allPrivileges().exceptUserMode()
-      .fault(EL1, faultCacheEL1)
+      .faultRead(EL1, faultCacheEL1<true, &HFGTR::csselrEL1>)
+      .faultWrite(EL1, faultCacheEL1<false, &HFGTR::csselrEL1>)
       .mapsTo(MISCREG_CSSELR_NS);
     InitReg(MISCREG_CTR_EL0)
       .faultRead(EL0, faultCtrEL0)
-      .faultRead(EL1, HCR_TRAP(tid2))
+      .faultRead(EL1, faultHcrFgtEL1<true, &HCR::tid2, &HFGTR::ctrEL0>)
       .reads(1)
       .mapsTo(MISCREG_CTR);
     InitReg(MISCREG_DCZID_EL0)
       .reset(0x04) // DC ZVA clear 64-byte chunks
+      .faultRead(EL0, faultFgtEL0<true, &HFGTR::dczidEL0>)
+      .faultRead(EL1, faultFgtEL1<true, &HFGTR::dczidEL0>)
       .reads(1);
     InitReg(MISCREG_VPIDR_EL2)
       .hyp().mon()
@@ -4210,8 +4627,8 @@ ISA::initializeMiscRegMetadata()
       .mapsTo(MISCREG_VMPIDR);
     InitReg(MISCREG_SCTLR_EL1)
       .allPrivileges().exceptUserMode()
-      .faultRead(EL1, HCR_TRAP(trvm))
-      .faultWrite(EL1, HCR_TRAP(tvm))
+      .faultRead(EL1, faultHcrFgtEL1<true, &HCR::trvm, &HFGTR::sctlrEL1>)
+      .faultWrite(EL1, faultHcrFgtEL1<false, &HCR::tvm, &HFGTR::sctlrEL1>)
       .res0( 0x20440 | (EnDB   ? 0 :     0x2000)
                      | (IESB   ? 0 :   0x200000)
                      | (EnDA   ? 0 :  0x8000000)
@@ -4233,13 +4650,23 @@ ISA::initializeMiscRegMetadata()
                      | (nTLSMD ? 0 :  0x8000000)
                      | (LSMAOE ? 0 : 0x10000000))
       .mapsTo(MISCREG_SCTLR_EL1);
+    InitReg(MISCREG_SCTLR2_EL1)
+      .allPrivileges().exceptUserMode()
+      .faultRead(EL1, faultSctlr2EL1<true, &HCR::trvm>)
+      .faultWrite(EL1, faultSctlr2EL1<false, &HCR::tvm>)
+      .fault(EL2,faultSctlr2EL2);
+    InitReg(MISCREG_SCTLR2_EL12)
+      .fault(EL2, faultSctlr2VheEL2)
+      .fault(EL3, defaultFaultE2H_EL3)
+      .mapsTo(MISCREG_SCTLR2_EL1);
     InitReg(MISCREG_ACTLR_EL1)
       .allPrivileges().exceptUserMode()
-      .fault(EL1, HCR_TRAP(tacr))
+      .fault(EL1, faultHcrEL1<&HCR::tacr>)
       .mapsTo(MISCREG_ACTLR_NS);
     InitReg(MISCREG_CPACR_EL1)
       .allPrivileges().exceptUserMode()
-      .fault(EL1, faultCpacrEL1)
+      .faultRead(EL1, faultCpacrEL1<true, &HFGTR::cpacrEL1>)
+      .faultWrite(EL1, faultCpacrEL1<false, &HFGTR::cpacrEL1>)
       .fault(EL2, faultCpacrEL2)
       .mapsTo(MISCREG_CPACR);
     InitReg(MISCREG_CPACR_EL12)
@@ -4255,6 +4682,9 @@ ISA::initializeMiscRegMetadata()
                        | (EnIA   ? 0 : 0x80000000))
       .res1(0x30c50830)
       .mapsTo(MISCREG_HSCTLR);
+    InitReg(MISCREG_SCTLR2_EL2)
+      .hyp().mon()
+      .fault(EL2, faultSctlr2EL2);
     InitReg(MISCREG_ACTLR_EL2)
       .hyp().mon()
       .mapsTo(MISCREG_HACTLR);
@@ -4287,6 +4717,8 @@ ISA::initializeMiscRegMetadata()
                        | (EnIB   ? 0 : 0x40000000)
                        | (EnIA   ? 0 : 0x80000000))
       .res1(0x30c50830);
+    InitReg(MISCREG_SCTLR2_EL3)
+      .mon();
     InitReg(MISCREG_ACTLR_EL3)
       .mon();
     InitReg(MISCREG_SCR_EL3)
@@ -4302,8 +4734,8 @@ ISA::initializeMiscRegMetadata()
       .mapsTo(MISCREG_SDCR);
     InitReg(MISCREG_TTBR0_EL1)
       .allPrivileges().exceptUserMode()
-      .faultRead(EL1, HCR_TRAP(trvm))
-      .faultWrite(EL1, HCR_TRAP(tvm))
+      .faultRead(EL1, faultHcrFgtEL1<true, &HCR::trvm, &HFGTR::ttbr0EL1>)
+      .faultWrite(EL1, faultHcrFgtEL1<false, &HCR::tvm, &HFGTR::ttbr0EL1>)
       .mapsTo(MISCREG_TTBR0_NS);
     InitReg(MISCREG_TTBR0_EL12)
       .fault(EL2, defaultFaultE2H_EL2)
@@ -4311,8 +4743,8 @@ ISA::initializeMiscRegMetadata()
       .mapsTo(MISCREG_TTBR0_EL1);
     InitReg(MISCREG_TTBR1_EL1)
       .allPrivileges().exceptUserMode()
-      .faultRead(EL1, HCR_TRAP(trvm))
-      .faultWrite(EL1, HCR_TRAP(tvm))
+      .faultRead(EL1, faultHcrFgtEL1<true, &HCR::trvm, &HFGTR::ttbr1EL1>)
+      .faultWrite(EL1, faultHcrFgtEL1<false, &HCR::tvm, &HFGTR::ttbr1EL1>)
       .mapsTo(MISCREG_TTBR1_NS);
     InitReg(MISCREG_TTBR1_EL12)
       .fault(EL2, defaultFaultE2H_EL2)
@@ -4320,13 +4752,22 @@ ISA::initializeMiscRegMetadata()
       .mapsTo(MISCREG_TTBR1_EL1);
     InitReg(MISCREG_TCR_EL1)
       .allPrivileges().exceptUserMode()
-      .faultRead(EL1, HCR_TRAP(trvm))
-      .faultWrite(EL1, HCR_TRAP(tvm))
+      .faultRead(EL1, faultHcrFgtEL1<true, &HCR::trvm, &HFGTR::tcrEL1>)
+      .faultWrite(EL1, faultHcrFgtEL1<false, &HCR::tvm, &HFGTR::tcrEL1>)
       .mapsTo(MISCREG_TTBCR_NS);
     InitReg(MISCREG_TCR_EL12)
       .fault(EL2, defaultFaultE2H_EL2)
       .fault(EL3, defaultFaultE2H_EL3)
       .mapsTo(MISCREG_TTBCR_NS);
+    InitReg(MISCREG_TCR2_EL1)
+      .allPrivileges().exceptUserMode()
+      .faultRead(EL1, faultTcr2EL1<true, &HCR::trvm>)
+      .faultWrite(EL1, faultTcr2EL1<false, &HCR::tvm>)
+      .fault(EL2, faultTcr2EL2);
+    InitReg(MISCREG_TCR2_EL12)
+      .fault(EL2, faultTcr2VheEL2)
+      .fault(EL3, faultTcr2VheEL3)
+      .mapsTo(MISCREG_TCR2_EL1);
     InitReg(MISCREG_TTBR0_EL2)
       .hyp().mon()
       .mapsTo(MISCREG_HTTBR);
@@ -4335,6 +4776,9 @@ ISA::initializeMiscRegMetadata()
     InitReg(MISCREG_TCR_EL2)
       .hyp().mon()
       .mapsTo(MISCREG_HTCR);
+    InitReg(MISCREG_TCR2_EL2)
+      .hyp().mon()
+      .fault(EL2, faultTcr2EL2);
     InitReg(MISCREG_VTTBR_EL2)
       .hyp().mon()
       .mapsTo(MISCREG_VTTBR);
@@ -4424,8 +4868,8 @@ ISA::initializeMiscRegMetadata()
       .mon();
     InitReg(MISCREG_AFSR0_EL1)
       .allPrivileges().exceptUserMode()
-      .faultRead(EL1, HCR_TRAP(trvm))
-      .faultWrite(EL1, HCR_TRAP(tvm))
+      .faultRead(EL1, faultHcrFgtEL1<true, &HCR::trvm, &HFGTR::afsr0EL1>)
+      .faultWrite(EL1, faultHcrFgtEL1<false, &HCR::tvm, &HFGTR::afsr0EL1>)
       .mapsTo(MISCREG_ADFSR_NS);
     InitReg(MISCREG_AFSR0_EL12)
       .fault(EL2, defaultFaultE2H_EL2)
@@ -4433,16 +4877,16 @@ ISA::initializeMiscRegMetadata()
       .mapsTo(MISCREG_ADFSR_NS);
     InitReg(MISCREG_AFSR1_EL1)
       .allPrivileges().exceptUserMode()
-      .faultRead(EL1, HCR_TRAP(trvm))
-      .faultWrite(EL1, HCR_TRAP(tvm))
+      .faultRead(EL1, faultHcrFgtEL1<true, &HCR::trvm, &HFGTR::afsr1EL1>)
+      .faultWrite(EL1, faultHcrFgtEL1<false, &HCR::tvm, &HFGTR::afsr1EL1>)
       .mapsTo(MISCREG_AIFSR_NS);
     InitReg(MISCREG_AFSR1_EL12)
       .fault(EL2, defaultFaultE2H_EL2)
       .fault(EL3, defaultFaultE2H_EL3)
       .mapsTo(MISCREG_AIFSR_NS);
     InitReg(MISCREG_ESR_EL1)
-      .faultRead(EL1, HCR_TRAP(trvm))
-      .faultWrite(EL1, HCR_TRAP(tvm))
+      .faultRead(EL1, faultHcrFgtEL1<true, &HCR::trvm, &HFGTR::esrEL1>)
+      .faultWrite(EL1, faultHcrFgtEL1<false, &HCR::tvm, &HFGTR::esrEL1>)
       .allPrivileges().exceptUserMode();
     InitReg(MISCREG_ESR_EL12)
       .fault(EL2, defaultFaultE2H_EL2)
@@ -4472,8 +4916,8 @@ ISA::initializeMiscRegMetadata()
       .mon();
     InitReg(MISCREG_FAR_EL1)
       .allPrivileges().exceptUserMode()
-      .faultRead(EL1, HCR_TRAP(trvm))
-      .faultWrite(EL1, HCR_TRAP(tvm))
+      .faultRead(EL1, faultHcrFgtEL1<true, &HCR::trvm, &HFGTR::farEL1>)
+      .faultWrite(EL1, faultHcrFgtEL1<false, &HCR::tvm, &HFGTR::farEL1>)
       .mapsTo(MISCREG_DFAR_NS, MISCREG_IFAR_NS);
     InitReg(MISCREG_FAR_EL12)
       .fault(EL2, defaultFaultE2H_EL2)
@@ -4489,61 +4933,61 @@ ISA::initializeMiscRegMetadata()
       .mon();
     InitReg(MISCREG_IC_IALLUIS)
       .warnNotFail()
-      .faultWrite(EL1, faultPouIsEL1)
+      .faultWrite(EL1, faultPouIsEL1<&HFGITR::icialluis>)
       .writes(1).exceptUserMode();
     InitReg(MISCREG_PAR_EL1)
       .allPrivileges().exceptUserMode()
       .mapsTo(MISCREG_PAR_NS);
     InitReg(MISCREG_IC_IALLU)
       .warnNotFail()
-      .faultWrite(EL1, faultPouEL1)
+      .faultWrite(EL1, faultPouEL1<&HFGITR::iciallu>)
       .writes(1).exceptUserMode();
     InitReg(MISCREG_DC_IVAC_Xt)
-      .faultWrite(EL1, HCR_TRAP(tpc))
+      .faultWrite(EL1, faultHcrFgtInstEL1<&HCR::tpc, &HFGITR::dcivac>)
       .writes(1).exceptUserMode();
     InitReg(MISCREG_DC_ISW_Xt)
       .warnNotFail()
-      .faultWrite(EL1, HCR_TRAP(tsw))
+      .faultWrite(EL1, faultHcrFgtInstEL1<&HCR::tsw, &HFGITR::dcisw>)
       .writes(1).exceptUserMode();
     InitReg(MISCREG_AT_S1E1R_Xt)
-      .faultWrite(EL1, HCR_TRAP(at))
+      .faultWrite(EL1, faultHcrFgtInstEL1<&HCR::at, &HFGITR::ats1e1r>)
       .writes(1).exceptUserMode();
     InitReg(MISCREG_AT_S1E1W_Xt)
-      .faultWrite(EL1, HCR_TRAP(at))
+      .faultWrite(EL1, faultHcrFgtInstEL1<&HCR::at, &HFGITR::ats1e1w>)
       .writes(1).exceptUserMode();
     InitReg(MISCREG_AT_S1E0R_Xt)
-      .faultWrite(EL1, HCR_TRAP(at))
+      .faultWrite(EL1, faultHcrFgtInstEL1<&HCR::at, &HFGITR::ats1e0r>)
       .writes(1).exceptUserMode();
     InitReg(MISCREG_AT_S1E0W_Xt)
-      .faultWrite(EL1, HCR_TRAP(at))
+      .faultWrite(EL1, faultHcrFgtInstEL1<&HCR::at, &HFGITR::ats1e0w>)
       .writes(1).exceptUserMode();
     InitReg(MISCREG_DC_CSW_Xt)
       .warnNotFail()
-      .faultWrite(EL1, HCR_TRAP(tsw))
+      .faultWrite(EL1, faultHcrFgtInstEL1<&HCR::tsw, &HFGITR::dccsw>)
       .writes(1).exceptUserMode();
     InitReg(MISCREG_DC_CISW_Xt)
       .warnNotFail()
-      .faultWrite(EL1, HCR_TRAP(tsw))
+      .faultWrite(EL1, faultHcrFgtInstEL1<&HCR::tsw, &HFGITR::dccisw>)
       .writes(1).exceptUserMode();
     InitReg(MISCREG_DC_ZVA_Xt)
       .writes(1)
       .faultWrite(EL0, faultDczvaEL0)
-      .faultWrite(EL1, HCR_TRAP(tdz));
+      .faultWrite(EL1, faultHcrFgtInstEL1<&HCR::tdz, &HFGITR::dczva>);
     InitReg(MISCREG_IC_IVAU_Xt)
       .faultWrite(EL0, faultPouEL0)
-      .faultWrite(EL1, faultPouEL1)
+      .faultWrite(EL1, faultPouEL1<&HFGITR::icivau>)
       .writes(1);
     InitReg(MISCREG_DC_CVAC_Xt)
       .faultWrite(EL0, faultCvacEL0)
-      .faultWrite(EL1, HCR_TRAP(tpc))
+      .faultWrite(EL1, faultHcrEL1<&HCR::tpc>)
       .writes(1);
     InitReg(MISCREG_DC_CVAU_Xt)
       .faultWrite(EL0, faultPouEL0)
-      .faultWrite(EL1, faultPouEL1)
+      .faultWrite(EL1, faultPouEL1<&HFGITR::dccvau>)
       .writes(1);
     InitReg(MISCREG_DC_CIVAC_Xt)
       .faultWrite(EL0, faultCvacEL0)
-      .faultWrite(EL1, HCR_TRAP(tpc))
+      .faultWrite(EL1, faultHcrFgtInstEL1<&HCR::tpc, &HFGITR::dccivac>)
       .writes(1);
     InitReg(MISCREG_AT_S1E2R_Xt)
       .monNonSecureWrite().hypWrite();
@@ -4562,58 +5006,58 @@ ISA::initializeMiscRegMetadata()
     InitReg(MISCREG_AT_S1E3W_Xt)
       .monSecureWrite().monNonSecureWrite();
     InitReg(MISCREG_TLBI_VMALLE1OS)
-      .faultWrite(EL1, faultTlbiOsEL1)
+      .faultWrite(EL1, faultTlbiOsEL1<&HFGITR::tlbivmalle1os>)
       .writes(1).exceptUserMode();
     InitReg(MISCREG_TLBI_VAE1OS_Xt)
-      .faultWrite(EL1, faultTlbiOsEL1)
+      .faultWrite(EL1, faultTlbiOsEL1<&HFGITR::tlbivae1os>)
       .writes(1).exceptUserMode();
     InitReg(MISCREG_TLBI_ASIDE1OS_Xt)
-      .faultWrite(EL1, faultTlbiOsEL1)
+      .faultWrite(EL1, faultTlbiOsEL1<&HFGITR::tlbiaside1os>)
       .writes(1).exceptUserMode();
     InitReg(MISCREG_TLBI_VAAE1OS_Xt)
-      .faultWrite(EL1, faultTlbiOsEL1)
+      .faultWrite(EL1, faultTlbiOsEL1<&HFGITR::tlbivaae1os>)
       .writes(1).exceptUserMode();
     InitReg(MISCREG_TLBI_VALE1OS_Xt)
-      .faultWrite(EL1, faultTlbiOsEL1)
+      .faultWrite(EL1, faultTlbiOsEL1<&HFGITR::tlbivale1os>)
       .writes(1).exceptUserMode();
     InitReg(MISCREG_TLBI_VAALE1OS_Xt)
-      .faultWrite(EL1, faultTlbiOsEL1)
+      .faultWrite(EL1, faultTlbiOsEL1<&HFGITR::tlbivaale1os>)
       .writes(1).exceptUserMode();
     InitReg(MISCREG_TLBI_VMALLE1IS)
-      .faultWrite(EL1, faultTlbiIsEL1)
+      .faultWrite(EL1, faultTlbiIsEL1<&HFGITR::tlbivmalle1is>)
       .writes(1).exceptUserMode();
     InitReg(MISCREG_TLBI_VAE1IS_Xt)
-      .faultWrite(EL1, faultTlbiIsEL1)
+      .faultWrite(EL1, faultTlbiIsEL1<&HFGITR::tlbivae1is>)
       .writes(1).exceptUserMode();
     InitReg(MISCREG_TLBI_ASIDE1IS_Xt)
-      .faultWrite(EL1, faultTlbiIsEL1)
+      .faultWrite(EL1, faultTlbiIsEL1<&HFGITR::tlbiaside1is>)
       .writes(1).exceptUserMode();
     InitReg(MISCREG_TLBI_VAAE1IS_Xt)
-      .faultWrite(EL1, faultTlbiIsEL1)
+      .faultWrite(EL1, faultTlbiIsEL1<&HFGITR::tlbivaae1is>)
       .writes(1).exceptUserMode();
     InitReg(MISCREG_TLBI_VALE1IS_Xt)
-      .faultWrite(EL1, faultTlbiIsEL1)
+      .faultWrite(EL1, faultTlbiIsEL1<&HFGITR::tlbivale1is>)
       .writes(1).exceptUserMode();
     InitReg(MISCREG_TLBI_VAALE1IS_Xt)
-      .faultWrite(EL1, faultTlbiIsEL1)
+      .faultWrite(EL1, faultTlbiIsEL1<&HFGITR::tlbivaale1is>)
       .writes(1).exceptUserMode();
     InitReg(MISCREG_TLBI_VMALLE1)
-      .faultWrite(EL1, HCR_TRAP(ttlb))
+      .faultWrite(EL1, faultHcrFgtInstEL1<&HCR::ttlb, &HFGITR::tlbivmalle1>)
       .writes(1).exceptUserMode();
     InitReg(MISCREG_TLBI_VAE1_Xt)
-      .faultWrite(EL1, HCR_TRAP(ttlb))
+      .faultWrite(EL1, faultHcrFgtInstEL1<&HCR::ttlb, &HFGITR::tlbivae1>)
       .writes(1).exceptUserMode();
     InitReg(MISCREG_TLBI_ASIDE1_Xt)
-      .faultWrite(EL1, HCR_TRAP(ttlb))
+      .faultWrite(EL1, faultHcrFgtInstEL1<&HCR::ttlb, &HFGITR::tlbiaside1>)
       .writes(1).exceptUserMode();
     InitReg(MISCREG_TLBI_VAAE1_Xt)
-      .faultWrite(EL1, HCR_TRAP(ttlb))
+      .faultWrite(EL1, faultHcrFgtInstEL1<&HCR::ttlb, &HFGITR::tlbivaae1>)
       .writes(1).exceptUserMode();
     InitReg(MISCREG_TLBI_VALE1_Xt)
-      .faultWrite(EL1, HCR_TRAP(ttlb))
+      .faultWrite(EL1, faultHcrFgtInstEL1<&HCR::ttlb, &HFGITR::tlbivale1>)
       .writes(1).exceptUserMode();
     InitReg(MISCREG_TLBI_VAALE1_Xt)
-      .faultWrite(EL1, HCR_TRAP(ttlb))
+      .faultWrite(EL1, faultHcrFgtInstEL1<&HCR::ttlb, &HFGITR::tlbivaale1>)
       .writes(1).exceptUserMode();
     InitReg(MISCREG_TLBI_IPAS2E1OS_Xt)
       .hypWrite().monSecureWrite().monNonSecureWrite();
@@ -4675,6 +5119,79 @@ ISA::initializeMiscRegMetadata()
       .monSecureWrite().monNonSecureWrite();
     InitReg(MISCREG_TLBI_VALE3_Xt)
       .monSecureWrite().monNonSecureWrite();
+
+    InitReg(MISCREG_TLBI_RVAE1_Xt)
+      .faultWrite(EL1, faultHcrFgtInstEL1<&HCR::ttlb, &HFGITR::tlbirvae1>)
+      .writes(1).exceptUserMode();
+    InitReg(MISCREG_TLBI_RVAAE1_Xt)
+      .faultWrite(EL1, faultHcrFgtInstEL1<&HCR::ttlb, &HFGITR::tlbirvaae1>)
+      .writes(1).exceptUserMode();
+    InitReg(MISCREG_TLBI_RVALE1_Xt)
+      .faultWrite(EL1, faultHcrFgtInstEL1<&HCR::ttlb, &HFGITR::tlbirvale1>)
+      .writes(1).exceptUserMode();
+    InitReg(MISCREG_TLBI_RVAALE1_Xt)
+      .faultWrite(EL1, faultHcrFgtInstEL1<&HCR::ttlb, &HFGITR::tlbirvaale1>)
+      .writes(1).exceptUserMode();
+    InitReg(MISCREG_TLBI_RIPAS2E1_Xt)
+      .hypWrite().monWrite();
+    InitReg(MISCREG_TLBI_RIPAS2LE1_Xt)
+      .hypWrite().monWrite();
+    InitReg(MISCREG_TLBI_RVAE2_Xt)
+      .hypWrite().monWrite();
+    InitReg(MISCREG_TLBI_RVALE2_Xt)
+      .hypWrite().monWrite();
+    InitReg(MISCREG_TLBI_RVAE3_Xt)
+      .monWrite();
+    InitReg(MISCREG_TLBI_RVALE3_Xt)
+      .monWrite();
+    InitReg(MISCREG_TLBI_RVAE1IS_Xt)
+      .faultWrite(EL1, faultTlbiIsEL1<&HFGITR::tlbirvae1is>)
+      .writes(1).exceptUserMode();
+    InitReg(MISCREG_TLBI_RVAAE1IS_Xt)
+      .faultWrite(EL1, faultTlbiIsEL1<&HFGITR::tlbirvaae1is>)
+      .writes(1).exceptUserMode();
+    InitReg(MISCREG_TLBI_RVALE1IS_Xt)
+      .faultWrite(EL1, faultTlbiIsEL1<&HFGITR::tlbirvale1is>)
+      .writes(1).exceptUserMode();
+    InitReg(MISCREG_TLBI_RVAALE1IS_Xt)
+      .faultWrite(EL1, faultTlbiIsEL1<&HFGITR::tlbirvaale1is>)
+      .writes(1).exceptUserMode();
+    InitReg(MISCREG_TLBI_RIPAS2E1IS_Xt)
+      .hypWrite().monWrite();
+    InitReg(MISCREG_TLBI_RIPAS2LE1IS_Xt)
+      .hypWrite().monWrite();
+    InitReg(MISCREG_TLBI_RVAE2IS_Xt)
+      .hypWrite().monWrite();
+    InitReg(MISCREG_TLBI_RVALE2IS_Xt)
+      .hypWrite().monWrite();
+    InitReg(MISCREG_TLBI_RVAE3IS_Xt)
+      .monWrite();
+    InitReg(MISCREG_TLBI_RVALE3IS_Xt)
+      .monWrite();
+    InitReg(MISCREG_TLBI_RVAE1OS_Xt)
+      .faultWrite(EL1, faultTlbiOsEL1<&HFGITR::tlbirvae1os>)
+      .writes(1).exceptUserMode();
+    InitReg(MISCREG_TLBI_RVAAE1OS_Xt)
+      .faultWrite(EL1, faultTlbiOsEL1<&HFGITR::tlbirvaae1os>)
+      .writes(1).exceptUserMode();
+    InitReg(MISCREG_TLBI_RVALE1OS_Xt)
+      .faultWrite(EL1, faultTlbiOsEL1<&HFGITR::tlbirvale1os>)
+      .writes(1).exceptUserMode();
+    InitReg(MISCREG_TLBI_RVAALE1OS_Xt)
+      .faultWrite(EL1, faultTlbiOsEL1<&HFGITR::tlbirvaale1os>)
+      .writes(1).exceptUserMode();
+    InitReg(MISCREG_TLBI_RIPAS2E1OS_Xt)
+      .hypWrite().monWrite();
+    InitReg(MISCREG_TLBI_RIPAS2LE1OS_Xt)
+      .hypWrite().monWrite();
+    InitReg(MISCREG_TLBI_RVAE2OS_Xt)
+      .hypWrite().monWrite();
+    InitReg(MISCREG_TLBI_RVALE2OS_Xt)
+      .hypWrite().monWrite();
+    InitReg(MISCREG_TLBI_RVAE3OS_Xt)
+      .monWrite();
+    InitReg(MISCREG_TLBI_RVALE3OS_Xt)
+      .monWrite();
     InitReg(MISCREG_PMINTENSET_EL1)
       .allPrivileges().exceptUserMode()
       .mapsTo(MISCREG_PMINTENSET);
@@ -4724,8 +5241,8 @@ ISA::initializeMiscRegMetadata()
       .mapsTo(MISCREG_PMOVSSET);
     InitReg(MISCREG_MAIR_EL1)
       .allPrivileges().exceptUserMode()
-      .faultRead(EL1, HCR_TRAP(trvm))
-      .faultWrite(EL1, HCR_TRAP(tvm))
+      .faultRead(EL1, faultHcrFgtEL1<true, &HCR::trvm, &HFGTR::mairEL1>)
+      .faultWrite(EL1, faultHcrFgtEL1<false, &HCR::tvm, &HFGTR::mairEL1>)
       .mapsTo(MISCREG_PRRR_NS, MISCREG_NMRR_NS);
     InitReg(MISCREG_MAIR_EL12)
       .fault(EL2, defaultFaultE2H_EL2)
@@ -4733,8 +5250,8 @@ ISA::initializeMiscRegMetadata()
       .mapsTo(MISCREG_PRRR_NS, MISCREG_NMRR_NS);
     InitReg(MISCREG_AMAIR_EL1)
       .allPrivileges().exceptUserMode()
-      .faultRead(EL1, HCR_TRAP(trvm))
-      .faultWrite(EL1, HCR_TRAP(tvm))
+      .faultRead(EL1, faultHcrFgtEL1<true, &HCR::trvm, &HFGTR::amairEL1>)
+      .faultWrite(EL1, faultHcrFgtEL1<false, &HCR::tvm, &HFGTR::amairEL1>)
       .mapsTo(MISCREG_AMAIR0_NS, MISCREG_AMAIR1_NS);
     InitReg(MISCREG_AMAIR_EL12)
       .fault(EL2, defaultFaultE2H_EL2)
@@ -4756,6 +5273,8 @@ ISA::initializeMiscRegMetadata()
       .allPrivileges().exceptUserMode();
     InitReg(MISCREG_VBAR_EL1)
       .allPrivileges().exceptUserMode()
+      .faultRead(EL1, faultFgtEL1<true, &HFGTR::vbarEL1>)
+      .faultWrite(EL1, faultFgtEL1<false, &HFGTR::vbarEL1>)
       .mapsTo(MISCREG_VBAR_NS);
     InitReg(MISCREG_VBAR_EL12)
       .fault(EL2, defaultFaultE2H_EL2)
@@ -4785,8 +5304,8 @@ ISA::initializeMiscRegMetadata()
       .mon();
     InitReg(MISCREG_CONTEXTIDR_EL1)
       .allPrivileges().exceptUserMode()
-      .faultRead(EL1, HCR_TRAP(trvm))
-      .faultWrite(EL1, HCR_TRAP(tvm))
+      .faultRead(EL1, faultHcrFgtEL1<true, &HCR::trvm, &HFGTR::contextidrEL1>)
+      .faultWrite(EL1, faultHcrFgtEL1<false, &HCR::tvm, &HFGTR::contextidrEL1>)
       .mapsTo(MISCREG_CONTEXTIDR_NS);
     InitReg(MISCREG_CONTEXTIDR_EL12)
       .fault(EL2, defaultFaultE2H_EL2)
@@ -4794,12 +5313,21 @@ ISA::initializeMiscRegMetadata()
       .mapsTo(MISCREG_CONTEXTIDR_NS);
     InitReg(MISCREG_TPIDR_EL1)
       .allPrivileges().exceptUserMode()
+      .faultRead(EL1, faultFgtEL1<true, &HFGTR::tpidrEL1>)
+      .faultWrite(EL1, faultFgtEL1<false, &HFGTR::tpidrEL1>)
       .mapsTo(MISCREG_TPIDRPRW_NS);
     InitReg(MISCREG_TPIDR_EL0)
       .allPrivileges()
+      .faultRead(EL0, faultFgtEL0<true, &HFGTR::tpidrEL0>)
+      .faultWrite(EL0, faultFgtEL0<false, &HFGTR::tpidrEL0>)
+      .faultRead(EL1, faultFgtEL1<true, &HFGTR::tpidrEL0>)
+      .faultWrite(EL1, faultFgtEL1<false, &HFGTR::tpidrEL0>)
       .mapsTo(MISCREG_TPIDRURW_NS);
     InitReg(MISCREG_TPIDRRO_EL0)
       .allPrivileges().userNonSecureWrite(0).userSecureWrite(0)
+      .faultRead(EL0, faultFgtEL0<true, &HFGTR::tpidrroEL0>)
+      .faultRead(EL1, faultFgtEL1<true, &HFGTR::tpidrroEL0>)
+      .faultWrite(EL1, faultFgtEL1<false, &HFGTR::tpidrroEL0>)
       .mapsTo(MISCREG_TPIDRURO_NS);
     InitReg(MISCREG_TPIDR_EL2)
       .hyp().mon()
@@ -5187,9 +5715,13 @@ ISA::initializeMiscRegMetadata()
     InitReg(MISCREG_ICC_IGRPEN0_EL1)
         .res0(0xFFFFFFFE) // [31:1]
         .allPrivileges().exceptUserMode()
+        .faultRead(EL1, faultFgtEL1<true, &HFGTR::iccIgrpEnEL1>)
+        .faultWrite(EL1, faultFgtEL1<false, &HFGTR::iccIgrpEnEL1>)
         .mapsTo(MISCREG_ICC_IGRPEN0);
     InitReg(MISCREG_ICC_IGRPEN1_EL1)
         .banked64()
+        .faultRead(EL1, faultFgtEL1<true, &HFGTR::iccIgrpEnEL1>)
+        .faultWrite(EL1, faultFgtEL1<false, &HFGTR::iccIgrpEnEL1>)
         .mapsTo(MISCREG_ICC_IGRPEN1);
     InitReg(MISCREG_ICC_IGRPEN1_EL1_NS)
         .bankedChild()
@@ -5502,7 +6034,7 @@ ISA::initializeMiscRegMetadata()
             return zfr0_el1;
         }())
         .faultRead(EL0, faultIdst)
-        .faultRead(EL1, HCR_TRAP(tid3))
+        .faultRead(EL1, faultHcrEL1<&HCR::tid3>)
         .allPrivileges().exceptUserMode().writes(0);
     InitReg(MISCREG_ZCR_EL3)
         .reset(sveVL - 1)
@@ -5542,7 +6074,7 @@ ISA::initializeMiscRegMetadata()
             return smfr0_el1;
         }())
         .faultRead(EL0, faultIdst)
-        .faultRead(EL1, HCR_TRAP(tid3))
+        .faultRead(EL1, faultHcrEL1<&HCR::tid3>)
         .allPrivileges().writes(0);
     InitReg(MISCREG_SVCR)
         .res0([](){
@@ -5565,7 +6097,7 @@ ISA::initializeMiscRegMetadata()
             return smidr_el1;
         }())
         .faultRead(EL0, faultIdst)
-        .faultRead(EL1, HCR_TRAP(tid1))
+        .faultRead(EL1, faultHcrEL1<&HCR::tid1>)
         .allPrivileges().writes(0);
     InitReg(MISCREG_SMPRI_EL1)
         .res0(mask(63, 4))
@@ -5644,6 +6176,17 @@ ISA::initializeMiscRegMetadata()
         .unverifiable()
         .allPrivileges().writes(0);
 
+    // FEAT_FGT extension
+    InitReg(MISCREG_HFGRTR_EL2)
+      .fault(EL2, faultFgtCtrlRegs)
+      .hyp().mon(release->has(ArmExtension::FEAT_FGT));
+    InitReg(MISCREG_HFGWTR_EL2)
+      .fault(EL2, faultFgtCtrlRegs)
+      .hyp().mon(release->has(ArmExtension::FEAT_FGT));
+    InitReg(MISCREG_HFGITR_EL2)
+      .fault(EL2, faultFgtCtrlRegs)
+      .hyp().mon(release->has(ArmExtension::FEAT_FGT));
+
     // Dummy registers
     InitReg(MISCREG_NOP)
       .allPrivileges();
@@ -5691,14 +6234,6 @@ ISA::initializeMiscRegMetadata()
       .warnNotFail()
       .fault(faultUnimplemented);
 
-    // FGT extension (unimplemented)
-    InitReg(MISCREG_HFGRTR_EL2)
-      .unimplemented()
-      .warnNotFail();
-    InitReg(MISCREG_HFGWTR_EL2)
-      .unimplemented()
-      .warnNotFail();
-
     // Register mappings for some unimplemented registers:
     // ESR_EL1 -> DFSR
     // RMR_EL1 -> RMR
diff --git a/src/arch/arm/regs/misc.hh b/src/arch/arm/regs/misc.hh
index cb03841848..065e5439c2 100644
--- a/src/arch/arm/regs/misc.hh
+++ b/src/arch/arm/regs/misc.hh
@@ -96,7 +96,7 @@ namespace ArmISA
         MISCREG_SEV_MAILBOX,
         MISCREG_TLBINEEDSYNC,
 
-        // AArch32 CP14 registers (debug/trace/ThumbEE/Jazelle control)
+        // AArch32 CP14 registers (debug/trace control)
         MISCREG_DBGDIDR,
         MISCREG_DBGDSCRint,
         MISCREG_DBGDCCINT,
@@ -583,10 +583,13 @@ namespace ArmISA
         MISCREG_VMPIDR_EL2,
         MISCREG_SCTLR_EL1,
         MISCREG_SCTLR_EL12,
+        MISCREG_SCTLR2_EL1,
+        MISCREG_SCTLR2_EL12,
         MISCREG_ACTLR_EL1,
         MISCREG_CPACR_EL1,
         MISCREG_CPACR_EL12,
         MISCREG_SCTLR_EL2,
+        MISCREG_SCTLR2_EL2,
         MISCREG_ACTLR_EL2,
         MISCREG_HCR_EL2,
         MISCREG_HCRX_EL2,
@@ -595,6 +598,7 @@ namespace ArmISA
         MISCREG_HSTR_EL2,
         MISCREG_HACR_EL2,
         MISCREG_SCTLR_EL3,
+        MISCREG_SCTLR2_EL3,
         MISCREG_ACTLR_EL3,
         MISCREG_SCR_EL3,
         MISCREG_SDER32_EL3,
@@ -606,8 +610,11 @@ namespace ArmISA
         MISCREG_TTBR1_EL12,
         MISCREG_TCR_EL1,
         MISCREG_TCR_EL12,
+        MISCREG_TCR2_EL1,
+        MISCREG_TCR2_EL12,
         MISCREG_TTBR0_EL2,
         MISCREG_TCR_EL2,
+        MISCREG_TCR2_EL2,
         MISCREG_VTTBR_EL2,
         MISCREG_VTCR_EL2,
         MISCREG_VSTTBR_EL2,
@@ -729,6 +736,36 @@ namespace ArmISA
         MISCREG_TLBI_ALLE3,
         MISCREG_TLBI_VAE3_Xt,
         MISCREG_TLBI_VALE3_Xt,
+        MISCREG_TLBI_RVAE1_Xt,
+        MISCREG_TLBI_RVAAE1_Xt,
+        MISCREG_TLBI_RVALE1_Xt,
+        MISCREG_TLBI_RVAALE1_Xt,
+        MISCREG_TLBI_RIPAS2E1_Xt,
+        MISCREG_TLBI_RIPAS2LE1_Xt,
+        MISCREG_TLBI_RVAE2_Xt,
+        MISCREG_TLBI_RVALE2_Xt,
+        MISCREG_TLBI_RVAE3_Xt,
+        MISCREG_TLBI_RVALE3_Xt,
+        MISCREG_TLBI_RVAE1IS_Xt,
+        MISCREG_TLBI_RVAAE1IS_Xt,
+        MISCREG_TLBI_RVALE1IS_Xt,
+        MISCREG_TLBI_RVAALE1IS_Xt,
+        MISCREG_TLBI_RIPAS2E1IS_Xt,
+        MISCREG_TLBI_RIPAS2LE1IS_Xt,
+        MISCREG_TLBI_RVAE2IS_Xt,
+        MISCREG_TLBI_RVALE2IS_Xt,
+        MISCREG_TLBI_RVAE3IS_Xt,
+        MISCREG_TLBI_RVALE3IS_Xt,
+        MISCREG_TLBI_RVAE1OS_Xt,
+        MISCREG_TLBI_RVAAE1OS_Xt,
+        MISCREG_TLBI_RVALE1OS_Xt,
+        MISCREG_TLBI_RVAALE1OS_Xt,
+        MISCREG_TLBI_RIPAS2E1OS_Xt,
+        MISCREG_TLBI_RIPAS2LE1OS_Xt,
+        MISCREG_TLBI_RVAE2OS_Xt,
+        MISCREG_TLBI_RVALE2OS_Xt,
+        MISCREG_TLBI_RVAE3OS_Xt,
+        MISCREG_TLBI_RVALE3OS_Xt,
         MISCREG_PMINTENSET_EL1,
         MISCREG_PMINTENCLR_EL1,
         MISCREG_PMCR_EL0,
@@ -842,6 +879,7 @@ namespace ArmISA
         MISCREG_TTBR1_EL2,
 
         MISCREG_ID_AA64MMFR2_EL1,
+        MISCREG_ID_AA64MMFR3_EL1,
 
         //PAuth Key Regsiters
         MISCREG_APDAKeyHi_EL1,
@@ -1096,6 +1134,11 @@ namespace ArmISA
         MISCREG_RNDR,
         MISCREG_RNDRRS,
 
+        // FEAT_FGT
+        MISCREG_HFGITR_EL2,
+        MISCREG_HFGRTR_EL2,
+        MISCREG_HFGWTR_EL2,
+
         // NUM_PHYS_MISCREGS specifies the number of actual physical
         // registers, not considering the following pseudo-registers
         // (dummy registers), like MISCREG_UNKNOWN, MISCREG_IMPDEF_UNIMPL.
@@ -1126,10 +1169,6 @@ namespace ArmISA
         MISCREG_VSESR_EL2,
         MISCREG_VDISR_EL2,
 
-        // FGT extension (unimplemented)
-        MISCREG_HFGRTR_EL2,
-        MISCREG_HFGWTR_EL2,
-
         // PSTATE
         MISCREG_PAN,
         MISCREG_UAO,
@@ -1502,6 +1541,13 @@ namespace ArmISA
             return *this;
         }
         chain
+        monWrite(bool v = true) const
+        {
+            monSecureWrite(v);
+            monNonSecureWrite(v);
+            return *this;
+        }
+        chain
         monSecure(bool v = true) const
         {
             monSecureRead(v);
@@ -2264,10 +2310,13 @@ namespace ArmISA
         "vmpidr_el2",
         "sctlr_el1",
         "sctlr_el12",
+        "sctlr2_el1",
+        "sctlr2_el12",
         "actlr_el1",
         "cpacr_el1",
         "cpacr_el12",
         "sctlr_el2",
+        "sctlr2_el2",
         "actlr_el2",
         "hcr_el2",
         "hcrx_el2",
@@ -2276,6 +2325,7 @@ namespace ArmISA
         "hstr_el2",
         "hacr_el2",
         "sctlr_el3",
+        "sctlr2_el3",
         "actlr_el3",
         "scr_el3",
         "sder32_el3",
@@ -2287,8 +2337,11 @@ namespace ArmISA
         "ttbr1_el12",
         "tcr_el1",
         "tcr_el12",
+        "tcr2_el1",
+        "tcr2_el12",
         "ttbr0_el2",
         "tcr_el2",
+        "tcr2_el2",
         "vttbr_el2",
         "vtcr_el2",
         "vsttbr_el2",
@@ -2410,6 +2463,36 @@ namespace ArmISA
         "tlbi_alle3",
         "tlbi_vae3_xt",
         "tlbi_vale3_xt",
+        "tlbi_rvae1_xt",
+        "tlbi_rvaae1_xt",
+        "tlbi_rvale1_xt",
+        "tlbi_rvaale1_xt",
+        "tlbi_ripas2e1_xt",
+        "tlbi_ripas2le1_xt",
+        "tlbi_rvae2_xt",
+        "tlbi_rvale2_xt",
+        "tlbi_rvae3_xt",
+        "tlbi_rvale3_xt",
+        "tlbi_rvae1is_xt",
+        "tlbi_rvaae1is_xt",
+        "tlbi_rvale1is_xt",
+        "tlbi_rvaale1is_xt",
+        "tlbi_ripas2e1is_xt",
+        "tlbi_ripas2le1is_xt",
+        "tlbi_rvae2is_xt",
+        "tlbi_rvale2is_xt",
+        "tlbi_rvae3is_xt",
+        "tlbi_rvale3is_xt",
+        "tlbi_rvae1os_xt",
+        "tlbi_rvaae1os_xt",
+        "tlbi_rvale1os_xt",
+        "tlbi_rvaale1os_xt",
+        "tlbi_ripas2e1os_xt",
+        "tlbi_ripas2le1os_xt",
+        "tlbi_rvae2os_xt",
+        "tlbi_rvale2os_xt",
+        "tlbi_rvae3os_xt",
+        "tlbi_rvale3os_xt",
         "pmintenset_el1",
         "pmintenclr_el1",
         "pmcr_el0",
@@ -2517,6 +2600,7 @@ namespace ArmISA
 
         "ttbr1_el2",
         "id_aa64mmfr2_el1",
+        "id_aa64mmfr3_el1",
 
         "apdakeyhi_el1",
         "apdakeylo_el1",
@@ -2766,6 +2850,10 @@ namespace ArmISA
         "rndr",
         "rndrrs",
 
+        "hfgitr_el2",
+        "hfgrtr_el2",
+        "hfgwtr_el2",
+
         "num_phys_regs",
 
         // Dummy registers
@@ -2784,8 +2872,6 @@ namespace ArmISA
         "disr_el1",
         "vsesr_el2",
         "vdisr_el2",
-        "hfgrtr_el2",
-        "hfgwtr_el2",
 
         // PSTATE
         "pan",
diff --git a/src/arch/arm/regs/misc_types.hh b/src/arch/arm/regs/misc_types.hh
index 00640dd339..0e6bdc8fe3 100644
--- a/src/arch/arm/regs/misc_types.hh
+++ b/src/arch/arm/regs/misc_types.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2010-2022 Arm Limited
+ * Copyright (c) 2010-2023 Arm Limited
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -54,7 +54,7 @@ namespace ArmISA
         Bitfield<28> v;
         Bitfield<27> q;
         Bitfield<26, 25> it1;
-        Bitfield<24> j;
+        Bitfield<24> dit;       // AArch64
         Bitfield<23> uao;       // AArch64
         Bitfield<22> pan;
         Bitfield<21> ss;        // AArch64
@@ -187,6 +187,21 @@ namespace ArmISA
         Bitfield<3, 0> cnp;
     EndBitUnion(AA64MMFR2)
 
+    BitUnion64(AA64MMFR3)
+        Bitfield<47, 44> anerr;
+        Bitfield<43, 40> snerr;
+        Bitfield<39, 36> d128_2;
+        Bitfield<35, 32> d128;
+        Bitfield<31, 28> mec;
+        Bitfield<27, 24> aie;
+        Bitfield<23, 20> s2poe;
+        Bitfield<19, 16> s1poe;
+        Bitfield<15, 12> s2pie;
+        Bitfield<11, 8> s1pie;
+        Bitfield<7, 4> sctlrx;
+        Bitfield<3, 0> tcrx;
+    EndBitUnion(AA64MMFR3)
+
     BitUnion64(AA64PFR0)
         Bitfield<63, 60> csv3;
         Bitfield<59, 56> csv2;
@@ -361,8 +376,11 @@ namespace ArmISA
     EndBitUnion(NSACR)
 
     BitUnion64(SCR)
+        Bitfield<44> sctlr2En;
+        Bitfield<43> tcr2En;
         Bitfield<40> trndr;
         Bitfield<38> hxen;
+        Bitfield<27> fgten;
         Bitfield<21> fien;
         Bitfield<20> nmea;
         Bitfield<19> ease;
@@ -439,8 +457,6 @@ namespace ArmISA
         Bitfield<7>    itd;     // IT disable
                                 // (ARMv8 AArch32 and AArch64 SCTLR_EL1 only)
         Bitfield<6, 3> rao4;    // Read as one
-        Bitfield<6>    thee;    // ThumbEE enable
-                                // (ARMv8 AArch32 and AArch64 SCTLR_EL1 only)
         Bitfield<5>    cp15ben; // CP15 barrier enable
                                 // (AArch32 and AArch64 SCTLR_EL1 only)
         Bitfield<4>    sa0;     // Stack Alignment Check Enable for EL0
@@ -931,6 +947,122 @@ namespace ArmISA
         Bitfield<3,0>   pcsample;
    EndBitUnion(DEVID)
 
+    BitUnion64(HFGITR)
+        Bitfield<54> dccvac;
+        Bitfield<53> svcEL1;
+        Bitfield<52> svcEL0;
+        Bitfield<51> eret;
+        Bitfield<47> tlbivaale1;
+        Bitfield<46> tlbivale1;
+        Bitfield<45> tlbivaae1;
+        Bitfield<44> tlbiaside1;
+        Bitfield<43> tlbivae1;
+        Bitfield<42> tlbivmalle1;
+        Bitfield<41> tlbirvaale1;
+        Bitfield<40> tlbirvale1;
+        Bitfield<39> tlbirvaae1;
+        Bitfield<38> tlbirvae1;
+        Bitfield<37> tlbirvaale1is;
+        Bitfield<36> tlbirvale1is;
+        Bitfield<35> tlbirvaae1is;
+        Bitfield<34> tlbirvae1is;
+        Bitfield<33> tlbivaale1is;
+        Bitfield<32> tlbivale1is;
+        Bitfield<31> tlbivaae1is;
+        Bitfield<30> tlbiaside1is;
+        Bitfield<29> tlbivae1is;
+        Bitfield<28> tlbivmalle1is;
+        Bitfield<27> tlbirvaale1os;
+        Bitfield<26> tlbirvale1os;
+        Bitfield<25> tlbirvaae1os;
+        Bitfield<24> tlbirvae1os;
+        Bitfield<23> tlbivaale1os;
+        Bitfield<22> tlbivale1os;
+        Bitfield<21> tlbivaae1os;
+        Bitfield<20> tlbiaside1os;
+        Bitfield<19> tlbivae1os;
+        Bitfield<18> tlbivmalle1os;
+        Bitfield<17> ats1e1wp;
+        Bitfield<16> ats1e1rp;
+        Bitfield<15> ats1e0w;
+        Bitfield<14> ats1e0r;
+        Bitfield<13> ats1e1w;
+        Bitfield<12> ats1e1r;
+        Bitfield<11> dczva;
+        Bitfield<10> dccivac;
+        Bitfield<9> dccvapd;
+        Bitfield<8> dccvap;
+        Bitfield<7> dccvau;
+        Bitfield<6> dccisw;
+        Bitfield<5> dccsw;
+        Bitfield<4> dcisw;
+        Bitfield<3> dcivac;
+        Bitfield<2> icivau;
+        Bitfield<1> iciallu;
+        Bitfield<0> icialluis;
+    EndBitUnion(HFGITR)
+
+    // HFGRTR and HFGWTR. Some fields are
+    // for HFGRTR only (RO registers)
+    BitUnion64(HFGTR)
+        Bitfield<50> nAccdataEL1;
+        Bitfield<49> erxaddrEL1;
+        Bitfield<48> erxpfgcdnEL1;
+        Bitfield<47> erxpfgctlEL1;
+        Bitfield<46> erxpfgfEL1; // RES0 for HFGWTR
+        Bitfield<45> erxmiscNEL1;
+        Bitfield<44> erxstatusEL1;
+        Bitfield<43> erxctlrEL1;
+        Bitfield<42> erxfrEL1;
+        Bitfield<41> errselrEL1;
+        Bitfield<40> erridrEL1; // RES0 for HFGWTR
+        Bitfield<39> iccIgrpEnEL1;
+        Bitfield<38> vbarEL1;
+        Bitfield<37> ttbr1EL1;
+        Bitfield<36> ttbr0EL1;
+        Bitfield<35> tpidrEL0;
+        Bitfield<34> tpidrroEL0;
+        Bitfield<33> tpidrEL1;
+        Bitfield<32> tcrEL1;
+        Bitfield<31> scxtnumEL0;
+        Bitfield<30> scxtnumEL1;
+        Bitfield<29> sctlrEL1;
+        Bitfield<28> revidrEL1; // RES0 for HFGWTR
+        Bitfield<27> parEL1;
+        Bitfield<26> mpidrEL1; // RES0 for HFGWTR
+        Bitfield<25> midrEL1; // RES0 for HFGWTR
+        Bitfield<24> mairEL1;
+        Bitfield<23> lorsaEL1;
+        Bitfield<22> lornEL1;
+        Bitfield<21> loridEL1; // RES0 for HFGWTR
+        Bitfield<20> loreaEL1;
+        Bitfield<19> lorcEL1;
+        Bitfield<18> isrEL1; // RES0 for HFGWTR
+        Bitfield<17> farEL1;
+        Bitfield<16> esrEL1;
+        Bitfield<15> dczidEL0; // RES0 for HFGWTR
+        Bitfield<14> ctrEL0; // RES0 for HFGWTR
+        Bitfield<13> csselrEL1;
+        Bitfield<12> cpacrEL1;
+        Bitfield<11> contextidrEL1;
+        Bitfield<10> clidrEL1; // RES0 for HFGWTR
+        Bitfield<9> ccsidrEL1; // RES0 for HFGWTR
+        Bitfield<8> apibKey;
+        Bitfield<7> apiaKey;
+        Bitfield<6> apgaKey;
+        Bitfield<5> apdbKey;
+        Bitfield<4> apdaKey;
+        Bitfield<3> amairEL1;
+        Bitfield<2> aidrEL1; // RES0 for HFGWTR
+        Bitfield<1> afsr1EL1;
+        Bitfield<0> afsr0EL1;
+    EndBitUnion(HFGTR)
+
+    BitUnion64(HCRX)
+        Bitfield<15> sctlr2En;
+        Bitfield<14> tcr2En;
+    EndBitUnion(HCRX)
+
 } // namespace ArmISA
 } // namespace gem5
 
diff --git a/src/arch/arm/table_walker.cc b/src/arch/arm/table_walker.cc
index 60f9e3f76e..5938755d86 100644
--- a/src/arch/arm/table_walker.cc
+++ b/src/arch/arm/table_walker.cc
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2010, 2012-2019, 2021-2022 Arm Limited
+ * Copyright (c) 2010, 2012-2019, 2021-2023 Arm Limited
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -2305,6 +2305,7 @@ TableWalker::insertPartialTableEntry(LongDescriptor &descriptor)
     te.asid           = currState->asid;
     te.vmid           = currState->vmid;
     te.N              = descriptor.offsetBits();
+    te.tg             = descriptor.grainSize;
     te.vpn            = currState->vaddr >> te.N;
     te.size           = (1ULL << te.N) - 1;
     te.pfn            = descriptor.nextTableAddr();
@@ -2378,6 +2379,7 @@ TableWalker::insertTableEntry(DescriptorBase &descriptor, bool long_descriptor)
         LongDescriptor l_descriptor =
             dynamic_cast<LongDescriptor &>(descriptor);
 
+        te.tg = l_descriptor.grainSize;
         te.xn |= currState->xnTable;
         te.pxn = currState->pxnTable || l_descriptor.pxn();
         if (isStage2) {
diff --git a/src/arch/arm/tlbi_op.cc b/src/arch/arm/tlbi_op.cc
index e89f411384..b49139bf3e 100644
--- a/src/arch/arm/tlbi_op.cc
+++ b/src/arch/arm/tlbi_op.cc
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2022 Arm Limited
+ * Copyright (c) 2018-2023 Arm Limited
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -209,6 +209,22 @@ TLBIALLN::match(TlbEntry* te, vmid_t vmid) const
         te->checkELMatch(targetEL, false);
 }
 
+TlbEntry::Lookup
+TLBIMVAA::lookupGen(vmid_t vmid) const
+{
+    TlbEntry::Lookup lookup_data;
+    lookup_data.va = sext<56>(addr);
+    lookup_data.ignoreAsn = true;
+    lookup_data.vmid = vmid;
+    lookup_data.hyp = targetEL == EL2;
+    lookup_data.secure = secureLookup;
+    lookup_data.functional = true;
+    lookup_data.targetEL = targetEL;
+    lookup_data.inHost = inHost;
+    lookup_data.mode = BaseMMU::Read;
+    return lookup_data;
+}
+
 void
 TLBIMVAA::operator()(ThreadContext* tc)
 {
@@ -224,10 +240,19 @@ TLBIMVAA::operator()(ThreadContext* tc)
 
 bool
 TLBIMVAA::match(TlbEntry* te, vmid_t vmid) const
+{
+    TlbEntry::Lookup lookup_data = lookupGen(vmid);
+
+    return te->match(lookup_data) && (!lastLevel || !te->partial);
+}
+
+TlbEntry::Lookup
+TLBIMVA::lookupGen(vmid_t vmid) const
 {
     TlbEntry::Lookup lookup_data;
     lookup_data.va = sext<56>(addr);
-    lookup_data.ignoreAsn = true;
+    lookup_data.asn = asid;
+    lookup_data.ignoreAsn = false;
     lookup_data.vmid = vmid;
     lookup_data.hyp = targetEL == EL2;
     lookup_data.secure = secureLookup;
@@ -236,7 +261,7 @@ TLBIMVAA::match(TlbEntry* te, vmid_t vmid) const
     lookup_data.inHost = inHost;
     lookup_data.mode = BaseMMU::Read;
 
-    return te->match(lookup_data) && (!lastLevel || !te->partial);
+    return lookup_data;
 }
 
 void
@@ -255,17 +280,7 @@ TLBIMVA::operator()(ThreadContext* tc)
 bool
 TLBIMVA::match(TlbEntry* te, vmid_t vmid) const
 {
-    TlbEntry::Lookup lookup_data;
-    lookup_data.va = sext<56>(addr);
-    lookup_data.asn = asid;
-    lookup_data.ignoreAsn = false;
-    lookup_data.vmid = vmid;
-    lookup_data.hyp = targetEL == EL2;
-    lookup_data.secure = secureLookup;
-    lookup_data.functional = true;
-    lookup_data.targetEL = targetEL;
-    lookup_data.inHost = inHost;
-    lookup_data.mode = BaseMMU::Read;
+    TlbEntry::Lookup lookup_data = lookupGen(vmid);
 
     return te->match(lookup_data) && (!lastLevel || !te->partial);
 }
@@ -305,5 +320,37 @@ TLBIIPA::operator()(ThreadContext* tc)
     }
 }
 
+bool
+TLBIRMVA::match(TlbEntry* te, vmid_t vmid) const
+{
+    TlbEntry::Lookup lookup_data = lookupGen(vmid);
+    lookup_data.size = rangeSize();
+
+    auto addr_match = te->match(lookup_data) && (!lastLevel || !te->partial);
+    if (addr_match) {
+        return tgMap[rangeData.tg] == te->tg &&
+        (resTLBIttl(rangeData.tg, rangeData.ttl) ||
+            rangeData.ttl == te->lookupLevel);
+    } else {
+        return false;
+    }
+}
+
+bool
+TLBIRMVAA::match(TlbEntry* te, vmid_t vmid) const
+{
+    TlbEntry::Lookup lookup_data = lookupGen(vmid);
+    lookup_data.size = rangeSize();
+
+    auto addr_match = te->match(lookup_data) && (!lastLevel || !te->partial);
+    if (addr_match) {
+        return tgMap[rangeData.tg] == te->tg &&
+        (resTLBIttl(rangeData.tg, rangeData.ttl) ||
+            rangeData.ttl == te->lookupLevel);
+    } else {
+        return false;
+    }
+}
+
 } // namespace ArmISA
 } // namespace gem5
diff --git a/src/arch/arm/tlbi_op.hh b/src/arch/arm/tlbi_op.hh
index 4f4ea09ec8..38e8252869 100644
--- a/src/arch/arm/tlbi_op.hh
+++ b/src/arch/arm/tlbi_op.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2020, 2022 Arm Limited
+ * Copyright (c) 2018-2020, 2022-2023 Arm Limited
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -300,6 +300,8 @@ class TLBIALLN : public TLBIOp
 /** TLB Invalidate by VA, All ASID */
 class TLBIMVAA : public TLBIOp
 {
+  protected:
+    TlbEntry::Lookup lookupGen(vmid_t vmid) const;
   public:
     TLBIMVAA(ExceptionLevel _targetEL, bool _secure,
              Addr _addr, bool last_level)
@@ -319,6 +321,9 @@ class TLBIMVAA : public TLBIOp
 /** TLB Invalidate by VA */
 class TLBIMVA : public TLBIOp
 {
+  protected:
+    TlbEntry::Lookup lookupGen(vmid_t vmid) const;
+
   public:
     TLBIMVA(ExceptionLevel _targetEL, bool _secure,
             Addr _addr, uint16_t _asid, bool last_level)
@@ -368,6 +373,61 @@ class DTLBIMVA : public TLBIMVA
     bool match(TlbEntry *entry, vmid_t curr_vmid) const override;
 };
 
+class TLBIRange
+{
+  public:
+    /**
+     * Is the range valid? This mainly depends on the specified
+     * translation granule.
+     */
+    bool valid() const { return granule != ReservedGrain; }
+
+  protected:
+    BitUnion64(RangeData)
+        Bitfield<47, 46> tg;
+        Bitfield<45, 44> scale;
+        Bitfield<43, 39> num;
+        Bitfield<38, 37> ttl;
+        Bitfield<36, 0> baseAddr;
+    EndBitUnion(RangeData)
+
+    static constexpr std::array<GrainSize, 4> tgMap = {
+        ReservedGrain,
+        Grain4KB,
+        Grain16KB,
+        Grain64KB
+    };
+
+    TLBIRange(RegVal val)
+      : rangeData(val), granule(tgMap[rangeData.tg])
+    {}
+
+    Addr
+    startAddress() const
+    {
+        return sext<37>(rangeData.baseAddr) << granule;
+    }
+
+    Addr
+    rangeSize() const
+    {
+        return (rangeData.num + 1) << (5 * rangeData.scale + 1 + granule);
+    }
+
+    bool
+    resTLBIttl(uint8_t tg, uint8_t ttl) const
+    {
+        switch (ttl) {
+          case 0: return true;
+          case 1: return tgMap[tg] == Grain16KB;
+          default: return false;
+        }
+    }
+
+    RangeData rangeData;
+    GrainSize granule;
+};
+
 /** TLB Invalidate by Intermediate Physical Address */
 class TLBIIPA : public TLBIOp
 {
@@ -392,7 +452,7 @@ class TLBIIPA : public TLBIOp
     }
 
     /** TLBIIPA is basically a TLBIMVAA for stage2 TLBs */
-    TLBIMVAA
+    virtual TLBIMVAA
     makeStage2() const
     {
         return TLBIMVAA(EL1, secureLookup, addr, lastLevel);
@@ -402,6 +462,49 @@ class TLBIIPA : public TLBIOp
     bool lastLevel;
 };
 
+/** TLB Range Invalidate by VA */
+class TLBIRMVA : public TLBIRange, public TLBIMVA
+{
+  public:
+    TLBIRMVA(ExceptionLevel _targetEL, bool _secure,
+             RegVal val, uint16_t _asid, bool last_level)
+      : TLBIRange(val),
+        TLBIMVA(_targetEL, _secure, startAddress(), _asid, last_level)
+    {}
+
+    bool match(TlbEntry *entry, vmid_t curr_vmid) const override;
+};
+
+/** TLB Range Invalidate by VA, All ASIDs */
+class TLBIRMVAA : public TLBIRange, public TLBIMVAA
+{
+  public:
+    TLBIRMVAA(ExceptionLevel _targetEL, bool _secure,
+              RegVal val, bool last_level)
+      : TLBIRange(val),
+        TLBIMVAA(_targetEL, _secure, startAddress(), last_level)
+    {}
+
+    bool match(TlbEntry *entry, vmid_t curr_vmid) const override;
+};
+
+/** TLB Range Invalidate by VA, All ASIDs */
+class TLBIRIPA : public TLBIRange, public TLBIIPA
+{
+  public:
+    TLBIRIPA(ExceptionLevel _targetEL, bool _secure,
+             RegVal val, bool last_level)
+      : TLBIRange(val),
+        TLBIIPA(_targetEL, _secure, startAddress(), last_level)
+    {}
+
+    virtual TLBIMVAA
+    makeStage2() const
+    {
+        return TLBIRMVAA(EL1, secureLookup, rangeData, lastLevel);
+    }
+};
+
 } // namespace ArmISA
 } // namespace gem5
 
diff --git a/tests/configs/o3-timing-mp.py b/src/arch/arm/tracers/ArmCapstone.py
similarity index 85%
rename from tests/configs/o3-timing-mp.py
rename to src/arch/arm/tracers/ArmCapstone.py
index 9b58c9d416..7f1b6a9e8a 100644
--- a/tests/configs/o3-timing-mp.py
+++ b/src/arch/arm/tracers/ArmCapstone.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2013 ARM Limited
+# Copyright (c) 2023 Arm Limited
 # All rights reserved.
 #
 # The license below extends only to copyright in the software and shall
@@ -10,9 +10,6 @@
 # unmodified and in its entirety in all distributions of the software,
 # modified or unmodified, in source code or in binary form.
 #
-# Copyright (c) 2006-2007 The Regents of The University of Michigan
-# All rights reserved.
-#
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions are
 # met: redistributions of source code must retain the above copyright
@@ -36,13 +33,12 @@
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-from m5.objects import *
-from base_config import *
+from m5.SimObject import SimObject
+from m5.params import *
+from m5.objects.Capstone import CapstoneDisassembler
+
 
-nb_cores = 4
-root = BaseSESystem(
-    mem_mode="timing",
-    mem_class=DDR3_1600_8x8,
-    cpu_class=DerivO3CPU,
-    num_cpus=nb_cores,
-).create_root()
+class ArmCapstoneDisassembler(CapstoneDisassembler):
+    type = "ArmCapstoneDisassembler"
+    cxx_class = "gem5::trace::ArmCapstoneDisassembler"
+    cxx_header = "arch/arm/tracers/capstone.hh"
diff --git a/src/arch/arm/tracers/SConscript b/src/arch/arm/tracers/SConscript
index 15945a4ac4..ca012c5c2e 100644
--- a/src/arch/arm/tracers/SConscript
+++ b/src/arch/arm/tracers/SConscript
@@ -1,4 +1,4 @@
-# Copyright (c) 2018 ARM Limited
+# Copyright (c) 2018, 2023 Arm Limited
 # All rights reserved.
 #
 # The license below extends only to copyright in the software and shall
@@ -42,3 +42,8 @@ Source('tarmac_parser.cc', tags='arm isa')
 Source('tarmac_tracer.cc', tags='arm isa')
 Source('tarmac_record.cc', tags='arm isa')
 Source('tarmac_record_v8.cc', tags='arm isa')
+
+if env['CONF']['HAVE_CAPSTONE']:
+    SimObject('ArmCapstone.py', sim_objects=['ArmCapstoneDisassembler'],
+              tags=['capstone', 'arm isa'])
+    Source('capstone.cc', tags=['capstone', 'arm isa'])
diff --git a/src/arch/arm/tracers/capstone.cc b/src/arch/arm/tracers/capstone.cc
new file mode 100644
index 0000000000..469dc46568
--- /dev/null
+++ b/src/arch/arm/tracers/capstone.cc
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2023 Arm Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "arch/arm/tracers/capstone.hh"
+
+#include "arch/arm/insts/static_inst.hh"
+#include "base/output.hh"
+
+namespace gem5
+{
+
+namespace trace
+{
+
+using namespace ArmISA;
+
+ArmCapstoneDisassembler::ArmCapstoneDisassembler(const Params &p)
+  : CapstoneDisassembler(p)
+{
+    if (cs_open(CS_ARCH_ARM64, CS_MODE_ARM, &arm64Handle) != CS_ERR_OK)
+        panic("Unable to open capstone for arm64 disassembly");
+
+    if (cs_open(CS_ARCH_ARM, CS_MODE_ARM, &armHandle) != CS_ERR_OK)
+        panic("Unable to open capstone for arm disassembly");
+}
+
+const csh*
+ArmCapstoneDisassembler::currHandle(const PCStateBase &_pc) const
+{
+    auto pc = _pc.as<ArmISA::PCState>();
+    if (pc.aarch64()) {
+        return &arm64Handle;
+    } else {
+        auto mode = pc.thumb() ? CS_MODE_THUMB : CS_MODE_ARM;
+        cs_option(armHandle, CS_OPT_MODE, mode);
+        return &armHandle;
+    }
+}
+
+} // namespace trace
+} // namespace gem5
diff --git a/src/arch/arm/tracers/capstone.hh b/src/arch/arm/tracers/capstone.hh
new file mode 100644
index 0000000000..929fbad6f5
--- /dev/null
+++ b/src/arch/arm/tracers/capstone.hh
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2023 Arm Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __ARCH_ARM_TRACERS_CAPSTONE_HH__
+#define __ARCH_ARM_TRACERS_CAPSTONE_HH__
+
+#include "cpu/capstone.hh"
+#include "params/ArmCapstoneDisassembler.hh"
+
+namespace gem5
+{
+
+class ThreadContext;
+
+namespace trace
+{
+
+class ArmCapstoneDisassembler : public CapstoneDisassembler
+{
+  public:
+    PARAMS(ArmCapstoneDisassembler);
+    ArmCapstoneDisassembler(const Params &p);
+
+  protected:
+    const csh* currHandle(const PCStateBase &pc) const override;
+
+  protected:
+    csh arm64Handle;
+    csh armHandle;
+};
+
+} // namespace trace
+} // namespace gem5
+
+#endif // __ARCH_ARM_TRACERS_CAPSTONE_HH__
diff --git a/src/arch/arm/tracers/tarmac_base.cc b/src/arch/arm/tracers/tarmac_base.cc
index 99ed3bb0f1..01add3037a 100644
--- a/src/arch/arm/tracers/tarmac_base.cc
+++ b/src/arch/arm/tracers/tarmac_base.cc
@@ -68,7 +68,6 @@ TarmacBaseRecord::InstEntry::InstEntry(
         : taken(predicate) ,
           addr(pc.instAddr()) ,
           opcode(staticInst->getEMI() & 0xffffffff),
-          disassemble(staticInst->disassemble(addr)),
           isetstate(pcToISetState(pc)),
           mode(MODE_USER)
 {
@@ -76,11 +75,6 @@ TarmacBaseRecord::InstEntry::InstEntry(
     // Operating mode gained by reading the architectural register (CPSR)
     const CPSR cpsr = thread->readMiscRegNoEffect(MISCREG_CPSR);
     mode = (OperatingMode) (uint8_t)cpsr.mode;
-
-    // In Tarmac, instruction names are printed in capital
-    // letters.
-    std::for_each(disassemble.begin(), disassemble.end(),
-                  [](char& c) { c = toupper(c); });
 }
 
 TarmacBaseRecord::RegEntry::RegEntry(const PCStateBase &pc)
@@ -107,12 +101,12 @@ TarmacBaseRecord::pcToISetState(const PCStateBase &pc)
 
     if (apc.aarch64())
         isetstate = TarmacBaseRecord::ISET_A64;
-    else if (!apc.thumb() && !apc.jazelle())
+    else if (!apc.thumb())
         isetstate = TarmacBaseRecord::ISET_ARM;
-    else if (apc.thumb() && !apc.jazelle())
+    else if (apc.thumb())
         isetstate = TarmacBaseRecord::ISET_THUMB;
     else
-        // No Jazelle state in TARMAC
+        // Unsupported state in TARMAC
         isetstate = TarmacBaseRecord::ISET_UNSUPPORTED;
 
     return isetstate;
diff --git a/src/arch/arm/tracers/tarmac_base.hh b/src/arch/arm/tracers/tarmac_base.hh
index 501eb1b008..9e80f6d1f1 100644
--- a/src/arch/arm/tracers/tarmac_base.hh
+++ b/src/arch/arm/tracers/tarmac_base.hh
@@ -93,7 +93,6 @@ class TarmacBaseRecord : public InstRecord
         bool taken;
         Addr addr;
         ArmISA::MachInst opcode;
-        std::string disassemble;
         ISetState isetstate;
         ArmISA::OperatingMode mode;
     };
diff --git a/src/arch/arm/tracers/tarmac_record.cc b/src/arch/arm/tracers/tarmac_record.cc
index 59d6a18b39..5aa1f7e957 100644
--- a/src/arch/arm/tracers/tarmac_record.cc
+++ b/src/arch/arm/tracers/tarmac_record.cc
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2019 ARM Limited
+ * Copyright (c) 2017-2019, 2023 Arm Limited
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -123,7 +123,8 @@ TarmacTracerRecord::TarmacTracerRecord(Tick _when, ThreadContext *_thread,
 TarmacTracerRecord::TraceInstEntry::TraceInstEntry(
     const TarmacContext& tarmCtx,
     bool predicate)
-      : InstEntry(tarmCtx.thread, *tarmCtx.pc, tarmCtx.staticInst, predicate)
+      : InstEntry(tarmCtx.thread, *tarmCtx.pc, tarmCtx.staticInst, predicate),
+        disassemble(tarmCtx.tracer.disassemble(tarmCtx.staticInst, *tarmCtx.pc))
 {
     secureMode = isSecure(tarmCtx.thread);
 
@@ -140,6 +141,11 @@ TarmacTracerRecord::TraceInstEntry::TraceInstEntry(
     // for 16bit (Thumb) instruction.
     opcode = arm_inst->encoding();
 
+    // In Tarmac, instruction names are printed in capital
+    // letters.
+    std::for_each(disassemble.begin(), disassemble.end(),
+                  [](char& c) { c = toupper(c); });
+
     // Update the instruction count: number of executed
     // instructions.
     instCount++;
@@ -332,6 +338,7 @@ TarmacTracerRecord::dump()
     auto &regQueue = tracer.regQueue;
 
     const TarmacContext tarmCtx(
+        tracer,
         thread,
         staticInst->isMicroop()? macroStaticInst : staticInst,
         *pc
diff --git a/src/arch/arm/tracers/tarmac_record.hh b/src/arch/arm/tracers/tarmac_record.hh
index 009df5db29..d80121b1b9 100644
--- a/src/arch/arm/tracers/tarmac_record.hh
+++ b/src/arch/arm/tracers/tarmac_record.hh
@@ -115,6 +115,9 @@ class TarmacTracerRecord : public TarmacBaseRecord
          * 32 otherwise (ARM and BigThumb)
          */
         uint8_t instSize;
+
+        /** Instruction disassembly */
+        std::string disassemble;
     };
 
     /** Register Entry */
diff --git a/src/arch/arm/tracers/tarmac_tracer.hh b/src/arch/arm/tracers/tarmac_tracer.hh
index f8c7b5ca53..71207b3860 100644
--- a/src/arch/arm/tracers/tarmac_tracer.hh
+++ b/src/arch/arm/tracers/tarmac_tracer.hh
@@ -58,6 +58,8 @@ class OutputStream;
 
 namespace trace {
 
+class TarmacTracer;
+
 /**
  * This object type is encapsulating the informations needed by
  * a Tarmac record to generate it's own entries.
@@ -65,15 +67,18 @@ namespace trace {
 class TarmacContext
 {
   public:
-    TarmacContext(ThreadContext* _thread,
+    TarmacContext(const TarmacTracer &_tracer,
+                  ThreadContext* _thread,
                   const StaticInstPtr _staticInst,
                   const PCStateBase &_pc)
-      : thread(_thread), staticInst(_staticInst), pc(_pc.clone())
+      : tracer(_tracer), thread(_thread), staticInst(_staticInst),
+        pc(_pc.clone())
     {}
 
     std::string tarmacCpuName() const;
 
   public:
+    const TarmacTracer &tracer;
     ThreadContext* thread;
     const StaticInstPtr staticInst;
     std::unique_ptr<PCStateBase> pc;
diff --git a/src/arch/arm/types.hh b/src/arch/arm/types.hh
index 2251d57c0b..f7b6cbf86b 100644
--- a/src/arch/arm/types.hh
+++ b/src/arch/arm/types.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2010, 2012-2013, 2017-2018, 2022 Arm Limited
+ * Copyright (c) 2010, 2012-2013, 2017-2018, 2022-2023 Arm Limited
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -323,6 +323,7 @@ namespace ArmISA
         SMC_64                  = 0x17,
         TRAPPED_MSR_MRS_64      = 0x18,
         TRAPPED_SVE             = 0x19,
+        TRAPPED_ERET            = 0x1A,
         TRAPPED_SME             = 0x1D,
         PREFETCH_ABORT_TO_HYP   = 0x20,
         PREFETCH_ABORT_LOWER_EL = 0x20,  // AArch64 alias
diff --git a/src/arch/arm/utility.cc b/src/arch/arm/utility.cc
index 05d1cab06c..926a7e3343 100644
--- a/src/arch/arm/utility.cc
+++ b/src/arch/arm/utility.cc
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2009-2014, 2016-2020, 2022 Arm Limited
+ * Copyright (c) 2009-2014, 2016-2020, 2022-2023 Arm Limited
  * All rights reserved.
  *
  * The license below extends only to copyright in the software and shall
@@ -1347,5 +1347,24 @@ syncVecElemsToRegs(ThreadContext *tc)
     }
 }
 
+bool
+fgtEnabled(ThreadContext *tc)
+{
+    return EL2Enabled(tc) && HaveExt(tc, ArmExtension::FEAT_FGT) &&
+        (!ArmSystem::haveEL(tc, EL3) ||
+            static_cast<SCR>(tc->readMiscReg(MISCREG_SCR_EL3)).fgten);
+}
+
+bool
+isHcrxEL2Enabled(ThreadContext *tc)
+{
+    if (!ArmSystem::has(ArmExtension::FEAT_HCX, tc))
+        return false;
+    if (ArmSystem::haveEL(tc, EL3) &&
+        !static_cast<SCR>(tc->readMiscReg(MISCREG_SCR_EL3)).hxen)
+        return false;
+    return EL2Enabled(tc);
+}
+
 } // namespace ArmISA
 } // namespace gem5
diff --git a/src/arch/arm/utility.hh b/src/arch/arm/utility.hh
index b5a5dd72dd..8ccb251fa5 100644
--- a/src/arch/arm/utility.hh
+++ b/src/arch/arm/utility.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2010, 2012-2013, 2016-2020, 2022 Arm Limited
+ * Copyright (c) 2010, 2012-2013, 2016-2020, 2022-2023 Arm Limited
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -364,6 +364,15 @@ bool isUnpriviledgeAccess(ThreadContext *tc);
 void syncVecRegsToElems(ThreadContext *tc);
 void syncVecElemsToRegs(ThreadContext *tc);
 
+bool fgtEnabled(ThreadContext *tc);
+bool isHcrxEL2Enabled(ThreadContext *tc);
+
+static inline bool
+useVMID(ExceptionLevel el, bool in_host)
+{
+    return el == EL1 || (el == EL0 && !in_host);
+}
+
 } // namespace ArmISA
 } // namespace gem5
 
diff --git a/src/arch/generic/isa.hh b/src/arch/generic/isa.hh
index 58f66fc99b..e9e4d95d7b 100644
--- a/src/arch/generic/isa.hh
+++ b/src/arch/generic/isa.hh
@@ -70,7 +70,6 @@ class BaseISA : public SimObject
   public:
     virtual PCStateBase *newPCState(Addr new_inst_addr=0) const = 0;
     virtual void clear() {}
-    virtual void clearLoadReservation(ContextID cid) {}
 
     virtual RegVal readMiscRegNoEffect(RegIndex idx) const = 0;
     virtual RegVal readMiscReg(RegIndex idx) = 0;
diff --git a/src/arch/generic/memhelpers.hh b/src/arch/generic/memhelpers.hh
index d5684a6af9..9cdd2a56eb 100644
--- a/src/arch/generic/memhelpers.hh
+++ b/src/arch/generic/memhelpers.hh
@@ -124,6 +124,24 @@ readMemAtomic(XC *xc, trace::InstRecord *traceData, Addr addr, MemT &mem,
     return fault;
 }
 
+/// Read from memory in atomic mode.
+template <ByteOrder Order, class XC, class MemT>
+Fault
+readMemAtomic(XC *xc, trace::InstRecord *traceData, Addr addr, MemT &mem,
+              size_t size, Request::Flags flags)
+{
+    memset(&mem, 0, size);
+    static const std::vector<bool> byte_enable(size, true);
+    Fault fault = readMemAtomic(xc, addr, (uint8_t*)&mem,
+                                size, flags, byte_enable);
+    if (fault == NoFault) {
+        mem = gtoh(mem, Order);
+        if (traceData)
+            traceData->setData(mem);
+    }
+    return fault;
+}
+
 template <class XC, class MemT>
 Fault
 readMemAtomicLE(XC *xc, trace::InstRecord *traceData, Addr addr, MemT &mem,
@@ -133,6 +151,16 @@ readMemAtomicLE(XC *xc, trace::InstRecord *traceData, Addr addr, MemT &mem,
             xc, traceData, addr, mem, flags);
 }
 
+template <class XC, class MemT>
+Fault
+readMemAtomicLE(XC *xc, trace::InstRecord *traceData, Addr addr, MemT &mem,
+                size_t size, Request::Flags flags)
+{
+    return readMemAtomic<ByteOrder::little>(
+            xc, traceData, addr, mem, size, flags);
+}
+
+
 template <class XC, class MemT>
 Fault
 readMemAtomicBE(XC *xc, trace::InstRecord *traceData, Addr addr, MemT &mem,
@@ -165,6 +193,20 @@ writeMemTiming(XC *xc, trace::InstRecord *traceData, MemT mem, Addr addr,
                           sizeof(MemT), flags, res, byte_enable);
 }
 
+template <ByteOrder Order, class XC, class MemT>
+Fault
+writeMemTiming(XC *xc, trace::InstRecord *traceData, MemT mem, Addr addr,
+               size_t size, Request::Flags flags, uint64_t *res)
+{
+    if (traceData) {
+        traceData->setData(mem);
+    }
+    mem = htog(mem, Order);
+    static const std::vector<bool> byte_enable(size, true);
+    return writeMemTiming(xc, (uint8_t*)&mem, addr,
+                          size, flags, res, byte_enable);
+}
+
 template <class XC, class MemT>
 Fault
 writeMemTimingLE(XC *xc, trace::InstRecord *traceData, MemT mem, Addr addr,
@@ -174,6 +216,15 @@ writeMemTimingLE(XC *xc, trace::InstRecord *traceData, MemT mem, Addr addr,
             xc, traceData, mem, addr, flags, res);
 }
 
+template <class XC, class MemT>
+Fault
+writeMemTimingLE(XC *xc, trace::InstRecord *traceData, MemT mem, Addr addr,
+               size_t size, Request::Flags flags, uint64_t *res)
+{
+    return writeMemTiming<ByteOrder::little>(
+            xc, traceData, mem, addr, size, flags, res);
+}
+
 template <class XC, class MemT>
 Fault
 writeMemTimingBE(XC *xc, trace::InstRecord *traceData, MemT mem, Addr addr,
@@ -214,6 +265,27 @@ writeMemAtomic(XC *xc, trace::InstRecord *traceData, const MemT &mem,
     return fault;
 }
 
+template <ByteOrder Order, class XC, class MemT>
+Fault
+writeMemAtomic(XC *xc, trace::InstRecord *traceData, const MemT &mem,
+               Addr addr, size_t size, Request::Flags flags, uint64_t *res)
+{
+    if (traceData) {
+        traceData->setData(mem);
+    }
+    MemT host_mem = htog(mem, Order);
+    static const std::vector<bool> byte_enable(size, true);
+    Fault fault = writeMemAtomic(xc, (uint8_t*)&host_mem,
+                                 addr, size, flags, res, byte_enable);
+    if (fault == NoFault && res != NULL) {
+        if (flags & Request::MEM_SWAP || flags & Request::MEM_SWAP_COND)
+            *(MemT *)res = gtoh(*(MemT *)res, Order);
+        else
+            *res = gtoh(*res, Order);
+    }
+    return fault;
+}
+
 template <class XC, class MemT>
 Fault
 writeMemAtomicLE(XC *xc, trace::InstRecord *traceData, const MemT &mem,
@@ -223,6 +295,15 @@ writeMemAtomicLE(XC *xc, trace::InstRecord *traceData, const MemT &mem,
             xc, traceData, mem, addr, flags, res);
 }
 
+template <class XC, class MemT>
+Fault
+writeMemAtomicLE(XC *xc, trace::InstRecord *traceData, const MemT &mem,
+                 size_t size, Addr addr, Request::Flags flags, uint64_t *res)
+{
+    return writeMemAtomic<ByteOrder::little>(
+            xc, traceData, mem, addr, size, flags, res);
+}
+
 template <class XC, class MemT>
 Fault
 writeMemAtomicBE(XC *xc, trace::InstRecord *traceData, const MemT &mem,
diff --git a/src/arch/generic/pcstate.hh b/src/arch/generic/pcstate.hh
index f1df6e7c39..25b3af69ea 100644
--- a/src/arch/generic/pcstate.hh
+++ b/src/arch/generic/pcstate.hh
@@ -1,5 +1,6 @@
 /*
  * Copyright (c) 2020 ARM Limited
+ * Copyright (c) 2023 The University of Edinburgh
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -126,6 +127,13 @@ class PCStateBase : public Serializable
         _upc = 0;
     }
 
+    virtual void
+    set(Addr val)
+    {
+        _pc = val;
+        _upc = 0;
+    }
+
     virtual void advance() = 0;
     virtual bool branching() const = 0;
 
@@ -309,6 +317,14 @@ class PCStateWithNext : public PCStateBase
             _npc == ps._npc && _nupc == ps._nupc;
     }
 
+    void
+    set(Addr val) override
+    {
+        PCStateBase::set(val);
+        _npc = 0;
+        _nupc = 1;
+    }
+
     void
     serialize(CheckpointOut &cp) const override
     {
@@ -359,9 +375,9 @@ class SimplePCState : public PCStateWithNext
      * @param val The value to set the PC to.
      */
     void
-    set(Addr val)
+    set(Addr val) override
     {
-        this->pc(val);
+        Base::set(val);
         this->npc(val + InstWidth);
     };
 
@@ -402,7 +418,7 @@ class UPCState : public SimplePCState<InstWidth>
     }
 
     void
-    set(Addr val)
+    set(Addr val) override
     {
         Base::set(val);
         this->upc(0);
@@ -473,7 +489,7 @@ class DelaySlotPCState : public SimplePCState<InstWidth>
     void nnpc(Addr val) { _nnpc = val; }
 
     void
-    set(Addr val)
+    set(Addr val) override
     {
         Base::set(val);
         nnpc(val + 2 * InstWidth);
@@ -547,7 +563,7 @@ class DelaySlotUPCState : public DelaySlotPCState<InstWidth>
     }
 
     void
-    set(Addr val)
+    set(Addr val) override
     {
         Base::set(val);
         this->upc(0);
diff --git a/src/arch/isa_parser/isa_parser.py b/src/arch/isa_parser/isa_parser.py
index 0f29840c3b..b0b2485cac 100755
--- a/src/arch/isa_parser/isa_parser.py
+++ b/src/arch/isa_parser/isa_parser.py
@@ -61,7 +61,7 @@
 labelRE = re.compile(r"(?<!%)%\(([^\)]+)\)[sd]")
 
 
-class Template(object):
+class Template:
     def __init__(self, parser, t):
         self.parser = parser
         self.template = t
@@ -95,12 +95,10 @@ def subst(self, d):
                 l for l in labelRE.findall(template) if l in d.snippets
             ]
 
-            snippets = dict(
-                [
-                    (s, self.parser.mungeSnippet(d.snippets[s]))
-                    for s in snippetLabels
-                ]
-            )
+            snippets = {
+                s: self.parser.mungeSnippet(d.snippets[s])
+                for s in snippetLabels
+            }
 
             myDict.update(snippets)
 
@@ -199,7 +197,7 @@ def __str__(self):
 # definition.
 
 
-class Format(object):
+class Format:
     def __init__(self, id, params, code):
         self.id = id
         self.params = params
@@ -242,7 +240,7 @@ def defineInst(self, parser, name, args, lineno):
 
 # Special null format to catch an implicit-format instruction
 # definition outside of any format block.
-class NoFormat(object):
+class NoFormat:
     def __init__(self):
         self.defaultInst = ""
 
@@ -265,7 +263,7 @@ def defineInst(self, parser, name, args, lineno):
 # to allow explicit default clauses to override default default clauses.
 
 
-class GenCode(object):
+class GenCode:
     # Constructor.
     def __init__(
         self,
@@ -355,7 +353,7 @@ def substBitOps(code):
             if here < 0:
                 sys.exit("Didn't find '('!")
         exprStart = here + 1
-        newExpr = r"bits(%s, %s, %s)" % (
+        newExpr = r"bits({}, {}, {})".format(
             code[exprStart : exprEnd + 1],
             match.group(1),
             match.group(2),
@@ -374,6 +372,7 @@ def substBitOps(code):
 #
 #####################################################################
 
+
 # Force the argument to be a list.  Useful for flags, where a caller
 # can specify a singleton flag or a list of flags.  Also usful for
 # converting tuples to lists so they can be modified.
@@ -412,7 +411,7 @@ def makeFlagConstructor(flag_list):
 opClassRE = re.compile(r".*Op|No_OpClass")
 
 
-class InstObjParams(object):
+class InstObjParams:
     def __init__(
         self, parser, mnem, class_name, base_class="", snippets={}, opt_args=[]
     ):
@@ -512,8 +511,9 @@ def padDestRegIdx(self, padding):
 
 
 class ISAParser(Grammar):
-    def __init__(self, output_dir):
+    def __init__(self, output_dir, decoder_name="Decoder"):
         super().__init__()
+        self.lex_kwargs["reflags"] = int(re.MULTILINE)
         self.output_dir = output_dir
 
         self.filename = None  # for output file watermarking/scaremongering
@@ -541,6 +541,9 @@ def __init__(self, output_dir):
         self.isa_name = None
         self.namespace = None
 
+        # decoder_name is class name for cpu decoder.
+        self.decoder_name = decoder_name
+
         # The format stack.
         self.formatStack = Stack(NoFormat())
 
@@ -554,7 +557,7 @@ def __init__(self, output_dir):
         self.fileNameStack = Stack()
 
         symbols = ("makeList", "re")
-        self.exportContext = dict([(s, eval(s)) for s in symbols])
+        self.exportContext = {s: eval(s) for s in symbols}
         self.exportContext.update(
             {
                 "overrideInOperand": overrideInOperand,
@@ -589,7 +592,7 @@ def __getitem__(self, i):  # Allow object (self) to be
     # Change the file suffix of a base filename:
     #   (e.g.) decoder.cc -> decoder-g.cc.inc for 'global' outputs
     def suffixize(self, s, sec):
-        extn = re.compile("(\.[^\.]+)$")  # isolate extension
+        extn = re.compile(r"(\.[^\.]+)$")  # isolate extension
         if self.namespace:
             return extn.sub(r"-ns\1.inc", s)  # insert some text on either side
         else:
@@ -681,7 +684,7 @@ def write_top_level_files(self):
             # is guaranteed to have been written for parse to complete
             f.write(f'#include "{fn}"\n')
 
-        extn = re.compile("(\.[^\.]+)$")
+        extn = re.compile(r"(\.[^\.]+)$")
 
         # instruction constructors
         splits = self.splits[self.get_file("decoder")]
@@ -851,7 +854,7 @@ def t_INTLIT(self, t):
     # String literal.  Note that these use only single quotes, and
     # can span multiple lines.
     def t_STRLIT(self, t):
-        r"(?m)'([^'])+'"
+        r"'([^'])+'"
         # strip off quotes
         t.value = t.value[1:-1]
         t.lexer.lineno += t.value.count("\n")
@@ -860,19 +863,19 @@ def t_STRLIT(self, t):
     # "Code literal"... like a string literal, but delimiters are
     # '{{' and '}}' so they get formatted nicely under emacs c-mode
     def t_CODELIT(self, t):
-        r"(?m)\{\{([^\}]|}(?!\}))+\}\}"
+        r"\{\{([^\}]|}(?!\}))+\}\}"
         # strip off {{ & }}
         t.value = t.value[2:-2]
         t.lexer.lineno += t.value.count("\n")
         return t
 
     def t_CPPDIRECTIVE(self, t):
-        r"^\#[^\#].*\n"
+        r"^\#[^\#][^\n]*\n"
         t.lexer.lineno += t.value.count("\n")
         return t
 
     def t_NEWFILE(self, t):
-        r'^\#\#newfile\s+"[^"]*"\n'
+        r'^\#\#newfile\s+"[^"\n]*"\n'
         self.fileNameStack.push(t.lexer.lineno)
         t.lexer.lineno = LineTracker(t.value[11:-2])
 
@@ -892,7 +895,7 @@ def t_NEWLINE(self, t):
 
     # Comments
     def t_comment(self, t):
-        r"//.*"
+        r"//[^\n]*\n"
 
     # Completely ignored characters
     t_ignore = " \t\x0c"
@@ -1230,7 +1233,7 @@ def p_top_level_decode_block(self, t):
             """
 using namespace gem5;
 StaticInstPtr
-%(isa_name)s::Decoder::decodeInst(%(isa_name)s::ExtMachInst machInst)
+%(isa_name)s::%(decoder_name)s::decodeInst(%(isa_name)s::ExtMachInst machInst)
 {
     using namespace %(namespace)s;
 """
@@ -1559,9 +1562,9 @@ def buildOperandREs(self):
 
         operandsREString = r"""
         (?<!\w|:)     # neg. lookbehind assertion: prevent partial matches
-        ((%s)(?:_(%s))?)   # match: operand with optional '_' then suffix
+        (({})(?:_({}))?)   # match: operand with optional '_' then suffix
         (?!\w)       # neg. lookahead assertion: prevent partial matches
-        """ % (
+        """.format(
             "|".join(operands),
             "|".join(extensions),
         )
@@ -1573,7 +1576,7 @@ def buildOperandREs(self):
         # Same as operandsREString, but extension is mandatory, and only two
         # groups are returned (base and ext, not full name as above).
         # Used for subtituting '_' for '.' to make C++ identifiers.
-        operandsWithExtREString = r"(?<!\w)(%s)_(%s)(?!\w)" % (
+        operandsWithExtREString = r"(?<!\w)({})_({})(?!\w)".format(
             "|".join(operands),
             "|".join(extensions),
         )
@@ -1625,7 +1628,7 @@ def replace_include(self, matchobj, dirname):
 
         fname = matchobj.group("filename")
         full_fname = os.path.normpath(os.path.join(dirname, fname))
-        contents = '##newfile "%s"\n%s\n##endfile\n' % (
+        contents = '##newfile "{}"\n{}\n##endfile\n'.format(
             full_fname,
             self.read_and_flatten(full_fname),
         )
@@ -1637,7 +1640,7 @@ def read_and_flatten(self, filename):
         current_dir = os.path.dirname(filename)
         try:
             contents = open(filename).read()
-        except IOError:
+        except OSError:
             error(f'Error including file "{filename}"')
 
         self.fileNameStack.push(LineTracker(filename))
diff --git a/src/arch/isa_parser/operand_list.py b/src/arch/isa_parser/operand_list.py
index 5741a52324..29062893ec 100755
--- a/src/arch/isa_parser/operand_list.py
+++ b/src/arch/isa_parser/operand_list.py
@@ -41,7 +41,7 @@
 from .util import error
 
 
-class OperandList(object):
+class OperandList:
     """Find all the operands in the given code block.  Returns an operand
     descriptor list (instance of class OperandList)."""
 
diff --git a/src/arch/isa_parser/operand_types.py b/src/arch/isa_parser/operand_types.py
index 174a54cd4c..0caaa012f4 100755
--- a/src/arch/isa_parser/operand_types.py
+++ b/src/arch/isa_parser/operand_types.py
@@ -46,11 +46,10 @@ def overrideInOperand(func):
 overrideInOperand.overrides = dict()
 
 
-class OperandDesc(object):
+class OperandDesc:
     def __init__(
         self, base_cls, dflt_ext, reg_spec, flags=None, sort_pri=None
     ):
-
         from .isa_parser import makeList
 
         # Canonical flag structure is a triple of lists, where each list
@@ -112,7 +111,7 @@ def setName(self, name):
         self.attrs["base_name"] = name
 
 
-class Operand(object):
+class Operand:
     """Base class for operand descriptors.  An instance of this class
     (or actually a class derived from this one) represents a specific
     operand for a code block (e.g, "Rc.sq" as a dest). Intermediate
diff --git a/src/arch/isa_parser/util.py b/src/arch/isa_parser/util.py
index 2cf0d82a7a..9e330adc1f 100755
--- a/src/arch/isa_parser/util.py
+++ b/src/arch/isa_parser/util.py
@@ -42,6 +42,7 @@
 ###################
 # Utility functions
 
+
 #
 # Indent every line in string 's' by two spaces
 # (except preprocessor directives).
@@ -155,7 +156,7 @@ def backtrace(filename_stack):
 #
 
 
-class LineTracker(object):
+class LineTracker:
     def __init__(self, filename, lineno=1):
         self.filename = filename
         self.lineno = lineno
diff --git a/src/arch/micro_asm.py b/src/arch/micro_asm.py
index 1c2183c07a..ec890cbe6d 100644
--- a/src/arch/micro_asm.py
+++ b/src/arch/micro_asm.py
@@ -88,18 +88,18 @@ def __init__(self, name):
 ##########################################################################
 
 
-class Label(object):
+class Label:
     def __init__(self):
         self.extern = False
         self.name = ""
 
 
-class Block(object):
+class Block:
     def __init__(self):
         self.statements = []
 
 
-class Statement(object):
+class Statement:
     def __init__(self):
         self.is_microop = False
         self.is_directive = False
@@ -187,6 +187,7 @@ def handle_statement(parser, container, statement):
 #
 ##########################################################################
 
+
 # Error handler.  Just call exit.  Output formatted to work under
 # Emacs compile-mode.  Optional 'print_traceback' arg, if set to True,
 # prints a Python stack backtrace too (can be handy when trying to
@@ -231,6 +232,7 @@ def error(lineno, string, print_traceback=False):
 for r in reserved:
     reserved_map[r.lower()] = r
 
+
 # Ignore comments
 def t_ANY_COMMENT(t):
     r"\#[^\n]*(?=\n)"
@@ -360,6 +362,7 @@ def t_ANY_error(t):
 #
 ##########################################################################
 
+
 # Start symbol for a file which may have more than one macroop or rom
 # specification.
 def p_file(t):
@@ -567,7 +570,7 @@ def p_error(t):
         error(0, "unknown syntax error", True)
 
 
-class MicroAssembler(object):
+class MicroAssembler:
     def __init__(self, macro_type, microops, rom=None, rom_macroop_type=None):
         self.lexer = lex.lex()
         self.parser = yacc.yacc(write_tables=False)
diff --git a/src/arch/micro_asm_test.py b/src/arch/micro_asm_test.py
index 609b8a4021..b6b8918b2d 100755
--- a/src/arch/micro_asm_test.py
+++ b/src/arch/micro_asm_test.py
@@ -27,17 +27,17 @@
 from micro_asm import MicroAssembler, CombinationalMacroop, RomMacroop, Rom
 
 
-class Bah(object):
+class Bah:
     def __init__(self):
         self.mnemonic = "bah"
 
 
-class Bah_Tweaked(object):
+class Bah_Tweaked:
     def __init__(self):
         self.mnemonic = "bah_tweaked"
 
 
-class Hoop(object):
+class Hoop:
     def __init__(self, first_param, second_param):
         self.mnemonic = f"hoop_{first_param}_{second_param}"
 
@@ -45,7 +45,7 @@ def __str__(self):
         return f"{self.mnemonic}"
 
 
-class Dah(object):
+class Dah:
     def __init__(self):
         self.mnemonic = "dah"
 
diff --git a/src/arch/power/faults.cc b/src/arch/power/faults.cc
index 0d8f2ddd68..77fc8cba96 100644
--- a/src/arch/power/faults.cc
+++ b/src/arch/power/faults.cc
@@ -42,24 +42,28 @@ namespace PowerISA
 void
 UnimplementedOpcodeFault::invoke(ThreadContext *tc, const StaticInstPtr &inst)
 {
-    panic_if(tc->getSystemPtr()->trapToGdb(GDBSignal::ILL, tc->contextId()),
-             "Unimplemented opcode encountered at virtual address %#x\n",
-             tc->pcState().instAddr());
+    if (! tc->getSystemPtr()->trapToGdb(GDBSignal::ILL, tc->contextId()) ) {
+        panic("Unimplemented opcode encountered at virtual address %#x\n",
+              tc->pcState().instAddr());
+    }
 }
 
 void
 AlignmentFault::invoke(ThreadContext *tc, const StaticInstPtr &inst)
 {
-    panic_if(!tc->getSystemPtr()->trapToGdb(GDBSignal::BUS, tc->contextId()),
-             "Alignment fault when accessing virtual address %#x\n", vaddr);
+    if (! tc->getSystemPtr()->trapToGdb(GDBSignal::BUS, tc->contextId()) ) {
+        panic("Alignment fault when accessing virtual address %#x\n",
+              vaddr);
+    }
 }
 
 void
 TrapFault::invoke(ThreadContext *tc, const StaticInstPtr &inst)
 {
-    panic_if(tc->getSystemPtr()->trapToGdb(GDBSignal::TRAP, tc->contextId()),
-             "Trap encountered at virtual address %#x\n",
-             tc->pcState().instAddr());
+    if (! tc->getSystemPtr()->trapToGdb(GDBSignal::TRAP, tc->contextId()) ) {
+        panic("Trap encountered at virtual address %#x\n",
+              tc->pcState().instAddr());
+    }
 }
 
 } // namespace PowerISA
diff --git a/src/arch/riscv/RiscvDecoder.py b/src/arch/riscv/RiscvDecoder.py
index 30c1077662..4100a3c5b3 100644
--- a/src/arch/riscv/RiscvDecoder.py
+++ b/src/arch/riscv/RiscvDecoder.py
@@ -24,6 +24,7 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 from m5.objects.InstDecoder import InstDecoder
+from m5.params import *
 
 
 class RiscvDecoder(InstDecoder):
diff --git a/src/arch/riscv/RiscvFsWorkload.py b/src/arch/riscv/RiscvFsWorkload.py
index 9e158811da..a71dc1acaf 100644
--- a/src/arch/riscv/RiscvFsWorkload.py
+++ b/src/arch/riscv/RiscvFsWorkload.py
@@ -52,3 +52,37 @@ class RiscvLinux(KernelWorkload):
         "", "File that contains the Device Tree Blob. Don't use DTB if empty."
     )
     dtb_addr = Param.Addr(0x87E00000, "DTB address")
+
+
+class RiscvBootloaderKernelWorkload(Workload):
+    type = "RiscvBootloaderKernelWorkload"
+    cxx_class = "gem5::RiscvISA::BootloaderKernelWorkload"
+    cxx_header = "arch/riscv/linux/fs_workload.hh"
+
+    bootloader_filename = Param.String(
+        "", "File that contains the bootloader. Don't use bootloader if empty."
+    )
+    bootloader_addr = Param.Addr(
+        0x0, "Where to place the bootloader in memory."
+    )
+    kernel_filename = Param.String(
+        "", "vmlinux file. Don't use kernel if empty."
+    )
+    kernel_addr = Param.Addr(
+        0x80200000,
+        "Where to place the kernel in memory. Typically, after the first "
+        "stage of booting is done, the bootloader will jump to where the "
+        "`start` symbol of the kernel is.",
+    )
+    entry_point = Param.Addr(
+        0x80000000, "Where to find the first instruction to execute."
+    )
+    dtb_filename = Param.String(
+        "", "File that contains the Device Tree Blob. Don't use DTB if empty."
+    )
+    dtb_addr = Param.Addr(0x87E00000, "Where to place the DTB in memory.")
+
+    # booting parameters
+    boot_args = Param.String(
+        "", "Booting arguments, to be passed to the kernel"
+    )
diff --git a/src/arch/riscv/RiscvISA.py b/src/arch/riscv/RiscvISA.py
index e2381fd158..bce7f2497f 100644
--- a/src/arch/riscv/RiscvISA.py
+++ b/src/arch/riscv/RiscvISA.py
@@ -13,6 +13,7 @@
 #
 # Copyright (c) 2016 RISC-V Foundation
 # Copyright (c) 2016 The University of Virginia
+# Copyright (c) 2023 The Regents of the University of California
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -38,11 +39,37 @@
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-from m5.params import Enum
+from m5.params import Enum, UInt32
 from m5.params import Param
 from m5.objects.BaseISA import BaseISA
 
 
+class RiscvVectorLength(UInt32):
+    min = 8
+    max = 65536
+
+    def _check(self):
+        super()._check()
+
+        # VLEN needs to be a whole power of 2. We already know value is
+        # not zero. Hence:
+        if self.value & (self.value - 1) != 0:
+            raise TypeError("VLEN is not a power of 2: %d" % self.value)
+
+
+class RiscvVectorElementLength(UInt32):
+    min = 8
+    max = 64
+
+    def _check(self):
+        super()._check()
+
+        # ELEN needs to be a whole power of 2. We already know value is
+        # not zero. Hence:
+        if self.value & (self.value - 1) != 0:
+            raise TypeError("ELEN is not a power of 2: %d" % self.value)
+
+
 class RiscvType(Enum):
     vals = ["RV32", "RV64"]
 
@@ -53,6 +80,49 @@ class RiscvISA(BaseISA):
     cxx_header = "arch/riscv/isa.hh"
 
     check_alignment = Param.Bool(
-        False, "whether to check memory access alignment"
+        True, "whether to check memory access alignment"
     )
     riscv_type = Param.RiscvType("RV64", "RV32 or RV64")
+
+    enable_rvv = Param.Bool(True, "Enable vector extension")
+    vlen = Param.RiscvVectorLength(
+        256,
+        "Length of each vector register in bits. \
+        VLEN in Ch. 2 of RISC-V vector spec",
+    )
+    elen = Param.RiscvVectorElementLength(
+        64,
+        "Length of each vector element in bits. \
+        ELEN in Ch. 2 of RISC-V vector spec",
+    )
+
+    enable_Zicbom_fs = Param.Bool(True, "Enable Zicbom extension in FS mode")
+    enable_Zicboz_fs = Param.Bool(True, "Enable Zicboz extension in FS mode")
+
+    def get_isa_string(self):
+        isa_extensions = []
+        # check for the base ISA type
+        if self.riscv_type.value == "RV32":
+            isa_extensions.append("rv32")
+        elif self.riscv_type.value == "RV64":
+            isa_extensions.append("rv64")
+        # use imafdc by default
+        isa_extensions.extend(["i", "m", "a", "f", "d", "c"])
+        # check for the vector extension
+        if self.enable_rvv.value == True:
+            isa_extensions.append("v")
+        isa_string = "".join(isa_extensions)
+
+        if self.enable_Zicbom_fs.value:
+            isa_string += "_Zicbom"  # Cache-block Management Instructions
+        if self.enable_Zicboz_fs.value:
+            isa_string += "_Zicboz"  # Cache-block Zero Instruction
+        isa_string += "_Zicntr"  # Performance Couter Spec
+        isa_string += "_Zicsr"  # RMW CSR Instructions (Privileged Spec)
+        isa_string += "_Zifencei"  # FENCE.I Instruction (Unprivileged Spec)
+        isa_string += "_Zihpm"  # Performance Couter Spec
+        isa_string += "_Zba"  # Address Generation
+        isa_string += "_Zbb"  # Basic Bit Manipulation
+        isa_string += "_Zbs"  # Single-bit Instructions
+
+        return isa_string
diff --git a/src/arch/riscv/SConscript b/src/arch/riscv/SConscript
index 924bba5915..bf40b6eccd 100644
--- a/src/arch/riscv/SConscript
+++ b/src/arch/riscv/SConscript
@@ -66,7 +66,9 @@ Source('bare_metal/fs_workload.cc', tags='riscv isa')
 SimObject('PMAChecker.py', sim_objects=['PMAChecker'], tags='riscv isa')
 SimObject('PMP.py', sim_objects=['PMP'], tags='riscv isa')
 SimObject('RiscvDecoder.py', sim_objects=['RiscvDecoder'], tags='riscv isa')
-SimObject('RiscvFsWorkload.py', sim_objects=['RiscvBareMetal', 'RiscvLinux'],
+SimObject('RiscvFsWorkload.py',
+    sim_objects=['RiscvBareMetal', 'RiscvLinux',
+                 'RiscvBootloaderKernelWorkload'],
     tags='riscv isa')
 SimObject('RiscvInterrupts.py', sim_objects=['RiscvInterrupts'],
     tags='riscv isa')
diff --git a/src/arch/riscv/decoder.cc b/src/arch/riscv/decoder.cc
index 7faa310b1e..ee5d313587 100644
--- a/src/arch/riscv/decoder.cc
+++ b/src/arch/riscv/decoder.cc
@@ -28,6 +28,7 @@
  */
 
 #include "arch/riscv/decoder.hh"
+#include "arch/riscv/isa.hh"
 #include "arch/riscv/types.hh"
 #include "base/bitfield.hh"
 #include "debug/Decode.hh"
@@ -38,6 +39,14 @@ namespace gem5
 namespace RiscvISA
 {
 
+Decoder::Decoder(const RiscvDecoderParams &p) : InstDecoder(p, &machInst)
+{
+    ISA *isa = dynamic_cast<ISA*>(p.isa);
+    vlen = isa->getVecLenInBits();
+    elen = isa->getVecElemLenInBits();
+    reset();
+}
+
 void Decoder::reset()
 {
     aligned = true;
@@ -90,6 +99,8 @@ Decoder::decode(ExtMachInst mach_inst, Addr addr)
     if (!si)
         si = decodeInst(mach_inst);
 
+    si->size(compressed(mach_inst) ? 2 : 4);
+
     DPRINTF(Decode, "Decode: Decoded %s instruction: %#x\n",
             si->getName(), mach_inst);
     return si;
@@ -112,7 +123,11 @@ Decoder::decode(PCStateBase &_next_pc)
         next_pc.compressed(false);
     }
 
+    emi.vl      = next_pc.vl();
+    emi.vtype8  = next_pc.vtype() & 0xff;
+    emi.vill    = next_pc.vtype().vill;
     emi.rv_type = static_cast<int>(next_pc.rvType());
+
     return decode(emi, next_pc.instAddr());
 }
 
diff --git a/src/arch/riscv/decoder.hh b/src/arch/riscv/decoder.hh
index 15cbefe39c..bf863fda22 100644
--- a/src/arch/riscv/decoder.hh
+++ b/src/arch/riscv/decoder.hh
@@ -32,6 +32,7 @@
 
 #include "arch/generic/decode_cache.hh"
 #include "arch/generic/decoder.hh"
+#include "arch/riscv/insts/vector.hh"
 #include "arch/riscv/types.hh"
 #include "base/logging.hh"
 #include "base/types.hh"
@@ -59,7 +60,10 @@ class Decoder : public InstDecoder
     ExtMachInst emi;
     uint32_t machInst;
 
-    StaticInstPtr decodeInst(ExtMachInst mach_inst);
+    uint32_t vlen;
+    uint32_t elen;
+
+    virtual StaticInstPtr decodeInst(ExtMachInst mach_inst);
 
     /// Decode a machine instruction.
     /// @param mach_inst The binary instruction to decode.
@@ -67,14 +71,11 @@ class Decoder : public InstDecoder
     StaticInstPtr decode(ExtMachInst mach_inst, Addr addr);
 
   public:
-    Decoder(const RiscvDecoderParams &p) : InstDecoder(p, &machInst)
-    {
-        reset();
-    }
+    Decoder(const RiscvDecoderParams &p);
 
     void reset() override;
 
-    inline bool compressed(ExtMachInst inst) { return (inst & 0x3) < 0x3; }
+    inline bool compressed(ExtMachInst inst) { return inst.quadRant < 0x3; }
 
     //Use this to give data to the decoder. This should be used
     //when there is control flow.
diff --git a/src/arch/riscv/faults.cc b/src/arch/riscv/faults.cc
index 8fb8f81261..7d4e9f90b6 100644
--- a/src/arch/riscv/faults.cc
+++ b/src/arch/riscv/faults.cc
@@ -154,17 +154,14 @@ RiscvFault::invoke(ThreadContext *tc, const StaticInstPtr &inst)
         }
 
         // Clear load reservation address
-        tc->getIsaPtr()->clearLoadReservation(tc->contextId());
+        auto isa = static_cast<RiscvISA::ISA*>(tc->getIsaPtr());
+        isa->clearLoadReservation(tc->contextId());
 
         // Set PC to fault handler address
-        Addr addr = mbits(tc->readMiscReg(tvec), 63, 2);
-        if (isInterrupt() && bits(tc->readMiscReg(tvec), 1, 0) == 1)
-            addr += 4 * _code;
+        Addr addr = isa->getFaultHandlerAddr(tvec, _code, isInterrupt());
         pc_state.set(addr);
         tc->pcState(pc_state);
     } else {
-        inst->advancePC(pc_state);
-        tc->pcState(pc_state);
         invokeSE(tc, inst);
     }
 }
@@ -184,6 +181,10 @@ Reset::invoke(ThreadContext *tc, const StaticInstPtr &inst)
     std::unique_ptr<PCState> new_pc(dynamic_cast<PCState *>(
         tc->getIsaPtr()->newPCState(workload->getEntry())));
     panic_if(!new_pc, "Failed create new PCState from ISA pointer");
+    VTYPE vtype = 0;
+    vtype.vill = 1;
+    new_pc->vtype(vtype);
+    new_pc->vl(0);
     tc->pcState(*new_pc);
 
     // Reset PMP Cfg
@@ -206,9 +207,11 @@ UnknownInstFault::invokeSE(ThreadContext *tc, const StaticInstPtr &inst)
 void
 IllegalInstFault::invokeSE(ThreadContext *tc, const StaticInstPtr &inst)
 {
-    auto *rsi = static_cast<RiscvStaticInst *>(inst.get());
-    panic("Illegal instruction 0x%08x at pc %s: %s", rsi->machInst,
-        tc->pcState(), reason.c_str());
+    if (! tc->getSystemPtr()->trapToGdb(GDBSignal::ILL, tc->contextId()) ) {
+        auto *rsi = static_cast<RiscvStaticInst *>(inst.get());
+        panic("Illegal instruction 0x%08x at pc %s: %s", rsi->machInst,
+            tc->pcState(), reason.c_str());
+    }
 }
 
 void
@@ -227,12 +230,20 @@ IllegalFrmFault::invokeSE(ThreadContext *tc, const StaticInstPtr &inst)
 void
 BreakpointFault::invokeSE(ThreadContext *tc, const StaticInstPtr &inst)
 {
-    schedRelBreak(0);
+    if (! tc->getSystemPtr()->trapToGdb(GDBSignal::TRAP, tc->contextId()) ) {
+        schedRelBreak(0);
+    }
 }
 
 void
 SyscallFault::invokeSE(ThreadContext *tc, const StaticInstPtr &inst)
 {
+    /* Advance the PC to next instruction so - once (simulated) syscall
+       is executed - execution continues. */
+    auto pc_state = tc->pcState().as<PCState>();
+    inst->advancePC(pc_state);
+    tc->pcState(pc_state);
+
     tc->getSystemPtr()->workload->syscall(tc);
 }
 
diff --git a/src/arch/riscv/faults.hh b/src/arch/riscv/faults.hh
index f687fd6f20..fa67e3b34c 100644
--- a/src/arch/riscv/faults.hh
+++ b/src/arch/riscv/faults.hh
@@ -173,7 +173,7 @@ class InstFault : public RiscvFault
         : RiscvFault(n, FaultType::OTHERS, INST_ILLEGAL), _inst(inst)
     {}
 
-    RegVal trap_value() const override { return bits(_inst, 31, 0); }
+    RegVal trap_value() const override { return _inst.instBits; }
 };
 
 class UnknownInstFault : public InstFault
diff --git a/src/arch/riscv/insts/SConscript b/src/arch/riscv/insts/SConscript
index 704152c040..2822cf86b4 100644
--- a/src/arch/riscv/insts/SConscript
+++ b/src/arch/riscv/insts/SConscript
@@ -33,3 +33,4 @@ Source('compressed.cc', tags='riscv isa')
 Source('mem.cc', tags='riscv isa')
 Source('standard.cc', tags='riscv isa')
 Source('static_inst.cc', tags='riscv isa')
+Source('vector.cc', tags='riscv isa')
diff --git a/src/arch/riscv/insts/mem.cc b/src/arch/riscv/insts/mem.cc
index 5f58a68a57..8ebda7406d 100644
--- a/src/arch/riscv/insts/mem.cc
+++ b/src/arch/riscv/insts/mem.cc
@@ -55,8 +55,13 @@ std::string
 Store::generateDisassembly(Addr pc, const loader::SymbolTable *symtab) const
 {
     std::stringstream ss;
-    ss << mnemonic << ' ' << registerName(srcRegIdx(1)) << ", " <<
-        offset << '(' << registerName(srcRegIdx(0)) << ')';
+    if (_numSrcRegs == 1) {
+        ss << mnemonic << ' ' << offset << '(' << registerName(srcRegIdx(0))
+           << ")";
+    } else {
+        ss << mnemonic << ' ' << registerName(srcRegIdx(1)) << ", " <<
+            offset << '(' << registerName(srcRegIdx(0)) << ')';
+    }
     return ss.str();
 }
 
diff --git a/src/arch/riscv/insts/static_inst.hh b/src/arch/riscv/insts/static_inst.hh
index f835713505..2e4d94864a 100644
--- a/src/arch/riscv/insts/static_inst.hh
+++ b/src/arch/riscv/insts/static_inst.hh
@@ -33,6 +33,7 @@
 #include <string>
 
 #include "arch/riscv/pcstate.hh"
+#include "arch/riscv/regs/misc.hh"
 #include "arch/riscv/types.hh"
 #include "cpu/exec_context.hh"
 #include "cpu/static_inst.hh"
@@ -145,6 +146,15 @@ class RiscvMacroInst : public RiscvStaticInst
     {
         panic("Tried to execute a macroop directly!\n");
     }
+
+    void size(size_t newSize) override
+    {
+        for (int i = 0; i < microops.size(); i++) {
+            microops[i]->size(newSize);
+        }
+        _size = newSize;
+    }
+
 };
 
 /**
diff --git a/src/arch/riscv/insts/vector.cc b/src/arch/riscv/insts/vector.cc
new file mode 100644
index 0000000000..7f17bb055e
--- /dev/null
+++ b/src/arch/riscv/insts/vector.cc
@@ -0,0 +1,505 @@
+/*
+ * Copyright (c) 2022 PLCT Lab
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "arch/riscv/insts/vector.hh"
+
+#include <sstream>
+#include <string>
+
+#include "arch/riscv/insts/static_inst.hh"
+#include "arch/riscv/isa.hh"
+#include "arch/riscv/regs/misc.hh"
+#include "arch/riscv/regs/vector.hh"
+#include "arch/riscv/utility.hh"
+#include "cpu/static_inst.hh"
+
+namespace gem5
+{
+
+namespace RiscvISA
+{
+
+/**
+ * This function translates the 3-bit value of vlmul bits to the corresponding
+ * lmul value as specified in RVV 1.0 spec p11-12 chapter 3.4.2.
+ *
+ * I.e.,
+ * vlmul = -3 -> LMUL = 1/8
+ * vlmul = -2 -> LMUL = 1/4
+ * vlmul = -1 -> LMUL = 1/2
+ * vlmul = 0 -> LMUL = 1
+ * vlmul = 1 -> LMUL = 2
+ * vlmul = 2 -> LMUL = 4
+ * vlmul = 3 -> LMUL = 8
+ *
+**/
+float
+getVflmul(uint32_t vlmul_encoding)
+{
+    int vlmul = sext<3>(vlmul_encoding & 7);
+    float vflmul = vlmul >= 0 ? 1 << vlmul : 1.0 / (1 << -vlmul);
+    return vflmul;
+}
+
+uint32_t
+getVlmax(VTYPE vtype, uint32_t vlen)
+{
+    uint32_t sew = getSew(vtype.vsew);
+    // vlmax is defined in RVV 1.0 spec p12 chapter 3.4.2.
+    uint32_t vlmax = (vlen/sew) * getVflmul(vtype.vlmul);
+    return vlmax;
+}
+
+std::string
+VConfOp::generateDisassembly(Addr pc, const loader::SymbolTable *symtab) const
+{
+    std::stringstream ss;
+    ss << mnemonic << ' ' << registerName(destRegIdx(0)) << ", ";
+    if (bit31 && bit30 == 0) {
+        ss << registerName(srcRegIdx(0)) << ", " << registerName(srcRegIdx(1));
+    } else if (bit31 && bit30) {
+        ss << uimm << ", " << generateZimmDisassembly();
+    } else {
+        ss << registerName(srcRegIdx(0)) << ", " << generateZimmDisassembly();
+    }
+    return ss.str();
+}
+
+std::string
+VConfOp::generateZimmDisassembly() const
+{
+    std::stringstream s;
+
+    // VSETIVLI uses ZIMM10 and VSETVLI uses ZIMM11
+    uint64_t zimm = (bit31 && bit30) ? zimm10 : zimm11;
+
+    bool frac_lmul = bits(zimm, 2);
+    int sew = 1 << (bits(zimm, 5, 3) + 3);
+    int lmul = bits(zimm, 1, 0);
+    auto vta = bits(zimm, 6) == 1 ? "ta" : "tu";
+    auto vma = bits(zimm, 7) == 1 ? "ma" : "mu";
+    s << "e" << sew;
+    if (frac_lmul) {
+        std::string lmul_str = "";
+        switch(lmul){
+        case 3:
+            lmul_str = "f2";
+            break;
+        case 2:
+            lmul_str = "f4";
+            break;
+        case 1:
+            lmul_str = "f8";
+            break;
+        default:
+            panic("Unsupport fractional LMUL");
+        }
+        s << ", m" << lmul_str;
+    } else {
+        s << ", m" << (1 << lmul);
+    }
+    s << ", " << vta << ", " << vma;
+    return s.str();
+}
+
+std::string
+VectorNonSplitInst::generateDisassembly(Addr pc,
+        const loader::SymbolTable *symtab) const
+{
+    std::stringstream ss;
+    ss << mnemonic << ' ' << registerName(destRegIdx(0)) << ", "
+        << registerName(srcRegIdx(0));
+    if (machInst.vm == 0) ss << ", v0.t";
+    return ss.str();
+}
+
+std::string VectorArithMicroInst::generateDisassembly(Addr pc,
+        const loader::SymbolTable *symtab) const
+{
+    std::stringstream ss;
+    ss << mnemonic << ' ' << registerName(destRegIdx(0)) << ", ";
+    if (machInst.funct3 == 0x3) {
+        // OPIVI
+      ss  << registerName(srcRegIdx(0)) << ", " << machInst.vecimm;
+    } else {
+      ss  << registerName(srcRegIdx(1)) << ", " << registerName(srcRegIdx(0));
+    }
+    if (machInst.vm == 0) ss << ", v0.t";
+    return ss.str();
+}
+
+std::string VectorArithMacroInst::generateDisassembly(Addr pc,
+        const loader::SymbolTable *symtab) const
+{
+    std::stringstream ss;
+    ss << mnemonic << ' ' << registerName(destRegIdx(0)) << ", ";
+    if (machInst.funct3 == 0x3) {
+        // OPIVI
+      ss  << registerName(srcRegIdx(0)) << ", " << machInst.vecimm;
+    } else {
+      ss  << registerName(srcRegIdx(1)) << ", " << registerName(srcRegIdx(0));
+    }
+    if (machInst.vm == 0) ss << ", v0.t";
+    return ss.str();
+}
+
+std::string VectorVMUNARY0MicroInst::generateDisassembly(Addr pc,
+        const loader::SymbolTable *symtab) const
+{
+    std::stringstream ss;
+    ss << mnemonic << ' ' << registerName(destRegIdx(0));
+    if (machInst.vm == 0) ss << ", v0.t";
+    return ss.str();
+}
+
+std::string VectorVMUNARY0MacroInst::generateDisassembly(Addr pc,
+        const loader::SymbolTable *symtab) const
+{
+    std::stringstream ss;
+    ss << mnemonic << ' ' << registerName(destRegIdx(0));
+    if (machInst.vm == 0) ss << ", v0.t";
+    return ss.str();
+}
+
+std::string VectorSlideMicroInst::generateDisassembly(Addr pc,
+        const loader::SymbolTable *symtab) const
+{
+    std::stringstream ss;
+    ss << mnemonic << ' ' << registerName(destRegIdx(0)) <<  ", ";
+    if (machInst.funct3 == 0x3) {
+      ss  << registerName(srcRegIdx(0)) << ", " << machInst.vecimm;
+    } else {
+      ss  << registerName(srcRegIdx(1)) << ", " << registerName(srcRegIdx(0));
+    }
+    if (machInst.vm == 0) ss << ", v0.t";
+    return ss.str();
+}
+
+std::string VectorSlideMacroInst::generateDisassembly(Addr pc,
+        const loader::SymbolTable *symtab) const
+{
+    std::stringstream ss;
+    ss << mnemonic << ' ' << registerName(destRegIdx(0)) << ", ";
+    if (machInst.funct3 == 0x3) {
+      ss  << registerName(srcRegIdx(0)) << ", " << machInst.vecimm;
+    } else {
+      ss  << registerName(srcRegIdx(1)) << ", " << registerName(srcRegIdx(0));
+    }
+    if (machInst.vm == 0) ss << ", v0.t";
+    return ss.str();
+}
+
+std::string VleMicroInst::generateDisassembly(Addr pc,
+        const loader::SymbolTable *symtab) const
+{
+    std::stringstream ss;
+    unsigned vlenb = vlen >> 3;
+    ss << mnemonic << ' ' << registerName(destRegIdx(0)) << ", "
+       << vlenb * microIdx << '(' << registerName(srcRegIdx(0)) << ')' << ", "
+       << registerName(srcRegIdx(1));
+    if (!machInst.vm) ss << ", v0.t";
+    return ss.str();
+}
+
+std::string VlWholeMicroInst::generateDisassembly(Addr pc,
+        const loader::SymbolTable *symtab) const
+{
+    std::stringstream ss;
+    unsigned vlenb = vlen >> 3;
+    ss << mnemonic << ' ' << registerName(destRegIdx(0)) << ", "
+       << vlenb * microIdx << '(' << registerName(srcRegIdx(0)) << ')';
+    return ss.str();
+}
+
+std::string VseMicroInst::generateDisassembly(Addr pc,
+        const loader::SymbolTable *symtab) const
+{
+    std::stringstream ss;
+    unsigned vlenb = vlen >> 3;
+    ss << mnemonic << ' ' << registerName(srcRegIdx(1)) << ", "
+       << vlenb * microIdx  << '(' << registerName(srcRegIdx(0)) << ')';
+    if (!machInst.vm) ss << ", v0.t";
+    return ss.str();
+}
+
+std::string VsWholeMicroInst::generateDisassembly(Addr pc,
+        const loader::SymbolTable *symtab) const
+{
+    std::stringstream ss;
+    unsigned vlenb = vlen >> 3;
+    ss << mnemonic << ' ' << registerName(srcRegIdx(1)) << ", "
+       << vlenb * microIdx << '(' << registerName(srcRegIdx(0)) << ')';
+    return ss.str();
+}
+
+std::string VleMacroInst::generateDisassembly(Addr pc,
+        const loader::SymbolTable *symtab) const
+{
+    std::stringstream ss;
+    ss << mnemonic << ' ' << registerName(destRegIdx(0)) << ", " <<
+        '(' << registerName(srcRegIdx(0)) << ')';
+    if (!machInst.vm) ss << ", v0.t";
+    return ss.str();
+}
+
+std::string VlWholeMacroInst::generateDisassembly(Addr pc,
+        const loader::SymbolTable *symtab) const
+{
+    std::stringstream ss;
+    ss << mnemonic << ' ' << registerName(destRegIdx(0)) << ", " <<
+        '(' << registerName(srcRegIdx(0)) << ')';
+    return ss.str();
+}
+
+std::string VseMacroInst::generateDisassembly(Addr pc,
+        const loader::SymbolTable *symtab) const
+{
+    std::stringstream ss;
+    ss << mnemonic << ' ' << registerName(srcRegIdx(1)) << ", " <<
+        '(' << registerName(srcRegIdx(0)) << ')';
+    if (!machInst.vm) ss << ", v0.t";
+    return ss.str();
+}
+
+std::string VsWholeMacroInst::generateDisassembly(Addr pc,
+        const loader::SymbolTable *symtab) const
+{
+    std::stringstream ss;
+    ss << mnemonic << ' ' << registerName(srcRegIdx(1)) << ", " <<
+        '(' << registerName(srcRegIdx(0)) << ')';
+    return ss.str();
+}
+
+std::string VlStrideMacroInst::generateDisassembly(Addr pc,
+        const loader::SymbolTable *symtab) const
+{
+    std::stringstream ss;
+    ss << mnemonic << ' ' << registerName(destRegIdx(0)) << ", " <<
+        '(' << registerName(srcRegIdx(0)) << ')' <<
+        ", " << registerName(srcRegIdx(1));
+    if (!machInst.vm) ss << ", v0.t";
+    return ss.str();
+}
+
+std::string VlStrideMicroInst::generateDisassembly(Addr pc,
+        const loader::SymbolTable *symtab) const
+{
+    std::stringstream ss;
+    ss << mnemonic << ' ' << registerName(destRegIdx(0)) << ", " <<
+        '(' << registerName(srcRegIdx(0)) << ')' <<
+        ", "<< registerName(srcRegIdx(1));
+    if (microIdx != 0 || machInst.vtype8.vma == 0 || machInst.vtype8.vta == 0)
+        ss << ", " << registerName(srcRegIdx(2));
+    if (!machInst.vm) ss << ", v0.t";
+    return ss.str();
+}
+
+std::string VsStrideMacroInst::generateDisassembly(Addr pc,
+        const loader::SymbolTable *symtab) const
+{
+    std::stringstream ss;
+    ss << mnemonic << ' ' << registerName(srcRegIdx(2)) << ", " <<
+        '(' << registerName(srcRegIdx(0)) << ')' <<
+        ", " << registerName(srcRegIdx(1));
+    if (!machInst.vm) ss << ", v0.t";
+    return ss.str();
+}
+
+std::string VsStrideMicroInst::generateDisassembly(Addr pc,
+        const loader::SymbolTable *symtab) const
+{
+    std::stringstream ss;
+    ss << mnemonic << ' ' << registerName(srcRegIdx(2)) << ", " <<
+        '(' << registerName(srcRegIdx(0)) << ')' <<
+        ", "<< registerName(srcRegIdx(1));
+    if (microIdx != 0 || machInst.vtype8.vma == 0 || machInst.vtype8.vta == 0)
+        ss << ", " << registerName(srcRegIdx(2));
+    if (!machInst.vm) ss << ", v0.t";
+    return ss.str();
+}
+
+std::string VlIndexMacroInst::generateDisassembly(Addr pc,
+        const loader::SymbolTable *symtab) const
+{
+    std::stringstream ss;
+    ss << mnemonic << ' ' << registerName(destRegIdx(0)) << ", "
+        << '(' << registerName(srcRegIdx(0)) << "),"
+        << registerName(srcRegIdx(1));
+    if (!machInst.vm) ss << ", v0.t";
+    return ss.str();
+}
+
+std::string VlIndexMicroInst::generateDisassembly(Addr pc,
+        const loader::SymbolTable *symtab) const
+{
+    std::stringstream ss;
+    ss << mnemonic << ' '
+        << registerName(destRegIdx(0)) << "[" << uint16_t(vdElemIdx) << "], "
+        << '(' << registerName(srcRegIdx(0)) << "), "
+        << registerName(srcRegIdx(1)) << "[" << uint16_t(vs2ElemIdx) << "]";
+    if (microIdx != 0 || machInst.vtype8.vma == 0 || machInst.vtype8.vta == 0)
+        ss << ", " << registerName(srcRegIdx(2));
+    if (!machInst.vm) ss << ", v0.t";
+    return ss.str();
+}
+
+std::string VsIndexMacroInst::generateDisassembly(Addr pc,
+        const loader::SymbolTable *symtab) const
+{
+    std::stringstream ss;
+    ss << mnemonic << ' ' << registerName(srcRegIdx(2)) << ", "
+        << '(' << registerName(srcRegIdx(0)) << "),"
+        << registerName(srcRegIdx(1));
+    if (!machInst.vm) ss << ", v0.t";
+    return ss.str();
+}
+
+std::string VsIndexMicroInst::generateDisassembly(Addr pc,
+        const loader::SymbolTable *symtab) const
+{
+    std::stringstream ss;
+    ss << mnemonic << ' '
+        << registerName(srcRegIdx(2)) << "[" << uint16_t(vs3ElemIdx) << "], "
+        << '(' << registerName(srcRegIdx(0)) << "), "
+        << registerName(srcRegIdx(1)) << "[" << uint16_t(vs2ElemIdx) << "]";
+    if (!machInst.vm) ss << ", v0.t";
+    return ss.str();
+}
+
+std::string
+VMvWholeMacroInst::generateDisassembly(Addr pc,
+    const loader::SymbolTable *symtab) const
+{
+    std::stringstream ss;
+    ss << mnemonic << ' ' << registerName(destRegIdx(0)) << ", " <<
+        registerName(srcRegIdx(1));
+    return ss.str();
+}
+
+std::string
+VMvWholeMicroInst::generateDisassembly(Addr pc,
+    const loader::SymbolTable *symtab) const
+{
+    std::stringstream ss;
+    ss << mnemonic << ' ' << registerName(destRegIdx(0)) << ", " <<
+        registerName(srcRegIdx(1));
+    return ss.str();
+}
+
+VMaskMergeMicroInst::VMaskMergeMicroInst(ExtMachInst extMachInst,
+    uint8_t _dstReg, uint8_t _numSrcs, uint32_t _vlen, size_t _elemSize)
+    : VectorArithMicroInst("vmask_mv_micro", extMachInst,
+                            VectorIntegerArithOp, 0, 0),
+      vlen(_vlen),
+      elemSize(_elemSize)
+{
+    setRegIdxArrays(
+        reinterpret_cast<RegIdArrayPtr>(
+            &std::remove_pointer_t<decltype(this)>::srcRegIdxArr),
+        reinterpret_cast<RegIdArrayPtr>(
+            &std::remove_pointer_t<decltype(this)>::destRegIdxArr));
+
+    _numSrcRegs = 0;
+    _numDestRegs = 0;
+
+    setDestRegIdx(_numDestRegs++, vecRegClass[_dstReg]);
+    _numTypedDestRegs[VecRegClass]++;
+    for (uint8_t i=0; i<_numSrcs; i++) {
+        setSrcRegIdx(_numSrcRegs++, vecRegClass[VecMemInternalReg0 + i]);
+    }
+}
+
+Fault
+VMaskMergeMicroInst::execute(ExecContext* xc,
+    trace::InstRecord* traceData) const
+{
+    vreg_t& tmp_d0 = *(vreg_t *)xc->getWritableRegOperand(this, 0);
+    PCStateBase *pc_ptr = xc->tcBase()->pcState().clone();
+    auto Vd = tmp_d0.as<uint8_t>();
+    uint32_t vlenb = pc_ptr->as<PCState>().vlenb();
+    const uint32_t elems_per_vreg = vlenb / elemSize;
+    size_t bit_cnt = elems_per_vreg;
+    vreg_t tmp_s;
+    xc->getRegOperand(this, 0, &tmp_s);
+    auto s = tmp_s.as<uint8_t>();
+    // cp the first result and tail
+    memcpy(Vd, s, vlenb);
+    for (uint8_t i = 1; i < this->_numSrcRegs; i++) {
+        xc->getRegOperand(this, i, &tmp_s);
+        s = tmp_s.as<uint8_t>();
+        if (elems_per_vreg < 8) {
+            const uint32_t m = (1 << elems_per_vreg) - 1;
+            const uint32_t mask = m << (i * elems_per_vreg % 8);
+            // clr & ext bits
+            Vd[bit_cnt/8] ^= Vd[bit_cnt/8] & mask;
+            Vd[bit_cnt/8] |= s[bit_cnt/8] & mask;
+            bit_cnt += elems_per_vreg;
+        } else {
+            const uint32_t byte_offset = elems_per_vreg / 8;
+            memcpy(Vd + i * byte_offset, s + i * byte_offset, byte_offset);
+        }
+    }
+    if (traceData)
+        traceData->setData(vecRegClass, &tmp_d0);
+    return NoFault;
+}
+
+std::string
+VMaskMergeMicroInst::generateDisassembly(Addr pc,
+    const loader::SymbolTable *symtab) const
+{
+    std::stringstream ss;
+    ss << mnemonic << ' ' << registerName(destRegIdx(0));
+    for (uint8_t i = 0; i < this->_numSrcRegs; i++) {
+        ss << ", " << registerName(srcRegIdx(i));
+    }
+    unsigned vlenb = vlen >> 3;
+    ss << ", offset:" << vlenb / elemSize;
+    return ss.str();
+}
+
+Fault
+VxsatMicroInst::execute(ExecContext* xc, trace::InstRecord* traceData) const
+{
+    xc->setMiscReg(MISCREG_VXSAT, *vxsat);
+    auto vcsr = xc->readMiscReg(MISCREG_VCSR);
+    xc->setMiscReg(MISCREG_VCSR, ((vcsr&~1)|*vxsat));
+    return NoFault;
+}
+
+std::string
+VxsatMicroInst::generateDisassembly(Addr pc,
+    const loader::SymbolTable *symtab) const
+{
+    std::stringstream ss;
+    ss << mnemonic << ' ' << "VXSAT" << ", " << (*vxsat ? "0x1" : "0x0");
+    return ss.str();
+}
+
+} // namespace RiscvISA
+} // namespace gem5
diff --git a/src/arch/riscv/insts/vector.hh b/src/arch/riscv/insts/vector.hh
new file mode 100644
index 0000000000..4127060e4a
--- /dev/null
+++ b/src/arch/riscv/insts/vector.hh
@@ -0,0 +1,578 @@
+/*
+ * Copyright (c) 2022 PLCT Lab
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __ARCH_RISCV_INSTS_VECTOR_HH__
+#define __ARCH_RISCV_INSTS_VECTOR_HH__
+
+#include <string>
+
+#include "arch/riscv/insts/static_inst.hh"
+#include "arch/riscv/isa.hh"
+#include "arch/riscv/regs/misc.hh"
+#include "arch/riscv/utility.hh"
+#include "cpu/exec_context.hh"
+#include "cpu/static_inst.hh"
+
+namespace gem5
+{
+
+namespace RiscvISA
+{
+
+float
+getVflmul(uint32_t vlmul_encoding);
+
+inline uint32_t
+getSew(uint32_t vsew)
+{
+    assert(vsew <= 3);
+    return (8 << vsew);
+}
+
+uint32_t
+getVlmax(VTYPE vtype, uint32_t vlen);
+
+/**
+ * Base class for Vector Config operations
+ */
+class VConfOp : public RiscvStaticInst
+{
+  protected:
+    uint64_t bit30;
+    uint64_t bit31;
+    uint64_t zimm10;
+    uint64_t zimm11;
+    uint64_t uimm;
+    uint32_t elen;
+    VConfOp(const char *mnem, ExtMachInst _extMachInst,
+            uint32_t _elen, OpClass __opClass)
+        : RiscvStaticInst(mnem, _extMachInst, __opClass),
+          bit30(_extMachInst.bit30), bit31(_extMachInst.bit31),
+          zimm10(_extMachInst.zimm_vsetivli),
+          zimm11(_extMachInst.zimm_vsetvli),
+          uimm(_extMachInst.uimm_vsetivli),
+          elen(_elen)
+    {
+        this->flags[IsVector] = true;
+    }
+
+    std::string generateDisassembly(
+        Addr pc, const loader::SymbolTable *symtab) const override;
+
+    std::string generateZimmDisassembly() const;
+};
+
+inline uint8_t checked_vtype(bool vill, uint8_t vtype) {
+    panic_if(vill, "vill has been set");
+    const uint8_t vsew = bits(vtype, 5, 3);
+    panic_if(vsew >= 0b100, "vsew: %#x not supported", vsew);
+    const uint8_t vlmul = bits(vtype, 2, 0);
+    panic_if(vlmul == 0b100, "vlmul: %#x not supported", vlmul);
+    return vtype;
+}
+
+class VectorNonSplitInst : public RiscvStaticInst
+{
+  protected:
+    uint32_t vl;
+    uint8_t vtype;
+    VectorNonSplitInst(const char* mnem, ExtMachInst _machInst,
+                   OpClass __opClass)
+        : RiscvStaticInst(mnem, _machInst, __opClass),
+        vl(_machInst.vl),
+        vtype(_machInst.vtype8)
+    {
+        this->flags[IsVector] = true;
+    }
+
+    std::string generateDisassembly(
+        Addr pc, const loader::SymbolTable *symtab) const override;
+};
+
+class VectorMacroInst : public RiscvMacroInst
+{
+  protected:
+    uint32_t vl;
+    uint8_t vtype;
+    uint32_t vlen;
+
+    VectorMacroInst(const char* mnem, ExtMachInst _machInst,
+                   OpClass __opClass, uint32_t _vlen = 256)
+        : RiscvMacroInst(mnem, _machInst, __opClass),
+        vl(_machInst.vl),
+        vtype(_machInst.vtype8),
+        vlen(_vlen)
+    {
+        this->flags[IsVector] = true;
+    }
+};
+
+class VectorMicroInst : public RiscvMicroInst
+{
+protected:
+    uint32_t vlen;
+    uint32_t microVl;
+    uint8_t microIdx;
+    uint8_t vtype;
+    VectorMicroInst(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
+      uint32_t _microVl, uint8_t _microIdx, uint32_t _vlen = 256)
+        : RiscvMicroInst(mnem, _machInst, __opClass),
+        vlen(_vlen),
+        microVl(_microVl),
+        microIdx(_microIdx),
+        vtype(_machInst.vtype8)
+    {
+        this->flags[IsVector] = true;
+    }
+};
+
+class VectorNopMicroInst : public RiscvMicroInst
+{
+public:
+    VectorNopMicroInst(ExtMachInst _machInst)
+        : RiscvMicroInst("vnop", _machInst, No_OpClass)
+    {}
+
+    Fault execute(ExecContext* xc, trace::InstRecord* traceData)
+        const override
+    {
+        return NoFault;
+    }
+
+    std::string generateDisassembly(Addr pc, const loader::SymbolTable *symtab)
+      const override
+    {
+        std::stringstream ss;
+        ss << mnemonic;
+        return ss.str();
+    }
+};
+
+class VectorArithMicroInst : public VectorMicroInst
+{
+protected:
+    VectorArithMicroInst(const char *mnem, ExtMachInst _machInst,
+                         OpClass __opClass, uint32_t _microVl,
+                         uint8_t _microIdx)
+        : VectorMicroInst(mnem, _machInst, __opClass, _microVl, _microIdx)
+    {}
+
+    std::string generateDisassembly(
+            Addr pc, const loader::SymbolTable *symtab) const override;
+};
+
+class VectorArithMacroInst : public VectorMacroInst
+{
+  protected:
+    VectorArithMacroInst(const char* mnem, ExtMachInst _machInst,
+                         OpClass __opClass, uint32_t _vlen = 256)
+        : VectorMacroInst(mnem, _machInst, __opClass, _vlen)
+    {
+        this->flags[IsVector] = true;
+    }
+    std::string generateDisassembly(
+            Addr pc, const loader::SymbolTable *symtab) const override;
+};
+
+class VectorVMUNARY0MicroInst : public VectorMicroInst
+{
+protected:
+    VectorVMUNARY0MicroInst(const char *mnem, ExtMachInst _machInst,
+                         OpClass __opClass, uint32_t _microVl,
+                         uint8_t _microIdx)
+        : VectorMicroInst(mnem, _machInst, __opClass, _microVl, _microIdx)
+    {}
+
+    std::string generateDisassembly(
+            Addr pc, const loader::SymbolTable *symtab) const override;
+};
+
+class VectorVMUNARY0MacroInst : public VectorMacroInst
+{
+  protected:
+    VectorVMUNARY0MacroInst(const char* mnem, ExtMachInst _machInst,
+                         OpClass __opClass, uint32_t _vlen)
+        : VectorMacroInst(mnem, _machInst, __opClass, _vlen)
+    {
+        this->flags[IsVector] = true;
+    }
+
+    std::string generateDisassembly(
+            Addr pc, const loader::SymbolTable *symtab) const override;
+};
+
+class VectorSlideMacroInst : public VectorMacroInst
+{
+  protected:
+    VectorSlideMacroInst(const char* mnem, ExtMachInst _machInst,
+                         OpClass __opClass, uint32_t _vlen = 256)
+        : VectorMacroInst(mnem, _machInst, __opClass, _vlen)
+    {
+        this->flags[IsVector] = true;
+    }
+
+    std::string generateDisassembly(
+            Addr pc, const loader::SymbolTable *symtab) const override;
+};
+
+class VectorSlideMicroInst : public VectorMicroInst
+{
+  protected:
+    uint8_t vdIdx;
+    uint8_t vs2Idx;
+    VectorSlideMicroInst(const char *mnem, ExtMachInst _machInst,
+                         OpClass __opClass, uint32_t _microVl,
+                         uint8_t _microIdx, uint8_t _vdIdx, uint8_t _vs2Idx)
+        : VectorMicroInst(mnem, _machInst, __opClass, _microVl, _microIdx)
+        , vdIdx(_vdIdx), vs2Idx(_vs2Idx)
+    {}
+
+    std::string generateDisassembly(
+            Addr pc, const loader::SymbolTable *symtab) const override;
+};
+
+class VectorMemMicroInst : public VectorMicroInst
+{
+  protected:
+    uint32_t offset; // Used to calculate EA.
+    Request::Flags memAccessFlags;
+
+    VectorMemMicroInst(const char* mnem, ExtMachInst _machInst,
+                       OpClass __opClass, uint32_t _microVl, uint8_t _microIdx,
+                       uint32_t _offset)
+        : VectorMicroInst(mnem, _machInst, __opClass, _microVl, _microIdx)
+        , offset(_offset)
+        , memAccessFlags(0)
+    {}
+};
+
+class VectorMemMacroInst : public VectorMacroInst
+{
+  protected:
+    VectorMemMacroInst(const char* mnem, ExtMachInst _machInst,
+                        OpClass __opClass, uint32_t _vlen = 256)
+        : VectorMacroInst(mnem, _machInst, __opClass, _vlen)
+    {}
+};
+
+class VleMacroInst : public VectorMemMacroInst
+{
+  protected:
+    VleMacroInst(const char* mnem, ExtMachInst _machInst,
+                   OpClass __opClass, uint32_t _vlen)
+        : VectorMemMacroInst(mnem, _machInst, __opClass, _vlen)
+    {}
+
+    std::string generateDisassembly(
+            Addr pc, const loader::SymbolTable *symtab) const override;
+};
+
+class VseMacroInst : public VectorMemMacroInst
+{
+  protected:
+    VseMacroInst(const char* mnem, ExtMachInst _machInst,
+                   OpClass __opClass, uint32_t _vlen)
+        : VectorMemMacroInst(mnem, _machInst, __opClass, _vlen)
+    {}
+
+    std::string generateDisassembly(
+            Addr pc, const loader::SymbolTable *symtab) const override;
+};
+
+class VleMicroInst : public VectorMicroInst
+{
+  protected:
+    Request::Flags memAccessFlags;
+
+    VleMicroInst(const char *mnem, ExtMachInst _machInst,OpClass __opClass,
+                  uint32_t _microVl, uint8_t _microIdx, uint32_t _vlen)
+        : VectorMicroInst(mnem, _machInst, __opClass, _microVl,
+                            _microIdx, _vlen)
+    {
+        this->flags[IsLoad] = true;
+    }
+
+    std::string generateDisassembly(
+        Addr pc, const loader::SymbolTable *symtab) const override;
+};
+
+class VseMicroInst : public VectorMicroInst
+{
+  protected:
+    Request::Flags memAccessFlags;
+
+    VseMicroInst(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
+                  uint32_t _microVl, uint8_t _microIdx, uint32_t _vlen)
+        : VectorMicroInst(mnem, _machInst, __opClass, _microVl,
+                            _microIdx, _vlen)
+    {
+        this->flags[IsStore] = true;
+    }
+
+    std::string generateDisassembly(
+        Addr pc, const loader::SymbolTable *symtab) const override;
+};
+
+class VlWholeMacroInst : public VectorMemMacroInst
+{
+  protected:
+    VlWholeMacroInst(const char *mnem, ExtMachInst _machInst,
+                     OpClass __opClass, uint32_t _vlen)
+      : VectorMemMacroInst(mnem, _machInst, __opClass, _vlen)
+    {}
+
+    std::string generateDisassembly(
+      Addr pc, const loader::SymbolTable *symtab) const override;
+};
+
+class VlWholeMicroInst : public VectorMicroInst
+{
+  protected:
+    Request::Flags memAccessFlags;
+
+    VlWholeMicroInst(const char *mnem, ExtMachInst _machInst,
+          OpClass __opClass, uint32_t _microVl, uint8_t _microIdx,
+          uint32_t _vlen)
+        : VectorMicroInst(mnem, _machInst, __opClass, _microVl,
+                            _microIdx, _vlen)
+    {}
+
+    std::string generateDisassembly(
+      Addr pc, const loader::SymbolTable *symtab) const override;
+};
+
+class VsWholeMacroInst : public VectorMemMacroInst
+{
+  protected:
+    VsWholeMacroInst(const char *mnem, ExtMachInst _machInst,
+                     OpClass __opClass, uint32_t _vlen)
+        : VectorMemMacroInst(mnem, _machInst, __opClass, _vlen)
+    {}
+
+    std::string generateDisassembly(
+            Addr pc, const loader::SymbolTable *symtab) const override;
+};
+
+class VsWholeMicroInst : public VectorMicroInst
+{
+  protected:
+    Request::Flags memAccessFlags;
+
+    VsWholeMicroInst(const char *mnem, ExtMachInst _machInst,
+                      OpClass __opClass, uint32_t _microVl,
+                      uint8_t _microIdx, uint32_t _vlen)
+        : VectorMicroInst(mnem, _machInst, __opClass , _microVl,
+                          _microIdx, _vlen)
+    {}
+
+    std::string generateDisassembly(
+        Addr pc, const loader::SymbolTable *symtab) const override;
+};
+
+class VlStrideMacroInst : public VectorMemMacroInst
+{
+  protected:
+    VlStrideMacroInst(const char* mnem, ExtMachInst _machInst,
+                   OpClass __opClass, uint32_t _vlen)
+        : VectorMemMacroInst(mnem, _machInst, __opClass, _vlen)
+    {}
+
+    std::string generateDisassembly(
+            Addr pc, const loader::SymbolTable *symtab) const override;
+};
+
+class VlStrideMicroInst : public VectorMemMicroInst
+{
+  protected:
+  uint8_t regIdx;
+    VlStrideMicroInst(const char *mnem, ExtMachInst _machInst,
+                      OpClass __opClass, uint8_t _regIdx,
+                      uint8_t _microIdx, uint32_t _microVl)
+        : VectorMemMicroInst(mnem, _machInst, __opClass, _microVl,
+                             _microIdx, 0)
+        , regIdx(_regIdx)
+    {}
+
+    std::string generateDisassembly(
+        Addr pc, const loader::SymbolTable *symtab) const override;
+};
+
+class VsStrideMacroInst : public VectorMemMacroInst
+{
+  protected:
+    VsStrideMacroInst(const char* mnem, ExtMachInst _machInst,
+                   OpClass __opClass, uint32_t _vlen)
+        : VectorMemMacroInst(mnem, _machInst, __opClass, _vlen)
+    {}
+
+    std::string generateDisassembly(
+            Addr pc, const loader::SymbolTable *symtab) const override;
+};
+
+class VsStrideMicroInst : public VectorMemMicroInst
+{
+  protected:
+    uint8_t regIdx;
+    VsStrideMicroInst(const char *mnem, ExtMachInst _machInst,
+                      OpClass __opClass, uint8_t _regIdx,
+                      uint8_t _microIdx, uint32_t _microVl)
+        : VectorMemMicroInst(mnem, _machInst, __opClass, _microVl,
+                             _microIdx, 0)
+        , regIdx(_regIdx)
+    {}
+
+    std::string generateDisassembly(
+        Addr pc, const loader::SymbolTable *symtab) const override;
+};
+
+class VlIndexMacroInst : public VectorMemMacroInst
+{
+  protected:
+    VlIndexMacroInst(const char* mnem, ExtMachInst _machInst,
+                   OpClass __opClass, uint32_t _vlen)
+        : VectorMemMacroInst(mnem, _machInst, __opClass, _vlen)
+    {}
+
+    std::string generateDisassembly(
+            Addr pc, const loader::SymbolTable *symtab) const override;
+};
+
+class VlIndexMicroInst : public VectorMemMicroInst
+{
+  protected:
+    uint8_t vdRegIdx;
+    uint8_t vdElemIdx;
+    uint8_t vs2RegIdx;
+    uint8_t vs2ElemIdx;
+    VlIndexMicroInst(const char *mnem, ExtMachInst _machInst,
+                    OpClass __opClass, uint8_t _vdRegIdx, uint8_t _vdElemIdx,
+                    uint8_t _vs2RegIdx, uint8_t _vs2ElemIdx)
+        : VectorMemMicroInst(mnem, _machInst, __opClass, 1,
+                             0, 0)
+        , vdRegIdx(_vdRegIdx), vdElemIdx(_vdElemIdx)
+        , vs2RegIdx(_vs2RegIdx), vs2ElemIdx(_vs2ElemIdx)
+    {}
+
+    std::string generateDisassembly(
+        Addr pc, const loader::SymbolTable *symtab) const override;
+};
+
+class VsIndexMacroInst : public VectorMemMacroInst
+{
+  protected:
+    VsIndexMacroInst(const char* mnem, ExtMachInst _machInst,
+                   OpClass __opClass, uint32_t _vlen)
+        : VectorMemMacroInst(mnem, _machInst, __opClass, _vlen)
+    {}
+
+    std::string generateDisassembly(
+            Addr pc, const loader::SymbolTable *symtab) const override;
+};
+
+class VsIndexMicroInst : public VectorMemMicroInst
+{
+  protected:
+    uint8_t vs3RegIdx;
+    uint8_t vs3ElemIdx;
+    uint8_t vs2RegIdx;
+    uint8_t vs2ElemIdx;
+    VsIndexMicroInst(const char *mnem, ExtMachInst _machInst,
+                    OpClass __opClass, uint8_t _vs3RegIdx, uint8_t _vs3ElemIdx,
+                    uint8_t _vs2RegIdx, uint8_t _vs2ElemIdx)
+        : VectorMemMicroInst(mnem, _machInst, __opClass, 1, 0, 0),
+          vs3RegIdx(_vs3RegIdx), vs3ElemIdx(_vs3ElemIdx),
+          vs2RegIdx(_vs2RegIdx), vs2ElemIdx(_vs2ElemIdx)
+    {}
+
+    std::string generateDisassembly(
+        Addr pc, const loader::SymbolTable *symtab) const override;
+};
+
+class VMvWholeMacroInst : public VectorArithMacroInst
+{
+  protected:
+    VMvWholeMacroInst(const char* mnem, ExtMachInst _machInst,
+                         OpClass __opClass)
+        : VectorArithMacroInst(mnem, _machInst, __opClass)
+    {}
+
+    std::string generateDisassembly(
+            Addr pc, const loader::SymbolTable *symtab) const override;
+};
+
+class VMvWholeMicroInst : public VectorArithMicroInst
+{
+  protected:
+    VMvWholeMicroInst(const char *mnem, ExtMachInst _machInst,
+                         OpClass __opClass, uint32_t _microVl,
+                         uint8_t _microIdx)
+        : VectorArithMicroInst(mnem, _machInst, __opClass, _microVl, _microIdx)
+    {}
+
+    std::string generateDisassembly(
+            Addr pc, const loader::SymbolTable *symtab) const override;
+};
+
+
+class VMaskMergeMicroInst : public VectorArithMicroInst
+{
+  private:
+    RegId srcRegIdxArr[NumVecInternalRegs];
+    RegId destRegIdxArr[1];
+
+  public:
+    uint32_t vlen;
+    size_t elemSize;
+    VMaskMergeMicroInst(ExtMachInst extMachInst,
+        uint8_t _dstReg, uint8_t _numSrcs, uint32_t _vlen, size_t _elemSize);
+    Fault execute(ExecContext *, trace::InstRecord *) const override;
+    std::string generateDisassembly(Addr,
+        const loader::SymbolTable *) const override;
+};
+
+class VxsatMicroInst : public VectorArithMicroInst
+{
+  private:
+    bool* vxsat;
+  public:
+    VxsatMicroInst(bool* Vxsat, ExtMachInst extMachInst)
+        : VectorArithMicroInst("vxsat_micro", extMachInst,
+          VectorIntegerArithOp, 0, 0)
+    {
+        vxsat = Vxsat;
+    }
+    Fault execute(ExecContext *, trace::InstRecord *) const override;
+    std::string generateDisassembly(Addr, const loader::SymbolTable *)
+        const override;
+};
+
+} // namespace RiscvISA
+} // namespace gem5
+
+
+#endif // __ARCH_RISCV_INSTS_VECTOR_HH__
diff --git a/src/arch/riscv/interrupts.hh b/src/arch/riscv/interrupts.hh
index a1ee396cd4..d6fa374a14 100644
--- a/src/arch/riscv/interrupts.hh
+++ b/src/arch/riscv/interrupts.hh
@@ -112,13 +112,13 @@ class Interrupts : public BaseInterrupts
     }
 
     bool checkInterrupt(int num) const { return ip[num] && ie[num]; }
-    bool checkInterrupts() const
+    bool checkInterrupts() const override
     {
         return checkNonMaskableInterrupt() || (ip & ie & globalMask()).any();
     }
 
     Fault
-    getInterrupt()
+    getInterrupt() override
     {
         assert(checkInterrupts());
         if (checkNonMaskableInterrupt())
@@ -135,10 +135,10 @@ class Interrupts : public BaseInterrupts
         return NoFault;
     }
 
-    void updateIntrInfo() {}
+    void updateIntrInfo() override {}
 
     void
-    post(int int_num, int index)
+    post(int int_num, int index) override
     {
         DPRINTF(Interrupt, "Interrupt %d:%d posted\n", int_num, index);
         if (int_num != INT_NMI) {
@@ -149,7 +149,7 @@ class Interrupts : public BaseInterrupts
     }
 
     void
-    clear(int int_num, int index)
+    clear(int int_num, int index) override
     {
         DPRINTF(Interrupt, "Interrupt %d:%d cleared\n", int_num, index);
         if (int_num != INT_NMI) {
@@ -163,7 +163,7 @@ class Interrupts : public BaseInterrupts
     void clearNMI() { tc->setMiscReg(MISCREG_NMIP, 0); }
 
     void
-    clearAll()
+    clearAll() override
     {
         DPRINTF(Interrupt, "All interrupts cleared\n");
         ip = 0;
@@ -176,7 +176,7 @@ class Interrupts : public BaseInterrupts
     void setIE(const uint64_t& val) { ie = val; }
 
     void
-    serialize(CheckpointOut &cp) const
+    serialize(CheckpointOut &cp) const override
     {
         unsigned long ip_ulong = ip.to_ulong();
         unsigned long ie_ulong = ie.to_ulong();
@@ -185,7 +185,7 @@ class Interrupts : public BaseInterrupts
     }
 
     void
-    unserialize(CheckpointIn &cp)
+    unserialize(CheckpointIn &cp) override
     {
         unsigned long ip_ulong;
         unsigned long ie_ulong;
diff --git a/src/arch/riscv/isa.cc b/src/arch/riscv/isa.cc
index 94a8239bac..0ba6d15b6c 100644
--- a/src/arch/riscv/isa.cc
+++ b/src/arch/riscv/isa.cc
@@ -36,13 +36,16 @@
 #include <sstream>
 
 #include "arch/riscv/faults.hh"
+#include "arch/riscv/insts/static_inst.hh"
 #include "arch/riscv/interrupts.hh"
 #include "arch/riscv/mmu.hh"
 #include "arch/riscv/pagetable.hh"
 #include "arch/riscv/pmp.hh"
+#include "arch/riscv/pcstate.hh"
 #include "arch/riscv/regs/float.hh"
 #include "arch/riscv/regs/int.hh"
 #include "arch/riscv/regs/misc.hh"
+#include "arch/riscv/regs/vector.hh"
 #include "base/bitfield.hh"
 #include "base/compiler.hh"
 #include "base/logging.hh"
@@ -52,6 +55,7 @@
 #include "debug/LLSC.hh"
 #include "debug/MatRegs.hh"
 #include "debug/RiscvMisc.hh"
+#include "debug/VecRegs.hh"
 #include "mem/packet.hh"
 #include "mem/request.hh"
 #include "params/RiscvISA.hh"
@@ -189,6 +193,14 @@ namespace RiscvISA
     [MISCREG_FFLAGS]        = "FFLAGS",
     [MISCREG_FRM]           = "FRM",
 
+    [MISCREG_VSTART]        = "VSTART",
+    [MISCREG_VXSAT]         = "VXSAT",
+    [MISCREG_VXRM]          = "VXRM",
+    [MISCREG_VCSR]          = "VCSR",
+    [MISCREG_VL]            = "VL",
+    [MISCREG_VTYPE]         = "VTYPE",
+    [MISCREG_VLENB]         = "VLENB",
+
     [MISCREG_NMIVEC]        = "NMIVEC",
     [MISCREG_NMIE]          = "NMIE",
     [MISCREG_NMIP]          = "NMIP",
@@ -234,17 +246,17 @@ namespace
 {
 
 /* Not applicable to RISCV */
-RegClass vecRegClass(VecRegClass, VecRegClassName, 1, debug::IntRegs);
-RegClass vecElemClass(VecElemClass, VecElemClassName, 2, debug::IntRegs);
-RegClass vecPredRegClass(VecPredRegClass, VecPredRegClassName, 1,
+RegClass vecElemClass(VecElemClass, VecElemClassName, 0, debug::IntRegs);
+RegClass vecPredRegClass(VecPredRegClass, VecPredRegClassName, 0,
         debug::IntRegs);
-RegClass matRegClass(MatRegClass, MatRegClassName, 1, debug::MatRegs);
+RegClass matRegClass(MatRegClass, MatRegClassName, 0, debug::MatRegs);
 RegClass ccRegClass(CCRegClass, CCRegClassName, 0, debug::IntRegs);
 
 } // anonymous namespace
 
-ISA::ISA(const Params &p) :
-    BaseISA(p), rv_type(p.riscv_type), checkAlignment(p.check_alignment)
+ISA::ISA(const Params &p) :BaseISA(p),
+    _rvType(p.riscv_type), checkAlignment(p.check_alignment),
+    enableRvv(p.enable_rvv),vlen(p.vlen),elen(p.elen)
 {
     _regClasses.push_back(&intRegClass);
     _regClasses.push_back(&floatRegClass);
@@ -255,6 +267,14 @@ ISA::ISA(const Params &p) :
     _regClasses.push_back(&ccRegClass);
     _regClasses.push_back(&miscRegClass);
 
+    fatal_if( p.vlen < p.elen,
+    "VLEN should be greater or equal",
+        "than ELEN. Ch. 2RISC-V vector spec.");
+
+    inform("RVV enabled, VLEN = %d bits, ELEN = %d bits",
+            p.vlen, p.elen);
+
+
     miscRegFile.resize(NUM_MISCREGS);
     clear();
 }
@@ -275,6 +295,17 @@ ISA::copyRegsFrom(ThreadContext *src)
     for (auto &id: floatRegClass)
         tc->setReg(id, src->getReg(id));
 
+    // Third loop through the vector registers.
+    RiscvISA::VecRegContainer vc;
+    for (auto &id: vecRegClass) {
+        src->getReg(id, &vc);
+        tc->setReg(id, &vc);
+    }
+
+    // Copying Misc Regs
+    for (int i = 0; i < NUM_MISCREGS; i++)
+        tc->setMiscRegNoEffect(i, src->readMiscRegNoEffect(i));
+
     // Lastly copy PC/NPC
     tc->pcState(src->pcState());
 }
@@ -299,17 +330,21 @@ void ISA::clear()
     // mark FS is initial
     status.fs = INITIAL;
 
-    // rv_type dependent init.
-    switch (rv_type) {
+    // _rvType dependent init.
+    switch (_rvType) {
         case RV32:
           misa.rv32_mxl = 1;
           break;
         case RV64:
           misa.rv64_mxl = 2;
           status.uxl = status.sxl = 2;
+          if (getEnableRvv()) {
+              status.vs = VPUStatus::INITIAL;
+              misa.rvv = 1;
+          }
           break;
         default:
-          panic("%s: Unknown rv_type: %d", name(), (int)rv_type);
+          panic("%s: Unknown _rvType: %d", name(), (int)_rvType);
     }
 
     miscRegFile[MISCREG_ISA] = misa;
@@ -465,7 +500,7 @@ ISA::readMiscReg(RegIndex idx)
                 (status.xs == 3) || (status.fs == 3) || (status.vs == 3);
             // For RV32, the SD bit is at index 31
             // For RV64, the SD bit is at index 63.
-            switch (rv_type) {
+            switch (_rvType) {
                 case RV32:
                     status.rv32_sd = sd_bit;
                     break;
@@ -473,12 +508,33 @@ ISA::readMiscReg(RegIndex idx)
                     status.rv64_sd = sd_bit;
                     break;
                 default:
-                    panic("%s: Unknown rv_type: %d", name(), (int)rv_type);
+                    panic("%s: Unknown _rvType: %d", name(), (int)_rvType);
             }
             setMiscRegNoEffect(idx, status);
 
             return readMiscRegNoEffect(idx);
         }
+      case MISCREG_VLENB:
+        {
+            auto rpc = tc->pcState().as<PCState>();
+            return rpc.vlenb();
+        }
+      case MISCREG_VTYPE:
+        {
+            auto rpc = tc->pcState().as<PCState>();
+            return rpc.vtype();
+        }
+      case MISCREG_VL:
+        {
+            auto rpc = tc->pcState().as<PCState>();
+            return (RegVal)rpc.vl();
+        }
+      case MISCREG_VCSR:
+        {
+            return readMiscRegNoEffect(MISCREG_VXSAT) &
+                  (readMiscRegNoEffect(MISCREG_VXRM) << 1);
+        }
+        break;
       default:
         // Try reading HPM counters
         // As a placeholder, all HPM counters are just cycle counters
@@ -541,7 +597,7 @@ ISA::setMiscReg(RegIndex idx, RegVal val)
                 assert(readMiscRegNoEffect(MISCREG_PRV) == PRV_M);
 
                 int regSize = 0;
-                switch (rv_type) {
+                switch (_rvType) {
                     case RV32:
                         regSize = 4;
                     break;
@@ -549,7 +605,7 @@ ISA::setMiscReg(RegIndex idx, RegVal val)
                         regSize = 8;
                     break;
                     default:
-                        panic("%s: Unknown rv_type: %d", name(), (int)rv_type);
+                        panic("%s: Unknown _rvType: %d", name(), (int)_rvType);
                 }
 
                 // Specs do not seem to mention what should be
@@ -638,20 +694,43 @@ ISA::setMiscReg(RegIndex idx, RegVal val)
                             2, 0) != 0) {
                     new_misa.rvc = new_misa.rvc | cur_misa.rvc;
                 }
+                if (!getEnableRvv()) {
+                    new_misa.rvv = 0;
+                }
                 setMiscRegNoEffect(idx, new_misa);
             }
             break;
           case MISCREG_STATUS:
             {
-                if (rv_type != RV32) {
+                if (_rvType != RV32) {
                     // SXL and UXL are hard-wired to 64 bit
                     auto cur = readMiscRegNoEffect(idx);
                     val &= ~(STATUS_SXL_MASK | STATUS_UXL_MASK);
                     val |= cur & (STATUS_SXL_MASK | STATUS_UXL_MASK);
                 }
+                if (!getEnableRvv()) {
+                    // Always OFF is rvv is disabled.
+                    val &= ~STATUS_VS_MASK;
+                }
                 setMiscRegNoEffect(idx, val);
             }
             break;
+          case MISCREG_VXSAT:
+            {
+                setMiscRegNoEffect(idx, val & 0x1);
+            }
+            break;
+          case MISCREG_VXRM:
+            {
+                setMiscRegNoEffect(idx, val & 0x3);
+            }
+            break;
+          case MISCREG_VCSR:
+            {
+                setMiscRegNoEffect(MISCREG_VXSAT, val & 0x1);
+                setMiscRegNoEffect(MISCREG_VXRM, (val & 0x6) >> 1);
+            }
+            break;
           default:
             setMiscRegNoEffect(idx, val);
         }
@@ -758,6 +837,16 @@ ISA::resetThread()
     Reset().invoke(tc);
 }
 
+Addr
+ISA::getFaultHandlerAddr(RegIndex idx, uint64_t cause, bool intr) const
+{
+    auto vec = tc->readMiscRegNoEffect(idx);
+    Addr addr = mbits(vec, 63, 2);
+    if (intr && bits(vec, 1, 0) == 1)
+        addr += 4 * cause;
+    return addr;
+}
+
 } // namespace RiscvISA
 } // namespace gem5
 
diff --git a/src/arch/riscv/isa.hh b/src/arch/riscv/isa.hh
index 7ef5c526f5..66cba0f7fa 100644
--- a/src/arch/riscv/isa.hh
+++ b/src/arch/riscv/isa.hh
@@ -67,12 +67,15 @@ enum FPUStatus
     DIRTY = 3,
 };
 
+using VPUStatus = FPUStatus;
+
 class ISA : public BaseISA
 {
   protected:
-    RiscvType rv_type;
+    RiscvType _rvType;
     std::vector<RegVal> miscRegFile;
     bool checkAlignment;
+    bool enableRvv;
 
     bool hpmCounterEnabled(int counter) const;
 
@@ -81,6 +84,16 @@ class ISA : public BaseISA
     const Addr INVALID_RESERVATION_ADDR = (Addr)-1;
     std::unordered_map<int, Addr> load_reservation_addrs;
 
+    /** Length of each vector register in bits.
+     *  VLEN in Ch. 2 of RISC-V vector spec
+     */
+    unsigned vlen;
+
+    /** Length of each vector element in bits.
+     *  ELEN in Ch. 2 of RISC-V vector spec
+    */
+    unsigned elen;
+
   public:
     using Params = RiscvISAParams;
 
@@ -89,14 +102,8 @@ class ISA : public BaseISA
     PCStateBase*
     newPCState(Addr new_inst_addr=0) const override
     {
-        return new PCState(new_inst_addr, rv_type);
-    }
-
-    void
-    clearLoadReservation(ContextID cid) override
-    {
-        Addr& load_reservation_addr = load_reservation_addrs[cid];
-        load_reservation_addr = INVALID_RESERVATION_ADDR;
+        unsigned vlenb = vlen >> 3;
+        return new PCState(new_inst_addr, _rvType, vlenb);
     }
 
   public:
@@ -117,7 +124,7 @@ class ISA : public BaseISA
     virtual const std::unordered_map<int, RegVal>&
     getCSRMaskMap() const
     {
-        return CSRMasks[rv_type];
+        return CSRMasks[_rvType];
     }
 
     bool alignmentCheckEnabled() const { return checkAlignment; }
@@ -141,7 +148,24 @@ class ISA : public BaseISA
 
     void resetThread() override;
 
-    RiscvType rvType() const { return rv_type; }
+    RiscvType rvType() const { return _rvType; }
+
+    bool getEnableRvv() const { return enableRvv; }
+
+    void
+    clearLoadReservation(ContextID cid)
+    {
+        Addr& load_reservation_addr = load_reservation_addrs[cid];
+        load_reservation_addr = INVALID_RESERVATION_ADDR;
+    }
+
+    /** Methods for getting VLEN, VLENB and ELEN values */
+    unsigned getVecLenInBits() { return vlen; }
+    unsigned getVecLenInBytes() { return vlen >> 3; }
+    unsigned getVecElemLenInBits() { return elen; }
+
+    virtual Addr getFaultHandlerAddr(
+        RegIndex idx, uint64_t cause, bool intr) const;
 };
 
 } // namespace RiscvISA
diff --git a/src/arch/riscv/isa/bitfields.isa b/src/arch/riscv/isa/bitfields.isa
index 8589269949..66ce74afe3 100644
--- a/src/arch/riscv/isa/bitfields.isa
+++ b/src/arch/riscv/isa/bitfields.isa
@@ -98,7 +98,9 @@ def bitfield RL <25>;
 
 // Compressed
 def bitfield COPCODE <15:13>;
+def bitfield CFUNCT6LOW3 <12:10>;
 def bitfield CFUNCT1 <12>;
+def bitfield CFUNCT1BIT6 <6>;
 def bitfield CFUNCT2HIGH <11:10>;
 def bitfield CFUNCT2LOW <6:5>;
 def bitfield RC1 <11:7>;
@@ -133,3 +135,27 @@ def bitfield BIT25         <25>;
 def bitfield RNUM       <23:20>;
 def bitfield KFUNCT5    <29:25>;
 def bitfield BS         <31:30>;
+
+// Vector instructions
+def bitfield VFUNCT6    vfunct6;
+def bitfield VFUNCT5    vfunct5;
+def bitfield VFUNCT3    vfunct3;
+def bitfield VFUNCT2    vfunct2;
+
+def bitfield VS3        vs3;
+def bitfield VS2        vs2;
+def bitfield VS1        vs1;
+def bitfield VD         vd;
+
+def bitfield NF         nf;
+def bitfield MEW        mew;
+def bitfield MOP        mop;
+def bitfield VM         vm;
+def bitfield LUMOP      lumop;
+def bitfield SUMOP      sumop;
+def bitfield WIDTH      width;
+
+def bitfield BIT31      bit31;
+def bitfield BIT30      bit30;
+def bitfield SIMM5      uimm_vsetivli;
+def bitfield SIMM3      simm3;
diff --git a/src/arch/riscv/isa/decoder.isa b/src/arch/riscv/isa/decoder.isa
index c7eefbc79c..3d1d396165 100644
--- a/src/arch/riscv/isa/decoder.isa
+++ b/src/arch/riscv/isa/decoder.isa
@@ -61,6 +61,11 @@ decode QUADRANT default Unknown::unknown() {
                     return std::make_shared<IllegalInstFault>("FPU is off",
                                                                machInst);
 
+                // Mutating any floating point register changes the FS bit
+                // of the STATUS CSR.
+                status.fs = FPUStatus::DIRTY;
+                xc->setMiscReg(MISCREG_STATUS, status);
+
                 Fp2_bits = Mem;
             }}, {{
                 EA = rvZext(Rp1 + offset);
@@ -85,6 +90,9 @@ decode QUADRANT default Unknown::unknown() {
                         return std::make_shared<IllegalInstFault>("FPU is off",
                                                                    machInst);
 
+                    status.fs = FPUStatus::DIRTY;
+                    xc->setMiscReg(MISCREG_STATUS, status);
+
                     freg_t fd = freg(f32(Mem_uw));
                     Fp2_bits = fd.v;
                 }}, {{
@@ -99,6 +107,49 @@ decode QUADRANT default Unknown::unknown() {
                 }});
             }
         }
+        0x4: decode CFUNCT6LOW3 {
+            format CompressedLoad {
+                0x0: c_lbu({{
+                    offset = (CIMM2<0:0> << 1) | CIMM2<1:1>;
+                }}, {{
+                    Rp2 = Mem_ub;
+                }}, {{
+                    EA = rvZext(Rp1 + offset);
+                }});
+                0x1: decode CFUNCT1BIT6 {
+                    0x0: c_lhu({{
+                        offset = CIMM2<0:0> << 1;
+                    }}, {{
+                        Rp2 = Mem_uh;
+                    }}, {{
+                        EA = rvZext(Rp1 + offset);
+                    }});
+                    0x1: c_lh({{
+                        offset = CIMM2<0:0> << 1;
+                    }}, {{
+                        Rp2_sd = Mem_sh;
+                    }}, {{
+                        EA = rvZext(Rp1 + offset);
+                    }});
+                }
+            }
+            format CompressedStore {
+                0x2: c_sb({{
+                    offset = (CIMM2<0:0> << 1) | CIMM2<1:1>;
+                }}, {{
+                    Mem_ub = Rp2_ub;
+                }}, ea_code={{
+                    EA = rvZext(Rp1 + offset);
+                }});
+                0x3: c_sh({{
+                    offset = (CIMM2<0:0> << 1);
+                }}, {{
+                    Mem_uh = Rp2_uh;
+                }}, ea_code={{
+                    EA = rvZext(Rp1 + offset);
+                }});
+            }
+        }
         format CompressedStore {
             0x5: c_fsd({{
                 offset = CIMM3 << 3 | CIMM2 << 6;
@@ -256,15 +307,42 @@ decode QUADRANT default Unknown::unknown() {
                             Rp1 = rvSext(Rp1 & Rp2);
                         }});
                     }
-                    0x1: decode RVTYPE {
-                        0x1: decode CFUNCT2LOW {
-                            0x0: c_subw({{
+                    0x1: decode CFUNCT2LOW {
+                        0x0: decode RVTYPE {
+                            0x1: c_subw({{
                                 Rp1_sd = (int32_t)Rp1_sd - Rp2_sw;
                             }});
+                        }
+                        0x1: decode RVTYPE {
                             0x1: c_addw({{
                                 Rp1_sd = (int32_t)Rp1_sd + Rp2_sw;
                             }});
                         }
+                        0x2: c_mul({{
+                            Rp1_sd = rvSext(Rp1_sd * Rp2_sd);
+                        }}, IntMultOp);
+                        0x3: decode RP2 {
+                            0x0: c_zext_b({{
+                                Rp1 = Rp1 & 0xFFULL;
+                            }});
+                            0x1: c_sext_b({{
+                                Rp1 = sext<8>(Rp1 & 0xFFULL);
+                            }});
+                            0x2: c_zext_h({{
+                                Rp1 = Rp1 & 0xFFFFULL;
+                            }});
+                            0x3: c_sext_h({{
+                                Rp1 = sext<16>(Rp1 & 0xFFFFULL);
+                            }});
+                            0x4: decode RVTYPE {
+                                0x1: c_zext_w({{
+                                    Rp1 = bits(Rp1, 31, 0);
+                                }});
+                            }
+                            0x5: c_not({{
+                                Rp1 = ~Rp1;
+                            }});
+                        }
                     }
                 }
             }
@@ -307,6 +385,14 @@ decode QUADRANT default Unknown::unknown() {
                          CIMM1 << 5 |
                          CIMM5<2:0> << 6;
             }}, {{
+                STATUS status = xc->readMiscReg(MISCREG_STATUS);
+                if (status.fs == FPUStatus::OFF)
+                    return std::make_shared<IllegalInstFault>("FPU is off",
+                                                               machInst);
+
+                status.fs = FPUStatus::DIRTY;
+                xc->setMiscReg(MISCREG_STATUS, status);
+
                 Fc1_bits = Mem;
             }}, {{
                 EA = rvZext(sp + offset);
@@ -330,6 +416,14 @@ decode QUADRANT default Unknown::unknown() {
                              CIMM1 << 5 |
                              CIMM5<1:0> << 6;
                 }}, {{
+                    STATUS status = xc->readMiscReg(MISCREG_STATUS);
+                    if (status.fs == FPUStatus::OFF)
+                        return std::make_shared<IllegalInstFault>("FPU is off",
+                                                                   machInst);
+
+                    status.fs = FPUStatus::DIRTY;
+                    xc->setMiscReg(MISCREG_STATUS, status);
+
                     freg_t fd;
                     fd = freg(f32(Mem_uw));
                     Fd_bits = fd.v;
@@ -387,6 +481,11 @@ decode QUADRANT default Unknown::unknown() {
                 offset = CIMM6<5:3> << 3 |
                          CIMM6<2:0> << 6;
             }}, {{
+                STATUS status = xc->readMiscReg(MISCREG_STATUS);
+                if (status.fs == FPUStatus::OFF)
+                    return std::make_shared<IllegalInstFault>("FPU is off",
+                                                               machInst);
+
                 Mem_ud = Fc2_bits;
             }}, {{
                 EA = rvZext(sp + offset);
@@ -404,6 +503,11 @@ decode QUADRANT default Unknown::unknown() {
                     offset = CIMM6<5:2> << 2 |
                              CIMM6<1:0> << 6;
                 }}, {{
+                    STATUS status = xc->readMiscReg(MISCREG_STATUS);
+                    if (status.fs == FPUStatus::OFF)
+                        return std::make_shared<IllegalInstFault>("FPU is off",
+                                                                   machInst);
+
                     Mem_uw = unboxF32(boxF32(Fs2_bits));
                 }}, {{
                     EA = (uint32_t)(sp_uw + offset);
@@ -457,6 +561,10 @@ decode QUADRANT default Unknown::unknown() {
                     if (status.fs == FPUStatus::OFF)
                         return std::make_shared<IllegalInstFault>(
                                     "FPU is off", machInst);
+
+                    status.fs = FPUStatus::DIRTY;
+                    xc->setMiscReg(MISCREG_STATUS, status);
+
                     freg_t fd;
                     fd = freg(f16(Mem_uh));
                     Fd_bits = fd.v;
@@ -466,6 +574,10 @@ decode QUADRANT default Unknown::unknown() {
                     if (status.fs == FPUStatus::OFF)
                         return std::make_shared<IllegalInstFault>(
                                     "FPU is off", machInst);
+
+                    status.fs = FPUStatus::DIRTY;
+                    xc->setMiscReg(MISCREG_STATUS, status);
+
                     freg_t fd;
                     fd = freg(f32(Mem_uw));
                     Fd_bits = fd.v;
@@ -475,11 +587,183 @@ decode QUADRANT default Unknown::unknown() {
                     if (status.fs == FPUStatus::OFF)
                         return std::make_shared<IllegalInstFault>(
                                     "FPU is off", machInst);
+
+                    status.fs = FPUStatus::DIRTY;
+                    xc->setMiscReg(MISCREG_STATUS, status);
+
                     freg_t fd;
                     fd = freg(f64(Mem));
                     Fd_bits = fd.v;
                 }}, inst_flags=FloatMemReadOp);
             }
+
+            0x0: decode MOP {
+                0x0: decode LUMOP {
+                    0x00: VleOp::vle8_v({{
+                        if ((machInst.vm || elem_mask(v0, ei)) &&
+                            i < this->microVl) {
+                            Vd_ub[i] = Mem_vc.as<uint8_t>()[i];
+                        } else {
+                            Vd_ub[i] = Vs2_ub[i];
+                        }
+                    }}, inst_flags=VectorUnitStrideLoadOp);
+                    0x08: decode NF {
+                        format VlWholeOp {
+                            0x0: vl1re8_v({{
+                                Vd_ub[i] = Mem_vc.as<uint8_t>()[i];
+                            }}, inst_flags=VectorWholeRegisterLoadOp);
+                            0x1: vl2re8_v({{
+                                Vd_ub[i] = Mem_vc.as<uint8_t>()[i];
+                            }}, inst_flags=VectorWholeRegisterLoadOp);
+                            0x3: vl4re8_v({{
+                                Vd_ub[i] = Mem_vc.as<uint8_t>()[i];
+                            }}, inst_flags=VectorWholeRegisterLoadOp);
+                            0x7: vl8re8_v({{
+                                Vd_ub[i] = Mem_vc.as<uint8_t>()[i];
+                            }}, inst_flags=VectorWholeRegisterLoadOp);
+                        }
+                    }
+                    0x0b: VlmOp::vlm_v({{
+                        Vd_ub[i] = Mem_vc.as<uint8_t>()[i];
+                    }}, inst_flags=VectorUnitStrideMaskLoadOp);
+                }
+                0x1: VlIndexOp::vluxei8_v({{
+                    Vd_vu[vdElemIdx] = Mem_vc.as<vu>()[0];
+                }}, {{
+                    EA = Rs1 + Vs2_ub[vs2ElemIdx];
+                }}, inst_flags=VectorIndexedLoadOp);
+                0x2: VlStrideOp::vlse8_v({{
+                    Vd_ub[microIdx] = Mem_vc.as<uint8_t>()[0];
+                }}, inst_flags=VectorStridedLoadOp);
+                0x3: VlIndexOp::vloxei8_v({{
+                    Vd_vu[vdElemIdx] = Mem_vc.as<vu>()[0];
+                }}, {{
+                    EA = Rs1 + Vs2_ub[vs2ElemIdx];
+                }}, inst_flags=VectorIndexedLoadOp);
+            }
+            0x5: decode MOP {
+                0x0: decode LUMOP {
+                    0x00: VleOp::vle16_v({{
+                        if ((machInst.vm || elem_mask(v0, ei)) &&
+                            i < this->microVl) {
+                            Vd_uh[i] = Mem_vc.as<uint16_t>()[i];
+                        } else {
+                            Vd_uh[i] = Vs2_uh[i];
+                        }
+                    }}, inst_flags=VectorUnitStrideLoadOp);
+                    0x08: decode NF {
+                        format VlWholeOp {
+                            0x0: vl1re16_v({{
+                                Vd_uh[i] = Mem_vc.as<uint16_t>()[i];
+                            }}, inst_flags=VectorWholeRegisterLoadOp);
+                            0x1: vl2re16_v({{
+                                Vd_uh[i] = Mem_vc.as<uint16_t>()[i];
+                            }}, inst_flags=VectorWholeRegisterLoadOp);
+                            0x3: vl4re16_v({{
+                                Vd_uh[i] = Mem_vc.as<uint16_t>()[i];
+                            }}, inst_flags=VectorWholeRegisterLoadOp);
+                            0x7: vl8re16_v({{
+                                Vd_uh[i] = Mem_vc.as<uint16_t>()[i];
+                            }}, inst_flags=VectorWholeRegisterLoadOp);
+                        }
+                    }
+                }
+                0x1: VlIndexOp::vluxei16_v({{
+                    Vd_vu[vdElemIdx] = Mem_vc.as<vu>()[0];
+                }}, {{
+                    EA = Rs1 + Vs2_uh[vs2ElemIdx];
+                }}, inst_flags=VectorIndexedLoadOp);
+                0x2: VlStrideOp::vlse16_v({{
+                    Vd_uh[microIdx] = Mem_vc.as<uint16_t>()[0];
+                }}, inst_flags=VectorStridedLoadOp);
+                0x3: VlIndexOp::vloxei16_v({{
+                    Vd_vu[vdElemIdx] = Mem_vc.as<vu>()[0];
+                }}, {{
+                    EA = Rs1 + Vs2_uh[vs2ElemIdx];
+                }}, inst_flags=VectorIndexedLoadOp);
+            }
+            0x6: decode MOP {
+                0x0: decode LUMOP {
+                    0x00: VleOp::vle32_v({{
+                        if ((machInst.vm || elem_mask(v0, ei)) &&
+                            i < this->microVl) {
+                            Vd_uw[i] = Mem_vc.as<uint32_t>()[i];
+                        } else {
+                            Vd_uw[i] = Vs2_uw[i];
+                        }
+                    }}, inst_flags=VectorUnitStrideLoadOp);
+                    0x08: decode NF {
+                        format VlWholeOp {
+                            0x0: vl1re32_v({{
+                                Vd_uw[i] = Mem_vc.as<uint32_t>()[i];
+                            }}, inst_flags=VectorWholeRegisterLoadOp);
+                            0x1: vl2re32_v({{
+                                Vd_uw[i] = Mem_vc.as<uint32_t>()[i];
+                            }}, inst_flags=VectorWholeRegisterLoadOp);
+                            0x3: vl4re32_v({{
+                                Vd_uw[i] = Mem_vc.as<uint32_t>()[i];
+                            }}, inst_flags=VectorWholeRegisterLoadOp);
+                            0x7: vl8re32_v({{
+                                Vd_uw[i] = Mem_vc.as<uint32_t>()[i];
+                            }}, inst_flags=VectorWholeRegisterLoadOp);
+                        }
+                    }
+                }
+                0x1: VlIndexOp::vluxei32_v({{
+                    Vd_vu[vdElemIdx] = Mem_vc.as<vu>()[0];
+                }}, {{
+                    EA = Rs1 + Vs2_uw[vs2ElemIdx];
+                }}, inst_flags=VectorIndexedLoadOp);
+                0x2: VlStrideOp::vlse32_v({{
+                    Vd_uw[microIdx] = Mem_vc.as<uint32_t>()[0];
+                }}, inst_flags=VectorStridedLoadOp);
+                0x3: VlIndexOp::vloxei32_v({{
+                    Vd_vu[vdElemIdx] = Mem_vc.as<vu>()[0];
+                }}, {{
+                    EA = Rs1 + Vs2_uw[vs2ElemIdx];
+                }}, inst_flags=VectorIndexedLoadOp);
+            }
+            0x7: decode MOP {
+                0x0: decode LUMOP {
+                    0x00: VleOp::vle64_v({{
+                        if ((machInst.vm || elem_mask(v0, ei)) &&
+                            i < this->microVl) {
+                            Vd_ud[i] = Mem_vc.as<uint64_t>()[i];
+                        } else {
+                            Vd_ud[i] = Vs2_ud[i];
+                        }
+                    }}, inst_flags=VectorUnitStrideLoadOp);
+                    0x08: decode NF {
+                        format VlWholeOp {
+                            0x0: vl1re64_v({{
+                                Vd_ud[i] = Mem_vc.as<uint64_t>()[i];
+                            }}, inst_flags=VectorWholeRegisterLoadOp);
+                            0x1: vl2re64_v({{
+                                Vd_ud[i] = Mem_vc.as<uint64_t>()[i];
+                            }}, inst_flags=VectorWholeRegisterLoadOp);
+                            0x3: vl4re64_v({{
+                                Vd_ud[i] = Mem_vc.as<uint64_t>()[i];
+                            }}, inst_flags=VectorWholeRegisterLoadOp);
+                            0x7: vl8re64_v({{
+                                Vd_ud[i] = Mem_vc.as<uint64_t>()[i];
+                            }}, inst_flags=VectorWholeRegisterLoadOp);
+                        }
+                    }
+                }
+                0x1: VlIndexOp::vluxei64_v({{
+                    Vd_vu[vdElemIdx] = Mem_vc.as<vu>()[0];
+                }}, {{
+                    EA = Rs1 + Vs2_ud[vs2ElemIdx];
+                }}, inst_flags=VectorIndexedLoadOp);
+                0x2: VlStrideOp::vlse64_v({{
+                    Vd_ud[microIdx] = Mem_vc.as<uint64_t>()[0];
+                }}, inst_flags=VectorStridedLoadOp);
+                0x3: VlIndexOp::vloxei64_v({{
+                    Vd_vu[vdElemIdx] = Mem_vc.as<vu>()[0];
+                }}, {{
+                    EA = Rs1 + Vs2_ud[vs2ElemIdx];
+                }}, inst_flags=VectorIndexedLoadOp);
+            }
         }
 
         0x03: decode FUNCT3 {
@@ -489,6 +773,23 @@ decode QUADRANT default Unknown::unknown() {
                 0x1: fence_i({{
                 }}, uint64_t, IsNonSpeculative, IsSerializeAfter, No_OpClass);
             }
+
+            0x2: decode FUNCT12 {
+                format CBMOp {
+                    0x0: cbo_inval({{
+                        Mem = 0;
+                    }}, mem_flags=[INVALIDATE, DST_POC]);
+                    0x1: cbo_clean({{
+                        Mem = 0;
+                    }}, mem_flags=[CLEAN, DST_POC]);
+                    0x2: cbo_flush({{
+                        Mem = 0;
+                    }}, mem_flags=[CLEAN, INVALIDATE, DST_POC]);
+                    0x4: cbo_zero({{
+                        Mem = 0;
+                    }}, mem_flags=[CACHE_BLOCK_ZERO]);
+                }
+            }
         }
 
         0x04: decode FUNCT3 {
@@ -786,6 +1087,106 @@ decode QUADRANT default Unknown::unknown() {
                     Mem_ud = Fs2_bits;
                 }}, inst_flags=FloatMemWriteOp);
             }
+
+            0x0: decode MOP {
+                0x0: decode SUMOP {
+                    0x00: VseOp::vse8_v({{
+                        Mem_vc.as<uint8_t>()[i] = Vs3_ub[i];
+                    }}, inst_flags=VectorUnitStrideStoreOp);
+                    format VsWholeOp {
+                        0x8: decode NF {
+                            0x0: vs1r_v({{
+                                Mem_vc.as<uint8_t>()[i] = Vs3_ub[i];
+                            }}, inst_flags=VectorWholeRegisterStoreOp);
+                            0x1: vs2r_v({{
+                                Mem_vc.as<uint8_t>()[i] = Vs3_ub[i];
+                            }}, inst_flags=VectorWholeRegisterStoreOp);
+                            0x3: vs4r_v({{
+                                Mem_vc.as<uint8_t>()[i] = Vs3_ub[i];
+                            }}, inst_flags=VectorWholeRegisterStoreOp);
+                            0x7: vs8r_v({{
+                                Mem_vc.as<uint8_t>()[i] = Vs3_ub[i];
+                            }}, inst_flags=VectorWholeRegisterStoreOp);
+                        }
+                    }
+                    0x0b: VsmOp::vsm_v({{
+                        Mem_vc.as<uint8_t>()[i] = Vs3_ub[i];
+                    }}, inst_flags=VectorUnitStrideMaskStoreOp);
+                }
+                0x1: VsIndexOp::vsuxei8_v({{
+                    Mem_vc.as<vu>()[0] = Vs3_vu[vs3ElemIdx];
+                }}, {{
+                    EA = Rs1 + Vs2_ub[vs2ElemIdx];
+                }}, inst_flags=VectorIndexedStoreOp);
+                0x2: VsStrideOp::vsse8_v({{
+                    Mem_vc.as<uint8_t>()[0] = Vs3_ub[microIdx];
+                }}, inst_flags=VectorStridedStoreOp);
+                0x3: VsIndexOp::vsoxei8_v({{
+                    Mem_vc.as<vu>()[0] = Vs3_vu[vs3ElemIdx];
+                }}, {{
+                    EA = Rs1 + Vs2_ub[vs2ElemIdx];
+                }}, inst_flags=VectorIndexedStoreOp);
+            }
+            0x5: decode MOP {
+                0x0: decode SUMOP {
+                    0x00: VseOp::vse16_v({{
+                        Mem_vc.as<uint16_t>()[i] = Vs3_uh[i];
+                    }}, inst_flags=VectorUnitStrideStoreOp);
+                }
+                0x1: VsIndexOp::vsuxei16_v({{
+                    Mem_vc.as<vu>()[0] = Vs3_vu[vs3ElemIdx];
+                }}, {{
+                    EA = Rs1 + Vs2_uh[vs2ElemIdx];
+                }}, inst_flags=VectorIndexedStoreOp);
+                0x2: VsStrideOp::vsse16_v({{
+                    Mem_vc.as<uint16_t>()[0] = Vs3_uh[microIdx];
+                }}, inst_flags=VectorStridedStoreOp);
+                0x3: VsIndexOp::vsoxei16_v({{
+                    Mem_vc.as<vu>()[0] = Vs3_vu[vs3ElemIdx];
+                }}, {{
+                    EA = Rs1 + Vs2_uh[vs2ElemIdx];
+                }}, inst_flags=VectorIndexedStoreOp);
+            }
+            0x6: decode MOP {
+                0x0: decode SUMOP {
+                    0x00: VseOp::vse32_v({{
+                        Mem_vc.as<uint32_t>()[i] = Vs3_uw[i];
+                    }}, inst_flags=VectorUnitStrideStoreOp);
+                }
+                0x1: VsIndexOp::vsuxei32_v({{
+                    Mem_vc.as<vu>()[0] = Vs3_vu[vs3ElemIdx];
+                }}, {{
+                    EA = Rs1 + Vs2_uw[vs2ElemIdx];
+                }}, inst_flags=VectorIndexedStoreOp);
+                0x2: VsStrideOp::vsse32_v({{
+                    Mem_vc.as<uint32_t>()[0] = Vs3_uw[microIdx];
+                }}, inst_flags=VectorStridedStoreOp);
+                0x3: VsIndexOp::vsoxei32_v({{
+                    Mem_vc.as<vu>()[0] = Vs3_vu[vs3ElemIdx];
+                }}, {{
+                    EA = Rs1 + Vs2_uw[vs2ElemIdx];
+                }}, inst_flags=VectorIndexedStoreOp);
+            }
+            0x7: decode MOP {
+                0x0: decode SUMOP {
+                    0x00: VseOp::vse64_v({{
+                        Mem_vc.as<uint64_t>()[i] = Vs3_ud[i];
+                    }}, inst_flags=VectorUnitStrideStoreOp);
+                }
+                0x1: VsIndexOp::vsuxei64_v({{
+                    Mem_vc.as<vu>()[0] = Vs3_vu[vs3ElemIdx];
+                }}, {{
+                    EA = Rs1 + Vs2_ud[vs2ElemIdx];
+                }}, inst_flags=VectorIndexedStoreOp);
+                0x2: VsStrideOp::vsse64_v({{
+                    Mem_vc.as<uint64_t>()[0] = Vs3_ud[microIdx];
+                }}, inst_flags=VectorStridedStoreOp);
+                0x3: VsIndexOp::vsoxei64_v({{
+                    Mem_vc.as<vu>()[0] = Vs3_vu[vs3ElemIdx];
+                }}, {{
+                    EA = Rs1 + Vs2_ud[vs2ElemIdx];
+                }}, inst_flags=VectorIndexedStoreOp);
+            }
         }
 
         0x0b: decode FUNCT3 {
@@ -1595,93 +1996,80 @@ decode QUADRANT default Unknown::unknown() {
                 }
                 0x14: decode ROUND_MODE {
                     0x0: fmin_s({{
-                        bool less = f32_lt_quiet(f32(freg(Fs1_bits)),
-                            f32(freg(Fs2_bits))) ||
-                            (f32_eq(f32(freg(Fs1_bits)),
-                            f32(freg(Fs2_bits))) &&
-                            bits(f32(freg(Fs1_bits)).v, 31));
-
-                        Fd_bits = less ||
-                            isNaNF32UI(f32(freg(Fs2_bits)).v) ?
-                            freg(Fs1_bits).v : freg(Fs2_bits).v;
-                        if (isNaNF32UI(f32(freg(Fs1_bits)).v) &&
-                            isNaNF32UI(f32(freg(Fs2_bits)).v))
-                            Fd_bits = f32(defaultNaNF32UI).v;
+                        float32_t fs1 = f32(freg(Fs1_bits));
+                        float32_t fs2 = f32(freg(Fs2_bits));
+                        float32_t fd;
+                        bool less = f32_lt_quiet(fs1, fs2) ||
+                            (f32_eq(fs1, fs2) && bits(fs1.v, 31));
+
+                        fd = less || isNaNF32UI(fs2.v) ? fs1 : fs2;
+                        if (isNaNF32UI(fs1.v) && isNaNF32UI(fs2.v))
+                            fd = f32(defaultNaNF32UI);
+                        Fd_bits = freg(fd).v;
                         }}, FloatCmpOp);
                     0x1: fmax_s({{
-                        bool greater = f32_lt_quiet(f32(freg(Fs2_bits)),
-                            f32(freg(Fs1_bits))) ||
-                            (f32_eq(f32(freg(Fs2_bits)),
-                            f32(freg(Fs1_bits))) &&
-                            bits(f32(freg(Fs2_bits)).v, 31));
-
-                        Fd_bits = greater ||
-                            isNaNF32UI(f32(freg(Fs2_bits)).v) ?
-                            freg(Fs1_bits).v : freg(Fs2_bits).v;
-                        if (isNaNF32UI(f32(freg(Fs1_bits)).v) &&
-                            isNaNF32UI(f32(freg(Fs2_bits)).v))
-                            Fd_bits = f32(defaultNaNF32UI).v;
+                        float32_t fs1 = f32(freg(Fs1_bits));
+                        float32_t fs2 = f32(freg(Fs2_bits));
+                        float32_t fd;
+                        bool greater = f32_lt_quiet(fs2, fs1) ||
+                            (f32_eq(fs2, fs1) && bits(fs2.v, 31));
+
+                        fd = greater || isNaNF32UI(fs2.v) ? fs1: fs2;
+                        if (isNaNF32UI(fs1.v) && isNaNF32UI(fs2.v))
+                            fd = f32(defaultNaNF32UI);
+                        Fd_bits = freg(fd).v;
                         }}, FloatCmpOp);
                 }
                 0x15: decode ROUND_MODE {
                     0x0: fmin_d({{
-                        bool less = f64_lt_quiet(f64(freg(Fs1_bits)),
-                            f64(freg(Fs2_bits))) ||
-                            (f64_eq(f64(freg(Fs1_bits)),
-                            f64(freg(Fs2_bits))) &&
-                            bits(f64(freg(Fs1_bits)).v, 63));
-
-                        Fd_bits = less ||
-                            isNaNF64UI(f64(freg(Fs2_bits)).v) ?
-                            freg(Fs1_bits).v : freg(Fs2_bits).v;
-                        if (isNaNF64UI(f64(freg(Fs1_bits)).v) &&
-                            isNaNF64UI(f64(freg(Fs2_bits)).v))
-                            Fd_bits = f64(defaultNaNF64UI).v;
+                        float64_t fs1 = f64(freg(Fs1_bits));
+                        float64_t fs2 = f64(freg(Fs2_bits));
+                        float64_t fd;
+                        bool less = f64_lt_quiet(fs1, fs2) ||
+                            (f64_eq(fs1, fs2) && bits(fs1.v, 63));
+
+                        fd = less || isNaNF64UI(fs2.v) ? fs1 : fs2;
+                        if (isNaNF64UI(fs1.v) && isNaNF64UI(fs2.v))
+                            fd = f64(defaultNaNF64UI);
+                        Fd_bits = freg(fd).v;
                     }}, FloatCmpOp);
                     0x1: fmax_d({{
-                        bool greater =
-                            f64_lt_quiet(f64(freg(Fs2_bits)),
-                            f64(freg(Fs1_bits))) ||
-                            (f64_eq(f64(freg(Fs2_bits)),
-                            f64(freg(Fs1_bits))) &&
-                            bits(f64(freg(Fs2_bits)).v, 63));
-
-                        Fd_bits = greater ||
-                            isNaNF64UI(f64(freg(Fs2_bits)).v) ?
-                            freg(Fs1_bits).v : freg(Fs2_bits).v;
-                        if (isNaNF64UI(f64(freg(Fs1_bits)).v) &&
-                            isNaNF64UI(f64(Fs2_bits).v))
-                            Fd_bits = f64(defaultNaNF64UI).v;
+                        float64_t fs1 = f64(freg(Fs1_bits));
+                        float64_t fs2 = f64(freg(Fs2_bits));
+                        float64_t fd;
+                        bool greater = f64_lt_quiet(fs2, fs1) ||
+                            (f64_eq(fs2, fs1) && bits(fs2.v, 63));
+
+                        fd = greater || isNaNF64UI(fs2.v) ? fs1 : fs2;
+                        if (isNaNF64UI(fs1.v) && isNaNF64UI(fs2.v))
+                            fd = f64(defaultNaNF64UI);
+                        Fd_bits = freg(fd).v;
                     }}, FloatCmpOp);
                 }
                 0x16: decode ROUND_MODE {
                     0x0: fmin_h({{
-                        bool less = f16_lt_quiet(f16(freg(Fs1_bits)),
-                            f16(freg(Fs2_bits))) ||
-                            (f16_eq(f16(freg(Fs1_bits)),
-                            f16(freg(Fs2_bits))) &&
-                            bits(f16(freg(Fs1_bits)).v, 15));
-
-                        Fd_bits = less ||
-                            isNaNF16UI(f16(freg(Fs2_bits)).v) ?
-                            freg(Fs1_bits).v : freg(Fs2_bits).v;
-                        if (isNaNF16UI(f16(freg(Fs1_bits)).v) &&
-                            isNaNF16UI(f16(freg(Fs2_bits)).v))
-                            Fd_bits = f16(defaultNaNF16UI).v;
+                        float16_t fs1 = f16(freg(Fs1_bits));
+                        float16_t fs2 = f16(freg(Fs2_bits));
+                        float16_t fd;
+                        bool less = f16_lt_quiet(fs1, fs2) ||
+                            (f16_eq(fs1, fs2) && bits(fs1.v, 15));
+
+                        fd = less || isNaNF16UI(fs2.v) ? fs1 : fs2;
+                        if (isNaNF16UI(fs1.v) && isNaNF16UI(fs2.v))
+                            fd = f16(defaultNaNF16UI);
+                        Fd_bits = freg(fd).v;
                         }}, FloatCmpOp);
                     0x1: fmax_h({{
-                        bool greater = f16_lt_quiet(f16(freg(Fs2_bits)),
-                            f16(freg(Fs1_bits))) ||
-                            (f16_eq(f16(freg(Fs2_bits)),
-                            f16(freg(Fs1_bits))) &&
-                            bits(f16(freg(Fs2_bits)).v, 15));
-
-                        Fd_bits = greater ||
-                            isNaNF16UI(f16(freg(Fs2_bits)).v) ?
-                            freg(Fs1_bits).v : freg(Fs2_bits).v;
-                        if (isNaNF16UI(f16(freg(Fs1_bits)).v) &&
-                            isNaNF16UI(f16(freg(Fs2_bits)).v))
-                            Fd_bits = f16(defaultNaNF16UI).v;
+                        float16_t fs1 = f16(freg(Fs1_bits));
+                        float16_t fs2 = f16(freg(Fs2_bits));
+                        float16_t fd;
+                        bool greater = f16_lt_quiet(fs2, fs1) ||
+                            (f16_eq(fs2, fs1) && bits(fs2.v, 15));
+
+                        fd = greater || isNaNF16UI(fs2.v) ? fs1 : fs2;
+                        if (isNaNF16UI(fs1.v) && isNaNF16UI(fs2.v))
+                            fd = f16(defaultNaNF16UI);
+                        Fd_bits = freg(fd).v;
                         }}, FloatCmpOp);
                 }
                 0x20: decode CONV_SGN {
@@ -2005,6 +2393,2135 @@ decode QUADRANT default Unknown::unknown() {
             }
         }
 
+        0x15: decode FUNCT3 {
+            // OPIVV
+            0x0: decode VFUNCT6 {
+                format VectorIntFormat {
+                    0x0: vadd_vv({{
+                        Vd_vu[i] = Vs2_vu[i] + Vs1_vu[i];
+                    }}, OPIVV, VectorIntegerArithOp);
+                    0x2: vsub_vv({{
+                        Vd_vu[i] = Vs2_vu[i] - Vs1_vu[i];
+                    }}, OPIVV, VectorIntegerArithOp);
+                    0x4: vminu_vv({{
+                        Vd_vu[i] = Vs2_vu[i] < Vs1_vu[i] ?
+                                Vs2_vu[i] : Vs1_vu[i];
+                    }}, OPIVV, VectorIntegerArithOp);
+                    0x5: vmin_vv({{
+                        Vd_vi[i] = Vs2_vi[i] < Vs1_vi[i] ?
+                                Vs2_vi[i] : Vs1_vi[i];
+                    }}, OPIVV, VectorIntegerArithOp);
+                    0x6: vmaxu_vv({{
+                        Vd_vu[i] = Vs2_vu[i] > Vs1_vu[i] ?
+                                Vs2_vu[i] : Vs1_vu[i];
+                    }}, OPIVV, VectorIntegerArithOp);
+                    0x7: vmax_vv({{
+                        Vd_vi[i] = Vs2_vi[i] > Vs1_vi[i] ?
+                                Vs2_vi[i] : Vs1_vi[i];
+                    }}, OPIVV, VectorIntegerArithOp);
+                    0x9: vand_vv({{
+                        Vd_vu[i] = Vs2_vu[i] & Vs1_vu[i];
+                    }}, OPIVV, VectorIntegerArithOp);
+                    0xa: vor_vv({{
+                        Vd_vu[i] = Vs2_vu[i] | Vs1_vu[i];
+                    }}, OPIVV, VectorIntegerArithOp);
+                    0xb: vxor_vv({{
+                        Vd_vu[i] = Vs2_vu[i] ^ Vs1_vu[i];
+                    }}, OPIVV, VectorIntegerArithOp);
+                }
+                0x0c: VectorGatherFormat::vrgather_vv({{
+                    for (uint32_t i = 0; i < microVl; i++) {
+                        uint32_t ei = i + vs1_idx * vs1_elems + vs1_bias;
+                        if (this->vm || elem_mask(v0, ei)) {
+                            const uint64_t idx = Vs1_vu[i]
+                                - vs2_elems * vs2_idx;
+                            auto res = (Vs1_vu[i] >= vlmax) ? 0
+                                : (idx < vs2_elems) ? Vs2_vu[idx]
+                                : Vs3_vu[i];
+                            Vd_vu[i] = res;
+                        }
+                    }
+                }}, OPIVV, VectorMiscOp);
+                0x0e: VectorGatherFormat::vrgatherei16_vv({{
+                    for (uint32_t i = 0; i < microVl; i++) {
+                        uint32_t ei = i + vs1_idx * vs1_elems + vs1_bias;
+                        if (this->vm || elem_mask(v0, ei)) {
+                            const uint16_t idx = Vs1_uh[i + vs1_bias]
+                                - vs2_elems * vs2_idx;
+                            auto res = (Vs1_uh[i + vs1_bias] >= vlmax) ? 0
+                                : (idx < vs2_elems) ? Vs2_vu[idx]
+                                : Vs3_vu[i + vd_bias];
+                            Vd_vu[i + vd_bias] = res;
+                        }
+                    }
+                }}, OPIVV, VectorMiscOp);
+                format VectorIntFormat {
+                    0x10: decode VM {
+                        0x0: vadc_vvm({{
+                            Vd_vi[i] = Vs2_vi[i] + Vs1_vi[i]
+                                    + elem_mask(v0, ei);
+                        }}, OPIVV, VectorIntegerArithOp);
+                        // the unmasked versions (vm=1) are reserved
+                    }
+                    0x12: decode VM {
+                        0x0: vsbc_vvm({{
+                            Vd_vi[i] = Vs2_vi[i] - Vs1_vi[i]
+                                    - elem_mask(v0, ei);
+                        }}, OPIVV, VectorIntegerArithOp);
+                        // the unmasked versions (vm=1) are reserved
+                    }
+                    0x17: decode VM {
+                        0x0: vmerge_vvm({{
+                            Vd_vu[i] = elem_mask(v0, ei)
+                                    ? Vs1_vu[i]
+                                    : Vs2_vu[i];
+                        }}, OPIVV, VectorIntegerArithOp);
+                        0x1: decode VS2 {
+                            0x0: vmv_v_v({{
+                                Vd_vu[i] = Vs1_vu[i];
+                            }}, OPIVV, VectorIntegerArithOp);
+                        }
+                    }
+                }
+                format VectorIntVxsatFormat{
+                    0x20: vsaddu_vv({{
+                        Vd_vu[i] = sat_addu<vu>(Vs2_vu[i], Vs1_vu[i],
+                            vxsatptr);
+                    }}, OPIVV, VectorIntegerArithOp);
+                    0x21: vsadd_vv({{
+                        Vd_vu[i] = sat_add<vi>(Vs2_vu[i], Vs1_vu[i],
+                            vxsatptr);
+                    }}, OPIVV, VectorIntegerArithOp);
+                    0x22: vssubu_vv({{
+                        Vd_vu[i] = sat_subu<vu>(Vs2_vu[i], Vs1_vu[i],
+                            vxsatptr);
+                    }}, OPIVV, VectorIntegerArithOp);
+                    0x23: vssub_vv({{
+                        Vd_vu[i] = sat_sub<vi>(Vs2_vu[i], Vs1_vu[i],
+                            vxsatptr);
+                    }}, OPIVV, VectorIntegerArithOp);
+                    0x27: vsmul_vv({{
+                        vi max = std::numeric_limits<vi>::max();
+                        vi min = std::numeric_limits<vi>::min();
+                        bool overflow = Vs1_vi[i] == Vs2_vi[i] &&
+                                        Vs1_vi[i] == min;
+                        __int128_t result = (__int128_t)Vs1_vi[i] *
+                                            (__int128_t)Vs2_vi[i];
+                        result = int_rounding<__int128_t>(
+                            result, 0 /* TODO */, sew - 1);
+                        result = result >> (sew - 1);
+                        if (overflow) {
+                            result = max;
+                            *vxsatptr = true;
+                        }
+
+                        Vd_vi[i] = (vi)result;
+                    }}, OPIVV, VectorIntegerArithOp);
+                }
+                format VectorIntFormat {
+                    0x25: vsll_vv({{
+                        Vd_vu[i] = Vs2_vu[i] << (Vs1_vu[i] & (sew - 1));
+                    }}, OPIVV, VectorIntegerArithOp);
+                    0x28: vsrl_vv({{
+                        Vd_vu[i] = Vs2_vu[i] >> (Vs1_vu[i] & (sew - 1));
+                    }}, OPIVV, VectorIntegerArithOp);
+                    0x29: vsra_vv({{
+                        Vd_vi[i] = Vs2_vi[i] >> (Vs1_vu[i] & (sew - 1));
+                    }}, OPIVV, VectorIntegerArithOp);
+                    0x2a: vssrl_vv({{
+                        int sh = Vs1_vu[i] & (sew - 1);
+                        __uint128_t val = Vs2_vu[i];
+
+                        val = int_rounding<__uint128_t>(val,
+                            xc->readMiscReg(MISCREG_VXRM), sh);
+                        Vd_vu[i] = val >> sh;
+                    }}, OPIVV, VectorIntegerArithOp);
+                    0x2b: vssra_vv({{
+                        int sh = Vs1_vi[i] & (sew - 1);
+                        __int128_t val = Vs2_vi[i];
+
+                        val = int_rounding<__int128_t>(val,
+                            xc->readMiscReg(MISCREG_VXRM), sh);
+                        Vd_vi[i] = val >> sh;
+                    }}, OPIVV, VectorIntegerArithOp);
+                }
+                format VectorReduceIntWideningFormat {
+                    0x30: vwredsumu_vs({{
+                        Vd_vwu[0] = reduce_loop(std::plus<vwu>(),
+                            Vs1_vwu, Vs2_vu);
+                    }}, OPIVV, VectorIntegerReduceOp);
+                    0x31: vwredsum_vs({{
+                        Vd_vwu[0] = reduce_loop(std::plus<vwi>(),
+                            Vs1_vwi, Vs2_vi);
+                    }}, OPIVV, VectorIntegerReduceOp);
+                }
+                format VectorIntMaskFormat {
+                    0x11: decode VM {
+                        0x0: vmadc_vvm({{
+                            Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
+                                carry_out(Vs2_vu[i], Vs1_vu[i],
+                                    elem_mask(v0, ei)));
+                        }}, OPIVV, VectorIntegerArithOp);
+                        0x1: vmadc_vv({{
+                            Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
+                                carry_out(Vs2_vu[i], Vs1_vu[i]));
+                        }}, OPIVV, VectorIntegerArithOp);
+                    }
+                    0x13: decode VM {
+                        0x0: vmsbc_vvm({{
+                            Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
+                                borrow_out(Vs2_vi[i], Vs1_vi[i],
+                                    elem_mask(v0, ei)));
+                        }}, OPIVV, VectorIntegerArithOp);
+                        0x1: vmsbc_vv({{
+                            Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
+                                borrow_out(Vs2_vi[i], Vs1_vi[i]));
+                        }}, OPIVV, VectorIntegerArithOp);
+                    }
+                    0x18: vmseq_vv({{
+                        Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
+                            (Vs2_vu[i] == Vs1_vu[i]));
+                    }}, OPIVV, VectorIntegerArithOp);
+                    0x19: vmsne_vv({{
+                        Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
+                            (Vs2_vu[i] != Vs1_vu[i]));
+                    }}, OPIVV, VectorIntegerArithOp);
+                    0x1a: vmsltu_vv({{
+                        Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
+                            (Vs2_vu[i] < Vs1_vu[i]));
+                    }}, OPIVV, VectorIntegerArithOp);
+                    0x1b: vmslt_vv({{
+                        Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
+                            (Vs2_vi[i] < Vs1_vi[i]));
+                    }}, OPIVV, VectorIntegerArithOp);
+                    0x1c: vmsleu_vv({{
+                        Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
+                            (Vs2_vu[i] <= Vs1_vu[i]));
+                    }}, OPIVV, VectorIntegerArithOp);
+                    0x1d: vmsle_vv({{
+                        Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
+                            (Vs2_vi[i] <= Vs1_vi[i]));
+                    }}, OPIVV, VectorIntegerArithOp);
+                }
+                format VectorIntNarrowingFormat {
+                    0x2c: vnsrl_wv({{
+                        Vd_vu[i + offset] = (vu)(Vs2_vwu[i] >>
+                            ((vwu)Vs1_vu[i + offset] & (sew * 2 - 1)));
+                    }}, OPIVV, VectorIntegerArithOp);
+                    0x2d: vnsra_wv({{
+                        Vd_vi[i + offset] = (vi)(Vs2_vwi[i] >>
+                            ((vwu)Vs1_vu[i + offset] & (sew * 2 - 1)));
+                    }}, OPIVV, VectorIntegerArithOp);
+                    0x2e: vnclipu_wv({{
+                        vu max = std::numeric_limits<vu>::max();
+                        uint64_t sign_mask =
+                            std::numeric_limits<uint64_t>::max() << sew;
+                        __uint128_t res = Vs2_vwu[i];
+                        unsigned shift = Vs1_vu[i + offset] & ((sew * 2) - 1);
+
+                        res = int_rounding<__uint128_t>(
+                            res, 0 /* TODO */, shift) >> shift;
+
+                        if (res & sign_mask) {
+                            res = max;
+                            // TODO: vxsat
+                        }
+
+                        Vd_vu[i + offset] = (vu)res;
+                    }}, OPIVV, VectorIntegerArithOp);
+                    0x2f: vnclip_wv({{
+                        vi max = std::numeric_limits<vi>::max();
+                        vi min = std::numeric_limits<vi>::min();
+                        __int128_t res = Vs2_vwi[i];
+                        unsigned shift = Vs1_vi[i + offset] & ((sew * 2) - 1);
+
+                        res = int_rounding<__int128_t>(
+                            res, 0 /* TODO */, shift) >> shift;
+
+                        if (res < min) {
+                            res = min;
+                            // TODO: vxsat
+                        } else if (res > max) {
+                            res = max;
+                            // TODO: vxsat
+                        }
+
+                        Vd_vi[i + offset] = (vi)res;
+                    }}, OPIVV, VectorIntegerArithOp);
+                }
+            }
+            // OPFVV
+            0x1: decode VFUNCT6 {
+                0x00: VectorFloatFormat::vfadd_vv({{
+                    auto fd = fadd<et>(ftype<et>(Vs2_vu[i]),
+                                       ftype<et>(Vs1_vu[i]));
+                    Vd_vu[i] = fd.v;
+                }}, OPFVV, VectorFloatArithOp);
+                0x01: VectorReduceFloatFormat::vfredusum_vs({{
+                    Vd_vu[0] = reduce_loop([](const vu& src1, const vu& src2) {
+                        return fadd<et>(ftype<et>(src1), ftype<et>(src2));
+                    }, Vs1_vu, Vs2_vu);
+                }}, OPFVV, VectorFloatReduceOp);
+                0x02: VectorFloatFormat::vfsub_vv({{
+                    auto fd = fsub<et>(ftype<et>(Vs2_vu[i]),
+                                       ftype<et>(Vs1_vu[i]));
+                    Vd_vu[i] = fd.v;
+                }}, OPFVV, VectorFloatArithOp);
+                0x03: VectorReduceFloatFormat::vfredosum_vs({{
+                    Vd_vu[0] = reduce_loop([](const vu& src1, const vu& src2) {
+                        return fadd<et>(ftype<et>(src1), ftype<et>(src2));
+                    }, Vs1_vu, Vs2_vu);
+                }}, OPFVV, VectorFloatReduceOp);
+                0x04: VectorFloatFormat::vfmin_vv({{
+                    auto fd = fmin<et>(ftype<et>(Vs2_vu[i]),
+                                       ftype<et>(Vs1_vu[i]));
+                    Vd_vu[i] = fd.v;
+                }}, OPFVV, VectorFloatArithOp);
+                0x05: VectorReduceFloatFormat::vfredmin_vs({{
+                    Vd_vu[0] = reduce_loop([](const vu& src1, const vu& src2) {
+                        return fmin<et>(ftype<et>(src1), ftype<et>(src2));
+                    }, Vs1_vu, Vs2_vu);
+                }}, OPFVV, VectorFloatReduceOp);
+                0x06: VectorFloatFormat::vfmax_vv({{
+                    auto fd = fmax<et>(ftype<et>(Vs2_vu[i]),
+                                       ftype<et>(Vs1_vu[i]));
+                    Vd_vu[i] = fd.v;
+                }}, OPFVV, VectorFloatArithOp);
+                0x07: VectorReduceFloatFormat::vfredmax_vs({{
+                    Vd_vu[0] = reduce_loop([](const vu& src1, const vu& src2) {
+                        return fmax<et>(ftype<et>(src1), ftype<et>(src2));
+                    }, Vs1_vu, Vs2_vu);
+                }}, OPFVV, VectorFloatReduceOp);
+                0x08: VectorFloatFormat::vfsgnj_vv({{
+                    Vd_vu[i] = fsgnj<et>(ftype<et>(Vs2_vu[i]),
+                                         ftype<et>(Vs1_vu[i]),
+                                         false, false).v;
+                }}, OPFVV, VectorFloatArithOp);
+                0x09: VectorFloatFormat::vfsgnjn_vv({{
+                    Vd_vu[i] = fsgnj<et>(ftype<et>(Vs2_vu[i]),
+                                         ftype<et>(Vs1_vu[i]),
+                                         true, false).v;
+                }}, OPFVV, VectorFloatArithOp);
+                0x0a: VectorFloatFormat::vfsgnjx_vv({{
+                    Vd_vu[i] = fsgnj<et>(ftype<et>(Vs2_vu[i]),
+                                         ftype<et>(Vs1_vu[i]),
+                                         false, true).v;
+                }}, OPFVV, VectorFloatArithOp);
+                // VWFUNARY0
+                0x10: decode VS1 {
+                    0x00: decode VM {
+                        // The encodings corresponding to the masked versions
+                        // (vm=0) of vfmv.f.s are reserved
+                        0x1: VectorNonSplitFormat::vfmv_f_s({{
+                            freg_t fd = freg(Vs2_vu[0]);
+                            Fd_bits = fd.v;
+                        }}, OPFVV, VectorMiscOp);
+                    }
+                }
+                0x12: decode VS1 {
+                    format VectorFloatCvtFormat {
+                        0x00: vfcvt_xu_f_v({{
+                            Vd_vu[i] = f_to_ui<et>(ftype<et>(Vs2_vu[i]),
+                                                   softfloat_roundingMode);
+                        }}, OPFVV, VectorFloatConvertOp);
+                        0x01: vfcvt_x_f_v({{
+                            Vd_vu[i] = f_to_i<et>(ftype<et>(Vs2_vu[i]),
+                                                  softfloat_roundingMode);
+                        }}, OPFVV, VectorFloatConvertOp);
+                        0x02: vfcvt_f_xu_v({{
+                            auto fd = ui_to_f<et>(Vs2_vu[i]);
+                            Vd_vu[i] = fd.v;
+                        }}, OPFVV, VectorFloatConvertOp);
+                        0x03: vfcvt_f_x_v({{
+                            auto fd = i_to_f<et>(Vs2_vu[i]);
+                            Vd_vu[i] = fd.v;
+                        }}, OPFVV, VectorFloatConvertOp);
+                        0x06: vfcvt_rtz_xu_f_v({{
+                            Vd_vu[i] = f_to_ui<et>(ftype<et>(Vs2_vu[i]),
+                                                   softfloat_round_minMag);
+                        }}, OPFVV, VectorFloatConvertOp);
+                        0x07: vfcvt_rtz_x_f_v({{
+                            Vd_vu[i] = f_to_i<et>(ftype<et>(Vs2_vu[i]),
+                                                  softfloat_round_minMag);
+                        }}, OPFVV, VectorFloatConvertOp);
+                    }
+                    format VectorFloatWideningCvtFormat {
+                        0x08: vfwcvt_xu_f_v({{
+                            Vd_vwu[i] = f_to_wui<et>(
+                                ftype<et>(Vs2_vu[i + offset]),
+                                softfloat_roundingMode);
+                        }}, OPFVV, VectorFloatConvertOp);
+                        0x09: vfwcvt_x_f_v({{
+                            Vd_vwu[i] = f_to_wi<et>(
+                                ftype<et>(Vs2_vu[i + offset]),
+                                softfloat_roundingMode);
+                        }}, OPFVV, VectorFloatConvertOp);
+                        0x0a: vfwcvt_f_xu_v({{
+                            auto fd = ui_to_wf<vu>(Vs2_vu[i + offset]);
+                            Vd_vwu[i] = fd.v;
+                        }}, OPFVV, VectorFloatConvertOp);
+                        0x0b: vfwcvt_f_x_v({{
+                            auto fd = i_to_wf<vu>(Vs2_vu[i + offset]);
+                            Vd_vwu[i] = fd.v;
+                        }}, OPFVV, VectorFloatConvertOp);
+                        0x0c: vfwcvt_f_f_v({{
+                            auto fd = f_to_wf<et>(
+                                ftype<et>(Vs2_vu[i + offset]));
+                            Vd_vwu[i] = fd.v;
+                        }}, OPFVV, VectorFloatConvertOp);
+                        0x0e: vfwcvt_rtz_xu_f_v({{
+                            Vd_vwu[i] = f_to_wui<et>(
+                                ftype<et>(Vs2_vu[i + offset]),
+                                softfloat_round_minMag);
+                        }}, OPFVV, VectorFloatConvertOp);
+                        0x0f: vfwcvt_rtz_x_f_v({{
+                            Vd_vwu[i] = f_to_wi<et>(
+                                ftype<et>(Vs2_vu[i + offset]),
+                                softfloat_round_minMag);
+                        }}, OPFVV, VectorFloatConvertOp);
+                    }
+                    format VectorFloatNarrowingCvtFormat {
+                        0x10: vfncvt_xu_f_w({{
+                            Vd_vu[i + offset] = f_to_nui<vu>(
+                                ftype<ewt>(Vs2_vwu[i]),
+                                softfloat_roundingMode);
+                        }}, OPFVV, VectorFloatConvertOp);
+                        0x11: vfncvt_x_f_w({{
+                            Vd_vu[i + offset] = f_to_ni<vu>(
+                                ftype<ewt>(Vs2_vwu[i]),
+                                softfloat_roundingMode);
+                        }}, OPFVV, VectorFloatConvertOp);
+                        0x12: vfncvt_f_xu_w({{
+                            auto fd = ui_to_nf<et>(Vs2_vwu[i]);
+                            Vd_vu[i + offset] = fd.v;
+                        }}, OPFVV, VectorFloatConvertOp);
+                        0x13: vfncvt_f_x_w({{
+                            auto fd = i_to_nf<et>(Vs2_vwu[i]);
+                            Vd_vu[i + offset] = fd.v;
+                        }}, OPFVV, VectorFloatConvertOp);
+                        0x14: vfncvt_f_f_w({{
+                            auto fd = f_to_nf<et>(ftype<ewt>(Vs2_vwu[i]));
+                            Vd_vu[i + offset] = fd.v;
+                        }}, OPFVV, VectorFloatConvertOp);
+                        0x15: vfncvt_rod_f_f_w({{
+                            softfloat_roundingMode = softfloat_round_odd;
+                            auto fd = f_to_nf<et>(ftype<ewt>(Vs2_vwu[i]));
+                            Vd_vu[i + offset] = fd.v;
+                        }}, OPFVV, VectorFloatConvertOp);
+                        0x16: vfncvt_rtz_xu_f_w({{
+                            Vd_vu[i + offset] = f_to_nui<vu>(
+                                ftype<ewt>(Vs2_vwu[i]),
+                                softfloat_round_minMag);
+                        }}, OPFVV, VectorFloatConvertOp);
+                        0x17: vfncvt_rtz_x_f_w({{
+                            Vd_vu[i + offset] = f_to_ni<vu>(
+                                ftype<ewt>(Vs2_vwu[i]),
+                                softfloat_round_minMag);
+                        }}, OPFVV, VectorFloatConvertOp);
+                    }
+                }
+                0x13: decode VS1 {
+                    format VectorFloatCvtFormat {
+                        0x00: vfsqrt_v({{
+                            auto fd = fsqrt<et>(ftype<et>(Vs2_vu[i]));
+                            Vd_vu[i] = fd.v;
+                        }}, OPFVV, VectorFloatArithOp);
+                        0x04: vfrsqrt7_v({{
+                            auto fd = frsqrte7<et>(ftype<et>(Vs2_vu[i]));
+                            Vd_vu[i] = fd.v;
+                        }}, OPFVV, VectorFloatArithOp);
+                        0x05: vfrec7_v({{
+                            auto fd = frecip7<et>(ftype<et>(Vs2_vu[i]));
+                            Vd_vu[i] = fd.v;
+                        }}, OPFVV, VectorFloatArithOp);
+                        0x10: vfclass_v({{
+                            auto fd = fclassify<et>(ftype<et>(Vs2_vu[i]));
+                            Vd_vu[i] = fd.v;
+                        }}, OPFVV, VectorFloatArithOp);
+                    }
+                }
+
+                format VectorFloatMaskFormat {
+                    0x18: vmfeq_vv({{
+                        Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
+                            feq<et>(ftype<et>(Vs2_vu[i]),
+                                    ftype<et>(Vs1_vu[i])));
+                    }}, OPFVV, VectorFloatArithOp);
+                    0x19: vmfle_vv({{
+                        Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
+                            fle<et>(ftype<et>(Vs2_vu[i]),
+                                    ftype<et>(Vs1_vu[i])));
+                    }}, OPFVV, VectorFloatArithOp);
+                    0x1b: vmflt_vv({{
+                        Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
+                            flt<et>(ftype<et>(Vs2_vu[i]),
+                                    ftype<et>(Vs1_vu[i])));
+                    }}, OPFVV, VectorFloatArithOp);
+                    0x1c: vmfne_vv({{
+                        Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
+                            !feq<et>(ftype<et>(Vs2_vu[i]),
+                                    ftype<et>(Vs1_vu[i])));
+                    }}, OPFVV, VectorFloatArithOp);
+                }
+                format VectorFloatFormat {
+                    0x20: vfdiv_vv({{
+                        auto fd = fdiv<et>(ftype<et>(Vs2_vu[i]),
+                                           ftype<et>(Vs1_vu[i]));
+                        Vd_vu[i] = fd.v;
+                    }}, OPFVV, VectorFloatArithOp);
+                    0x24: vfmul_vv({{
+                        auto fd = fmul<et>(ftype<et>(Vs2_vu[i]),
+                                           ftype<et>(Vs1_vu[i]));
+                        Vd_vu[i] = fd.v;
+                    }}, OPFVV, VectorFloatArithOp);
+                    0x28: vfmadd_vv({{
+                        auto fd = fmadd<et>(ftype<et>(Vs3_vu[i]),
+                                            ftype<et>(Vs1_vu[i]),
+                                            ftype<et>(Vs2_vu[i]));
+                        Vd_vu[i] = fd.v;
+                    }}, OPFVV, VectorFloatArithOp);
+                    0x29: vfnmadd_vv({{
+                        auto fd = fmadd<et>(fneg(ftype<et>(Vs3_vu[i])),
+                                            ftype<et>(Vs1_vu[i]),
+                                            fneg(ftype<et>(Vs2_vu[i])));
+                        Vd_vu[i] = fd.v;
+                    }}, OPFVV, VectorFloatArithOp);
+                    0x2a: vfmsub_vv({{
+                        auto fd = fmadd<et>(ftype<et>(Vs3_vu[i]),
+                                            ftype<et>(Vs1_vu[i]),
+                                            fneg(ftype<et>(Vs2_vu[i])));
+                        Vd_vu[i] = fd.v;
+                    }}, OPFVV, VectorFloatArithOp);
+                    0x2b: vfnmsub_vv({{
+                        auto fd = fmadd<et>(fneg(ftype<et>(Vs3_vu[i])),
+                                            ftype<et>(Vs1_vu[i]),
+                                            ftype<et>(Vs2_vu[i]));
+                        Vd_vu[i] = fd.v;
+                    }}, OPFVV, VectorFloatArithOp);
+                    0x2c: vfmacc_vv({{
+                        auto fd = fmadd<et>(ftype<et>(Vs1_vu[i]),
+                                            ftype<et>(Vs2_vu[i]),
+                                            ftype<et>(Vs3_vu[i]));
+                        Vd_vu[i] = fd.v;
+                    }}, OPFVV, VectorFloatArithOp);
+                    0x2d: vfnmacc_vv({{
+                        auto fd = fmadd<et>(fneg(ftype<et>(Vs1_vu[i])),
+                                            ftype<et>(Vs2_vu[i]),
+                                            fneg(ftype<et>(Vs3_vu[i])));
+                        Vd_vu[i] = fd.v;
+                    }}, OPFVV, VectorFloatArithOp);
+                    0x2e: vfmsac_vv({{
+                        auto fd = fmadd<et>(ftype<et>(Vs1_vu[i]),
+                                            ftype<et>(Vs2_vu[i]),
+                                            fneg(ftype<et>(Vs3_vu[i])));
+                        Vd_vu[i] = fd.v;
+                    }}, OPFVV, VectorFloatArithOp);
+                    0x2f: vfnmsac_vv({{
+                        auto fd = fmadd<et>(fneg(ftype<et>(Vs1_vu[i])),
+                                            ftype<et>(Vs2_vu[i]),
+                                            ftype<et>(Vs3_vu[i]));
+                        Vd_vu[i] = fd.v;
+                    }}, OPFVV, VectorFloatArithOp);
+                    0x31: VectorReduceFloatWideningFormat::vfwredusum_vs({{
+                        Vd_vwu[0] = reduce_loop(
+                            [](const vwu& src1, const vu& src2) {
+                                return fadd<ewt>(
+                                    ftype<ewt>(src1),
+                                    f_to_wf<et>(ftype<et>(src2))
+                                );
+                            }, Vs1_vwu, Vs2_vu);
+                    }}, OPFVV, VectorFloatReduceOp);
+                    0x33: VectorReduceFloatWideningFormat::vfwredosum_vs({{
+                        Vd_vwu[0] = reduce_loop(
+                            [](const vwu& src1, const vu& src2) {
+                                return fadd<ewt>(
+                                    ftype<ewt>(src1),
+                                    f_to_wf<et>(ftype<et>(src2))
+                                );
+                            }, Vs1_vwu, Vs2_vu);
+                    }}, OPFVV, VectorFloatReduceOp);
+                }
+                format VectorFloatWideningFormat {
+                    0x30: vfwadd_vv({{
+                        auto fd = fadd<ewt>(
+                            fwiden(ftype<et>(Vs2_vu[i + offset])),
+                            fwiden(ftype<et>(Vs1_vu[i + offset])));
+                        Vd_vwu[i] = fd.v;
+                    }}, OPFVV, VectorFloatArithOp);
+                    0x32: vfwsub_vv({{
+                        auto fd = fsub<ewt>(
+                            fwiden(ftype<et>(Vs2_vu[i + offset])),
+                            fwiden(ftype<et>(Vs1_vu[i + offset])));
+                        Vd_vwu[i] = fd.v;
+                    }}, OPFVV, VectorFloatArithOp);
+                    0x34: vfwadd_wv({{
+                        auto fd = fadd<ewt>(
+                            ftype<ewt>(Vs2_vwu[i]),
+                            fwiden(ftype<et>(Vs1_vu[i + offset])));
+                        Vd_vwu[i] = fd.v;
+                    }}, OPFVV, VectorFloatArithOp);
+                    0x36: vfwsub_wv({{
+                        auto fd = fsub<ewt>(
+                            ftype<ewt>(Vs2_vwu[i]),
+                            fwiden(ftype<et>(Vs1_vu[i + offset])));
+                        Vd_vwu[i] = fd.v;
+                    }}, OPFVV, VectorFloatArithOp);
+                    0x38: vfwmul_vv({{
+                        auto fd = fmul<ewt>(
+                            fwiden(ftype<et>(Vs2_vu[i + offset])),
+                            fwiden(ftype<et>(Vs1_vu[i + offset])));
+                        Vd_vwu[i] = fd.v;
+                    }}, OPFVV, VectorFloatArithOp);
+                    0x3c: vfwmacc_vv({{
+                        auto fd = fmadd<ewt>(
+                            fwiden(ftype<et>(Vs1_vu[i + offset])),
+                            fwiden(ftype<et>(Vs2_vu[i + offset])),
+                            ftype<ewt>(Vs3_vwu[i]));
+                        Vd_vwu[i] = fd.v;
+                    }}, OPFVV, VectorFloatArithOp);
+                    0x3d: vfwnmacc_vv({{
+                        auto fd = fmadd<ewt>(
+                            fwiden(fneg(ftype<et>(Vs1_vu[i + offset]))),
+                            fwiden(ftype<et>(Vs2_vu[i + offset])),
+                            fneg(ftype<ewt>(Vs3_vwu[i])));
+                        Vd_vwu[i] = fd.v;
+                    }}, OPFVV, VectorFloatArithOp);
+                    0x3e: vfwmsac_vv({{
+                        auto fd = fmadd<ewt>(
+                            fwiden(ftype<et>(Vs1_vu[i + offset])),
+                            fwiden(ftype<et>(Vs2_vu[i + offset])),
+                            fneg(ftype<ewt>(Vs3_vwu[i])));
+                        Vd_vwu[i] = fd.v;
+                    }}, OPFVV, VectorFloatArithOp);
+                    0x3f: vfwnmsac_vv({{
+                        auto fd = fmadd<ewt>(
+                            fwiden(fneg(ftype<et>(Vs1_vu[i + offset]))),
+                            fwiden(ftype<et>(Vs2_vu[i + offset])),
+                            ftype<ewt>(Vs3_vwu[i]));
+                        Vd_vwu[i] = fd.v;
+                    }}, OPFVV, VectorFloatArithOp);
+                }
+            }
+            // OPMVV
+            0x2: decode VFUNCT6 {
+                format VectorReduceIntFormat {
+                    0x0: vredsum_vs({{
+                        Vd_vi[0] =
+                            reduce_loop(std::plus<vi>(), Vs1_vi, Vs2_vi);
+                    }}, OPMVV, VectorIntegerReduceOp);
+                    0x1: vredand_vs({{
+                        Vd_vi[0] =
+                            reduce_loop(std::bit_and<vi>(), Vs1_vi, Vs2_vi);
+                    }}, OPMVV, VectorIntegerReduceOp);
+                    0x2: vredor_vs({{
+                        Vd_vi[0] =
+                            reduce_loop(std::bit_or<vi>(), Vs1_vi, Vs2_vi);
+                    }}, OPMVV, VectorIntegerReduceOp);
+                    0x3: vredxor_vs({{
+                        Vd_vi[0] =
+                            reduce_loop(std::bit_xor<vi>(), Vs1_vi, Vs2_vi);
+                    }}, OPMVV, VectorIntegerReduceOp);
+                    0x4: vredminu_vs({{
+                        Vd_vu[0] =
+                            reduce_loop([](const vu& src1, const vu& src2) {
+                                return std::min<vu>(src1, src2);
+                            }, Vs1_vu, Vs2_vu);
+                    }}, OPMVV, VectorIntegerReduceOp);
+                    0x5: vredmin_vs({{
+                        Vd_vi[0] =
+                            reduce_loop([](const vi& src1, const vi& src2) {
+                                return std::min<vi>(src1, src2);
+                            }, Vs1_vi, Vs2_vi);
+                    }}, OPMVV, VectorIntegerReduceOp);
+                    0x6: vredmaxu_vs({{
+                        Vd_vu[0] =
+                            reduce_loop([](const vu& src1, const vu& src2) {
+                                return std::max<vu>(src1, src2);
+                            }, Vs1_vu, Vs2_vu);
+                    }}, OPMVV, VectorIntegerReduceOp);
+                    0x7: vredmax_vs({{
+                        Vd_vi[0] =
+                            reduce_loop([](const vi& src1, const vi& src2) {
+                                return std::max<vi>(src1, src2);
+                            }, Vs1_vi, Vs2_vi);
+                    }}, OPMVV, VectorIntegerReduceOp);
+                }
+                format VectorIntFormat {
+                    0x8: vaaddu_vv({{
+                        __uint128_t res = (__uint128_t)Vs2_vu[i] + Vs1_vu[i];
+                        res = int_rounding<__uint128_t>(res, 0 /* TODO */, 1);
+                        Vd_vu[i] = res >> 1;
+                    }}, OPMVV, VectorIntegerArithOp);
+                    0x9: vaadd_vv({{
+                        __uint128_t res = (__uint128_t)Vs2_vi[i] + Vs1_vi[i];
+                        res = int_rounding<__uint128_t>(res, 0 /* TODO */, 1);
+                        Vd_vi[i] = res >> 1;
+                    }}, OPMVV, VectorIntegerArithOp);
+                    0xa: vasubu_vv({{
+                        __uint128_t res = (__uint128_t)Vs2_vu[i] - Vs1_vu[i];
+                        res = int_rounding<__uint128_t>(res, 0 /* TODO */, 1);
+                        Vd_vu[i] = res >> 1;
+                    }}, OPMVV, VectorIntegerArithOp);
+                    0xb: vasub_vv({{
+                        __uint128_t res = (__uint128_t)Vs2_vi[i] - Vs1_vi[i];
+                        res = int_rounding<__uint128_t>(res, 0 /* TODO */, 1);
+                        Vd_vi[i] = res >> 1;
+                    }}, OPMVV, VectorIntegerArithOp);
+                }
+                // VWXUNARY0
+                0x10: decode VS1 {
+                    0x00: decode VM {
+                        // The encodings corresponding to the masked versions
+                        // (vm=0) of vmv.x.s are reserved.
+                        0x1: VectorNonSplitFormat::vmv_x_s({{
+                            Rd_ud = Vs2_vi[0];
+                        }}, OPMVV, VectorMiscOp);
+                    }
+                    0x10: Vector1Vs1RdMaskFormat::vcpop_m({{
+                        uint64_t popcount = 0;
+                        for (uint32_t i = 0; i < (uint32_t)machInst.vl; i++) {
+                            bool vs2_lsb = elem_mask(Vs2_vu, i);
+                            if(this->vm){
+                                popcount += vs2_lsb;
+                            }else{
+                                bool do_mask = elem_mask(v0, i);
+                                popcount += (vs2_lsb && do_mask);
+                            }
+                        }
+                        Rd_vu = popcount;
+                    }}, OPMVV, VectorMiscOp);
+                    0x11: Vector1Vs1RdMaskFormat::vfirst_m({{
+                        int64_t pos = -1;
+                        for (uint32_t i = 0; i < (uint32_t)machInst.vl; i++) {
+                            if(this->vm == 0){
+                                if(elem_mask(v0, i)==0){
+                                    continue;
+                                }
+                            }
+                            bool vs2_lsb = elem_mask(Vs2_vu, i);
+                            if (vs2_lsb) {
+                                pos = i;
+                                break;
+                            }
+                        }
+                        Rd_vu = pos;
+                    }}, OPMVV, VectorMiscOp);
+                }
+                0x12: decode VS1 {
+                    format VectorIntExtFormat {
+                        0x02: vzext_vf8({{
+                            auto offset = (vlen / SEW) * index;
+
+                            Vd_vu[i] = Vs2_vextu[i + offset];
+                        }}, OPMVV, VectorIntegerExtensionOp);
+                        0x03: vsext_vf8({{
+                            auto offset = (vlen / SEW) * index;
+
+                            Vd_vi[i] = Vs2_vext[i + offset];
+                        }}, OPMVV, VectorIntegerExtensionOp);
+                        0x04: vzext_vf4({{
+                            auto offset = (vlen / SEW) * index;
+
+                            Vd_vu[i] = Vs2_vextu[i + offset];
+                        }}, OPMVV, VectorIntegerExtensionOp);
+                        0x05: vsext_vf4({{
+                            auto offset = (vlen / SEW) * index;
+
+                            Vd_vi[i] = Vs2_vext[i + offset];
+                        }}, OPMVV, VectorIntegerExtensionOp);
+                        0x06: vzext_vf2({{
+                            auto offset = (vlen / SEW) * index;
+
+                            Vd_vu[i] = Vs2_vextu[i + offset];
+                        }}, OPMVV, VectorIntegerExtensionOp);
+                        0x07: vsext_vf2({{
+                            auto offset = (vlen / SEW) * index;
+
+                            Vd_vi[i] = Vs2_vext[i + offset];
+                        }}, OPMVV, VectorIntegerExtensionOp);
+                    }
+                }
+                0x14: decode VS1 {
+                    0x01: Vector1Vs1VdMaskFormat::vmsbf_m({{
+                        bool has_one = false;
+                        for (uint32_t i = 0; i < (uint32_t)machInst.vl; i++) {
+                            bool vs2_lsb = elem_mask(Vs2_vu, i);
+                            bool do_mask = elem_mask(v0, i);
+                            if(this->vm||(this->vm == 0&&do_mask)){
+                                uint64_t res = 0;
+                                if (!has_one && !vs2_lsb) {
+                                    res = 1;
+                                } else if(!has_one && vs2_lsb) {
+                                    has_one = true;
+                                }
+                                Vd_ub[i/8] = ASSIGN_VD_BIT(i, res);
+                            }
+                        }
+                    }}, OPMVV, VectorMiscOp);
+                    0x02: Vector1Vs1VdMaskFormat::vmsof_m({{
+                        bool has_one = false;
+                        for (uint32_t i = 0; i < (uint32_t)machInst.vl; i++) {
+                            bool vs2_lsb = elem_mask(Vs2_vu, i);
+                            bool do_mask = elem_mask(v0, i);
+                            if(this->vm||(this->vm == 0&&do_mask)){
+                                uint64_t res = 0;
+                                if(!has_one && vs2_lsb) {
+                                    has_one = true;
+                                    res = 1;
+                                }
+                                Vd_ub[i/8] = ASSIGN_VD_BIT(i, res);
+                            }
+                        }
+                    }}, OPMVV, VectorMiscOp);
+                    0x03: Vector1Vs1VdMaskFormat::vmsif_m({{
+                        bool has_one = false;
+                        for (uint32_t i = 0; i < (uint32_t)machInst.vl; i++) {
+                            bool vs2_lsb = elem_mask(Vs2_vu, i);
+                            bool do_mask = elem_mask(v0, i);
+                            if(this->vm||(this->vm == 0&&do_mask)){
+                                uint64_t res = 0;
+                                if (!has_one && !vs2_lsb) {
+                                    res = 1;
+                                } else if(!has_one && vs2_lsb) {
+                                    has_one = true;
+                                    res = 1;
+                                }
+                                Vd_ub[i/8] = ASSIGN_VD_BIT(i, res);
+                            }
+                        }
+                    }}, OPMVV, VectorMiscOp);
+                    0x10: ViotaFormat::viota_m({{
+                        RiscvISAInst::VecRegContainer tmp_s2;
+                        xc->getRegOperand(this, 2,
+                            &tmp_s2);
+                        auto Vs2bit = tmp_s2.as<vu>();
+                        for (uint32_t i = 0; i < this->microVl; i++) {
+                            uint32_t ei = i +
+                                vtype_VLMAX(vtype, vlen, true) *
+                                this->microIdx;
+                            bool vs2_lsb = elem_mask(Vs2bit, ei);
+                            bool do_mask = elem_mask(v0, ei);
+                            bool has_one = false;
+                            if (this->vm || (do_mask && !this->vm)) {
+                                if (vs2_lsb) {
+                                    has_one = true;
+                                }
+                            }
+                            bool use_ori = (!this->vm) && !do_mask;
+                            if(use_ori == false){
+                                Vd_vu[i] = *cnt;
+                            }
+                            if (has_one) {
+                                *cnt = *cnt+1;
+                            }
+                        }
+                    }}, OPMVV, VectorMiscOp);
+                    0x11: VectorIntFormat::vid_v({{
+                        Vd_vu[i] = ei;
+                    }}, OPMVV, VectorMiscOp);
+                }
+                format VectorMaskFormat {
+                    0x18: vmandn_mm({{
+                        Vd_ub[i/8] = ASSIGN_VD_BIT(i,
+                            elem_mask(Vs2_vu, i) & !elem_mask(Vs1_vu, i));
+                    }}, OPMVV, VectorMiscOp);
+                    0x19: vmand_mm({{
+                        Vd_ub[i/8] = ASSIGN_VD_BIT(i,
+                            elem_mask(Vs2_vu, i) & elem_mask(Vs1_vu, i));
+                    }}, OPMVV, VectorMiscOp);
+                    0x1a: vmor_mm({{
+                        Vd_ub[i/8] = ASSIGN_VD_BIT(i,
+                            elem_mask(Vs2_vu, i) | elem_mask(Vs1_vu, i));
+                    }}, OPMVV, VectorMiscOp);
+                    0x1b: vmxor_mm({{
+                        Vd_ub[i/8] = ASSIGN_VD_BIT(i,
+                            elem_mask(Vs2_vu, i) ^ elem_mask(Vs1_vu, i));
+                    }}, OPMVV, VectorMiscOp);
+                    0x1c: vmorn_mm({{
+                        Vd_ub[i/8] = ASSIGN_VD_BIT(i,
+                            elem_mask(Vs2_vu, i) | !elem_mask(Vs1_vu, i));
+                    }}, OPMVV, VectorMiscOp);
+                    0x1d: vmnand_mm({{
+                        Vd_ub[i/8] = ASSIGN_VD_BIT(i,
+                            !(elem_mask(Vs2_vu, i) & elem_mask(Vs1_vu, i)));
+                    }}, OPMVV, VectorMiscOp);
+                    0x1e: vmnor_mm({{
+                        Vd_ub[i/8] = ASSIGN_VD_BIT(i,
+                            !(elem_mask(Vs2_vu, i) | elem_mask(Vs1_vu, i)));
+                    }}, OPMVV, VectorMiscOp);
+                    0x1f: vmxnor_mm({{
+                        Vd_ub[i/8] = ASSIGN_VD_BIT(i,
+                            !(elem_mask(Vs2_vu, i) ^ elem_mask(Vs1_vu, i)));
+                    }}, OPMVV, VectorMiscOp);
+                }
+                format VectorIntFormat {
+                    0x20: vdivu_vv({{
+                        if (Vs1_vu[i] == 0)
+                            Vd_vu[i] = (vu)-1;
+                        else
+                            Vd_vu[i] = Vs2_vu[i] / Vs1_vu[i];
+                    }}, OPMVV, VectorIntegerArithOp);
+                    0x21: vdiv_vv({{
+                        if (Vs1_vi[i] == 0)
+                            Vd_vi[i] = -1;
+                        else if (Vs2_vi[i] == std::numeric_limits<vi>::min()
+                                && Vs1_vi[i] == -1)
+                            Vd_vi[i] = Vs2_vi[i];
+                        else
+                            Vd_vi[i] = Vs2_vi[i] / Vs1_vi[i];
+                    }}, OPMVV, VectorIntegerArithOp);
+                    0x22: vremu_vv({{
+                        if (Vs1_vu[i] == 0) {
+                            Vd_vu[i] = Vs2_vu[i];
+                        } else {
+                            Vd_vu[i] = Vs2_vu[i] % Vs1_vu[i];
+                        }
+                    }}, OPMVV, VectorIntegerArithOp);
+                    0x23: vrem_vv({{
+                        if (Vs1_vi[i] == 0) {
+                            Vd_vi[i] = Vs2_vi[i];
+                        } else if (Vs2_vi[i] == std::numeric_limits<vi>::min()
+                                && Vs1_vi[i] == -1) {
+                            Vd_vi[i] = 0;
+                        } else {
+                            Vd_vi[i] = Vs2_vi[i] % Vs1_vi[i];
+                        }
+                    }}, OPMVV, VectorIntegerArithOp);
+                    0x24: vmulhu_vv({{
+                        if (sew < 64) {
+                            Vd_vu[i] = ((uint64_t)Vs2_vu[i] * Vs1_vu[i])
+                                        >> sew;
+                        } else {
+                            Vd_vu[i] = mulhu_64(Vs2_vu[i], Vs1_vu[i]);
+                        }
+                    }}, OPMVV, VectorIntegerArithOp);
+                    0x25: vmul_vv({{
+                        Vd_vi[i] = Vs2_vi[i] * Vs1_vi[i];
+                    }}, OPMVV, VectorIntegerArithOp);
+                    0x26: vmulhsu_vv({{
+                        if (sew < 64) {
+                            Vd_vi[i] = ((int64_t)Vs2_vi[i] *
+                                        (uint64_t)Vs1_vu[i])
+                                        >> sew;
+                        } else {
+                            Vd_vi[i] = mulhsu_64(Vs2_vi[i], Vs1_vu[i]);
+                        }
+                    }}, OPMVV, VectorIntegerArithOp);
+                    0x27: vmulh_vv({{
+                        if (sew < 64) {
+                            Vd_vi[i] = ((int64_t)Vs2_vi[i] * Vs1_vi[i])
+                                        >> sew;
+                        } else {
+                            Vd_vi[i] = mulh_64(Vs2_vi[i], Vs1_vi[i]);
+                        }
+                    }}, OPMVV, VectorIntegerArithOp);
+                    0x29: vmadd_vv({{
+                        Vd_vi[i] = Vs3_vi[i] * Vs1_vi[i] + Vs2_vi[i];
+                    }}, OPMVV, VectorIntegerArithOp);
+                    0x2b: vnmsub_vv({{
+                        Vd_vi[i] = -(Vs3_vi[i] * Vs1_vi[i]) + Vs2_vi[i];
+                    }}, OPMVV, VectorIntegerArithOp);
+                    0x2d: vmacc_vv({{
+                        Vd_vi[i] = Vs2_vi[i] * Vs1_vi[i] + Vs3_vi[i];
+                    }}, OPMVV, VectorIntegerArithOp);
+                    0x2f: vnmsac_vv({{
+                        Vd_vi[i] = -(Vs2_vi[i] * Vs1_vi[i]) + Vs3_vi[i];
+                    }}, OPMVV, VectorIntegerArithOp);
+                }
+                format VectorIntWideningFormat {
+                    0x30: vwaddu_vv({{
+                        Vd_vwu[i] = vwu(Vs2_vu[i + offset])
+                                + vwu(Vs1_vu[i + offset]);
+                    }}, OPMVV, VectorIntegerArithOp);
+                    0x31: vwadd_vv({{
+                        Vd_vwi[i] = vwi(Vs2_vi[i + offset])
+                                + vwi(Vs1_vi[i + offset]);
+                    }}, OPMVV, VectorIntegerArithOp);
+                    0x32: vwsubu_vv({{
+                        Vd_vwu[i] = vwu(Vs2_vu[i + offset])
+                                - vwu(Vs1_vu[i + offset]);
+                    }}, OPMVV, VectorIntegerArithOp);
+                    0x33: vwsub_vv({{
+                        Vd_vwi[i] = vwi(Vs2_vi[i + offset])
+                                - vwi(Vs1_vi[i + offset]);
+                    }}, OPMVV, VectorIntegerArithOp);
+                    0x34: vwaddu_wv({{
+                        Vd_vwu[i] = Vs2_vwu[i] + vwu(Vs1_vu[i + offset]);
+                    }}, OPMVV, VectorIntegerArithOp);
+                    0x35: vwadd_wv({{
+                        Vd_vwi[i] = Vs2_vwi[i] + vwi(Vs1_vi[i + offset]);
+                    }}, OPMVV, VectorIntegerArithOp);
+                    0x36: vwsubu_wv({{
+                        Vd_vwu[i] = Vs2_vwu[i] - vwu(Vs1_vu[i + offset]);
+                    }}, OPMVV, VectorIntegerArithOp);
+                    0x37: vwsub_wv({{
+                        Vd_vwi[i] = Vs2_vwi[i] - vwi(Vs1_vi[i + offset]);
+                    }}, OPMVV, VectorIntegerArithOp);
+                    0x38: vwmulu_vv({{
+                        Vd_vwu[i] = vwu(Vs2_vu[i + offset])
+                                * vwu(Vs1_vu[i + offset]);
+                    }}, OPMVV, VectorIntegerArithOp);
+                    0x3a: vwmulsu_vv({{
+                        Vd_vwi[i] = vwi(Vs2_vi[i + offset])
+                                * vwu(Vs1_vu[i + offset]);
+                    }}, OPMVV, VectorIntegerArithOp);
+                    0x3b: vwmul_vv({{
+                        Vd_vwi[i] = vwi(Vs2_vi[i + offset])
+                                * vwi(Vs1_vi[i + offset]);
+                    }}, OPMVV, VectorIntegerArithOp);
+                    0x3c: vwmaccu_vv({{
+                        Vd_vwu[i] = vwu(Vs1_vu[i + offset])
+                                * vwu(Vs2_vu[i + offset])
+                                + Vs3_vwu[i];
+                    }}, OPMVV, VectorIntegerArithOp);
+                    0x3d: vwmacc_vv({{
+                        Vd_vwi[i] = vwi(Vs1_vi[i + offset])
+                                * vwi(Vs2_vi[i + offset])
+                                + Vs3_vwi[i];
+                    }}, OPMVV, VectorIntegerArithOp);
+                    0x3f: vwmaccsu_vv({{
+                        Vd_vwi[i] = vwi(Vs1_vi[i + offset])
+                                * vwu(Vs2_vu[i + offset])
+                                + Vs3_vwi[i];
+                    }}, OPMVV, VectorIntegerArithOp);
+                }
+            }
+            // OPIVI
+            0x3: decode VFUNCT6 {
+                format VectorIntFormat {
+                    0x00: vadd_vi({{
+                        Vd_vi[i] = Vs2_vi[i] + (vi)sext<5>(SIMM5);
+                    }}, OPIVI, VectorIntegerArithOp);
+                    0x03: vrsub_vi({{
+                        Vd_vi[i] = (vi)sext<5>(SIMM5) - Vs2_vi[i];
+                    }}, OPIVI, VectorIntegerArithOp);
+                    0x09: vand_vi({{
+                        Vd_vi[i] = Vs2_vi[i] & (vi)sext<5>(SIMM5);
+                    }}, OPIVI, VectorIntegerArithOp);
+                    0x0a: vor_vi({{
+                        Vd_vi[i] = Vs2_vi[i] | (vi)sext<5>(SIMM5);
+                    }}, OPIVI, VectorIntegerArithOp);
+                    0x0b: vxor_vi({{
+                        Vd_vi[i] = Vs2_vi[i] ^ (vi)sext<5>(SIMM5);
+                    }}, OPIVI, VectorIntegerArithOp);
+                }
+                0x0c: VectorGatherFormat::vrgather_vi({{
+                    for (uint32_t i = 0; i < microVl; i++) {
+                        uint32_t ei = i + vs1_idx * vs1_elems + vs1_bias;
+                        if (this->vm || elem_mask(v0, ei)) {
+                            const uint64_t idx =
+                                (uint64_t)sext<5>(SIMM5) - vs2_elems * vs2_idx;
+                            Vd_vu[i] = ((uint64_t)sext<5>(SIMM5) >= vlmax) ? 0
+                                : (idx < vs2_elems) ? Vs2_vu[idx]
+                                : Vs3_vu[i];
+                        }
+                    }
+                }}, OPIVI, VectorMiscOp);
+                0x0e: VectorSlideUpFormat::vslideup_vi({{
+                    const int offset = (int)(uint64_t)(SIMM5);
+                    const int microVlmax = vtype_VLMAX(machInst.vtype8,
+                        vlen, true);
+                    const int vregOffset = vdIdx - vs2Idx;
+                    const int offsetInVreg = offset - vregOffset * microVlmax;
+                    if (std::abs(offsetInVreg) < uint32_t(microVlmax)) {
+                        const int upperBound = (offsetInVreg >= 0)
+                            ? microVlmax - offsetInVreg
+                            : microVlmax + offsetInVreg;
+                        const int vdOffset = (offsetInVreg >= 0)
+                            ? offsetInVreg
+                            : 0;
+                        const int vs2Offset = (offsetInVreg >= 0)
+                            ? 0
+                            : -offsetInVreg;
+                        const int elemOffset = vdOffset + vdIdx * microVlmax;
+                        for (int i = 0;
+                            i < upperBound && i + vdOffset < microVl;
+                            i++) {
+                            if (this->vm || elem_mask(v0, i + elemOffset)) {
+                                Vd_vu[i + vdOffset] = Vs2_vu[i + vs2Offset];
+                            }
+                        }
+                    }
+                }}, OPIVI, VectorMiscOp);
+                0x0f: VectorSlideDownFormat::vslidedown_vi({{
+                    const int offset = (int)(uint64_t)(SIMM5);
+                    const int microVlmax = vtype_VLMAX(machInst.vtype8,
+                        vlen, true);
+                    const int vregOffset = vs2Idx - vdIdx;
+                    const int offsetInVreg = offset - vregOffset * microVlmax;
+                    const int numVs2s = vtype_regs_per_group(vtype);
+                    if (std::abs(offsetInVreg) < uint32_t(microVlmax)) {
+                        const bool needZeroTail = numVs2s == vs2Idx + 1;
+                        const int upperBound = (offsetInVreg >= 0)
+                            ? microVlmax - offsetInVreg
+                            : microVlmax + offsetInVreg;
+                        const int vdOffset = (offsetInVreg >= 0)
+                            ? 0
+                            : -offsetInVreg;
+                        const int vs2Offset = (offsetInVreg >= 0)
+                            ? offsetInVreg
+                            : 0;
+                        const int elemIdxBase = vdIdx * microVlmax;
+                        vreg_t resVreg;
+                        auto res = resVreg.as<vu>();
+                        for (int i = 0;
+                            i < upperBound && i + vdOffset < microVl;
+                            i++) {
+                            res[i + vdOffset] = Vs2_vu[i + vs2Offset];
+                        }
+                        if (needZeroTail) {
+                            for (int i = upperBound + vdOffset;
+                                i < microVlmax; i++) {
+                                res[i] = 0;
+                            }
+                        }
+                        for (int i = vdOffset; i < microVl ; i++) {
+                            if (vm || elem_mask(v0, i + elemIdxBase)) {
+                                Vd_vu[i] = res[i];
+                            }
+                        }
+                    }
+                }}, OPIVI, VectorMiscOp);
+                format VectorIntFormat {
+                    0x10: decode VM {
+                        0x0: vadc_vim({{
+                            Vd_vi[i] = Vs2_vi[i] +
+                                (vi)sext<5>(SIMM5) + elem_mask(v0, ei);
+                        }}, OPIVI, VectorIntegerArithOp);
+                        // the unmasked versions (vm=1) are reserved
+                    }
+                    0x17: decode VM {
+                        0x0: vmerge_vim({{
+                            Vd_vi[i] = elem_mask(v0, ei)
+                                    ? (vi)sext<5>(SIMM5)
+                                    : Vs2_vi[i];
+                        }}, OPIVI, VectorIntegerArithOp);
+                        0x1: vmv_v_i({{
+                            Vd_vi[i] = (vi)sext<5>(SIMM5);
+                        }}, OPIVI, VectorIntegerArithOp);
+                    }
+                }
+                format VectorIntVxsatFormat{
+                    0x20: vsaddu_vi({{
+                        Vd_vu[i] = sat_addu<vu>(Vs2_vu[i], (vu)SIMM5,
+                            vxsatptr);
+                    }}, OPIVI, VectorIntegerArithOp);
+                    0x21: vsadd_vi({{
+                        Vd_vu[i] = sat_add<vi>(Vs2_vu[i], (vu)SIMM5,
+                            vxsatptr);
+                    }}, OPIVI, VectorIntegerArithOp);
+                }
+                format VectorIntFormat {
+                    0x25: vsll_vi({{
+                        Vd_vu[i] = Vs2_vu[i] << ((vu)SIMM5 & (sew - 1) & 0x1f);
+                    }}, OPIVI, VectorIntegerArithOp);
+                    0x28: vsrl_vi({{
+                        Vd_vu[i] = Vs2_vu[i] >> ((vu)SIMM5 & (sew - 1) & 0x1f);
+                    }}, OPIVI, VectorIntegerArithOp);
+                    0x2a: vssrl_vi({{
+                        int sh = SIMM5 & (vtype_SEW(vtype) - 1);
+                        __uint128_t res = Vs2_vu[i];
+
+                        res = int_rounding<__uint128_t>(
+                            res, 0 /* TODO */, sh) >> sh;
+
+                        Vd_vu[i] = res;
+                    }}, OPIVI, VectorIntegerArithOp);
+                    0x29: vsra_vi({{
+                        Vd_vi[i] = Vs2_vi[i] >> ((vu)SIMM5 & (sew - 1) & 0x1f);
+                    }}, OPIVI, VectorIntegerArithOp);
+                    0x2b: vssra_vi({{
+                        int sh = SIMM5 & (sew - 1);
+                        __int128_t val = Vs2_vi[i];
+
+                        val = int_rounding<__int128_t>(val,
+                            xc->readMiscReg(MISCREG_VXRM), sh);
+                        Vd_vi[i] = val >> sh;
+                    }}, OPIVI, VectorIntegerArithOp);
+                }
+                // According to Spec Section 16.6,
+                // vm must be 1 (unmasked) in vmv<nr>r.v instructions.
+                0x27: decode VM { 0x1: decode SIMM3 {
+                    format VMvWholeFormat {
+                        0x0: vmv1r_v({{
+                            Vd_ud[i] = Vs2_ud[i];
+                        }}, OPIVI, VectorMiscOp);
+                        0x1: vmv2r_v({{
+                            Vd_ud[i] = Vs2_ud[i];
+                        }}, OPIVI, VectorMiscOp);
+                        0x3: vmv4r_v({{
+                            Vd_ud[i] = Vs2_ud[i];
+                        }}, OPIVI, VectorMiscOp);
+                        0x7: vmv8r_v({{
+                            Vd_ud[i] = Vs2_ud[i];
+                        }}, OPIVI, VectorMiscOp);
+                    }
+                }}
+                format VectorIntMaskFormat {
+                    0x11: decode VM {
+                        0x0: vmadc_vim({{
+                            Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
+                                carry_out(Vs2_vi[i], (vi)sext<5>(SIMM5),
+                                    elem_mask(v0, ei)));
+                        }}, OPIVI, VectorIntegerArithOp);
+                        0x1: vmadc_vi({{
+                            Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
+                                carry_out(Vs2_vi[i], (vi)sext<5>(SIMM5)));
+                        }}, OPIVI, VectorIntegerArithOp);
+                    }
+                    0x18: vmseq_vi({{
+                        Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
+                            (Vs2_vi[i] == (vi)sext<5>(SIMM5)));
+                    }}, OPIVI, VectorIntegerArithOp);
+                    0x19: vmsne_vi({{
+                        Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
+                            (Vs2_vi[i] != (vi)sext<5>(SIMM5)));
+                    }}, OPIVI, VectorIntegerArithOp);
+                    0x1c: vmsleu_vi({{
+                        Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
+                            (Vs2_vu[i] <= (vu)sext<5>(SIMM5)));
+                    }}, OPIVI, VectorIntegerArithOp);
+                    0x1d: vmsle_vi({{
+                        Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
+                            (Vs2_vi[i] <= (vi)sext<5>(SIMM5)));
+                    }}, OPIVI, VectorIntegerArithOp);
+                    0x1e: vmsgtu_vi({{
+                        Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
+                            (Vs2_vu[i] > (vu)sext<5>(SIMM5)));
+                    }}, OPIVI, VectorIntegerArithOp);
+                    0x1f: vmsgt_vi({{
+                        Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
+                            (Vs2_vi[i] > (vi)sext<5>(SIMM5)));
+                    }}, OPIVI, VectorIntegerArithOp);
+                }
+                format VectorIntNarrowingFormat {
+                    0x2c: vnsrl_wi({{
+                        Vd_vu[i + offset] = (vu)(Vs2_vwu[i] >>
+                                            ((vwu)SIMM5 & (sew * 2 - 1)));
+                    }}, OPIVI, VectorIntegerArithOp);
+                    0x2d: vnsra_wi({{
+                        Vd_vi[i + offset] = (vi)(Vs2_vwi[i] >>
+                                            ((vwu)SIMM5 & (sew * 2 - 1)));
+                    }}, OPIVI, VectorIntegerArithOp);
+                    0x2e: vnclipu_wi({{
+                        vu max = std::numeric_limits<vu>::max();
+                        uint64_t sign_mask =
+                            std::numeric_limits<uint64_t>::max() << sew;
+                        __uint128_t res = Vs2_vwu[i];
+                        unsigned shift = VS1 & ((sew * 2) - 1);
+
+                        res = int_rounding<__uint128_t>(
+                            res, 0 /* TODO */, shift) >> shift;
+
+                        if (res & sign_mask) {
+                            // TODO: vxsat
+                            res = max;
+                        }
+
+                        Vd_vu[i + offset] = (vu)res;
+                    }}, OPIVI, VectorIntegerArithOp);
+                    0x2f: vnclip_wi({{
+                        vi max = std::numeric_limits<vi>::max();
+                        vi min = std::numeric_limits<vi>::min();
+                        __int128_t res = Vs2_vwi[i];
+                        unsigned shift = VS1 & ((sew * 2) - 1);
+
+                        res = int_rounding<__int128_t>(
+                            res, 0 /* TODO */, shift) >> shift;
+
+                        if (res < min) {
+                            res = min;
+                            // TODO: vxsat
+                        } else if (res > max) {
+                            res = max;
+                            // TODO: vxsat
+                        }
+
+                        Vd_vi[i + offset] = (vi)res;
+                    }}, OPIVI, VectorIntegerArithOp);
+                }
+            }
+            // OPIVX
+            0x4: decode VFUNCT6 {
+                format VectorIntFormat {
+                    0x0: vadd_vx({{
+                        Vd_vu[i] = Vs2_vu[i] + Rs1_vu;
+                    }}, OPIVX, VectorIntegerArithOp);
+                    0x2: vsub_vx({{
+                        Vd_vu[i] = Vs2_vu[i] - Rs1_vu;
+                    }}, OPIVX, VectorIntegerArithOp);
+                    0x3: vrsub_vx({{
+                        Vd_vu[i] = Rs1_vu - Vs2_vu[i];
+                    }}, OPIVX, VectorIntegerArithOp);
+                    0x4: vminu_vx({{
+                        Vd_vu[i] = std::min(Vs2_vu[i], Rs1_vu);
+                    }}, OPIVX, VectorIntegerArithOp);
+                    0x5: vmin_vx({{
+                        Vd_vi[i] = std::min(Vs2_vi[i], Rs1_vi);
+                    }}, OPIVX, VectorIntegerArithOp);
+                    0x6: vmaxu_vx({{
+                        Vd_vu[i] = std::max(Vs2_vu[i], Rs1_vu);
+                    }}, OPIVX, VectorIntegerArithOp);
+                    0x7: vmax_vx({{
+                        Vd_vi[i] = std::max(Vs2_vi[i], Rs1_vi);
+                    }}, OPIVX, VectorIntegerArithOp);
+                    0x9: vand_vx({{
+                        Vd_vu[i] = Vs2_vu[i] & Rs1_vu;
+                    }}, OPIVX, VectorIntegerArithOp);
+                    0xa: vor_vx({{
+                        Vd_vu[i] = Vs2_vu[i] | Rs1_vu;
+                    }}, OPIVX, VectorIntegerArithOp);
+                    0xb: vxor_vx({{
+                        Vd_vu[i] = Vs2_vu[i] ^ Rs1_vu;
+                    }}, OPIVX, VectorIntegerArithOp);
+                }
+                0x0e: VectorSlideUpFormat::vslideup_vx({{
+                    const int offset = (int)Rs1_vu;
+                    const int microVlmax = vtype_VLMAX(machInst.vtype8,
+                        vlen, true);
+                    const int vregOffset = vdIdx - vs2Idx;
+                    const int offsetInVreg = offset - vregOffset * microVlmax;
+                    if (std::abs(offsetInVreg) < uint32_t(microVlmax)) {
+                        const int upperBound = (offsetInVreg >= 0)
+                            ? microVlmax - offsetInVreg
+                            : microVlmax + offsetInVreg;
+                        const int vdOffset = (offsetInVreg >= 0)
+                            ? offsetInVreg
+                            : 0;
+                        const int vs2Offset = (offsetInVreg >= 0)
+                            ? 0
+                            : -offsetInVreg;
+                        const int elemOffset = vdOffset + vdIdx * microVlmax;
+                        for (int i = 0;
+                            i < upperBound && i + vdOffset < microVl;
+                            i++) {
+                            if (this->vm || elem_mask(v0, i + elemOffset)) {
+                                Vd_vu[i + vdOffset] = Vs2_vu[i + vs2Offset];
+                            }
+                        }
+                    }
+                }}, OPIVX, VectorMiscOp);
+                0x0f: VectorSlideDownFormat::vslidedown_vx({{
+                    const int offset = (int)Rs1_vu;
+                    const int microVlmax = vtype_VLMAX(machInst.vtype8,
+                        vlen, true);
+                    const int vregOffset = vs2Idx - vdIdx;
+                    const int offsetInVreg = offset - vregOffset * microVlmax;
+                    const int numVs2s = vtype_regs_per_group(vtype);
+                    if (std::abs(offsetInVreg) < uint32_t(microVlmax)) {
+                        const bool needZeroTail = numVs2s == vs2Idx + 1;
+                        const int upperBound = (offsetInVreg >= 0)
+                            ? microVlmax - offsetInVreg
+                            : microVlmax + offsetInVreg;
+                        const int vdOffset = (offsetInVreg >= 0)
+                            ? 0
+                            : -offsetInVreg;
+                        const int vs2Offset = (offsetInVreg >= 0)
+                            ? offsetInVreg
+                            : 0;
+                        const int elemIdxBase = vdIdx * microVlmax;
+                        vreg_t resVreg;
+                        auto res = resVreg.as<vu>();
+                        for (int i = 0;
+                            i < upperBound && i + vdOffset < microVl;
+                            i++) {
+                            res[i + vdOffset] = Vs2_vu[i + vs2Offset];
+                        }
+                        if (needZeroTail) {
+                            for (int i = upperBound + vdOffset;
+                                i < microVlmax; i++) {
+                                res[i] = 0;
+                            }
+                        }
+                        for (int i = vdOffset; i < microVl ; i++) {
+                            if (vm || elem_mask(v0, i + elemIdxBase)) {
+                                Vd_vu[i] = res[i];
+                            }
+                        }
+                    }
+                }}, OPIVX, VectorMiscOp);
+                0x0c: VectorGatherFormat::vrgather_vx({{
+                    for (uint32_t i = 0; i < microVl; i++) {
+                        uint32_t ei = i + vs1_idx * vs1_elems + vs1_bias;
+                        if (this->vm || elem_mask(v0, ei)) {
+                            const uint64_t idx = Rs1_vu - vs2_elems * vs2_idx;
+                            Vd_vu[i] = (Rs1_vu >= vlmax) ? 0
+                                : (idx < vs2_elems) ? Vs2_vu[idx]
+                                : Vs3_vu[i];
+                        }
+                    }
+                }}, OPIVX, VectorMiscOp);
+                format VectorIntFormat {
+                    0x10: decode VM {
+                        0x0: vadc_vxm({{
+                            Vd_vi[i] = Vs2_vi[i] + Rs1_vi + elem_mask(v0, ei);
+                        }}, OPIVX, VectorIntegerArithOp);
+                        // the unmasked versions (vm=1) are reserved
+                    }
+                    0x12: decode VM {
+                        0x0: vsbc_vxm({{
+                            Vd_vi[i] = Vs2_vi[i] - Rs1_vi - elem_mask(v0, ei);
+                        }}, OPIVX, VectorIntegerArithOp);
+                        // the unmasked versions (vm=1) are reserved
+                    }
+                    0x17: decode VM {
+                        0x0: vmerge_vxm({{
+                            Vd_vu[i] = elem_mask(v0, ei) ? Rs1_vu : Vs2_vu[i];
+                        }}, OPIVX, VectorIntegerArithOp);
+                        0x1: decode VS2 {
+                            0x0: vmv_v_x({{
+                                Vd_vu[i] = Rs1_vu;
+                            }}, OPIVX, VectorIntegerArithOp);
+                        }
+                    }
+                }
+                format VectorIntVxsatFormat{
+                    0x20: vsaddu_vx({{
+                        Vd_vu[i] = sat_addu<vu>(Vs2_vu[i], Rs1_vu,
+                            vxsatptr);
+                    }}, OPIVX, VectorIntegerArithOp);
+                    0x21: vsadd_vx({{
+                        Vd_vu[i] = sat_add<vi>(Vs2_vu[i], Rs1_vu,
+                            vxsatptr);
+                    }}, OPIVX, VectorIntegerArithOp);
+                    0x22: vssubu_vx({{
+                        Vd_vu[i] = sat_subu<vu>(Vs2_vu[i], Rs1_vu,
+                            vxsatptr);
+                    }}, OPIVX, VectorIntegerArithOp);
+                    0x23: vssub_vx({{
+                        Vd_vu[i] = sat_sub<vi>(Vs2_vu[i], Rs1_vu,
+                            vxsatptr);
+                    }}, OPIVX, VectorIntegerArithOp);
+                    0x27: vsmul_vx({{
+                        vi max = std::numeric_limits<vi>::max();
+                        vi min = std::numeric_limits<vi>::min();
+                        bool overflow = Rs1_vi == Vs2_vi[i] && Rs1_vi == min;
+                        __int128_t result =
+                            (__int128_t)Rs1_vi * (__int128_t)Vs2_vi[i];
+                        result = int_rounding<__uint128_t>(
+                            result, 0 /* TODO */, sew - 1);
+                        result = result >> (sew - 1);
+                        if (overflow) {
+                            result = max;
+                            *vxsatptr = true;
+                        }
+
+                        Vd_vi[i] = (vi)result;
+                    }}, OPIVX, VectorIntegerArithOp);
+                }
+                format VectorIntFormat {
+                    0x25: vsll_vx({{
+                        Vd_vu[i] = Vs2_vu[i] << (Rs1_vu & (sew - 1));
+                    }}, OPIVX, VectorIntegerArithOp);
+                    0x28: vsrl_vx({{
+                        Vd_vu[i] = Vs2_vu[i] >> (Rs1_vu & (sew - 1));
+                    }}, OPIVX, VectorIntegerArithOp);
+                    0x29: vsra_vx({{
+                        Vd_vi[i] = Vs2_vi[i] >> (Rs1_vu & (sew - 1));
+                    }}, OPIVX, VectorIntegerArithOp);
+                    0x2a: vssrl_vx({{
+                        int sh = Rs1_vu & (sew - 1);
+                        __uint128_t val = Vs2_vu[i];
+
+                        val = int_rounding<__uint128_t>(val,
+                            xc->readMiscReg(MISCREG_VXRM), sh);
+                        Vd_vu[i] = val >> sh;
+                    }}, OPIVX, VectorIntegerArithOp);
+                    0x2b: vssra_vx({{
+                        int sh = Rs1_vu & (sew - 1);
+                        __int128_t val = Vs2_vi[i];
+
+                        val = int_rounding<__int128_t>(val,
+                            xc->readMiscReg(MISCREG_VXRM), sh);
+                        Vd_vi[i] = val >> sh;
+                    }}, OPIVX, VectorIntegerArithOp);
+                }
+                format VectorIntNarrowingFormat {
+                    0x2c: vnsrl_wx({{
+                        Vd_vu[i + offset] = (vu)(Vs2_vwu[i] >>
+                                            ((vwu)Rs1_vu & (sew * 2 - 1)));
+                    }}, OPIVX, VectorIntegerArithOp);
+                    0x2d: vnsra_wx({{
+                        Vd_vi[i + offset] = (vi)(Vs2_vwi[i] >>
+                                            ((vwu)Rs1_vu & (sew * 2 - 1)));
+                    }}, OPIVX, VectorIntegerArithOp);
+                    0x2e: vnclipu_wx({{
+                        vu max = std::numeric_limits<vu>::max();
+                        uint64_t sign_mask =
+                            std::numeric_limits<uint64_t>::max() << sew;
+                        __uint128_t res = Vs2_vwu[i];
+                        unsigned shift = Rs1_vu & ((sew * 2) - 1);
+
+                        res = int_rounding<__uint128_t>(
+                            res, 0 /* TODO */, shift) >> shift;
+
+                        if (res & sign_mask) {
+                            // TODO: vxsat
+                            res = max;
+                        }
+
+                        Vd_vu[i + offset] = (vu)res;
+                    }}, OPIVX, VectorIntegerArithOp);
+                    0x2f: vnclip_wx({{
+                        vi max = std::numeric_limits<vi>::max();
+                        vi min = std::numeric_limits<vi>::min();
+                        __int128_t res = Vs2_vwi[i];
+                        unsigned shift = Rs1_vi & ((sew * 2) - 1);
+
+                        res = int_rounding<__int128_t>(
+                            res, 0 /* TODO */, shift) >> shift;
+
+                        if (res < min) {
+                            res = min;
+                            // TODO: vxsat
+                        } else if (res > max) {
+                            res = max;
+                            // TODO: vxsat
+                        }
+
+                        Vd_vi[i + offset] = (vi)res;
+                    }}, OPIVX, VectorIntegerArithOp);
+                }
+
+                format VectorIntMaskFormat {
+                    0x11: decode VM {
+                        0x0: vmadc_vxm({{
+                            Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
+                                carry_out(Vs2_vi[i], Rs1_vi,
+                                    elem_mask(v0, ei)));
+                        }}, OPIVX, VectorIntegerArithOp);
+                        0x1: vmadc_vx({{
+                            Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
+                                carry_out(Vs2_vi[i], Rs1_vi));
+                        }}, OPIVX, VectorIntegerArithOp);
+                    }
+                    0x13: decode VM {
+                        0x0: vmsbc_vxm({{
+                            Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
+                                borrow_out(Vs2_vi[i], Rs1_vi,
+                                    elem_mask(v0, ei)));
+                        }}, OPIVX, VectorIntegerArithOp);
+                        0x1: vmsbc_vx({{
+                            Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
+                                borrow_out(Vs2_vi[i], Rs1_vi));
+                        }}, OPIVX, VectorIntegerArithOp);
+                    }
+                    0x18: vmseq_vx({{
+                        Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
+                            (Vs2_vu[i] == Rs1_vu));
+                    }}, OPIVX, VectorIntegerArithOp);
+                    0x19: vmsne_vx({{
+                        Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
+                            (Vs2_vu[i] != Rs1_vu));
+                    }}, OPIVX, VectorIntegerArithOp);
+                    0x1a: vmsltu_vx({{
+                        Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
+                            (Vs2_vu[i] < Rs1_vu));
+                    }}, OPIVX, VectorIntegerArithOp);
+                    0x1b: vmslt_vx({{
+                        Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
+                            (Vs2_vi[i] < Rs1_vi));
+                    }}, OPIVX, VectorIntegerArithOp);
+                    0x1c: vmsleu_vx({{
+                        Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
+                            (Vs2_vu[i] <= Rs1_vu));
+                    }}, OPIVX, VectorIntegerArithOp);
+                    0x1d: vmsle_vx({{
+                        Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
+                            (Vs2_vi[i] <= Rs1_vi));
+                    }}, OPIVX, VectorIntegerArithOp);
+                    0x1e: vmsgtu_vx({{
+                        Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
+                            (Vs2_vu[i] > Rs1_vu));
+                    }}, OPIVX, VectorIntegerArithOp);
+                    0x1f: vmsgt_vx({{
+                        Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
+                            (Vs2_vi[i] > Rs1_vi));
+                    }}, OPIVX, VectorIntegerArithOp);
+                }
+            }
+            // OPFVF
+            0x5: decode VFUNCT6 {
+                format VectorFloatFormat{
+                    0x00: vfadd_vf({{
+                        auto fd = fadd<et>(ftype<et>(Vs2_vu[i]),
+                                           ftype_freg<et>(freg(Fs1_bits)));
+                        Vd_vu[i] = fd.v;
+                    }}, OPFVF, VectorFloatArithOp);
+                    0x02: vfsub_vf({{
+                        auto fd = fsub<et>(ftype<et>(Vs2_vu[i]),
+                                           ftype_freg<et>(freg(Fs1_bits)));
+                        Vd_vu[i] = fd.v;
+                    }}, OPFVF, VectorFloatArithOp);
+                    0x04: vfmin_vf({{
+                        auto fd = fmin<et>(ftype<et>(Vs2_vu[i]),
+                                           ftype_freg<et>(freg(Fs1_bits)));
+                        Vd_vu[i] = fd.v;
+                    }}, OPFVF, VectorFloatArithOp);
+                    0x06: vfmax_vf({{
+                        auto fd = fmax<et>(ftype<et>(Vs2_vu[i]),
+                                           ftype_freg<et>(freg(Fs1_bits)));
+                            Vd_vu[i] = fd.v;
+                    }}, OPFVF, VectorFloatArithOp);
+                    0x08: vfsgnj_vf({{
+                        Vd_vu[i] = fsgnj<et>(ftype<et>(Vs2_vu[i]),
+                                             ftype_freg<et>(freg(Fs1_bits)),
+                                             false, false).v;
+                    }}, OPFVF, VectorFloatArithOp);
+                    0x09: vfsgnjn_vf({{
+                        Vd_vu[i] = fsgnj<et>(ftype<et>(Vs2_vu[i]),
+                                             ftype_freg<et>(freg(Fs1_bits)),
+                                             true, false).v;
+                    }}, OPFVF, VectorFloatArithOp);
+                    0x0a: vfsgnjx_vf({{
+                        Vd_vu[i] = fsgnj<et>(ftype<et>(Vs2_vu[i]),
+                                             ftype_freg<et>(freg(Fs1_bits)),
+                                             false, true).v;
+                    }}, OPFVF, VectorFloatArithOp);
+                }
+                0x0e: VectorFloatSlideUpFormat::vfslide1up_vf({{
+                    const int offset = 1;
+                    const int microVlmax = vtype_VLMAX(machInst.vtype8,
+                        vlen, true);
+                    const int vregOffset = vdIdx - vs2Idx;
+                    const int offsetInVreg = offset - vregOffset * microVlmax;
+                    if (std::abs(offsetInVreg) < uint32_t(microVlmax)) {
+                        const int upperBound = (offsetInVreg >= 0)
+                            ? microVlmax - offsetInVreg
+                            : microVlmax + offsetInVreg;
+                        const int vdOffset = (offsetInVreg >= 0)
+                            ? offsetInVreg
+                            : 0;
+                        const int vs2Offset = (offsetInVreg >= 0)
+                            ? 0
+                            : -offsetInVreg;
+                        const int elemOffset = vdOffset + vdIdx * microVlmax;
+                        for (int i = 0;
+                            i < upperBound && i + vdOffset < microVl;
+                            i++) {
+                            if (this->vm || elem_mask(v0, i + elemOffset)) {
+                                Vd_vu[i + vdOffset] = Vs2_vu[i + vs2Offset];
+                            }
+                        }
+                        // TODO: dirty code
+                        if (vdIdx == 0 && vs2Idx == 0 &&
+                                (this->vm || elem_mask(v0, 0))) {
+                            tmp_d0.as<vu>()[0] = Rs1_vu;
+                        }
+                    }
+                }}, OPFVF, VectorMiscOp);
+                0x0f: VectorFloatSlideDownFormat::vfslide1down_vf({{
+                    const int offset = 1;
+                    const int microVlmax = vtype_VLMAX(machInst.vtype8,
+                        vlen, true);
+                    const int vregOffset = vs2Idx - vdIdx;
+                    const int offsetInVreg = offset - vregOffset * microVlmax;
+                    const int numVs2s = vtype_regs_per_group(vtype);
+                    if (std::abs(offsetInVreg) < uint32_t(microVlmax)) {
+                        const bool needZeroTail = numVs2s == vs2Idx + 1;
+                        const int upperBound = (offsetInVreg >= 0)
+                            ? microVlmax - offsetInVreg
+                            : microVlmax + offsetInVreg;
+                        const int vdOffset = (offsetInVreg >= 0)
+                            ? 0
+                            : -offsetInVreg;
+                        const int vs2Offset = (offsetInVreg >= 0)
+                            ? offsetInVreg
+                            : 0;
+                        const int elemIdxBase = vdIdx * microVlmax;
+                        vreg_t resVreg;
+                        auto res = resVreg.as<vu>();
+                        for (int i = 0;
+                            i < upperBound && i + vdOffset < microVl;
+                            i++) {
+                            res[i + vdOffset] = Vs2_vu[i + vs2Offset];
+                        }
+                        if (needZeroTail) {
+                            for (int i = upperBound + vdOffset;
+                                i < microVlmax; i++) {
+                                res[i] = 0;
+                            }
+                        }
+                        for (int i = vdOffset; i < microVl ; i++) {
+                            if (vm || elem_mask(v0, i + elemIdxBase)) {
+                                Vd_vu[i] = (i + elemIdxBase != machInst.vl - 1)
+                                    ? res[i]
+                                    : Rs1_vu;
+                            }
+                        }
+                    }
+                }}, OPFVF, VectorMiscOp);
+                // VRFUNARY0
+                0x10: decode VS2 {
+                    0x00: decode VM {
+                        // The encodings corresponding to the masked versions
+                        // (vm=0) of vfmv.s.f are reserved
+                        0x1: VectorNonSplitFormat::vfmv_s_f({{
+                            auto fd = ftype_freg<et>(freg(Fs1_bits));
+                            Vd_vu[0] = fd.v;
+                        }}, OPFVV, VectorMiscOp);
+                    }
+                }
+                format VectorFloatFormat{
+                    0x17: decode VM {
+                        0x0: vfmerge_vfm({{
+                            Vd_vu[i] = elem_mask(v0, ei)
+                                    ? ftype_freg<et>(freg(Fs1_bits)).v
+                                    : Vs2_vu[i];
+                        }}, OPFVF, VectorFloatArithOp);
+                        0x1: vfmv_v_f({{
+                            auto fd = ftype_freg<et>(freg(Fs1_bits));
+                            Vd_vu[i] = fd.v;
+                        }}, OPFVF, VectorFloatArithOp);
+                    }
+                }
+                format VectorFloatMaskFormat {
+                    0x18: vmfeq_vf({{
+                        Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
+                            feq<et>(ftype<et>(Vs2_vu[i]),
+                                    ftype_freg<et>(freg(Fs1_bits))));
+                    }}, OPFVF, VectorFloatArithOp);
+                    0x19: vmfle_vf({{
+                        Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
+                            fle<et>(ftype<et>(Vs2_vu[i]),
+                                    ftype_freg<et>(freg(Fs1_bits))));
+                    }}, OPFVF, VectorFloatArithOp);
+                    0x1b: vmflt_vf({{
+                        Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
+                            flt<et>(ftype<et>(Vs2_vu[i]),
+                                    ftype_freg<et>(freg(Fs1_bits))));
+                    }}, OPFVF, VectorFloatArithOp);
+                    0x1c: vmfne_vf({{
+                        Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
+                            !feq<et>(ftype<et>(Vs2_vu[i]),
+                                     ftype_freg<et>(freg(Fs1_bits))));
+                    }}, OPFVF, VectorFloatArithOp);
+                    0x1d: vmfgt_vf({{
+                        Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
+                            flt<et>(ftype_freg<et>(freg(Fs1_bits)),
+                                    ftype<et>(Vs2_vu[i])));
+                    }}, OPFVF, VectorFloatArithOp);
+                    0x1f: vmfge_vf({{
+                        Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
+                            fle<et>(ftype_freg<et>(freg(Fs1_bits)),
+                                    ftype<et>(Vs2_vu[i])));
+                    }}, OPFVF, VectorFloatArithOp);
+                }
+                format VectorFloatFormat{
+                    0x20: vfdiv_vf({{
+                        auto fd = fdiv<et>(ftype<et>(Vs2_vu[i]),
+                                           ftype_freg<et>(freg(Fs1_bits)));
+                        Vd_vu[i] = fd.v;
+                    }}, OPFVF, VectorFloatArithOp);
+                    0x21: vfrdiv_vf({{
+                        auto fd = fdiv<et>(ftype_freg<et>(freg(Fs1_bits)),
+                                           ftype<et>(Vs2_vu[i]));
+                        Vd_vu[i] = fd.v;
+                    }}, OPFVF, VectorFloatArithOp);
+                    0x24: vfmul_vf({{
+                        auto fd = fmul<et>(ftype<et>(Vs2_vu[i]),
+                                           ftype_freg<et>(freg(Fs1_bits)));
+                        Vd_vu[i] = fd.v;
+                    }}, OPFVF, VectorFloatArithOp);
+                    0x27: vfrsub_vf({{
+                        auto fd = fsub<et>(ftype_freg<et>(freg(Fs1_bits)),
+                                           ftype<et>(Vs2_vu[i]));
+                        Vd_vu[i] = fd.v;
+                    }}, OPFVF, VectorFloatArithOp);
+                    0x28: vfmadd_vf({{
+                        auto fd = fmadd<et>(ftype<et>(Vs3_vu[i]),
+                                            ftype_freg<et>(freg(Fs1_bits)),
+                                            ftype<et>(Vs2_vu[i]));
+                        Vd_vu[i] = fd.v;
+                    }}, OPFVF, VectorFloatArithOp);
+                    0x29: vfnmadd_vf({{
+                        auto fd = fmadd<et>(fneg(ftype<et>(Vs3_vu[i])),
+                                            ftype_freg<et>(freg(Fs1_bits)),
+                                            fneg(ftype<et>(Vs2_vu[i])));
+                        Vd_vu[i] = fd.v;
+                    }}, OPFVF, VectorFloatArithOp);
+                    0x2a: vfmsub_vf({{
+                        auto fd = fmadd<et>(ftype<et>(Vs3_vu[i]),
+                                            ftype_freg<et>(freg(Fs1_bits)),
+                                            fneg(ftype<et>(Vs2_vu[i])));
+                        Vd_vu[i] = fd.v;
+                    }}, OPFVF, VectorFloatArithOp);
+                    0x2b: vfnmsub_vf({{
+                        auto fd = fmadd<et>(fneg(ftype<et>(Vs3_vu[i])),
+                                            ftype_freg<et>(freg(Fs1_bits)),
+                                            ftype<et>(Vs2_vu[i]));
+                        Vd_vu[i] = fd.v;
+                    }}, OPFVF, VectorFloatArithOp);
+                    0x2c: vfmacc_vf({{
+                        auto fd = fmadd<et>(ftype_freg<et>(freg(Fs1_bits)),
+                                            ftype<et>(Vs2_vu[i]),
+                                            ftype<et>(Vs3_vu[i]));
+                        Vd_vu[i] = fd.v;
+                    }}, OPFVF, VectorFloatArithOp);
+                    0x2d: vfnmacc_vf({{
+                        auto fd = fmadd<et>(
+                            fneg(ftype_freg<et>(freg(Fs1_bits))),
+                            ftype<et>(Vs2_vu[i]),
+                            fneg(ftype<et>(Vs3_vu[i]))
+                        );
+                        Vd_vu[i] = fd.v;
+                    }}, OPFVF, VectorFloatArithOp);
+                    0x2e: vfmsac_vf({{
+                        auto fd = fmadd<et>(ftype_freg<et>(freg(Fs1_bits)),
+                                            ftype<et>(Vs2_vu[i]),
+                                            fneg(ftype<et>(Vs3_vu[i])));
+                        Vd_vu[i] = fd.v;
+                    }}, OPFVF, VectorFloatArithOp);
+                    0x2f: vfnmsac_vf({{
+                        auto fd = fmadd<et>(
+                            fneg(ftype_freg<et>(freg(Fs1_bits))),
+                            ftype<et>(Vs2_vu[i]),
+                            ftype<et>(Vs3_vu[i])
+                        );
+                        Vd_vu[i] = fd.v;
+                    }}, OPFVF, VectorFloatArithOp);
+                }
+                format VectorFloatWideningFormat {
+                    0x30: vfwadd_vf({{
+                        auto fd = fadd<ewt>(
+                            fwiden(ftype<et>(Vs2_vu[i + offset])),
+                            fwiden(ftype_freg<et>(freg(Fs1_bits))));
+                        Vd_vwu[i] = fd.v;
+                    }}, OPFVF, VectorFloatArithOp);
+                    0x32: vfwsub_vf({{
+                        auto fd = fsub<ewt>(
+                            fwiden(ftype<et>(Vs2_vu[i + offset])),
+                            fwiden(ftype_freg<et>(freg(Fs1_bits))));
+                        Vd_vwu[i] = fd.v;
+                    }}, OPFVF, VectorFloatArithOp);
+                    0x34: vfwadd_wf({{
+                        auto fd = fadd<ewt>(
+                            ftype<ewt>(Vs2_vwu[i]),
+                            fwiden(ftype_freg<et>(freg(Fs1_bits))));
+                        Vd_vwu[i] = fd.v;
+                    }}, OPFVF, VectorFloatArithOp);
+                    0x36: vfwsub_wf({{
+                        auto fd = fsub<ewt>(
+                            ftype<ewt>(Vs2_vwu[i]),
+                            fwiden(ftype_freg<et>(freg(Fs1_bits))));
+                        Vd_vwu[i] = fd.v;
+                    }}, OPFVF, VectorFloatArithOp);
+                    0x38: vfwmul_vf({{
+                        auto fd = fmul<ewt>(
+                            fwiden(ftype<et>(Vs2_vu[i + offset])),
+                            fwiden(ftype_freg<et>(freg(Fs1_bits))));
+                        Vd_vwu[i] = fd.v;
+                    }}, OPFVF, VectorFloatArithOp);
+                    0x3c: vfwmacc_vf({{
+                        auto fd = fmadd<ewt>(
+                            fwiden(ftype_freg<et>(freg(Fs1_bits))),
+                            fwiden(ftype<et>(Vs2_vu[i + offset])),
+                            ftype<ewt>(Vs3_vwu[i]));
+                        Vd_vwu[i] = fd.v;
+                    }}, OPFVF, VectorFloatArithOp);
+                    0x3d: vfwnmacc_vf({{
+                        auto fd = fmadd<ewt>(
+                            fwiden(fneg(ftype_freg<et>(freg(Fs1_bits)))),
+                            fwiden(ftype<et>(Vs2_vu[i + offset])),
+                            fneg(ftype<ewt>(Vs3_vwu[i])));
+                        Vd_vwu[i] = fd.v;
+                    }}, OPFVF, VectorFloatArithOp);
+                    0x3e: vfwmsac_vf({{
+                        auto fd = fmadd<ewt>(
+                            fwiden(ftype_freg<et>(freg(Fs1_bits))),
+                            fwiden(ftype<et>(Vs2_vu[i + offset])),
+                            fneg(ftype<ewt>(Vs3_vwu[i])));
+                        Vd_vwu[i] = fd.v;
+                    }}, OPFVF, VectorFloatArithOp);
+                    0x3f: vfwnmsac_vf({{
+                        auto fd = fmadd<ewt>(
+                            fwiden(fneg(ftype_freg<et>(freg(Fs1_bits)))),
+                            fwiden(ftype<et>(Vs2_vu[i + offset])),
+                            ftype<ewt>(Vs3_vwu[i]));
+                        Vd_vwu[i] = fd.v;
+                    }}, OPFVF, VectorFloatArithOp);
+                }
+            }
+            // OPMVX
+            0x6: decode VFUNCT6 {
+                format VectorIntFormat {
+                    0x08: vaaddu_vx({{
+                        __uint128_t res = (__uint128_t)Vs2_vu[i] + Rs1_vu;
+                        res = int_rounding<__uint128_t>(res, 0 /* TODO */, 1);
+                        Vd_vu[i] = res >> 1;
+                    }}, OPMVX, VectorIntegerArithOp);
+                    0x09: vaadd_vx({{
+                        __uint128_t res = (__uint128_t)Vs2_vi[i] + Rs1_vi;
+                        res = int_rounding<__uint128_t>(res, 0 /* TODO */, 1);
+                        Vd_vi[i] = res >> 1;
+                    }}, OPMVX, VectorIntegerArithOp);
+                }
+                0x0e: VectorSlideUpFormat::vslide1up_vx({{
+                    const int offset = 1;
+                    const int microVlmax = vtype_VLMAX(machInst.vtype8,
+                        vlen, true);
+                    const int vregOffset = vdIdx - vs2Idx;
+                    const int offsetInVreg = offset - vregOffset * microVlmax;
+                    if (std::abs(offsetInVreg) < uint32_t(microVlmax)) {
+                        const int upperBound = (offsetInVreg >= 0)
+                            ? microVlmax - offsetInVreg
+                            : microVlmax + offsetInVreg;
+                        const int vdOffset = (offsetInVreg >= 0)
+                            ? offsetInVreg
+                            : 0;
+                        const int vs2Offset = (offsetInVreg >= 0)
+                            ? 0
+                            : -offsetInVreg;
+                        const int elemOffset = vdOffset + vdIdx * microVlmax;
+                        for (int i = 0;
+                            i < upperBound && i + vdOffset < microVl;
+                            i++) {
+                            if (this->vm || elem_mask(v0, i + elemOffset)) {
+                                Vd_vu[i + vdOffset] = Vs2_vu[i + vs2Offset];
+                            }
+                        }
+                        // TODO: dirty code
+                        if (vdIdx == 0 && vs2Idx == 0 &&
+                                (this->vm || elem_mask(v0, 0))) {
+                            tmp_d0.as<vu>()[0] = Rs1_vu;
+                        }
+                    }
+                }}, OPIVX, VectorMiscOp);
+                0x0f: VectorSlideDownFormat::vslide1down_vx({{
+                    const int offset = 1;
+                    const int microVlmax = vtype_VLMAX(machInst.vtype8,
+                        vlen, true);
+                    const int vregOffset = vs2Idx - vdIdx;
+                    const int offsetInVreg = offset - vregOffset * microVlmax;
+                    const int numVs2s = vtype_regs_per_group(vtype);
+                    if (std::abs(offsetInVreg) < uint32_t(microVlmax)) {
+                        const bool needZeroTail = numVs2s == vs2Idx + 1;
+                        const int upperBound = (offsetInVreg >= 0)
+                            ? microVlmax - offsetInVreg
+                            : microVlmax + offsetInVreg;
+                        const int vdOffset = (offsetInVreg >= 0)
+                            ? 0
+                            : -offsetInVreg;
+                        const int vs2Offset = (offsetInVreg >= 0)
+                            ? offsetInVreg
+                            : 0;
+                        const int elemIdxBase = vdIdx * microVlmax;
+                        vreg_t resVreg;
+                        auto res = resVreg.as<vu>();
+                        for (int i = 0;
+                            i < upperBound && i + vdOffset < microVl;
+                            i++) {
+                            res[i + vdOffset] = Vs2_vu[i + vs2Offset];
+                        }
+                        if (needZeroTail) {
+                            for (int i = upperBound + vdOffset;
+                                i < microVlmax; i++) {
+                                res[i] = 0;
+                            }
+                        }
+                        for (int i = vdOffset; i < microVl ; i++) {
+                            if (vm || elem_mask(v0, i + elemIdxBase)) {
+                                Vd_vu[i] = (i + elemIdxBase != machInst.vl - 1)
+                                    ? res[i]
+                                    : Rs1_vu;
+                            }
+                        }
+                    }
+                }}, OPIVX, VectorMiscOp);
+                // VRXUNARY0
+                0x10: decode VS2 {
+                    0x00: decode VM {
+                        // The encodings corresponding to the masked versions
+                        // (vm=0) of vmv.s.x are reserved.
+                        0x1: VectorNonSplitFormat::vmv_s_x({{
+                            Vd_vu[0] = Rs1_vu;
+                        }}, OPMVX, VectorMiscOp);
+                    }
+                }
+                format VectorIntFormat {
+                    0x0a: vasubu_vx({{
+                        __uint128_t res = (__uint128_t)Vs2_vu[i] - Rs1_vu;
+                        res = int_rounding<__uint128_t>(res, 0 /* TODO */, 1);
+                        Vd_vu[i] = res >> 1;
+                    }}, OPMVX, VectorIntegerArithOp);
+                    0x0b: vasub_vx({{
+                        __uint128_t res = (__uint128_t)Vs2_vi[i] - Rs1_vi;
+                        res = int_rounding<__uint128_t>(res, 0 /* TODO */, 1);
+                        Vd_vi[i] = res >> 1;
+                    }}, OPMVX, VectorIntegerArithOp);
+                    0x20: vdivu_vx({{
+                        if (Rs1_vu == 0)
+                            Vd_vu[i] = (vu)-1;
+                        else
+                            Vd_vu[i] = Vs2_vu[i] / Rs1_vu;
+                    }}, OPMVX, VectorIntegerArithOp);
+                    0x21: vdiv_vx({{
+                        if (Rs1_vi == 0)
+                            Vd_vi[i] = -1;
+                        else if (Vs2_vi[i] == std::numeric_limits<vi>::min()
+                                && Rs1_vi == -1)
+                            Vd_vi[i] = Vs2_vi[i];
+                        else
+                            Vd_vi[i] = Vs2_vi[i] / Rs1_vi;
+                    }}, OPMVX, VectorIntegerArithOp);
+                    0x22: vremu_vx({{
+                        if (Rs1_vu == 0)
+                            Vd_vu[i] = Vs2_vu[i];
+                        else
+                            Vd_vu[i] = Vs2_vu[i] % Rs1_vu;
+                    }}, OPMVX, VectorIntegerArithOp);
+                    0x23: vrem_vx({{
+                        if (Rs1_vi == 0)
+                            Vd_vi[i] = Vs2_vi[i];
+                        else if (Vs2_vi[i] == std::numeric_limits<vi>::min()
+                                && Rs1_vi == -1)
+                            Vd_vi[i] = 0;
+                        else
+                            Vd_vi[i] = Vs2_vi[i] % Rs1_vi;
+                    }}, OPMVX, VectorIntegerArithOp);
+                    0x24: vmulhu_vx({{
+                        if (sew < 64)
+                            Vd_vu[i] = ((uint64_t)Vs2_vu[i] * Rs1_vu)
+                                        >> sew;
+                        else
+                            Vd_vu[i] = mulhu_64(Vs2_vu[i], Rs1_vu);
+                    }}, OPMVX, VectorIntegerArithOp);
+                    0x25: vmul_vx({{
+                        Vd_vi[i] = Vs2_vi[i] * Rs1_vi;
+                    }}, OPMVX, VectorIntegerArithOp);
+                    0x26: vmulhsu_vx({{
+                        if (sew < 64)
+                            Vd_vi[i] = ((int64_t)Vs2_vi[i] *
+                                        (uint64_t)Rs1_vu)
+                                        >> sew;
+                        else
+                            Vd_vi[i] = mulhsu_64(Vs2_vi[i], Rs1_vu);
+                    }}, OPMVX, VectorIntegerArithOp);
+                    0x27: vmulh_vx({{
+                        if (sew < 64)
+                            Vd_vi[i] = ((int64_t)Vs2_vi[i] * Rs1_vi)
+                                        >> sew;
+                        else
+                            Vd_vi[i] = mulh_64(Vs2_vi[i], Rs1_vi);
+                    }}, OPMVX, VectorIntegerArithOp);
+                    0x29: vmadd_vx({{
+                        Vd_vi[i] = Vs3_vi[i] * Rs1_vi + Vs2_vi[i];
+                    }}, OPMVX, VectorIntegerArithOp);
+                    0x2b: vnmsub_vx({{
+                        Vd_vi[i] = -(Vs3_vi[i] * Rs1_vi) + Vs2_vi[i];
+                    }}, OPMVX, VectorIntegerArithOp);
+                    0x2d: vmacc_vx({{
+                        Vd_vi[i] = Vs2_vi[i] * Rs1_vi + Vs3_vi[i];
+                    }}, OPMVX, VectorIntegerArithOp);
+                    0x2f: vnmsac_vx({{
+                        Vd_vi[i] = -(Vs2_vi[i] * Rs1_vi) + Vs3_vi[i];
+                    }}, OPMVX, VectorIntegerArithOp);
+                }
+                format VectorIntWideningFormat {
+                    0x30: vwaddu_vx({{
+                        Vd_vwu[i] = vwu(Vs2_vu[i + offset]) + vwu(Rs1_vu);
+                    }}, OPMVX, VectorIntegerArithOp);
+                    0x31: vwadd_vx({{
+                        Vd_vwi[i] = vwi(Vs2_vi[i + offset]) + vwi(Rs1_vi);
+                    }}, OPMVX, VectorIntegerArithOp);
+                    0x32: vwsubu_vx({{
+                        Vd_vwu[i] = vwu(Vs2_vu[i + offset]) - vwu(Rs1_vu);
+                    }}, OPMVX, VectorIntegerArithOp);
+                    0x33: vwsub_vx({{
+                        Vd_vwi[i] = vwi(Vs2_vi[i + offset]) - vwi(Rs1_vi);
+                    }}, OPMVX, VectorIntegerArithOp);
+                    0x34: vwaddu_wx({{
+                        Vd_vwu[i] = Vs2_vwu[i] + vwu(Rs1_vu);
+                    }}, OPMVX, VectorIntegerArithOp);
+                    0x35: vwadd_wx({{
+                        Vd_vwi[i] = Vs2_vwi[i] + vwi(Rs1_vi);
+                    }}, OPMVX, VectorIntegerArithOp);
+                    0x36: vwsubu_wx({{
+                        Vd_vwu[i] = Vs2_vwu[i] - vwu(Rs1_vu);
+                    }}, OPMVX, VectorIntegerArithOp);
+                    0x37: vwsub_wx({{
+                        Vd_vwi[i] = Vs2_vwi[i] - vwi(Rs1_vi);
+                    }}, OPMVX, VectorIntegerArithOp);
+                    0x38: vwmulu_vx({{
+                        Vd_vwu[i] = vwu(Vs2_vu[i + offset]) * vwu(Rs1_vu);
+                    }}, OPMVX, VectorIntegerArithOp);
+                    0x3a: vwmulsu_vx({{
+                        Vd_vwi[i] = vwi(Vs2_vi[i + offset]) * vwu(Rs1_vu);
+                    }}, OPMVX, VectorIntegerArithOp);
+                    0x3b: vwmul_vx({{
+                        Vd_vwi[i] = vwi(Vs2_vi[i + offset]) * vwi(Rs1_vi);
+                    }}, OPMVX, VectorIntegerArithOp);
+                    0x3c: vwmaccu_vx({{
+                        Vd_vwu[i] = vwu(Rs1_vu) * vwu(Vs2_vu[i + offset])
+                                + Vs3_vwu[i];
+                    }}, OPMVX, VectorIntegerArithOp);
+                    0x3d: vwmacc_vx({{
+                        Vd_vwi[i] = vwi(Rs1_vi) * vwi(Vs2_vi[i + offset])
+                                + Vs3_vwi[i];
+                    }}, OPMVX, VectorIntegerArithOp);
+                    0x3e: vwmaccus_vx({{
+                        Vd_vwi[i] = vwu(Rs1_vu) * vwi(Vs2_vi[i + offset])
+                                + Vs3_vwi[i];
+                    }}, OPMVX, VectorIntegerArithOp);
+                    0x3f: vwmaccsu_vx({{
+                        Vd_vwi[i] = vwi(Rs1_vi) * vwu(Vs2_vu[i + offset])
+                                + Vs3_vwi[i];
+                    }}, OPMVX, VectorIntegerArithOp);
+                }
+            }
+            0x7: decode BIT31 {
+                format VConfOp {
+                    0x0: vsetvli({{
+                        uint64_t rd_bits = RD;
+                        uint64_t rs1_bits = RS1;
+                        uint64_t requested_vl = Rs1_ud;
+                        uint64_t requested_vtype = zimm11;
+                        uint32_t vlen = VlenbBits * 8;
+                        uint32_t vlmax = getVlmax(Vtype, vlen);
+                        uint32_t current_vl = VL;
+                    }}, {{
+                        Rd_ud = new_vl;
+                        VL = new_vl;
+                        Vtype = new_vtype;
+                    }}, VSetVlDeclare, VSetVliBranchTarget
+                      , VectorConfigOp, IsUncondControl
+                      , IsIndirectControl);
+                    0x1: decode BIT30 {
+                        0x0: vsetvl({{
+                            uint64_t rd_bits = RD;
+                            uint64_t rs1_bits = RS1;
+                            uint64_t requested_vl = Rs1_ud;
+                            uint64_t requested_vtype = Rs2_ud;
+                            uint32_t vlen = VlenbBits * 8;
+                            uint32_t vlmax = getVlmax(Vtype, vlen);
+                            uint32_t current_vl = VL;
+                        }}, {{
+                            Rd_ud = new_vl;
+                            VL = new_vl;
+                            Vtype = new_vtype;
+                        }}, VSetVlDeclare, VSetVlBranchTarget
+                          , VectorConfigOp, IsUncondControl
+                          , IsIndirectControl);
+                        0x1: vsetivli({{
+                            uint64_t rd_bits = RD;
+                            uint64_t rs1_bits = -1;
+                            uint64_t requested_vl = uimm;
+                            uint64_t requested_vtype = zimm10;
+                            uint32_t vlen = VlenbBits * 8;
+                            uint32_t vlmax = getVlmax(Vtype, vlen);
+                            uint32_t current_vl = VL;
+                        }}, {{
+                            Rd_ud = new_vl;
+                            VL = new_vl;
+                            Vtype = new_vtype;
+                        }}, VSetiVliDeclare, VSetiVliBranchTarget
+                          , VectorConfigOp, IsUncondControl
+                          , IsDirectControl);
+                    }
+                }
+            }
+        }
+
         0x18: decode FUNCT3 {
             format BOp {
                 0x0: beq({{
diff --git a/src/arch/riscv/isa/formats/compressed.isa b/src/arch/riscv/isa/formats/compressed.isa
index 3d89ec38a6..7a9fd634c8 100644
--- a/src/arch/riscv/isa/formats/compressed.isa
+++ b/src/arch/riscv/isa/formats/compressed.isa
@@ -150,8 +150,9 @@ def template CBasicExecute {{
         std::vector<RegId> indices = {%(regs)s};
         std::stringstream ss;
         ss << mnemonic << ' ';
-        ss << registerName(indices[0]) << ", ";
-        ss << registerName(indices[1]);
+        ss << registerName(indices[0]);
+        if (_numSrcRegs >= 2)
+            ss << ", " << registerName(indices[1]);
         return ss.str();
     }
 }};
diff --git a/src/arch/riscv/isa/formats/formats.isa b/src/arch/riscv/isa/formats/formats.isa
index 19749438a8..0102df17d7 100644
--- a/src/arch/riscv/isa/formats/formats.isa
+++ b/src/arch/riscv/isa/formats/formats.isa
@@ -37,6 +37,9 @@
 ##include "fp.isa"
 ##include "amo.isa"
 ##include "bs.isa"
+##include "vector_conf.isa"
+##include "vector_arith.isa"
+##include "vector_mem.isa"
 
 // Include formats for nonstandard extensions
 ##include "compressed.isa"
diff --git a/src/arch/riscv/isa/formats/mem.isa b/src/arch/riscv/isa/formats/mem.isa
index 0d80260a25..53de4af8b4 100644
--- a/src/arch/riscv/isa/formats/mem.isa
+++ b/src/arch/riscv/isa/formats/mem.isa
@@ -228,6 +228,69 @@ def template StoreCompleteAcc {{
     }
 }};
 
+def template CacheBlockBasedStoreExecute {{
+    Fault
+    %(class_name)s::execute(ExecContext *xc,
+        trace::InstRecord *traceData) const
+    {
+        Addr EA;
+
+        %(op_decl)s;
+        %(op_rd)s;
+        %(ea_code)s;
+
+        Addr cacheBlockSize = xc->tcBase()->getCpuPtr()->cacheLineSize();
+        uint64_t numOffsetBits = floorLog2(cacheBlockSize);
+        EA = (EA >> numOffsetBits) << numOffsetBits;
+
+        {
+            Fault fault =
+                writeMemAtomic(xc, nullptr, EA, cacheBlockSize, memAccessFlags,
+                    nullptr, std::vector<bool>(cacheBlockSize, true));
+            if (fault != NoFault)
+                return fault;
+        }
+
+        return NoFault;
+    }
+}};
+
+def template CacheBlockBasedStoreInitiateAcc {{
+    Fault
+    %(class_name)s::initiateAcc(ExecContext *xc,
+        trace::InstRecord *traceData) const
+    {
+        Addr EA;
+
+        %(op_decl)s;
+        %(op_rd)s;
+        %(ea_code)s;
+
+        Addr cacheBlockSize = xc->tcBase()->getCpuPtr()->cacheLineSize();
+        uint64_t numOffsetBits = floorLog2(cacheBlockSize);
+        EA = (EA >> numOffsetBits) << numOffsetBits;
+
+        {
+            Fault fault =
+                writeMemTiming(xc, nullptr, EA, cacheBlockSize, memAccessFlags,
+                    nullptr, std::vector<bool>(cacheBlockSize, true));
+            if (fault != NoFault)
+                return fault;
+        }
+
+        return NoFault;
+    }
+}};
+
+def template CacheBlockBasedStoreCompleteAcc {{
+    Fault
+    %(class_name)s::completeAcc(PacketPtr pkt, ExecContext *xc,
+        trace::InstRecord *traceData) const
+    {
+        return NoFault;
+    }
+}};
+
 def format Load(memacc_code, ea_code = {{EA = rvZext(Rs1 + offset);}},
         offset_code={{offset = sext<12>(IMM12);}},
         mem_flags=[], inst_flags=[]) {{
@@ -243,3 +306,10 @@ def format Store(memacc_code, ea_code={{EA = rvZext(Rs1 + offset);}},
         LoadStoreBase(name, Name, offset_code, ea_code, memacc_code, mem_flags,
         inst_flags, 'Store', exec_template_base='Store')
 }};
+
+def format CBMOp(memacc_code, ea_code={{EA = rvZext(Rs1);}},
+        offset_code={{;}}, mem_flags=[], inst_flags=[]) {{
+    (header_output, decoder_output, decode_block, exec_output) = \
+        LoadStoreBase(name, Name, offset_code, ea_code, memacc_code, mem_flags,
+        inst_flags, 'Store', exec_template_base='CacheBlockBasedStore')
+}};
diff --git a/src/arch/riscv/isa/formats/standard.isa b/src/arch/riscv/isa/formats/standard.isa
index bb500f5f49..98b0af1e2d 100644
--- a/src/arch/riscv/isa/formats/standard.isa
+++ b/src/arch/riscv/isa/formats/standard.isa
@@ -267,7 +267,7 @@ def template JumpConstructor {{
                 flags[IsCall] = true;
 
             // Handle "Jalr" instruction
-            if (FUNCT3 == 0x0) {
+            if (FUNCT3 == 0x0 && OPCODE5 == 0x19) {
                 // If RD is not link and RS1 is link, then pop RAS
                 if (!rd_link && rs1_link) flags[IsReturn] = true;
                 else if (rd_link) {
diff --git a/src/arch/riscv/isa/formats/vector_arith.isa b/src/arch/riscv/isa/formats/vector_arith.isa
new file mode 100644
index 0000000000..1ddf323f04
--- /dev/null
+++ b/src/arch/riscv/isa/formats/vector_arith.isa
@@ -0,0 +1,1557 @@
+// -*- mode:c++ -*-
+
+// Copyright (c) 2022 PLCT Lab
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met: redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer;
+// redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution;
+// neither the name of the copyright holders nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+let {{
+    def setVlen():
+        return "uint32_t vlen = VlenbBits * 8;\n"
+    def setVlenb():
+        return "uint32_t vlenb = VlenbBits;\n"
+    def setDestWrapper(destRegId):
+        return "setDestRegIdx(_numDestRegs++, " + destRegId + ");\n" + \
+               "_numTypedDestRegs[VecRegClass]++;\n"
+    def setSrcWrapper(srcRegId):
+        return "setSrcRegIdx(_numSrcRegs++, " + srcRegId + ");\n"
+    def setSrcVm():
+        return "if (!this->vm)\n" + \
+               "    setSrcRegIdx(_numSrcRegs++, vecRegClass[0]);"
+    def vmDeclAndReadData():
+        return '''
+            [[maybe_unused]] RiscvISA::vreg_t tmp_v0;
+            [[maybe_unused]] uint8_t* v0;
+            if(!machInst.vm) {
+                xc->getRegOperand(this, _numSrcRegs-1, &tmp_v0);
+                v0 = tmp_v0.as<uint8_t>();
+            }
+        '''
+    def copyOldVd(vd_idx):
+        return 'COPY_OLD_VD(%d);' % vd_idx
+    def loopWrapper(code, micro_inst = True):
+        if micro_inst:
+            upper_bound = "this->microVl"
+        else:
+            upper_bound = "(uint32_t)machInst.vl"
+        return '''
+            for (uint32_t i = 0; i < %s; i++) {
+                %s
+            }
+        ''' % (upper_bound, code)
+    def maskCondWrapper(code):
+        return "if (this->vm || elem_mask(v0, ei)) {\n" + \
+               code + "}\n"
+    def eiDeclarePrefix(code, widening = False):
+        if widening:
+            return '''
+            uint32_t ei = i + micro_vlmax * this->microIdx;
+            ''' + code
+        else:
+            return '''
+            uint32_t ei = i + vtype_VLMAX(vtype, vlen, true) * this->microIdx;
+            ''' + code
+
+    def wideningOpRegisterConstraintChecks(code):
+        return '''
+            const uint32_t num_microops = 1 << std::max<int64_t>(0, vtype_vlmul(machInst.vtype8) + 1);
+            if ((machInst.vd % alignToPowerOfTwo(num_microops)) != 0) {
+                std::string error =
+                    csprintf("Unaligned Vd group in Widening op");
+                return std::make_shared<IllegalInstFault>(error, machInst);
+            }
+            if ((machInst.vs2 <= machInst.vd) && (machInst.vd < (machInst.vs2 + num_microops - 1))) {
+                // A destination vector register group can overlap a source vector
+                // register group if The destination EEW is greater than the source
+                // EEW, the source EMUL is at least 1, and the overlap is in the
+                // highest- numbered part of the destination register group.
+                std::string error =
+                    csprintf("Unsupported overlap in Vs2 and Vd for Widening op");
+                return std::make_shared<IllegalInstFault>(error, machInst);
+            }
+            ''' + code
+
+    def narrowingOpRegisterConstraintChecks(code):
+        return '''
+            const uint32_t num_microops = 1 << std::max<int64_t>(0, vtype_vlmul(machInst.vtype8) + 1);
+            if ((machInst.vs2 % alignToPowerOfTwo(num_microops)) != 0) {
+                std::string error =
+                    csprintf("Unaligned VS2 group in Narrowing op");
+                return std::make_shared<IllegalInstFault>(error, machInst);
+            }
+            if ((machInst.vs2 < machInst.vd) && (machInst.vd <= (VS2 + num_microops - 1))) {
+                // A destination vector register group can overlap a source vector
+                // register group The destination EEW is smaller than the source EEW
+                // and the overlap is in the lowest-numbered part of the source
+                // register group
+                std::string error =
+                    csprintf("Unsupported overlap in Vs2 and Vd for Narrowing op");
+                return std::make_shared<IllegalInstFault>(error, machInst);
+            }
+        ''' + code
+
+    def fflags_wrapper(code):
+        return '''
+        RegVal FFLAGS = xc->readMiscReg(MISCREG_FFLAGS);
+        std::feclearexcept(FE_ALL_EXCEPT);
+        ''' + code + '''
+        FFLAGS |= softfloat_exceptionFlags;
+        softfloat_exceptionFlags = 0;
+        xc->setMiscReg(MISCREG_FFLAGS, FFLAGS);
+        '''
+
+    def declareVArithTemplate(
+        class_name, type_name='uint', min_size=8, max_size=64):
+        sizes = [8, 16, 32, 64]
+        code = ''
+        for size in sizes:
+            if size < min_size or size > max_size:
+                continue
+            code += f'template class {class_name}<{type_name}{size}_t>;\n'
+        return code
+
+    def declareGatherTemplate(class_name, index_type):
+        sizes = [8, 16, 32, 64]
+        code = ''
+        for size in sizes:
+            if index_type == 'elem_type':
+                idx_type = f'uint{size}_t'
+            else:
+                idx_type = index_type
+            code += ('template class'
+                     f' {class_name}<uint{size}_t, {idx_type}>;\n')
+        return code
+}};
+
+
+def format VectorIntFormat(code, category, *flags) {{
+    macroop_class_name = 'VectorArithMacroInst'
+    microop_class_name = 'VectorArithMicroInst'
+
+    if name == "vid_v" :
+        macroop_class_name = 'VectorVMUNARY0MacroInst'
+        microp_class_name = 'VectorVMUNARY0MicroInst'
+
+    iop = InstObjParams(
+        name,
+        Name,
+        macroop_class_name,
+        {'code': code,
+         'declare_varith_template': declareVArithTemplate(Name)},
+        flags
+    )
+    inst_name, inst_suffix = name.split("_", maxsplit=1)
+    v0_required = inst_name not in ["vmv"]
+    mask_cond = v0_required and (inst_suffix not in ['vvm', 'vxm', 'vim'])
+    need_elem_idx = mask_cond or code.find("ei") != -1
+
+    dest_reg_id = "vecRegClass[_machInst.vd + _microIdx]"
+
+    num_src_regs = 0
+
+    src2_reg_id = "vecRegClass[_machInst.vs2 + _microIdx]"
+    num_src_regs += 1
+
+    src1_reg_id = ""
+    if category in ["OPIVV", "OPMVV"]:
+        src1_reg_id = "vecRegClass[_machInst.vs1 + _microIdx]"
+        num_src_regs += 1
+    elif category in ["OPIVX", "OPMVX"]:
+        src1_reg_id = "intRegClass[_machInst.rs1]"
+        num_src_regs += 1
+    elif category == "OPIVI":
+        pass
+    else:
+        error("not supported category for VectorIntFormat: %s" % category)
+
+    old_vd_idx = num_src_regs
+    src3_reg_id = "vecRegClass[_machInst.vd + _microIdx]"
+
+    set_dest_reg_idx = setDestWrapper(dest_reg_id)
+
+    set_src_reg_idx = ""
+    if category != "OPIVI":
+        set_src_reg_idx += setSrcWrapper(src1_reg_id)
+    set_src_reg_idx += setSrcWrapper(src2_reg_id)
+    set_src_reg_idx += setSrcWrapper(src3_reg_id)
+    if v0_required:
+        set_src_reg_idx += setSrcVm()
+
+    # code
+    if mask_cond:
+        code = maskCondWrapper(code)
+    if need_elem_idx:
+        code = eiDeclarePrefix(code)
+    code = loopWrapper(code)
+
+    vm_decl_rd = ""
+    if v0_required:
+        vm_decl_rd = vmDeclAndReadData()
+
+    set_vlenb = setVlenb()
+
+    microiop = InstObjParams(name + "_micro",
+        Name + "Micro",
+        microop_class_name,
+        {'code': code,
+         'set_dest_reg_idx': set_dest_reg_idx,
+         'set_src_reg_idx': set_src_reg_idx,
+         'set_vlenb' : set_vlenb,
+         'vm_decl_rd': vm_decl_rd,
+         'copy_old_vd': copyOldVd(old_vd_idx),
+         'declare_varith_template': declareVArithTemplate(Name + "Micro")},
+        flags)
+
+    header_output = \
+        VectorIntMicroDeclare.subst(microiop) + \
+        VectorIntMacroDeclare.subst(iop)
+    decoder_output = \
+        VectorIntMicroConstructor.subst(microiop) + \
+        VectorIntMacroConstructor.subst(iop)
+    exec_output = VectorIntMicroExecute.subst(microiop)
+    decode_block = VectorIntDecodeBlock.subst(iop)
+}};
+
+
+def format VectorIntExtFormat(code, category, *flags) {{
+    iop = InstObjParams(
+        name,
+        Name,
+        'VectorArithMacroInst',
+        {'code': code,
+         'declare_varith_template': declareVArithTemplate(Name)},
+        flags
+    )
+    inst_name, inst_suffix = name.split("_", maxsplit=1)
+    ext_div = int(inst_suffix[-1])
+
+    old_vd_idx = 1
+    dest_reg_id = "vecRegClass[_machInst.vd + _microIdx]"
+    src2_reg_id = "vecRegClass[_machInst.vs2 + _microIdx / " + \
+                      str(ext_div) + "]"
+    src3_reg_id = "vecRegClass[_machInst.vs3 + _microIdx]"
+
+    set_dest_reg_idx = setDestWrapper(dest_reg_id)
+
+    set_src_reg_idx = ""
+    set_src_reg_idx += setSrcWrapper(src2_reg_id)
+    set_src_reg_idx += setSrcWrapper(src3_reg_id)
+    set_src_reg_idx += setSrcVm()
+
+    code = maskCondWrapper(code)
+    code = eiDeclarePrefix(code)
+    code = loopWrapper(code)
+    vm_decl_rd = vmDeclAndReadData()
+
+    set_vlenb = setVlenb();
+    set_vlen = setVlen();
+
+    microiop = InstObjParams(name + "_micro",
+        Name + "Micro",
+        'VectorArithMicroInst',
+        {'code': code,
+         'set_dest_reg_idx': set_dest_reg_idx,
+         'set_src_reg_idx': set_src_reg_idx,
+         'set_vlenb': set_vlenb,
+         'set_vlen': set_vlen,
+         'vm_decl_rd': vm_decl_rd,
+         'copy_old_vd': copyOldVd(old_vd_idx),
+         'ext_div': ext_div,
+         'declare_varith_template': declareVArithTemplate(Name + "Micro")},
+        flags)
+
+    header_output = \
+        VectorIntExtMicroDeclare.subst(microiop) + \
+        VectorIntExtMacroDeclare.subst(iop)
+    decoder_output = \
+        VectorIntMicroConstructor.subst(microiop) + \
+        VectorIntMacroConstructor.subst(iop)
+    exec_output = \
+        VectorIntExtMicroExecute.subst(microiop) + \
+        VectorIntExtMacroExecute.subst(iop)
+    decode_block = VectorIntDecodeBlock.subst(iop)
+}};
+
+def format VectorIntWideningFormat(code, category, *flags) {{
+    iop = InstObjParams(
+        name,
+        Name,
+        'VectorArithMacroInst',
+        {'code': code,
+         'declare_varith_template': declareVArithTemplate(Name, max_size=32)},
+        flags
+    )
+    inst_name, inst_suffix = name.split("_", maxsplit=1)
+    v0_required = True
+    mask_cond = v0_required
+    need_elem_idx = mask_cond or code.find("ei") != -1
+    old_vd_idx = 2
+    dest_reg_id = "vecRegClass[_machInst.vd + _microIdx]"
+    src1_reg_id = ""
+    if category in ["OPIVV", "OPMVV"]:
+        src1_reg_id = "vecRegClass[_machInst.vs1 + _microIdx / 2]"
+    elif category in ["OPIVX", "OPMVX"]:
+        src1_reg_id = "intRegClass[_machInst.rs1]"
+    else:
+        error("not supported category for VectorIntFormat: %s" % category)
+    src2_reg_id = ""
+    if inst_suffix in ["vv", "vx"]:
+        src2_reg_id = "vecRegClass[_machInst.vs2 + _microIdx / 2]"
+    elif inst_suffix in ["wv", "wx"]:
+        src2_reg_id = "vecRegClass[_machInst.vs2 + _microIdx]"
+    src3_reg_id = "vecRegClass[_machInst.vs3 + _microIdx]"
+
+    set_dest_reg_idx = setDestWrapper(dest_reg_id)
+
+    set_src_reg_idx = ""
+    set_src_reg_idx += setSrcWrapper(src1_reg_id)
+    set_src_reg_idx += setSrcWrapper(src2_reg_id)
+    set_src_reg_idx += setSrcWrapper(src3_reg_id)
+    if v0_required:
+        set_src_reg_idx += setSrcVm()
+
+    # code
+    if mask_cond:
+        code = maskCondWrapper(code)
+    if need_elem_idx:
+        code = eiDeclarePrefix(code, widening=True)
+    code = loopWrapper(code)
+
+    code = wideningOpRegisterConstraintChecks(code)
+
+    vm_decl_rd = ""
+    if v0_required:
+        vm_decl_rd = vmDeclAndReadData()
+
+    set_vlenb = setVlenb();
+    set_vlen = setVlen();
+
+    varith_micro_declare = declareVArithTemplate(Name + "Micro", max_size=32)
+    microiop = InstObjParams(name + "_micro",
+        Name + "Micro",
+        'VectorArithMicroInst',
+        {'code': code,
+         'set_dest_reg_idx': set_dest_reg_idx,
+         'set_src_reg_idx': set_src_reg_idx,
+         'set_vlenb': set_vlenb,
+         'set_vlen': set_vlen,
+         'vm_decl_rd': vm_decl_rd,
+         'copy_old_vd': copyOldVd(old_vd_idx),
+         'declare_varith_template': varith_micro_declare},
+        flags)
+
+    header_output = \
+        VectorIntWideningMicroDeclare.subst(microiop) + \
+        VectorIntWideningMacroDeclare.subst(iop)
+    decoder_output = \
+        VectorIntWideningMicroConstructor.subst(microiop) + \
+        VectorIntWideningMacroConstructor.subst(iop)
+    exec_output = VectorIntWideningMicroExecute.subst(microiop)
+    decode_block = VectorIntWideningDecodeBlock.subst(iop)
+}};
+
+def format VectorIntNarrowingFormat(code, category, *flags) {{
+    iop = InstObjParams(
+        name,
+        Name,
+        'VectorArithMacroInst',
+        {'code': code,
+         'declare_varith_template': declareVArithTemplate(Name, max_size=32)},
+        flags
+    )
+    mask_cond = True
+    need_elem_idx = True
+
+    old_vd_idx = 2
+    dest_reg_id = "vecRegClass[_machInst.vd + _microIdx / 2]"
+    if category in ["OPIVV"]:
+        src1_reg_id = "vecRegClass[_machInst.vs1 + _microIdx / 2]"
+    elif category in ["OPIVX"]:
+        src1_reg_id = "intRegClass[_machInst.rs1]"
+    elif category == "OPIVI":
+        old_vd_idx = 1
+    else:
+        error("not supported category for VectorIntFormat: %s" % category)
+    src2_reg_id = "vecRegClass[_machInst.vs2 + _microIdx]"
+    old_dest_reg_id = "vecRegClass[_machInst.vs3 + _microIdx / 2]"
+
+    set_dest_reg_idx = setDestWrapper(dest_reg_id)
+    set_src_reg_idx = ""
+    if category != "OPIVI":
+        set_src_reg_idx += setSrcWrapper(src1_reg_id)
+    set_src_reg_idx += setSrcWrapper(src2_reg_id)
+    set_src_reg_idx += setSrcWrapper(old_dest_reg_id)
+    set_src_reg_idx += setSrcVm()
+    # code
+    code = maskCondWrapper(code)
+    code = eiDeclarePrefix(code, widening=True)
+    code = loopWrapper(code)
+    code = narrowingOpRegisterConstraintChecks(code)
+    vm_decl_rd = vmDeclAndReadData()
+
+    set_vlenb = setVlenb();
+    set_vlen = setVlen();
+
+    varith_micro_declare = declareVArithTemplate(Name + "Micro", max_size=32)
+    microiop = InstObjParams(name + "_micro",
+        Name + "Micro",
+        'VectorArithMicroInst',
+        {'code': code,
+         'set_dest_reg_idx': set_dest_reg_idx,
+         'set_src_reg_idx': set_src_reg_idx,
+         'set_vlenb': set_vlenb,
+         'set_vlen': set_vlen,
+         'vm_decl_rd': vm_decl_rd,
+         'copy_old_vd': copyOldVd(old_vd_idx),
+         'declare_varith_template': varith_micro_declare
+        },
+        flags)
+
+    header_output = \
+        VectorIntWideningMicroDeclare.subst(microiop) + \
+        VectorIntWideningMacroDeclare.subst(iop)
+    decoder_output = \
+        VectorIntWideningMicroConstructor.subst(microiop) + \
+        VectorIntWideningMacroConstructor.subst(iop)
+    exec_output = VectorIntNarrowingMicroExecute.subst(microiop)
+    decode_block = VectorIntWideningDecodeBlock.subst(iop)
+}};
+
+def format VectorIntMaskFormat(code, category, *flags) {{
+    iop = InstObjParams(name,
+        Name,
+        'VectorArithMacroInst',
+        {'code': code,
+         'declare_varith_template': declareVArithTemplate(Name)},
+        flags)
+    inst_name, inst_suffix = name.split("_", maxsplit=1)
+    v0_required = not (inst_name in ["vmadc", "vmsbc"] \
+        and inst_suffix in ["vv", "vx", "vi"])
+    mask_cond = inst_name not in ['vmadc', 'vmsbc']
+    need_elem_idx = mask_cond or code.find("ei") != -1
+
+    old_vd_idx = 2
+    dest_reg_id = "vecRegClass[VecMemInternalReg0 + _microIdx]"
+    src1_reg_id = ""
+    if category == "OPIVV":
+        src1_reg_id = "vecRegClass[_machInst.vs1 + _microIdx]"
+    elif category == "OPIVX":
+        src1_reg_id = "intRegClass[_machInst.rs1]"
+    elif category == "OPIVI":
+        old_vd_idx = 1
+    else:
+        error("not supported category for VectorIntFormat: %s" % category)
+    src2_reg_id = "vecRegClass[_machInst.vs2 + _microIdx]"
+    old_dest_reg_id = "vecRegClass[_machInst.vd]"
+    set_dest_reg_idx = setDestWrapper(dest_reg_id)
+    set_src_reg_idx = ""
+    if category != "OPIVI":
+        set_src_reg_idx += setSrcWrapper(src1_reg_id)
+    set_src_reg_idx += setSrcWrapper(src2_reg_id)
+    set_src_reg_idx += setSrcWrapper(old_dest_reg_id)
+    if v0_required:
+        set_src_reg_idx += setSrcVm()
+
+    #code
+    if mask_cond:
+        code = maskCondWrapper(code)
+    if need_elem_idx:
+        code = eiDeclarePrefix(code)
+    code = loopWrapper(code)
+
+    vm_decl_rd = ""
+    if v0_required:
+        vm_decl_rd = vmDeclAndReadData()
+
+    set_vlenb = setVlenb()
+
+    microiop = InstObjParams(name + "_micro",
+        Name + "Micro",
+        'VectorArithMicroInst',
+        {'code': code,
+         'set_dest_reg_idx': set_dest_reg_idx,
+         'set_src_reg_idx': set_src_reg_idx,
+         'set_vlenb': set_vlenb,
+         'vm_decl_rd': vm_decl_rd,
+         'copy_old_vd': copyOldVd(old_vd_idx),
+         'declare_varith_template': declareVArithTemplate(Name + "Micro")},
+        flags)
+
+    header_output = \
+        VectorIntMaskMicroDeclare.subst(microiop) + \
+        VectorIntMaskMacroDeclare.subst(iop)
+    decoder_output = \
+        VectorIntMaskMicroConstructor.subst(microiop) + \
+        VectorIntMaskMacroConstructor.subst(iop)
+    exec_output = VectorIntMaskMicroExecute.subst(microiop)
+    decode_block = VectorIntDecodeBlock.subst(iop)
+}};
+
+def format VectorGatherFormat(code, category, *flags) {{
+    inst_name, inst_suffix = name.split("_", maxsplit=1)
+    if inst_name == "vrgatherei16":
+        idx_type = "uint16_t"
+    else:
+        idx_type = "elem_type"
+    iop = InstObjParams(name, Name, 'VectorArithMacroInst',
+        {'idx_type': idx_type,
+         'code': code,
+         'declare_varith_template': declareGatherTemplate(Name, idx_type)},
+        flags)
+    old_vd_idx = 2
+    dest_reg_id = "vecRegClass[_machInst.vd + vd_idx]"
+    src1_reg_id = ""
+    if category in ["OPIVV"]:
+        src1_reg_id = "vecRegClass[_machInst.vs1 + vs1_idx]"
+    elif category in ["OPIVX"]:
+        src1_reg_id = "intRegClass[_machInst.rs1]"
+    elif category == "OPIVI":
+        old_vd_idx = 1
+    else:
+        error("not supported category for VectorIntFormat: %s" % category)
+    src2_reg_id = "vecRegClass[_machInst.vs2 + vs2_idx]"
+    src3_reg_id = "vecRegClass[_machInst.vs3 + vd_idx]"
+
+    set_dest_reg_idx = setDestWrapper(dest_reg_id)
+
+    set_src_reg_idx = ""
+    if category != "OPIVI":
+        set_src_reg_idx += setSrcWrapper(src1_reg_id)
+    set_src_reg_idx += setSrcWrapper(src2_reg_id)
+    set_src_reg_idx += setSrcWrapper(src3_reg_id)
+    set_src_reg_idx += setSrcVm()
+
+    # code
+
+    vm_decl_rd = vmDeclAndReadData()
+
+    set_vlenb = setVlenb();
+    set_vlen = setVlen();
+
+    varith_micro_declare = declareGatherTemplate(Name + "Micro", idx_type)
+    microiop = InstObjParams(name + "_micro",
+        Name + "Micro",
+        'VectorArithMicroInst',
+        {'code': code,
+         'set_dest_reg_idx': set_dest_reg_idx,
+         'set_src_reg_idx': set_src_reg_idx,
+         'set_vlenb': set_vlenb,
+         'set_vlen': set_vlen,
+         'vm_decl_rd': vm_decl_rd,
+         'copy_old_vd': copyOldVd(old_vd_idx),
+         'idx_type': idx_type,
+         'declare_varith_template': varith_micro_declare},
+        flags)
+
+    header_output = \
+        VectorGatherMicroDeclare.subst(microiop) + \
+        VectorGatherMacroDeclare.subst(iop)
+    decoder_output = \
+        VectorGatherMicroConstructor.subst(microiop) + \
+        VectorGatherMacroConstructor.subst(iop)
+    exec_output = VectorGatherMicroExecute.subst(microiop)
+    decode_block = VectorGatherDecodeBlock.subst(iop)
+
+}};
+
+def format VectorFloatFormat(code, category, *flags) {{
+    iop = InstObjParams(
+        name,
+        Name,
+        'VectorArithMacroInst',
+        {'code': code,
+         'declare_varith_template': declareVArithTemplate(Name, 'float', 32)},
+        flags
+    )
+    inst_name, inst_suffix = name.split("_", maxsplit=1)
+    v0_required = inst_name not in ["vfmv"]
+    mask_cond = v0_required and (inst_suffix not in ['vvm', 'vfm'])
+    need_elem_idx = mask_cond or code.find("ei") != -1
+
+    dest_reg_id = "vecRegClass[_machInst.vd + _microIdx]"
+    src1_reg_id = ""
+    if category == "OPFVV":
+        src1_reg_id = "vecRegClass[_machInst.vs1 + _microIdx]"
+    elif category == "OPFVF":
+        src1_reg_id = "floatRegClass[_machInst.rs1]"
+    else:
+        error("not supported category for VectorFloatFormat: %s" % category)
+    src2_reg_id = "vecRegClass[_machInst.vs2 + _microIdx]"
+    src3_reg_id = "vecRegClass[_machInst.vs3 + _microIdx]"
+
+    set_dest_reg_idx = setDestWrapper(dest_reg_id)
+
+    set_src_reg_idx = ""
+    set_src_reg_idx += setSrcWrapper(src1_reg_id)
+    set_src_reg_idx += setSrcWrapper(src2_reg_id)
+    set_src_reg_idx += setSrcWrapper(src3_reg_id)
+    if v0_required:
+        set_src_reg_idx += setSrcVm()
+    # code
+    if mask_cond:
+        code = maskCondWrapper(code)
+    if need_elem_idx:
+        code = eiDeclarePrefix(code)
+    code = loopWrapper(code)
+    code = fflags_wrapper(code)
+
+    vm_decl_rd = ""
+    if v0_required:
+        vm_decl_rd = vmDeclAndReadData()
+
+    set_vlenb = setVlenb();
+
+    varith_micro_declare = declareVArithTemplate(Name + "Micro", 'float', 32)
+    microiop = InstObjParams(name + "_micro",
+        Name + "Micro",
+        'VectorArithMicroInst',
+        {'code': code,
+         'set_dest_reg_idx': set_dest_reg_idx,
+         'set_src_reg_idx': set_src_reg_idx,
+         'set_vlenb': set_vlenb,
+         'vm_decl_rd': vm_decl_rd,
+         'copy_old_vd': copyOldVd(2),
+         'declare_varith_template': varith_micro_declare},
+        flags)
+
+    header_output = \
+        VectorFloatMicroDeclare.subst(microiop) + \
+        VectorFloatMacroDeclare.subst(iop)
+    decoder_output = \
+        VectorFloatMicroConstructor.subst(microiop) + \
+        VectorFloatMacroConstructor.subst(iop)
+    exec_output = VectorFloatMicroExecute.subst(microiop)
+    decode_block = VectorFloatDecodeBlock.subst(iop)
+}};
+
+def format VectorFloatCvtFormat(code, category, *flags) {{
+    iop = InstObjParams(
+        name,
+        Name,
+        'VectorArithMacroInst',
+        {'code': code,
+         'declare_varith_template': declareVArithTemplate(Name, 'float', 32)},
+        flags
+    )
+
+    old_vd_idx = 1
+    dest_reg_id = "vecRegClass[_machInst.vd + _microIdx]"
+    src2_reg_id = "vecRegClass[_machInst.vs2 + _microIdx]"
+    src3_reg_id = "vecRegClass[_machInst.vs3 + _microIdx]"
+
+    set_dest_reg_idx = setDestWrapper(dest_reg_id)
+
+    set_src_reg_idx = ""
+    set_src_reg_idx += setSrcWrapper(src2_reg_id)
+    set_src_reg_idx += setSrcWrapper(src3_reg_id)
+    set_src_reg_idx += setSrcVm()
+    code = maskCondWrapper(code)
+    code = eiDeclarePrefix(code)
+    code = loopWrapper(code)
+    code = fflags_wrapper(code)
+
+    vm_decl_rd = vmDeclAndReadData()
+
+    set_vlenb = setVlenb();
+
+    varith_micro_declare = declareVArithTemplate(Name + "Micro", 'float', 32)
+    microiop = InstObjParams(name + "_micro",
+        Name + "Micro",
+        'VectorArithMicroInst',
+        {'code': code,
+         'set_dest_reg_idx': set_dest_reg_idx,
+         'set_src_reg_idx': set_src_reg_idx,
+         'set_vlenb': set_vlenb,
+         'vm_decl_rd': vm_decl_rd,
+         'copy_old_vd': copyOldVd(old_vd_idx),
+         'declare_varith_template': varith_micro_declare},
+        flags)
+
+    header_output = \
+        VectorFloatCvtMicroDeclare.subst(microiop) + \
+        VectorFloatCvtMacroDeclare.subst(iop)
+    decoder_output = \
+        VectorFloatMicroConstructor.subst(microiop) + \
+        VectorFloatMacroConstructor.subst(iop)
+    exec_output = VectorFloatMicroExecute.subst(microiop)
+    decode_block = VectorFloatDecodeBlock.subst(iop)
+}};
+
+def format VectorFloatWideningFormat(code, category, *flags) {{
+    varith_macro_declare = declareVArithTemplate(Name, 'float', 32, 32)
+    iop = InstObjParams(
+        name,
+        Name,
+        'VectorArithMacroInst',
+        {'code': code,
+         'declare_varith_template': varith_macro_declare},
+        flags
+    )
+    inst_name, inst_suffix = name.split("_", maxsplit=1)
+    v0_required = True
+    mask_cond = v0_required
+    need_elem_idx = mask_cond or code.find("ei") != -1
+
+    dest_reg_id = "vecRegClass[_machInst.vd + _microIdx]"
+    src1_reg_id = ""
+    if category in ["OPFVV"]:
+        src1_reg_id = "vecRegClass[_machInst.vs1 + _microIdx / 2]"
+    elif category in ["OPFVF"]:
+        src1_reg_id = "floatRegClass[_machInst.rs1]"
+    else:
+        error("not supported category for VectorFloatFormat: %s" % category)
+    src2_reg_id = ""
+    if inst_suffix in ["vv", "vf"]:
+        src2_reg_id = "vecRegClass[_machInst.vs2 + _microIdx / 2]"
+    elif inst_suffix in ["wv", "wf"]:
+        src2_reg_id = "vecRegClass[_machInst.vs2 + _microIdx]"
+    src3_reg_id = "vecRegClass[_machInst.vs3 + _microIdx]"
+
+    set_dest_reg_idx = setDestWrapper(dest_reg_id)
+
+    set_src_reg_idx = ""
+    set_src_reg_idx += setSrcWrapper(src1_reg_id)
+    set_src_reg_idx += setSrcWrapper(src2_reg_id)
+    set_src_reg_idx += setSrcWrapper(src3_reg_id)
+    if v0_required:
+        set_src_reg_idx += setSrcVm()
+
+    # code
+    if mask_cond:
+        code = maskCondWrapper(code)
+    if need_elem_idx:
+        code = eiDeclarePrefix(code, widening=True)
+    code = loopWrapper(code)
+    code = fflags_wrapper(code)
+
+    code = wideningOpRegisterConstraintChecks(code)
+
+    vm_decl_rd = ""
+    if v0_required:
+        vm_decl_rd = vmDeclAndReadData()
+
+    set_vlenb = setVlenb();
+    set_vlen = setVlen();
+
+    varith_micro_declare = declareVArithTemplate(
+        Name + "Micro", 'float', 32, 32)
+    microiop = InstObjParams(name + "_micro",
+        Name + "Micro",
+        'VectorArithMicroInst',
+        {'code': code,
+         'set_dest_reg_idx': set_dest_reg_idx,
+         'set_src_reg_idx': set_src_reg_idx,
+         'set_vlenb': set_vlenb,
+         'set_vlen': set_vlen,
+         'vm_decl_rd': vm_decl_rd,
+         'copy_old_vd': copyOldVd(2),
+         'declare_varith_template': varith_micro_declare},
+        flags)
+
+    header_output = \
+        VectorIntWideningMicroDeclare.subst(microiop) + \
+        VectorIntWideningMacroDeclare.subst(iop)
+    decoder_output = \
+        VectorIntWideningMicroConstructor.subst(microiop) + \
+        VectorIntWideningMacroConstructor.subst(iop)
+    exec_output = VectorFloatWideningMicroExecute.subst(microiop)
+    decode_block = VectorFloatWideningDecodeBlock.subst(iop)
+}};
+
+def format VectorFloatWideningCvtFormat(code, category, *flags) {{
+    varith_macro_declare = declareVArithTemplate(Name, 'float', 32, 32)
+    iop = InstObjParams(
+        name,
+        Name,
+        'VectorArithMacroInst',
+        {'code': code,
+         'declare_varith_template': varith_macro_declare},
+        flags
+    )
+
+    old_vd_idx = 1
+    dest_reg_id = "vecRegClass[_machInst.vd + _microIdx]"
+    src2_reg_id = "vecRegClass[_machInst.vs2 + _microIdx / 2]"
+    src3_reg_id = "vecRegClass[_machInst.vs3 + _microIdx]"
+
+    set_dest_reg_idx = setDestWrapper(dest_reg_id)
+
+    set_src_reg_idx = ""
+    set_src_reg_idx += setSrcWrapper(src2_reg_id)
+    set_src_reg_idx += setSrcWrapper(src3_reg_id)
+    set_src_reg_idx += setSrcVm()
+    code = maskCondWrapper(code)
+    code = eiDeclarePrefix(code)
+    code = loopWrapper(code)
+    code = fflags_wrapper(code)
+
+    vm_decl_rd = vmDeclAndReadData()
+
+    set_vlenb = setVlenb();
+    set_vlen = setVlen();
+
+    varith_micro_declare = declareVArithTemplate(
+        Name + "Micro", 'float', 32, 32)
+    microiop = InstObjParams(name + "_micro",
+        Name + "Micro",
+        'VectorArithMicroInst',
+        {'code': code,
+         'set_dest_reg_idx': set_dest_reg_idx,
+         'set_src_reg_idx': set_src_reg_idx,
+         'set_vlenb': set_vlenb,
+         'set_vlen': set_vlen,
+         'vm_decl_rd': vm_decl_rd,
+         'copy_old_vd': copyOldVd(old_vd_idx),
+         'declare_varith_template': varith_micro_declare},
+        flags)
+
+    header_output = \
+        VectorFloatCvtMicroDeclare.subst(microiop) + \
+        VectorFloatCvtMacroDeclare.subst(iop)
+    decoder_output = \
+        VectorFloatMicroConstructor.subst(microiop) + \
+        VectorIntWideningMacroConstructor.subst(iop)
+    exec_output = VectorFloatWideningMicroExecute.subst(microiop)
+    decode_block = VectorFloatWideningDecodeBlock.subst(iop)
+}};
+
+def format VectorFloatNarrowingCvtFormat(code, category, *flags) {{
+    varith_macro_declare = declareVArithTemplate(Name, 'float', 32, 32)
+    iop = InstObjParams(
+        name,
+        Name,
+        'VectorArithMacroInst',
+        {'code': code,
+         'declare_varith_template': varith_macro_declare},
+        flags
+    )
+
+    old_vd_idx = 1
+    dest_reg_id = "vecRegClass[_machInst.vd + _microIdx / 2]"
+    src2_reg_id = "vecRegClass[_machInst.vs2 + _microIdx]"
+    src3_reg_id = "vecRegClass[_machInst.vs3 + _microIdx / 2]"
+
+    set_dest_reg_idx = setDestWrapper(dest_reg_id)
+
+    set_src_reg_idx = ""
+    set_src_reg_idx += setSrcWrapper(src2_reg_id)
+    set_src_reg_idx += setSrcWrapper(src3_reg_id)
+    set_src_reg_idx += setSrcVm()
+    code = maskCondWrapper(code)
+    code = eiDeclarePrefix(code)
+    code = loopWrapper(code)
+    code = fflags_wrapper(code)
+    code = narrowingOpRegisterConstraintChecks(code)
+
+    vm_decl_rd = vmDeclAndReadData()
+
+    set_vlenb = setVlenb();
+    set_vlen = setVlen();
+
+    varith_micro_declare = declareVArithTemplate(
+        Name + "Micro", 'float', 32, 32)
+    microiop = InstObjParams(name + "_micro",
+        Name + "Micro",
+        'VectorArithMicroInst',
+        {'code': code,
+         'set_dest_reg_idx': set_dest_reg_idx,
+         'set_src_reg_idx': set_src_reg_idx,
+         'set_vlenb': set_vlenb,
+         'set_vlen': set_vlen,
+         'vm_decl_rd': vm_decl_rd,
+         'copy_old_vd': copyOldVd(old_vd_idx),
+         'declare_varith_template': varith_micro_declare},
+        flags)
+
+    header_output = \
+        VectorFloatCvtMicroDeclare.subst(microiop) + \
+        VectorFloatCvtMacroDeclare.subst(iop)
+    decoder_output = \
+        VectorFloatMicroConstructor.subst(microiop) + \
+        VectorIntWideningMacroConstructor.subst(iop)
+    exec_output = VectorFloatNarrowingMicroExecute.subst(microiop)
+    decode_block = VectorFloatWideningDecodeBlock.subst(iop)
+}};
+
+def format VectorFloatMaskFormat(code, category, *flags) {{
+    iop = InstObjParams(name,
+        Name,
+        'VectorArithMacroInst',
+        {'code': code,
+         'declare_varith_template': declareVArithTemplate(Name, 'float', 32)},
+        flags
+    )
+    dest_reg_id = "vecRegClass[VecMemInternalReg0 + _microIdx]"
+    src1_reg_id = ""
+    if category == "OPFVV":
+        src1_reg_id = "vecRegClass[_machInst.vs1 + _microIdx]"
+    elif category == "OPFVF":
+        src1_reg_id = "floatRegClass[_machInst.rs1]"
+    else:
+        error("not supported category for VectorFloatFormat: %s" % category)
+    src2_reg_id = "vecRegClass[_machInst.vs2 + _microIdx]"
+    old_dest_reg_id = "vecRegClass[_machInst.vd]"
+    set_dest_reg_idx = setDestWrapper(dest_reg_id)
+    set_src_reg_idx = ""
+    set_src_reg_idx += setSrcWrapper(src1_reg_id)
+    set_src_reg_idx += setSrcWrapper(src2_reg_id)
+    set_src_reg_idx += setSrcWrapper(old_dest_reg_id)
+    set_src_reg_idx += setSrcVm()
+    vm_decl_rd = vmDeclAndReadData()
+    set_vlenb = setVlenb()
+
+    code = maskCondWrapper(code)
+    code = eiDeclarePrefix(code)
+    code = loopWrapper(code)
+    code = fflags_wrapper(code)
+
+    varith_micro_declare = declareVArithTemplate(Name + "Micro", 'float', 32)
+    microiop = InstObjParams(name + "_micro",
+        Name + "Micro",
+        'VectorArithMicroInst',
+        {'code': code,
+         'set_dest_reg_idx': set_dest_reg_idx,
+         'set_src_reg_idx': set_src_reg_idx,
+         'set_vlenb': set_vlenb,
+         'vm_decl_rd': vm_decl_rd,
+         'copy_old_vd': copyOldVd(2),
+         'declare_varith_template': varith_micro_declare},
+        flags)
+
+    header_output = \
+        VectorFloatMaskMicroDeclare.subst(microiop) + \
+        VectorFloatMaskMacroDeclare.subst(iop)
+    decoder_output = \
+        VectorFloatMaskMicroConstructor.subst(microiop) + \
+        VectorFloatMaskMacroConstructor.subst(iop)
+    exec_output = VectorFloatMaskMicroExecute.subst(microiop)
+    decode_block = VectorFloatDecodeBlock.subst(iop)
+}};
+
+def format VMvWholeFormat(code, category, *flags) {{
+    iop = InstObjParams(name, Name, 'VMvWholeMacroInst', {'code': code}, flags)
+
+    microiop = InstObjParams(name + "_micro",
+        Name + "Micro",
+        'VMvWholeMicroInst',
+        {'code': code,
+         'set_vlen': setVlen()},
+        flags)
+
+    header_output = \
+        VMvWholeMacroDeclare.subst(iop) + \
+        VMvWholeMicroDeclare.subst(microiop)
+    decoder_output = \
+        VMvWholeMacroConstructor.subst(iop) + \
+        VMvWholeMicroConstructor.subst(microiop)
+    exec_output = VMvWholeMicroExecute.subst(microiop)
+    decode_block = BasicDecode.subst(iop)
+}};
+
+def format ViotaFormat(code, category, *flags){{
+    iop = InstObjParams(
+        name,
+        Name,
+        'VectorArithMacroInst',
+        {'code': code,
+         'declare_varith_template': declareVArithTemplate(Name)},
+        flags
+    )
+
+    inst_name, inst_suffix = name.split("_", maxsplit=1)
+    dest_reg_id = "vecRegClass[_machInst.vd + _microIdx]"
+    src2_reg_id = "vecRegClass[_machInst.vs2 + _microIdx]"
+    # The tail of vector mask inst should be treated as tail-agnostic.
+    # We treat it with tail-undisturbed policy, since
+    # the test suits only support undisturbed policy.
+    old_dest_reg_id = "vecRegClass[_machInst.vd + _microIdx]"
+
+    set_src_reg_idx = ""
+    set_src_reg_idx += setSrcWrapper(src2_reg_id)
+    set_src_reg_idx += setSrcWrapper(old_dest_reg_id)
+    set_dest_reg_idx = setDestWrapper(dest_reg_id)
+    vm_decl_rd = vmDeclAndReadData()
+    set_vm_idx = setSrcVm()
+    set_vlenb = setVlenb()
+
+    microiop = InstObjParams(name+"_micro",
+        Name+"Micro",
+        'VectorArithMicroInst',
+        {'code': code,
+         'set_dest_reg_idx': set_dest_reg_idx,
+         'set_src_reg_idx': set_src_reg_idx,
+         'set_vlenb': set_vlenb,
+         'vm_decl_rd': vm_decl_rd,
+         'set_vm_idx': set_vm_idx,
+         'copy_old_vd': copyOldVd(1),
+         'declare_varith_template': declareVArithTemplate(Name + "Micro")},
+        flags)
+
+    header_output = \
+        ViotaMicroDeclare.subst(microiop) + \
+        ViotaMacroDeclare.subst(iop)
+    decoder_output = \
+        ViotaMicroConstructor.subst(microiop) + \
+        ViotaMacroConstructor.subst(iop)
+    exec_output = ViotaMicroExecute.subst(microiop)
+    decode_block = VectorIntDecodeBlock.subst(iop)
+
+}};
+
+def format Vector1Vs1VdMaskFormat(code, category, *flags){{
+    inst_name, inst_suffix = name.split("_", maxsplit=1)
+    dest_reg_id = "vecRegClass[_machInst.vd]"
+    src2_reg_id = "vecRegClass[_machInst.vs2]"
+    # The tail of vector mask inst should be treated as tail-agnostic.
+    # We treat it with tail-undisturbed policy, since
+    # the test suits only support undisturbed policy.
+    old_dest_reg_id = "vecRegClass[_machInst.vd]"
+    set_src_reg_idx = ""
+    set_src_reg_idx += setSrcWrapper(src2_reg_id)
+    set_src_reg_idx += setSrcWrapper(old_dest_reg_id)
+    set_dest_reg_idx = setDestWrapper(dest_reg_id)
+    vm_decl_rd = vmDeclAndReadData()
+    set_vm_idx = setSrcVm()
+    set_vlenb = setVlenb()
+    iop = InstObjParams(name,
+        Name,
+        'VectorNonSplitInst',
+        {'code': code,
+         'set_dest_reg_idx': set_dest_reg_idx,
+         'set_src_reg_idx': set_src_reg_idx,
+         'set_vlenb': set_vlenb,
+         'vm_decl_rd': vm_decl_rd,
+         'set_vm_idx': set_vm_idx,
+         'copy_old_vd': copyOldVd(1),
+         'declare_varith_template': declareVArithTemplate(Name, 'uint', 8, 8),
+         },
+        flags)
+
+    header_output = Vector1Vs1RdMaskDeclare.subst(iop)
+    decoder_output = Vector1Vs1VdMaskConstructor.subst(iop)
+    exec_output = Vector1Vs1VdMaskExecute.subst(iop)
+    decode_block = VectorMaskDecodeBlock.subst(iop)
+}};
+
+def format Vector1Vs1RdMaskFormat(code, category, *flags){{
+    inst_name, inst_suffix = name.split("_", maxsplit=1)
+    vm_decl_rd = vmDeclAndReadData()
+    set_vm_idx = setSrcVm()
+    iop = InstObjParams(name,
+        Name,
+        'VectorNonSplitInst',
+        {'code': code,
+         'vm_decl_rd': vm_decl_rd,
+         'set_vm_idx': set_vm_idx,
+         'declare_varith_template': declareVArithTemplate(Name, 'uint', 8, 8)
+        },
+        flags)
+
+    header_output = Vector1Vs1RdMaskDeclare.subst(iop)
+    decoder_output = Vector1Vs1RdMaskConstructor.subst(iop)
+    exec_output = Vector1Vs1RdMaskExecute.subst(iop)
+    decode_block = VectorMaskDecodeBlock.subst(iop)
+}};
+
+def format VectorNonSplitFormat(code, category, *flags) {{
+    inst_name, inst_suffix = name.split("_", maxsplit=1)
+    vm_decl_rd = ""
+
+    set_vm_idx = ""
+
+    if inst_name == "vfmv" :
+        code = fflags_wrapper(code)
+
+    if inst_name == "vfmv" :
+        varith_template = declareVArithTemplate(Name, 'float', 32)
+        iop = InstObjParams(name,
+            Name,
+            'VectorNonSplitInst',
+            {'code': code,
+             'vm_decl_rd': vm_decl_rd,
+             'set_vm_idx': set_vm_idx,
+             'declare_varith_template': varith_template},
+            flags)
+        header_output = VectorNonSplitDeclare.subst(iop)
+        decoder_output = VectorNonSplitConstructor.subst(iop)
+        exec_output = VectorFloatNonSplitExecute.subst(iop)
+        decode_block = VectorFloatNonSplitDecodeBlock.subst(iop)
+    elif inst_name == "vmv" :
+        iop = InstObjParams(name,
+            Name,
+            'VectorNonSplitInst',
+            {'code': code,
+             'vm_decl_rd': vm_decl_rd,
+             'set_vm_idx': set_vm_idx,
+             'declare_varith_template': declareVArithTemplate(Name)},
+            flags)
+        header_output = VectorNonSplitDeclare.subst(iop)
+        decoder_output = VectorNonSplitConstructor.subst(iop)
+        exec_output = VectorIntNonSplitExecute.subst(iop)
+        decode_block = VectorIntNonSplitDecodeBlock.subst(iop)
+    else :
+        error("Unsupported inst for VectorNonSplitFormat: %s" % inst_name)
+
+}};
+
+def format VectorMaskFormat(code, category, *flags) {{
+    inst_name, inst_suffix = name.split("_", maxsplit=1)
+    old_vd_idx = 2
+    if category not in ["OPMVV"]:
+        error("not supported category for VectorIntFormat: %s" % category)
+    dest_reg_id = "vecRegClass[_machInst.vd]"
+    src1_reg_id = "vecRegClass[_machInst.vs1]"
+    src2_reg_id = "vecRegClass[_machInst.vs2]"
+
+    # The tail of vector mask inst should be treated as tail-agnostic.
+    # We treat it with tail-undisturbed policy, since
+    # the test suits only support undisturbed policy.
+    # TODO: remove it
+    old_dest_reg_id = "vecRegClass[_machInst.vd]"
+
+    set_src_reg_idx = ""
+    set_src_reg_idx += setSrcWrapper(src1_reg_id)
+    set_src_reg_idx += setSrcWrapper(src2_reg_id)
+    set_src_reg_idx += setSrcWrapper(old_dest_reg_id)
+
+    set_dest_reg_idx = setDestWrapper(dest_reg_id)
+
+    set_vlenb = setVlenb()
+
+    code = loopWrapper(code, micro_inst = False)
+
+    iop = InstObjParams(name,
+        Name,
+        'VectorNonSplitInst',
+        {'code': code,
+         'set_dest_reg_idx': set_dest_reg_idx,
+         'set_src_reg_idx': set_src_reg_idx,
+         'set_vlenb': set_vlenb,
+         'copy_old_vd': copyOldVd(old_vd_idx),
+         'declare_varith_template': declareVArithTemplate(Name, 'uint', 8, 8)
+        },
+        flags)
+
+    header_output = VectorMaskDeclare.subst(iop)
+    decoder_output = VectorMaskConstructor.subst(iop)
+    exec_output = VectorMaskExecute.subst(iop)
+    decode_block = VectorMaskDecodeBlock.subst(iop)
+}};
+
+def format VectorReduceIntFormat(code, category, *flags) {{
+    iop = InstObjParams(
+        name,
+        Name,
+        'VectorArithMacroInst',
+        {'code': code,
+         'declare_varith_template': declareVArithTemplate(Name)},
+        flags
+    )
+    inst_name, inst_suffix = name.split("_", maxsplit=1)
+    dest_reg_id = "vecRegClass[_machInst.vd]"
+    src1_reg_id = "vecRegClass[_machInst.vs1]"
+    src2_reg_id = "vecRegClass[_machInst.vs2 + _microIdx]"
+    old_dest_reg_id = "vecRegClass[_machInst.vd]"
+    set_dest_reg_idx = setDestWrapper(dest_reg_id)
+    set_src_reg_idx = setSrcWrapper(src1_reg_id)
+    set_src_reg_idx += setSrcWrapper(src2_reg_id)
+    # Treat tail undisturbed/agnostic as the same
+    # We always need old rd as src vreg
+    set_src_reg_idx += setSrcWrapper(old_dest_reg_id)
+    set_src_reg_idx += setSrcVm()
+    vm_decl_rd = vmDeclAndReadData()
+    set_vlenb = setVlenb()
+    set_vlen = setVlen()
+
+    type_def = '''
+        using vu [[maybe_unused]] = std::make_unsigned_t<ElemType>;
+        using vi [[maybe_unused]] = std::make_signed_t<ElemType>;
+    '''
+    microiop = InstObjParams(name + "_micro",
+        Name + "Micro",
+        'VectorArithMicroInst',
+        {'code': code,
+         'set_dest_reg_idx': set_dest_reg_idx,
+         'set_src_reg_idx': set_src_reg_idx,
+         'set_vlenb' : set_vlenb,
+         'set_vlen' : set_vlen,
+         'vm_decl_rd': vm_decl_rd,
+         'type_def': type_def,
+         'copy_old_vd': copyOldVd(2),
+         'declare_varith_template': declareVArithTemplate(Name + "Micro")},
+        flags)
+
+    header_output = \
+        VectorReduceMicroDeclare.subst(microiop) + \
+        VectorReduceMacroDeclare.subst(iop)
+    decoder_output = \
+        VectorReduceMicroConstructor.subst(microiop) + \
+        VectorReduceMacroConstructor.subst(iop)
+    exec_output = VectorReduceIntMicroExecute.subst(microiop)
+    decode_block = VectorIntDecodeBlock.subst(iop)
+}};
+
+def format VectorReduceFloatFormat(code, category, *flags) {{
+    iop = InstObjParams(
+        name,
+        Name,
+        'VectorArithMacroInst',
+        {'code': code,
+         'declare_varith_template': declareVArithTemplate(Name, 'float', 32)},
+        flags
+    )
+    inst_name, inst_suffix = name.split("_", maxsplit=1)
+    dest_reg_id = "vecRegClass[_machInst.vd]"
+    src1_reg_id = "vecRegClass[_machInst.vs1]"
+    src2_reg_id = "vecRegClass[_machInst.vs2 + _microIdx]"
+    old_dest_reg_id = "vecRegClass[_machInst.vd]"
+    set_dest_reg_idx = setDestWrapper(dest_reg_id)
+    set_src_reg_idx = setSrcWrapper(src1_reg_id)
+    set_src_reg_idx += setSrcWrapper(src2_reg_id)
+    # Treat tail undisturbed/agnostic as the same
+    # We always need old rd as src vreg
+    set_src_reg_idx += setSrcWrapper(old_dest_reg_id)
+    set_src_reg_idx += setSrcVm()
+    vm_decl_rd = vmDeclAndReadData()
+    set_vlenb = setVlenb()
+    set_vlen = setVlen()
+
+    type_def = '''
+        using et = ElemType;
+        using vu = decltype(et::v);
+    '''
+
+    code = fflags_wrapper(code)
+
+    varith_micro_declare = declareVArithTemplate(Name + "Micro", 'float', 32)
+    microiop = InstObjParams(name + "_micro",
+        Name + "Micro",
+        'VectorArithMicroInst',
+        {'code': code,
+         'set_dest_reg_idx': set_dest_reg_idx,
+         'set_src_reg_idx': set_src_reg_idx,
+         'set_vlenb': set_vlenb,
+         'set_vlen': set_vlen,
+         'vm_decl_rd': vm_decl_rd,
+         'type_def': type_def,
+         'copy_old_vd': copyOldVd(2),
+         'declare_varith_template': varith_micro_declare},
+        flags)
+
+    header_output = \
+        VectorReduceMicroDeclare.subst(microiop) + \
+        VectorReduceMacroDeclare.subst(iop)
+    decoder_output = \
+        VectorReduceMicroConstructor.subst(microiop) + \
+        VectorReduceMacroConstructor.subst(iop)
+    exec_output = VectorReduceFloatMicroExecute.subst(microiop)
+    decode_block = VectorFloatDecodeBlock.subst(iop)
+}};
+
+def format VectorReduceFloatWideningFormat(code, category, *flags) {{
+    varith_macro_declare = declareVArithTemplate(Name, 'float', 32, 32)
+    iop = InstObjParams(
+        name,
+        Name,
+        'VectorArithMacroInst',
+        {'code': code,
+         'declare_varith_template': varith_macro_declare},
+        flags
+    )
+    inst_name, inst_suffix = name.split("_", maxsplit=1)
+    dest_reg_id = "vecRegClass[_machInst.vd]"
+    src1_reg_id = "vecRegClass[_machInst.vs1]"
+    src2_reg_id = "vecRegClass[_machInst.vs2 + _microIdx]"
+    old_dest_reg_id = "vecRegClass[_machInst.vd]"
+    set_dest_reg_idx = setDestWrapper(dest_reg_id)
+    set_src_reg_idx = setSrcWrapper(src1_reg_id)
+    set_src_reg_idx += setSrcWrapper(src2_reg_id)
+    # Treat tail undisturbed/agnostic as the same
+    # We always need old rd as src vreg
+    set_src_reg_idx += setSrcWrapper(old_dest_reg_id)
+    set_src_reg_idx += setSrcVm()
+    vm_decl_rd = vmDeclAndReadData()
+    set_vlenb = setVlenb()
+    set_vlen = setVlen()
+    type_def = '''
+        using et = ElemType;
+        using vu [[maybe_unused]] = decltype(et::v);
+        using ewt = typename double_width<et>::type;
+        using vwu = decltype(ewt::v);
+    '''
+
+    varith_micro_declare = declareVArithTemplate(
+        Name + "Micro", 'float', 32, 32)
+    microiop = InstObjParams(name + "_micro",
+        Name + "Micro",
+        'VectorArithMicroInst',
+        {'code': code,
+         'set_dest_reg_idx': set_dest_reg_idx,
+         'set_src_reg_idx': set_src_reg_idx,
+         'set_vlenb': set_vlenb,
+         'set_vlen': set_vlen,
+         'vm_decl_rd': vm_decl_rd,
+         'type_def': type_def,
+         'copy_old_vd': copyOldVd(2),
+         'declare_varith_template': varith_micro_declare},
+        flags)
+
+    header_output = \
+        VectorReduceMicroDeclare.subst(microiop) + \
+        VectorReduceMacroDeclare.subst(iop)
+    decoder_output = \
+        VectorReduceMicroConstructor.subst(microiop) + \
+        VectorReduceMacroConstructor.subst(iop)
+    exec_output = VectorReduceFloatWideningMicroExecute.subst(microiop)
+    decode_block = VectorFloatWideningDecodeBlock.subst(iop)
+}};
+
+def format VectorIntVxsatFormat(code, category, *flags) {{
+    iop = InstObjParams(
+        name,
+        Name,
+        'VectorArithMacroInst',
+        {'code': code,
+         'declare_varith_template': declareVArithTemplate(Name)},
+        flags
+    )
+    inst_name, inst_suffix = name.split("_", maxsplit=1)
+    old_vd_idx = 2
+    dest_reg_id = "vecRegClass[_machInst.vd + _microIdx]"
+    src1_reg_id = ""
+    if category in ["OPIVV"]:
+        src1_reg_id = "vecRegClass[_machInst.vs1 + _microIdx]"
+    elif category in ["OPIVX"]:
+        src1_reg_id = "intRegClass[_machInst.rs1]"
+    elif category == "OPIVI":
+        old_vd_idx = 1
+    else:
+        error("not supported category for VectorIntVxsatFormat: %s" % category)
+    src2_reg_id = "vecRegClass[_machInst.vs2 + _microIdx]"
+    src3_reg_id = "vecRegClass[_machInst.vs3 + _microIdx]"
+    set_dest_reg_idx = setDestWrapper(dest_reg_id)
+
+    set_src_reg_idx = ""
+    if category != "OPIVI":
+        set_src_reg_idx += setSrcWrapper(src1_reg_id)
+    set_src_reg_idx += setSrcWrapper(src2_reg_id)
+    set_src_reg_idx += setSrcWrapper(src3_reg_id)
+    set_src_reg_idx += setSrcVm()
+    vm_decl_rd = vmDeclAndReadData()
+
+    set_vlenb = setVlenb()
+
+    code = maskCondWrapper(code)
+    code = eiDeclarePrefix(code)
+    code = loopWrapper(code)
+
+    microiop = InstObjParams(name + "_micro",
+        Name + "Micro",
+        'VectorArithMicroInst',
+        {'code': code,
+         'set_dest_reg_idx': set_dest_reg_idx,
+         'set_src_reg_idx': set_src_reg_idx,
+         'set_vlenb': set_vlenb,
+         'vm_decl_rd': vm_decl_rd,
+         'copy_old_vd': copyOldVd(old_vd_idx),
+         'declare_varith_template': declareVArithTemplate(Name + "Micro")},
+        flags)
+
+    header_output = \
+        VectorIntVxsatMicroDeclare.subst(microiop) + \
+        VectorIntVxsatMacroDeclare.subst(iop)
+    decoder_output = \
+        VectorIntVxsatMicroConstructor.subst(microiop) + \
+        VectorIntVxsatMacroConstructor.subst(iop)
+    exec_output = VectorIntMicroExecute.subst(microiop)
+    decode_block = VectorIntDecodeBlock.subst(iop)
+}};
+
+def format VectorReduceIntWideningFormat(code, category, *flags) {{
+    iop = InstObjParams(
+        name,
+        Name,
+        'VectorArithMacroInst',
+        {'code': code,
+         'declare_varith_template': declareVArithTemplate(Name, max_size=32)},
+        flags
+    )
+    inst_name, inst_suffix = name.split("_", maxsplit=1)
+    dest_reg_id = "vecRegClass[_machInst.vd]"
+    src1_reg_id = "vecRegClass[_machInst.vs1]"
+    src2_reg_id = "vecRegClass[_machInst.vs2 + _microIdx]"
+    old_dest_reg_id = "vecRegClass[_machInst.vd]"
+    set_dest_reg_idx = setDestWrapper(dest_reg_id)
+    set_src_reg_idx = setSrcWrapper(src1_reg_id)
+    set_src_reg_idx += setSrcWrapper(src2_reg_id)
+    # Treat tail undisturbed/agnostic as the same
+    # We always need old rd as src vreg
+    set_src_reg_idx += setSrcWrapper(old_dest_reg_id)
+    set_src_reg_idx += setSrcVm()
+    vm_decl_rd = vmDeclAndReadData()
+    set_vlenb = setVlenb()
+    set_vlen = setVlen()
+
+    varith_micro_declare = declareVArithTemplate(Name + "Micro", max_size=32)
+    microiop = InstObjParams(name + "_micro",
+        Name + "Micro",
+        'VectorArithMicroInst',
+        {'code': code,
+         'set_dest_reg_idx': set_dest_reg_idx,
+         'set_src_reg_idx': set_src_reg_idx,
+         'set_vlenb': set_vlenb,
+         'set_vlen': set_vlen,
+         'vm_decl_rd': vm_decl_rd,
+         'copy_old_vd': copyOldVd(2),
+         'declare_varith_template': varith_micro_declare},
+        flags)
+
+    header_output = \
+        VectorReduceMicroDeclare.subst(microiop) + \
+        VectorReduceMacroDeclare.subst(iop)
+    decoder_output = \
+        VectorReduceMicroConstructor.subst(microiop) + \
+        VectorReduceMacroConstructor.subst(iop)
+    exec_output = VectorReduceIntWideningMicroExecute.subst(microiop)
+    decode_block = VectorIntWideningDecodeBlock.subst(iop)
+}};
+
+let {{
+
+def VectorSlideBase(name, Name, category, code, flags, macro_construtor,
+        decode_template, micro_execute_template):
+    macroop_class_name = 'VectorSlideMacroInst'
+    microop_class_name = 'VectorSlideMicroInst'
+    # Make sure flags are in lists (convert to lists if not).
+    flags = makeList(flags)
+
+    if decode_template is VectorIntDecodeBlock:
+        varith_macro_declare = declareVArithTemplate(Name)
+    elif decode_template is VectorFloatDecodeBlock:
+        varith_macro_declare = declareVArithTemplate(Name, 'float', 32)
+
+    iop = InstObjParams(
+        name,
+        Name,
+        macroop_class_name,
+       {'code': code,
+        'declare_varith_template': varith_macro_declare},
+        flags
+    )
+    inst_name, inst_suffix = name.split("_", maxsplit=1)
+    dest_reg_id = "vecRegClass[_machInst.vd + vdIdx]"
+    src2_reg_id = "vecRegClass[_machInst.vs2 + vs2Idx]"
+    src1_ireg_id = "intRegClass[_machInst.rs1]"
+    src1_freg_id = "floatRegClass[_machInst.rs1]"
+
+    # The tail of vector mask inst should be treated as tail-agnostic.
+    # We treat it with tail-undisturbed policy, since
+    # the test suits only support undisturbed policy.
+    num_src_regs = 0
+
+    old_dest_reg_id = "vecRegClass[_machInst.vd + vdIdx]"
+    set_src_reg_idx = ""
+    if category in ["OPIVX", "OPMVX"]:
+        set_src_reg_idx += setSrcWrapper(src1_ireg_id)
+        num_src_regs += 1
+    elif category in ["OPFVF"]:
+        set_src_reg_idx += setSrcWrapper(src1_freg_id)
+        num_src_regs += 1
+    set_src_reg_idx += setSrcWrapper(src2_reg_id)
+    num_src_regs += 1
+    old_vd_idx = num_src_regs
+    set_src_reg_idx += setSrcWrapper(old_dest_reg_id)
+    set_dest_reg_idx = setDestWrapper(dest_reg_id)
+    vm_decl_rd = vmDeclAndReadData()
+    set_src_reg_idx += setSrcVm()
+    set_vlenb = setVlenb()
+    set_vlen = setVlen()
+
+    if decode_template is VectorIntDecodeBlock:
+        varith_micro_declare = declareVArithTemplate(Name + "Micro")
+    elif decode_template is VectorFloatDecodeBlock:
+        varith_micro_declare = declareVArithTemplate(
+            Name + "Micro", 'float', 32)
+
+    microiop = InstObjParams(name + "_micro",
+        Name + "Micro",
+        microop_class_name,
+        {'code': code,
+         'set_dest_reg_idx': set_dest_reg_idx,
+         'set_src_reg_idx': set_src_reg_idx,
+         'set_vlenb': set_vlenb,
+         'set_vlen': set_vlen,
+         'vm_decl_rd': vm_decl_rd,
+         'copy_old_vd': copyOldVd(old_vd_idx),
+         'declare_varith_template': varith_micro_declare},
+        flags)
+
+    header_output = \
+        VectorSlideMicroDeclare.subst(microiop) + \
+        VectorSlideMacroDeclare.subst(iop)
+    decoder_output = \
+        VectorSlideMicroConstructor.subst(microiop) + \
+        macro_construtor.subst(iop)
+    exec_output = micro_execute_template.subst(microiop)
+    decode_block = decode_template.subst(iop)
+    return (header_output, decoder_output, decode_block, exec_output)
+
+}};
+
+def format VectorSlideUpFormat(code, category, *flags) {{
+    (header_output, decoder_output, decode_block, exec_output) = \
+        VectorSlideBase(name, Name, category, code,
+            flags,
+            macro_construtor = VectorSlideUpMacroConstructor,
+            decode_template = VectorIntDecodeBlock,
+            micro_execute_template = VectorSlideMicroExecute)
+}};
+
+def format VectorSlideDownFormat(code, category, *flags) {{
+    (header_output, decoder_output, decode_block, exec_output) = \
+        VectorSlideBase(name, Name, category, code,
+            flags,
+            macro_construtor = VectorSlideDownMacroConstructor,
+            decode_template = VectorIntDecodeBlock,
+            micro_execute_template = VectorSlideMicroExecute)
+}};
+
+def format VectorFloatSlideUpFormat(code, category, *flags) {{
+    (header_output, decoder_output, decode_block, exec_output) = \
+        VectorSlideBase(name, Name, category, code,
+            flags,
+            macro_construtor = VectorSlideUpMacroConstructor,
+            decode_template = VectorFloatDecodeBlock,
+            micro_execute_template = VectorFloatSlideMicroExecute)
+}};
+
+def format VectorFloatSlideDownFormat(code, category, *flags) {{
+    (header_output, decoder_output, decode_block, exec_output) = \
+        VectorSlideBase(name, Name, category, code,
+            flags,
+            macro_construtor = VectorSlideDownMacroConstructor,
+            decode_template = VectorFloatDecodeBlock,
+            micro_execute_template = VectorFloatSlideMicroExecute)
+}};
diff --git a/src/arch/riscv/isa/formats/vector_conf.isa b/src/arch/riscv/isa/formats/vector_conf.isa
new file mode 100644
index 0000000000..6280e5679b
--- /dev/null
+++ b/src/arch/riscv/isa/formats/vector_conf.isa
@@ -0,0 +1,261 @@
+// -*- mode:c++ -*-
+
+// Copyright (c) 2022 PLCT Lab
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met: redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer;
+// redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution;
+// neither the name of the copyright holders nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+def format VConfOp(code, write_code, declare_class, branch_class, *flags) {{
+    iop = InstObjParams(
+        name,
+        Name,
+        'VConfOp',
+        {
+            'code': code,
+            'write_code': write_code,
+        },
+        flags
+    )
+    declareTemplate = eval(declare_class)
+    branchTargetTemplate = eval(branch_class)
+
+    header_output = declareTemplate.subst(iop)
+    decoder_output = VConfConstructor.subst(iop)
+    decode_block = VConfDecodeBlock.subst(iop)
+    exec_output = VConfExecute.subst(iop) + branchTargetTemplate.subst(iop)
+}};
+
+def template VSetVlDeclare {{
+    //
+    // Static instruction class for "%(mnemonic)s".
+    //
+    class %(class_name)s : public %(base_class)s
+    {
+      private:
+        %(reg_idx_arr_decl)s;
+        VTYPE getNewVtype(VTYPE, VTYPE, uint32_t) const;
+        uint32_t getNewVL(
+            uint32_t, uint32_t, uint32_t, uint64_t, uint64_t) const;
+
+      public:
+        /// Constructor.
+        %(class_name)s(ExtMachInst machInst, uint32_t elen);
+        Fault execute(ExecContext *, trace::InstRecord *) const override;
+        std::unique_ptr<PCStateBase> branchTarget(
+                ThreadContext *tc) const override;
+
+        using StaticInst::branchTarget;
+        using %(base_class)s::generateDisassembly;
+
+    };
+}};
+
+def template VSetiVliDeclare {{
+    //
+    // Static instruction class for "%(mnemonic)s".
+    //
+    class %(class_name)s : public %(base_class)s
+    {
+      private:
+        %(reg_idx_arr_decl)s;
+        VTYPE getNewVtype(VTYPE, VTYPE, uint32_t) const;
+        uint32_t getNewVL(
+            uint32_t, uint32_t, uint32_t, uint64_t, uint64_t) const;
+
+      public:
+        /// Constructor.
+        %(class_name)s(ExtMachInst machInst, uint32_t elen);
+        Fault execute(ExecContext *, trace::InstRecord *) const override;
+        std::unique_ptr<PCStateBase> branchTarget(
+                const PCStateBase &branch_pc) const override;
+
+        using StaticInst::branchTarget;
+        using %(base_class)s::generateDisassembly;
+
+    };
+}};
+
+def template VConfConstructor {{
+%(class_name)s::%(class_name)s(ExtMachInst _machInst, uint32_t _elen)
+    : %(base_class)s("%(mnemonic)s", _machInst, _elen, %(op_class)s)
+    {
+        %(set_reg_idx_arr)s;
+        %(constructor)s;
+    }
+}};
+
+def template VConfDecodeBlock {{
+    return new %(class_name)s(machInst,elen);
+}};
+
+def template VConfExecute {{
+    VTYPE
+    %(class_name)s::getNewVtype(
+        VTYPE oldVtype, VTYPE reqVtype, uint32_t vlen) const
+    {
+        VTYPE newVtype = oldVtype;
+        if (oldVtype != reqVtype) {
+            newVtype = reqVtype;
+
+            float vflmul = getVflmul(newVtype.vlmul);
+
+            uint32_t sew = getSew(newVtype.vsew);
+
+            uint32_t newVill =
+                !(vflmul >= 0.125 && vflmul <= 8) ||
+                    sew > std::min(vflmul, 1.0f) * elen ||
+                    bits(reqVtype, 62, 8) != 0;
+            if (newVill) {
+                newVtype = 0;
+                newVtype.vill = 1;
+            }
+        }
+        return newVtype;
+    }
+
+    uint32_t
+    %(class_name)s::getNewVL(uint32_t currentVl, uint32_t reqVl,
+        uint32_t vlmax, uint64_t rdBits, uint64_t rs1Bits) const
+    {
+        uint32_t newVl = 0;
+        if (vlmax == 0) {
+            newVl = 0;
+        } else if (rdBits == 0 && rs1Bits == 0) {
+            newVl = currentVl > vlmax ? vlmax : currentVl;
+        } else if (rdBits != 0 && rs1Bits == 0) {
+            newVl = vlmax;
+        } else if (rs1Bits != 0) {
+            newVl = reqVl > vlmax ? vlmax : reqVl;
+        }
+        return newVl;
+    }
+
+    Fault
+    %(class_name)s::execute(ExecContext *xc,
+        trace::InstRecord *traceData) const
+    {
+        auto tc = xc->tcBase();
+        MISA misa = xc->readMiscReg(MISCREG_ISA);
+        STATUS status = xc->readMiscReg(MISCREG_STATUS);
+        if (!misa.rvv || status.vs == VPUStatus::OFF) {
+            return std::make_shared<IllegalInstFault>(
+                "RVV is disabled or VPU is off", machInst);
+        }
+
+        %(op_decl)s;
+        %(op_rd)s;
+        %(code)s;
+
+        tc->setMiscReg(MISCREG_VSTART, 0);
+
+        VTYPE new_vtype = getNewVtype(Vtype, requested_vtype,
+            vlen);
+        vlmax = new_vtype.vill ? 0 : getVlmax(new_vtype, vlen);
+        uint32_t new_vl = getNewVL(
+            current_vl, requested_vl, vlmax, rd_bits, rs1_bits);
+
+
+
+        %(write_code)s;
+
+        %(op_wb)s;
+        return NoFault;
+    }
+}};
+
+def template VSetiVliBranchTarget {{
+    std::unique_ptr<PCStateBase>
+    %(class_name)s::branchTarget(const PCStateBase &branch_pc) const
+    {
+        auto &rpc = branch_pc.as<RiscvISA::PCState>();
+
+        uint64_t rd_bits = machInst.rd;
+        uint64_t rs1_bits = -1;
+        uint64_t requested_vl = uimm;
+        uint64_t requested_vtype = zimm10;
+
+        uint32_t vlen = rpc.vlenb() * 8;
+
+        VTYPE new_vtype = getNewVtype(rpc.vtype(), requested_vtype, vlen);
+        uint32_t vlmax = new_vtype.vill ? 0 : getVlmax(new_vtype, vlen);
+        uint32_t new_vl = getNewVL(
+            rpc.vl(), requested_vl, vlmax, rd_bits, rs1_bits);
+
+        std::unique_ptr<PCState> npc(dynamic_cast<PCState*>(rpc.clone()));
+        npc->vtype(new_vtype);
+        npc->vl(new_vl);
+        return npc;
+    }
+}};
+
+def template VSetVliBranchTarget {{
+    std::unique_ptr<PCStateBase>
+    %(class_name)s::branchTarget(ThreadContext *tc) const
+    {
+        PCStateBase *pc_ptr = tc->pcState().clone();
+
+        uint64_t rd_bits = machInst.rd;
+        uint64_t rs1_bits = machInst.rs1;
+        uint64_t requested_vl = tc->getReg(srcRegIdx(0));
+        uint64_t requested_vtype = zimm11;
+
+        uint32_t vlen = pc_ptr->as<PCState>().vlenb() * 8;
+
+        VTYPE new_vtype = getNewVtype(
+            pc_ptr->as<PCState>().vtype(), requested_vtype, vlen);
+        uint32_t vlmax = new_vtype.vill ? 0 : getVlmax(new_vtype, vlen);
+        uint32_t new_vl = getNewVL(
+            pc_ptr->as<PCState>().vl(), requested_vl, vlmax, rd_bits, rs1_bits);
+
+        pc_ptr->as<PCState>().vtype(new_vtype);
+        pc_ptr->as<PCState>().vl(new_vl);
+        return std::unique_ptr<PCStateBase>{pc_ptr};
+    }
+}};
+
+def template VSetVlBranchTarget {{
+    std::unique_ptr<PCStateBase>
+    %(class_name)s::branchTarget(ThreadContext *tc) const
+    {
+        PCStateBase *pc_ptr = tc->pcState().clone();
+
+        uint64_t rd_bits = machInst.rd;
+        uint64_t rs1_bits = machInst.rs1;
+        uint64_t requested_vl = tc->getReg(srcRegIdx(0));
+        uint64_t requested_vtype = tc->getReg(srcRegIdx(1));
+
+        uint32_t vlen = pc_ptr->as<PCState>().vlenb() * 8;
+
+        VTYPE new_vtype = getNewVtype(
+            pc_ptr->as<PCState>().vtype(), requested_vtype, vlen);
+        uint32_t vlmax = new_vtype.vill ? 0 : getVlmax(new_vtype, vlen);
+        uint32_t new_vl = getNewVL(
+            pc_ptr->as<PCState>().vl(), requested_vl, vlmax, rd_bits, rs1_bits);
+
+        pc_ptr->as<PCState>().vtype(new_vtype);
+        pc_ptr->as<PCState>().vl(new_vl);
+        return std::unique_ptr<PCStateBase>{pc_ptr};
+    }
+}};
diff --git a/src/arch/riscv/isa/formats/vector_mem.isa b/src/arch/riscv/isa/formats/vector_mem.isa
new file mode 100644
index 0000000000..3b3309797c
--- /dev/null
+++ b/src/arch/riscv/isa/formats/vector_mem.isa
@@ -0,0 +1,230 @@
+// -*- mode:c++ -*-
+
+// Copyright (c) 2022 PLCT Lab
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met: redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer;
+// redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution;
+// neither the name of the copyright holders nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+let {{
+
+def setVlen():
+        return "uint32_t vlen = VlenbBits * 8;\n"
+def setVlenb():
+        return "uint32_t vlenb = VlenbBits;\n"
+
+def declareVMemTemplate(class_name):
+    return f'''
+    template class {class_name}<uint8_t>;
+    template class {class_name}<uint16_t>;
+    template class {class_name}<uint32_t>;
+    template class {class_name}<uint64_t>;
+    '''
+
+def VMemBase(name, Name, ea_code, memacc_code, mem_flags,
+                   inst_flags, base_class, postacc_code='',
+                   declare_template_base=VMemMacroDeclare,
+                   decode_template=VMemBaseDecodeBlock, exec_template_base='',
+                   # If it's a macroop, the corresponding microops will be
+                   # generated.
+                   is_macroop=True):
+    # Make sure flags are in lists (convert to lists if not).
+    mem_flags = makeList(mem_flags)
+    inst_flags = makeList(inst_flags)
+    iop = InstObjParams(name, Name, base_class,
+        {'ea_code': ea_code,
+         'memacc_code': memacc_code,
+         'postacc_code': postacc_code,
+         'declare_vmem_template': declareVMemTemplate(Name)},
+        inst_flags)
+
+    constructTemplate = eval(exec_template_base + 'Constructor')
+
+    header_output   = declare_template_base.subst(iop)
+    decoder_output  = constructTemplate.subst(iop)
+    decode_block    = decode_template.subst(iop)
+    exec_output     = ''
+    if not is_macroop:
+        return (header_output, decoder_output, decode_block, exec_output)
+
+    micro_class_name = exec_template_base + 'MicroInst'
+    microiop = InstObjParams(name + '_micro',
+        Name + 'Micro',
+        exec_template_base + 'MicroInst',
+        {'ea_code': ea_code,
+         'memacc_code': memacc_code,
+         'postacc_code': postacc_code,
+         'set_vlenb': setVlenb(),
+         'set_vlen': setVlen(),
+         'declare_vmem_template': declareVMemTemplate(Name + 'Micro')},
+        inst_flags)
+
+    if mem_flags:
+        mem_flags = [ 'Request::%s' % flag for flag in mem_flags ]
+        s = '\n\tmemAccessFlags = ' + '|'.join(mem_flags) + ';'
+        microiop.constructor += s
+
+    microDeclTemplate = eval(exec_template_base + 'Micro' + 'Declare')
+    microConsTemplate = eval(exec_template_base + 'Micro' + 'Constructor')
+    microExecTemplate = eval(exec_template_base + 'Micro' + 'Execute')
+    microInitTemplate = eval(exec_template_base + 'Micro' + 'InitiateAcc')
+    microCompTemplate = eval(exec_template_base + 'Micro' + 'CompleteAcc')
+    header_output = microDeclTemplate.subst(microiop) + header_output
+    decoder_output = microConsTemplate.subst(microiop) + decoder_output
+    micro_exec_output = (microExecTemplate.subst(microiop) +
+        microInitTemplate.subst(microiop) +
+        microCompTemplate.subst(microiop))
+    exec_output += micro_exec_output
+
+    return (header_output, decoder_output, decode_block, exec_output)
+
+}};
+
+def format VleOp(
+    memacc_code,
+    ea_code={{
+        EA = Rs1 + vlenb * microIdx;
+    }},
+    mem_flags=[],
+    inst_flags=[]
+) {{
+    (header_output, decoder_output, decode_block, exec_output) = \
+        VMemBase(name, Name, ea_code, memacc_code, mem_flags, inst_flags,
+                 'VleMacroInst', exec_template_base='Vle')
+}};
+
+def format VseOp(
+    memacc_code,
+    ea_code={{
+        EA = Rs1 + vlenb * microIdx;
+    }},
+    mem_flags=[],
+    inst_flags=[]
+) {{
+    (header_output, decoder_output, decode_block, exec_output) = \
+        VMemBase(name, Name, ea_code, memacc_code, mem_flags, inst_flags,
+                 'VseMacroInst', exec_template_base='Vse')
+}};
+
+def format VlmOp(
+    memacc_code,
+    ea_code={{ EA = Rs1; }},
+    mem_flags=[],
+    inst_flags=[]
+) {{
+    (header_output, decoder_output, decode_block, exec_output) = \
+        VMemBase(name, Name, ea_code, memacc_code, mem_flags, inst_flags,
+                 'VleMacroInst', exec_template_base='Vlm', is_macroop=False)
+}};
+
+def format VsmOp(
+  memacc_code,
+  ea_code={{ EA = Rs1; }},
+  mem_flags=[],
+  inst_flags=[]
+) {{
+    (header_output, decoder_output, decode_block, exec_output) = \
+        VMemBase(name, Name, ea_code, memacc_code, mem_flags, inst_flags,
+                 'VseMacroInst', exec_template_base='Vsm', is_macroop=False)
+}};
+
+def format VlWholeOp(
+    memacc_code,
+    ea_code={{
+        EA = Rs1 + vlenb * microIdx;
+    }},
+    mem_flags=[],
+    inst_flags=[]
+) {{
+    (header_output, decoder_output, decode_block, exec_output) = \
+        VMemBase(name, Name, ea_code, memacc_code, mem_flags, inst_flags,
+                 'VlWholeMacroInst', exec_template_base='VlWhole')
+}};
+
+def format VsWholeOp(
+    memacc_code,
+    ea_code={{
+        EA = Rs1 + vlenb * microIdx;
+    }},
+    mem_flags=[],
+    inst_flags=[]
+) {{
+    (header_output, decoder_output, decode_block, exec_output) = \
+        VMemBase(name, Name, ea_code, memacc_code, mem_flags, inst_flags,
+                 'VsWholeMacroInst', exec_template_base='VsWhole')
+}};
+
+def format VlStrideOp(
+    memacc_code,
+    ea_code={{
+        EA = Rs1 + Rs2 * (regIdx * vlenb / elem_size + microIdx);
+    }},
+    mem_flags=[],
+    inst_flags=[]
+) {{
+    (header_output, decoder_output, decode_block, exec_output) = \
+        VMemBase(name, Name, ea_code, memacc_code, mem_flags, inst_flags,
+                 'VlStrideMacroInst', exec_template_base='VlStride')
+}};
+
+def format VsStrideOp(
+    memacc_code,
+    ea_code={{
+        EA = Rs1 + Rs2 * (regIdx * vlenb / elem_size + microIdx);
+    }},
+    mem_flags=[],
+    inst_flags=[]
+) {{
+    (header_output, decoder_output, decode_block, exec_output) = \
+        VMemBase(name, Name, ea_code, memacc_code, mem_flags, inst_flags,
+                 'VsStrideMacroInst', exec_template_base='VsStride')
+}};
+
+def format VlIndexOp(
+    memacc_code,
+    ea_code,
+    mem_flags=[],
+    inst_flags=[]
+) {{
+    (header_output, decoder_output, decode_block, exec_output) = \
+        VMemBase(name, Name, ea_code, memacc_code, mem_flags, inst_flags,
+                 'VlIndexMacroInst', exec_template_base='VlIndex',
+                 declare_template_base=VMemTemplateMacroDeclare,
+                 decode_template=VMemSplitTemplateDecodeBlock
+                 )
+}};
+
+def format VsIndexOp(
+    memacc_code,
+    ea_code,
+    mem_flags=[],
+    inst_flags=[]
+) {{
+    (header_output, decoder_output, decode_block, exec_output) = \
+        VMemBase(name, Name, ea_code, memacc_code, mem_flags, inst_flags,
+                 'VsIndexMacroInst', exec_template_base='VsIndex',
+                 declare_template_base=VMemTemplateMacroDeclare,
+                 decode_template=VMemSplitTemplateDecodeBlock
+                 )
+}};
diff --git a/src/arch/riscv/isa/includes.isa b/src/arch/riscv/isa/includes.isa
index 8dddc2fb59..b37e62bca8 100644
--- a/src/arch/riscv/isa/includes.isa
+++ b/src/arch/riscv/isa/includes.isa
@@ -34,6 +34,7 @@
 //
 
 output header {{
+#include <functional>
 #include <iomanip>
 #include <sstream>
 #include <string>
@@ -53,6 +54,7 @@ output header {{
 #include "arch/riscv/insts/standard.hh"
 #include "arch/riscv/insts/static_inst.hh"
 #include "arch/riscv/insts/unknown.hh"
+#include "arch/riscv/insts/vector.hh"
 #include "arch/riscv/interrupts.hh"
 #include "cpu/static_inst.hh"
 #include "mem/packet.hh"
@@ -66,9 +68,15 @@ output decoder {{
 #include <limits>
 #include <string>
 
+/* riscv softfloat library */
+#include <internals.h>
+#include <softfloat.h>
+#include <specialize.h>
+
 #include "arch/riscv/decoder.hh"
 #include "arch/riscv/faults.hh"
 #include "arch/riscv/mmu.hh"
+#include "arch/riscv/regs/float.hh"
 #include "base/cprintf.hh"
 #include "base/loader/symtab.hh"
 #include "cpu/thread_context.hh"
@@ -95,6 +103,7 @@ output exec {{
 #include "arch/riscv/reg_abi.hh"
 #include "arch/riscv/regs/float.hh"
 #include "arch/riscv/regs/misc.hh"
+#include "arch/riscv/regs/vector.hh"
 #include "arch/riscv/utility.hh"
 #include "base/condcodes.hh"
 #include "cpu/base.hh"
diff --git a/src/arch/riscv/isa/main.isa b/src/arch/riscv/isa/main.isa
index 24f366b00c..2923a965da 100644
--- a/src/arch/riscv/isa/main.isa
+++ b/src/arch/riscv/isa/main.isa
@@ -50,6 +50,9 @@ namespace RiscvISA;
 //Include the operand_types and operand definitions
 ##include "operands.isa"
 
+//Include the definitions for the instruction templates
+##include "templates/templates.isa"
+
 //Include the definitions for the instruction formats
 ##include "formats/formats.isa"
 
diff --git a/src/arch/riscv/isa/operands.isa b/src/arch/riscv/isa/operands.isa
index 72d8f81bca..3a16e0994c 100644
--- a/src/arch/riscv/isa/operands.isa
+++ b/src/arch/riscv/isa/operands.isa
@@ -38,7 +38,15 @@ def operand_types {{
     'sd' : 'int64_t',
     'ud' : 'uint64_t',
     'sf' : 'float',
-    'df' : 'double'
+    'df' : 'double',
+
+    'vi'    : 'vi',
+    'vu'    : 'vu',
+    'vwi'   : 'vwi',
+    'vwu'   : 'vwu',
+    'vext'  : 'vext',
+    'vextu' : 'vextu',
+    'vc'    : 'RiscvISA::VecRegContainer'
 }};
 
 let {{
@@ -79,10 +87,21 @@ def operands {{
     'Fp2': FloatRegOp('df', 'FP2 + 8', 'IsFloating', 2),
     'Fp2_bits': FloatRegOp('ud', 'FP2 + 8', 'IsFloating', 2),
 
+    'Vd':  VecRegOp('vc', 'VD', 'IsVector', 1),
+    'Vs1': VecRegOp('vc', 'VS1', 'IsVector', 2),
+    'Vs2': VecRegOp('vc', 'VS2', 'IsVector', 3),
+    'Vs3': VecRegOp('vc', 'VS3', 'IsVector', 4),
+
 #Memory Operand
     'Mem': MemOp('ud', None, (None, 'IsLoad', 'IsStore'), 5),
 
 #Program Counter Operands
     'PC': PCStateOp('ud', 'pc', (None, None, 'IsControl'), 7),
     'NPC': PCStateOp('ud', 'npc', (None, None, 'IsControl'), 8),
+
+# VL and VTYPE
+    'Vtype': PCStateOp('ud', 'vtype', (None, None, 'IsControl'), 10),
+    'VL': PCStateOp('uw', 'vl', (None, None, 'IsControl'), 11),
+#VLENB, actually the CSR is read only.
+    'VlenbBits': PCStateOp('ud', 'vlenb', (None, None, 'IsControl'), 12),
 }};
diff --git a/src/arch/riscv/isa/templates/templates.isa b/src/arch/riscv/isa/templates/templates.isa
new file mode 100644
index 0000000000..ed3f5287c0
--- /dev/null
+++ b/src/arch/riscv/isa/templates/templates.isa
@@ -0,0 +1,32 @@
+// -*- mode:c++ -*-
+
+// Copyright (c) 2022 PLCT Lab
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met: redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer;
+// redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution;
+// neither the name of the copyright holders nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+// Include
+##include "vector_mem.isa"
+##include "vector_arith.isa"
diff --git a/src/arch/riscv/isa/templates/vector_arith.isa b/src/arch/riscv/isa/templates/vector_arith.isa
new file mode 100644
index 0000000000..364639a716
--- /dev/null
+++ b/src/arch/riscv/isa/templates/vector_arith.isa
@@ -0,0 +1,2409 @@
+// -*- mode:c++ -*-
+
+// Copyright (c) 2022 PLCT Lab
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met: redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer;
+// redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution;
+// neither the name of the copyright holders nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+output header {{
+
+#define ASSIGN_VD_BIT(idx, bit) \
+    ((Vd[(idx)/8] & ~(1 << (idx)%8)) | ((bit) << (idx)%8))
+
+#define COPY_OLD_VD(idx)                                             \
+    [[maybe_unused]] RiscvISA::vreg_t old_vd;                        \
+    [[maybe_unused]] decltype(Vd) old_Vd = nullptr;                  \
+    xc->getRegOperand(this, (idx), &old_vd);                           \
+    old_Vd = old_vd.as<std::remove_reference_t<decltype(Vd[0])> >();    \
+    memcpy(Vd, old_Vd, vlenb);
+
+#define VRM_REQUIRED                                                         \
+        uint_fast8_t frm = xc->readMiscReg(MISCREG_FRM);                     \
+        if (frm > 4)                                                         \
+            return std::make_shared<IllegalInstFault>("RM fault", machInst); \
+        softfloat_roundingMode = frm;
+
+template<typename Type>
+bool inline
+carry_out(Type a, Type b, bool carry_in = false) {
+    using TypeU = std::make_unsigned_t<Type>;
+    TypeU s = *reinterpret_cast<TypeU*>(&a)
+            + *reinterpret_cast<TypeU*>(&b) + carry_in;
+    return carry_in
+        ? (s <= *reinterpret_cast<TypeU*>(&a))
+        : (s <  *reinterpret_cast<TypeU*>(&a));
+}
+
+template<typename Type>
+bool inline
+borrow_out(Type a, Type b, bool borrow_in = false) {
+    using TypeU = std::make_unsigned_t<Type>;
+    return borrow_in
+        ? (*reinterpret_cast<TypeU*>(&a) <= *reinterpret_cast<TypeU*>(&b))
+        : (*reinterpret_cast<TypeU*>(&a) <  *reinterpret_cast<TypeU*>(&b));
+}
+
+}};
+
+def template VectorIntMacroDeclare {{
+
+template<typename ElemType>
+class %(class_name)s : public %(base_class)s {
+private:
+    %(reg_idx_arr_decl)s;
+public:
+    %(class_name)s(ExtMachInst _machInst, uint32_t _vlen);
+    using %(base_class)s::generateDisassembly;
+};
+
+}};
+
+def template VectorIntMacroConstructor {{
+
+template<typename ElemType>
+%(class_name)s<ElemType>::%(class_name)s(ExtMachInst _machInst, uint32_t _vlen)
+    : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vlen)
+{
+    %(set_reg_idx_arr)s;
+    %(constructor)s;
+    const uint32_t num_microops = vtype_regs_per_group(vtype);
+    int32_t tmp_vl = this->vl;
+    const int32_t micro_vlmax = vtype_VLMAX(_machInst.vtype8, vlen, true);
+    int32_t micro_vl = std::min(tmp_vl, micro_vlmax);
+    StaticInstPtr microop;
+
+    if (micro_vl == 0) {
+        microop = new VectorNopMicroInst(_machInst);
+        this->microops.push_back(microop);
+    }
+    for (int i = 0; i < num_microops && micro_vl > 0; ++i) {
+        microop = new %(class_name)sMicro<ElemType>(_machInst, micro_vl, i);
+        microop->setDelayedCommit();
+        this->microops.push_back(microop);
+        micro_vl = std::min(tmp_vl -= micro_vlmax, micro_vlmax);
+    }
+
+    this->microops.front()->setFirstMicroop();
+    this->microops.back()->setLastMicroop();
+}
+
+%(declare_varith_template)s;
+
+}};
+
+def template VectorIntMicroDeclare {{
+
+template<typename ElemType>
+class %(class_name)s : public %(base_class)s
+{
+private:
+    // vs1, vs2, vs3(old_vd), vm for *.vv, *.vx
+    // vs2, (old_vd), vm for *.vi
+    RegId srcRegIdxArr[4];
+    RegId destRegIdxArr[1];
+    bool vm;
+public:
+    %(class_name)s(ExtMachInst _machInst, uint32_t _microVl,
+                   uint8_t _microIdx);
+    Fault execute(ExecContext* xc, trace::InstRecord* traceData)const override;
+    using %(base_class)s::generateDisassembly;
+};
+
+}};
+
+def template VectorIntMicroConstructor {{
+
+template<typename ElemType>
+%(class_name)s<ElemType>::%(class_name)s(ExtMachInst _machInst,
+                                         uint32_t _microVl, uint8_t _microIdx)
+    : %(base_class)s("%(mnemonic)s", _machInst,
+                     %(op_class)s, _microVl, _microIdx)
+{
+    this->vm = _machInst.vm;
+    %(set_reg_idx_arr)s;
+    _numSrcRegs = 0;
+    _numDestRegs = 0;
+    %(set_dest_reg_idx)s;
+    %(set_src_reg_idx)s;
+}
+
+%(declare_varith_template)s;
+
+}};
+
+def template VectorIntMicroExecute {{
+
+template <typename ElemType>
+Fault
+%(class_name)s<ElemType>::execute(ExecContext* xc,
+                                  trace::InstRecord* traceData) const
+{
+    using vu [[maybe_unused]] = std::make_unsigned_t<ElemType>;
+    using vi [[maybe_unused]] = std::make_signed_t<ElemType>;
+    [[maybe_unused]] constexpr size_t sew = sizeof(vu) * 8;
+
+    MISA misa = xc->readMiscReg(MISCREG_ISA);
+    STATUS status = xc->readMiscReg(MISCREG_STATUS);
+    if (!misa.rvv || status.vs == VPUStatus::OFF) {
+        return std::make_shared<IllegalInstFault>(
+            "RVV is disabled or VPU is off", machInst);
+    }
+
+    if (machInst.vill)
+        return std::make_shared<IllegalInstFault>("VILL is set", machInst);
+
+    status.vs = VPUStatus::DIRTY;
+    xc->setMiscReg(MISCREG_STATUS, status);
+
+    %(op_decl)s;
+    %(op_rd)s;
+    %(set_vlenb)s;
+    %(vm_decl_rd)s;
+    %(copy_old_vd)s;
+    %(code)s;
+    %(op_wb)s;
+
+    return NoFault;
+}
+
+%(declare_varith_template)s;
+
+}};
+
+def template VectorIntExtMacroDeclare {{
+
+template<typename ElemType>
+class %(class_name)s : public %(base_class)s {
+private:
+    %(reg_idx_arr_decl)s;
+public:
+    %(class_name)s(ExtMachInst _machInst, uint32_t _vlen);
+    std::string generateDisassembly(Addr pc,
+        const loader::SymbolTable *symtab) const override;
+};
+
+}};
+
+def template VectorIntExtMicroDeclare {{
+
+template<typename ElemType>
+class %(class_name)s : public %(base_class)s
+{
+private:
+    RegId srcRegIdxArr[3];
+    RegId destRegIdxArr[1];
+    bool vm;
+public:
+    %(class_name)s(ExtMachInst _machInst, uint32_t _microVl,
+                   uint8_t _microIdx);
+    Fault execute(ExecContext* xc, trace::InstRecord* traceData)const override;
+    std::string generateDisassembly(Addr pc,
+        const loader::SymbolTable *symtab) const override;
+};
+
+}};
+
+def template VectorIntExtMicroExecute {{
+
+template <typename ElemType>
+Fault
+%(class_name)s<ElemType>::execute(ExecContext* xc,
+                                  trace::InstRecord* traceData) const
+{
+    using vu [[maybe_unused]] = std::make_unsigned_t<ElemType>;
+    using vi [[maybe_unused]] = std::make_signed_t<ElemType>;
+
+    MISA misa = xc->readMiscReg(MISCREG_ISA);
+    STATUS status = xc->readMiscReg(MISCREG_STATUS);
+    if (!misa.rvv || status.vs == VPUStatus::OFF) {
+        return std::make_shared<IllegalInstFault>(
+            "RVV is disabled or VPU is off", machInst);
+    }
+
+    if (machInst.vill)
+        return std::make_shared<IllegalInstFault>("VILL is set", machInst);
+
+    status.vs = VPUStatus::DIRTY;
+    xc->setMiscReg(MISCREG_STATUS, status);
+
+    auto SEW = vtype_SEW(vtype);
+    auto index = (microIdx % %(ext_div)d);
+
+    switch (SEW / %(ext_div)d) {
+      case 8: {
+        using vext  [[maybe_unused]] = int8_t;
+        using vextu [[maybe_unused]] = uint8_t;
+        %(op_decl)s;
+        %(op_rd)s;
+        %(set_vlenb)s;
+        %(set_vlen)s;
+        %(vm_decl_rd)s;
+        %(copy_old_vd)s;
+        %(code)s;
+        %(op_wb)s;
+        break;
+      }
+      case 16: {
+        using vext  [[maybe_unused]] = int16_t;
+        using vextu [[maybe_unused]] = uint16_t;
+        %(op_decl)s;
+        %(op_rd)s;
+        %(set_vlenb)s;
+        %(set_vlen)s;
+        %(vm_decl_rd)s;
+        %(copy_old_vd)s;
+        %(code)s;
+        %(op_wb)s;
+        break;
+      }
+      case 32: {
+        using vext  [[maybe_unused]] = int32_t;
+        using vextu [[maybe_unused]] = uint32_t;
+        %(op_decl)s;
+        %(op_rd)s;
+        %(set_vlenb)s;
+        %(set_vlen)s;
+        %(vm_decl_rd)s;
+        %(copy_old_vd)s;
+        %(code)s;
+        %(op_wb)s;
+      break;
+      }
+      default: break;
+    }
+
+    return NoFault;
+}
+
+template <typename ElemType>
+std::string
+%(class_name)s<ElemType>::generateDisassembly(Addr pc,
+    const loader::SymbolTable *symtab) const
+{
+    std::stringstream ss;
+    ss << mnemonic << ' ' << registerName(destRegIdx(0)) << ", "
+        << registerName(srcRegIdx(0));
+    if (machInst.vm == 0) ss << ", v0.t";
+    return ss.str();
+}
+
+%(declare_varith_template)s;
+
+}};
+
+def template VectorIntExtMacroExecute {{
+
+template <typename ElemType>
+std::string
+%(class_name)s<ElemType>::generateDisassembly(Addr pc,
+    const loader::SymbolTable *symtab) const
+{
+    std::stringstream ss;
+    ss << mnemonic << ' ' << registerName(destRegIdx(0)) << ", "
+        << registerName(srcRegIdx(0));
+    if (machInst.vm == 0) ss << ", v0.t";
+    return ss.str();
+}
+
+%(declare_varith_template)s;
+
+}};
+
+def template VectorIntDecodeBlock {{
+
+switch(machInst.vtype8.vsew) {
+case 0b000: return new %(class_name)s<uint8_t>(machInst, vlen);
+case 0b001: return new %(class_name)s<uint16_t>(machInst, vlen);
+case 0b010: return new %(class_name)s<uint32_t>(machInst, vlen);
+case 0b011: return new %(class_name)s<uint64_t>(machInst, vlen);
+default: GEM5_UNREACHABLE;
+}
+
+}};
+
+def template VectorIntWideningMacroDeclare {{
+
+template<typename ElemType>
+class %(class_name)s : public %(base_class)s {
+private:
+    %(reg_idx_arr_decl)s;
+public:
+    %(class_name)s(ExtMachInst _machInst, uint32_t _vlen);
+    using %(base_class)s::generateDisassembly;
+};
+
+}};
+
+def template VectorIntWideningMacroConstructor {{
+
+template<typename ElemType>
+%(class_name)s<ElemType>::%(class_name)s(ExtMachInst _machInst, uint32_t _vlen)
+    : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vlen)
+{
+    %(set_reg_idx_arr)s;
+    %(constructor)s;
+    const int64_t vlmul = vtype_vlmul(_machInst.vtype8);
+    // Todo: move to Decode template
+    panic_if(vlmul == 3, "LMUL=8 is illegal for widening inst");
+    // when LMUL setted as m1, need to split to 2 micro insts
+    const uint32_t num_microops = 1 << std::max<int64_t>(0, vlmul + 1);
+
+    int32_t tmp_vl = this->vl;
+    const int32_t t_micro_vlmax = vtype_VLMAX(_machInst.vtype8, vlen, true);
+    const int32_t micro_vlmax = vlmul < 0 ? t_micro_vlmax : t_micro_vlmax / 2;
+    int32_t micro_vl = std::min(tmp_vl, micro_vlmax);
+    StaticInstPtr microop;
+
+    if (micro_vl == 0) {
+        microop = new VectorNopMicroInst(_machInst);
+        this->microops.push_back(microop);
+    }
+    for (int i = 0; i < num_microops && micro_vl > 0; ++i) {
+        microop = new %(class_name)sMicro<ElemType>(_machInst, micro_vl, i);
+        microop->setDelayedCommit();
+        this->microops.push_back(microop);
+        micro_vl = std::min(tmp_vl -= micro_vlmax, micro_vlmax);
+    }
+
+    this->microops.front()->setFirstMicroop();
+    this->microops.back()->setLastMicroop();
+}
+
+%(declare_varith_template)s;
+
+}};
+
+def template VectorIntWideningMicroDeclare {{
+
+template<typename ElemType>
+class %(class_name)s : public %(base_class)s
+{
+private:
+    // vs1, vs2, vs3(old_vd), vm for *.vv, *.vx
+    RegId srcRegIdxArr[4];
+    RegId destRegIdxArr[1];
+    bool vm;
+public:
+    %(class_name)s(ExtMachInst _machInst, uint32_t _microVl,
+                   uint8_t _microIdx);
+    Fault execute(ExecContext* xc, trace::InstRecord* traceData)const override;
+    using %(base_class)s::generateDisassembly;
+};
+
+}};
+
+def template VectorIntWideningMicroConstructor {{
+
+template<typename ElemType>
+%(class_name)s<ElemType>::%(class_name)s(ExtMachInst _machInst,
+        uint32_t _microVl, uint8_t _microIdx)
+    : %(base_class)s("%(mnemonic)s", _machInst,
+                     %(op_class)s, _microVl, _microIdx)
+{
+    this->vm = _machInst.vm;
+    %(set_reg_idx_arr)s;
+    _numSrcRegs = 0;
+    _numDestRegs = 0;
+    %(set_dest_reg_idx)s;
+    %(set_src_reg_idx)s;
+}
+
+%(declare_varith_template)s;
+
+}};
+
+def template VectorIntWideningMicroExecute {{
+
+template <typename ElemType>
+Fault
+%(class_name)s<ElemType>::execute(ExecContext* xc,
+                                  trace::InstRecord* traceData) const
+{
+    using vu [[maybe_unused]] = std::make_unsigned_t<ElemType>;
+    using vi [[maybe_unused]] = std::make_signed_t<ElemType>;
+    using vwu [[maybe_unused]] = typename double_width<vu>::type;
+    using vwi [[maybe_unused]] = typename double_width<vi>::type;
+    [[maybe_unused]] constexpr size_t sew = sizeof(vu) * 8;
+
+    MISA misa = xc->readMiscReg(MISCREG_ISA);
+    STATUS status = xc->readMiscReg(MISCREG_STATUS);
+    if (!misa.rvv || status.vs == VPUStatus::OFF) {
+        return std::make_shared<IllegalInstFault>(
+            "RVV is disabled or VPU is off", machInst);
+    }
+    %(op_decl)s;
+    %(op_rd)s;
+    %(set_vlenb)s;
+    %(set_vlen)s;
+
+    if (machInst.vill)
+        return std::make_shared<IllegalInstFault>("VILL is set", machInst);
+
+    status.vs = VPUStatus::DIRTY;
+    xc->setMiscReg(MISCREG_STATUS, status);
+
+    const int64_t vlmul = vtype_vlmul(machInst.vtype8);
+    const int32_t t_micro_vlmax = vtype_VLMAX(machInst.vtype8, vlen, true);
+    const int32_t micro_vlmax = vlmul < 0 ? t_micro_vlmax : t_micro_vlmax / 2;
+    [[maybe_unused]] const size_t offset =
+        (this->microIdx % 2 == 0) ? 0 : micro_vlmax;
+
+    %(vm_decl_rd)s;
+    %(copy_old_vd)s;
+    %(code)s;
+    %(op_wb)s;
+    return NoFault;
+}
+
+%(declare_varith_template)s;
+
+}};
+
+def template VectorIntNarrowingMicroExecute {{
+
+template <typename ElemType>
+Fault
+%(class_name)s<ElemType>::execute(ExecContext* xc,
+                                  trace::InstRecord* traceData) const
+{
+    using vu [[maybe_unused]] = std::make_unsigned_t<ElemType>;
+    using vi [[maybe_unused]] = std::make_signed_t<ElemType>;
+    using vwu [[maybe_unused]] = typename double_width<vu>::type;
+    using vwi [[maybe_unused]] = typename double_width<vi>::type;
+    [[maybe_unused]] constexpr size_t sew = sizeof(vu) * 8;
+
+    MISA misa = xc->readMiscReg(MISCREG_ISA);
+    STATUS status = xc->readMiscReg(MISCREG_STATUS);
+    if (!misa.rvv || status.vs == VPUStatus::OFF) {
+        return std::make_shared<IllegalInstFault>(
+            "RVV is disabled or VPU is off", machInst);
+    }
+
+    %(op_decl)s;
+    %(op_rd)s;
+    %(set_vlenb)s;
+    %(set_vlen)s;
+
+    if (machInst.vill)
+        return std::make_shared<IllegalInstFault>("VILL is set", machInst);
+
+    status.vs = VPUStatus::DIRTY;
+    xc->setMiscReg(MISCREG_STATUS, status);
+
+    const int64_t vlmul = vtype_vlmul(machInst.vtype8);
+    const int32_t t_micro_vlmax = vtype_VLMAX(machInst.vtype8, vlen, true);
+    const int32_t micro_vlmax = vlmul < 0 ? t_micro_vlmax : t_micro_vlmax / 2;
+    [[maybe_unused]] const size_t offset =
+        (this->microIdx % 2 == 0) ? 0 : micro_vlmax;
+
+    %(vm_decl_rd)s;
+    %(copy_old_vd)s;
+    %(code)s;
+    %(op_wb)s;
+    return NoFault;
+}
+
+%(declare_varith_template)s;
+
+}};
+
+def template VectorIntWideningDecodeBlock {{
+
+switch(machInst.vtype8.vsew) {
+case 0b000: return new %(class_name)s<uint8_t>(machInst, vlen);
+case 0b001: return new %(class_name)s<uint16_t>(machInst, vlen);
+case 0b010: return new %(class_name)s<uint32_t>(machInst, vlen);
+default: GEM5_UNREACHABLE;
+}
+
+}};
+
+def template VectorFloatMacroDeclare {{
+
+template<typename ElemType>
+class %(class_name)s : public %(base_class)s {
+private:
+    %(reg_idx_arr_decl)s;
+public:
+    %(class_name)s(ExtMachInst _machInst, uint32_t _vlen);
+    using %(base_class)s::generateDisassembly;
+};
+
+}};
+
+def template VectorFloatMacroConstructor {{
+template<typename ElemType>
+%(class_name)s<ElemType>::%(class_name)s(ExtMachInst _machInst, uint32_t _vlen)
+    : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vlen)
+{
+    %(set_reg_idx_arr)s;
+    %(constructor)s;
+    const uint32_t num_microops = vtype_regs_per_group(vtype);
+    int32_t tmp_vl = this->vl;
+    const int32_t micro_vlmax = vtype_VLMAX(_machInst.vtype8, vlen, true);
+    int32_t micro_vl = std::min(tmp_vl, micro_vlmax);
+    StaticInstPtr microop;
+
+    if (micro_vl == 0) {
+        microop = new VectorNopMicroInst(_machInst);
+        this->microops.push_back(microop);
+    }
+    for (int i = 0; i < num_microops && micro_vl > 0; ++i) {
+        microop = new %(class_name)sMicro<ElemType>(_machInst, micro_vl, i);
+        microop->setDelayedCommit();
+        this->microops.push_back(microop);
+        micro_vl = std::min(tmp_vl -= micro_vlmax, micro_vlmax);
+    }
+
+    this->microops.front()->setFirstMicroop();
+    this->microops.back()->setLastMicroop();
+}
+
+%(declare_varith_template)s;
+
+}};
+
+def template VectorFloatMicroDeclare {{
+
+template<typename ElemType>
+class %(class_name)s : public %(base_class)s
+{
+private:
+    // vs1, vs2, vs3(old_vd), vm
+    RegId srcRegIdxArr[4];
+    RegId destRegIdxArr[1];
+    bool vm;
+public:
+    %(class_name)s(ExtMachInst _machInst,
+        uint32_t _microVl, uint8_t _microIdx);
+    Fault execute(ExecContext* xc, trace::InstRecord* traceData)const override;
+    using %(base_class)s::generateDisassembly;
+};
+
+}};
+
+def template VectorFloatMicroConstructor {{
+template<typename ElemType>
+%(class_name)s<ElemType>::%(class_name)s(ExtMachInst _machInst,
+                                         uint32_t _microVl, uint8_t _microIdx)
+    : %(base_class)s("%(mnemonic)s", _machInst,
+                     %(op_class)s, _microVl, _microIdx)
+{
+    this->vm = _machInst.vm;
+    %(set_reg_idx_arr)s;
+    _numSrcRegs = 0;
+    _numDestRegs = 0;
+    %(set_dest_reg_idx)s;
+    %(set_src_reg_idx)s;
+}
+
+%(declare_varith_template)s;
+
+}};
+
+def template VectorFloatMicroExecute {{
+
+template <typename ElemType>
+Fault
+%(class_name)s<ElemType>::execute(ExecContext* xc,
+                                  trace::InstRecord* traceData) const
+{
+    using et = ElemType;
+    using vu = decltype(et::v);
+
+    MISA misa = xc->readMiscReg(MISCREG_ISA);
+    STATUS status = xc->readMiscReg(MISCREG_STATUS);
+    if (!misa.rvv || status.vs == VPUStatus::OFF) {
+        return std::make_shared<IllegalInstFault>(
+            "RVV is disabled or VPU is off", machInst);
+    }
+
+    if (machInst.vill)
+        return std::make_shared<IllegalInstFault>("VILL is set", machInst);
+
+    status.vs = VPUStatus::DIRTY;
+    xc->setMiscReg(MISCREG_STATUS, status);
+
+    VRM_REQUIRED;
+
+    %(op_decl)s;
+    %(op_rd)s;
+    %(set_vlenb)s;
+    %(vm_decl_rd)s;
+    %(copy_old_vd)s;
+    %(code)s;
+    %(op_wb)s;
+
+    return NoFault;
+}
+
+%(declare_varith_template)s;
+
+}};
+
+def template VectorFloatDecodeBlock {{
+
+switch(machInst.vtype8.vsew) {
+case 0b010: return new %(class_name)s<float32_t>(machInst, vlen);
+case 0b011: return new %(class_name)s<float64_t>(machInst, vlen);
+default: GEM5_UNREACHABLE;
+}
+
+}};
+
+def template VectorFloatCvtMacroDeclare {{
+
+template<typename ElemType>
+class %(class_name)s : public %(base_class)s {
+private:
+    %(reg_idx_arr_decl)s;
+public:
+    %(class_name)s(ExtMachInst _machInst, uint32_t _vlen);
+    std::string generateDisassembly(Addr pc,
+        const loader::SymbolTable *symtab) const override
+    {
+        std::stringstream ss;
+        ss << mnemonic << ' ' << registerName(destRegIdx(0)) << ", "
+            << registerName(srcRegIdx(0));
+        if (machInst.vm == 0) ss << ", v0.t";
+        return ss.str();
+    }
+};
+
+}};
+
+def template VectorFloatCvtMicroDeclare {{
+
+template<typename ElemType>
+class %(class_name)s : public %(base_class)s
+{
+private:
+    RegId srcRegIdxArr[3];
+    RegId destRegIdxArr[1];
+    bool vm;
+public:
+    %(class_name)s(ExtMachInst _machInst,
+        uint32_t _microVl, uint8_t _microIdx);
+    Fault execute(ExecContext* xc, trace::InstRecord* traceData)const override;
+    std::string generateDisassembly(Addr pc,
+        const loader::SymbolTable *symtab) const override
+    {
+        std::stringstream ss;
+        ss << mnemonic << ' ' << registerName(destRegIdx(0)) << ", "
+            << registerName(srcRegIdx(0));
+        if (machInst.vm == 0) ss << ", v0.t";
+        return ss.str();
+    }
+};
+
+}};
+
+
+def template VectorFloatWideningMicroExecute {{
+
+template <typename ElemType>
+Fault
+%(class_name)s<ElemType>::execute(ExecContext* xc,
+                                  trace::InstRecord* traceData) const
+{
+    using et = ElemType;
+    using vu [[maybe_unused]] = decltype(et::v);
+    using ewt = typename double_width<et>::type;
+    using vwu = decltype(ewt::v);
+
+    MISA misa = xc->readMiscReg(MISCREG_ISA);
+    STATUS status = xc->readMiscReg(MISCREG_STATUS);
+    if (!misa.rvv || status.vs == VPUStatus::OFF) {
+        return std::make_shared<IllegalInstFault>(
+            "RVV is disabled or VPU is off", machInst);
+    }
+
+    if (machInst.vill)
+        return std::make_shared<IllegalInstFault>("VILL is set", machInst);
+
+    status.vs = VPUStatus::DIRTY;
+    xc->setMiscReg(MISCREG_STATUS, status);
+
+    VRM_REQUIRED;
+
+    %(op_decl)s;
+    %(op_rd)s;
+    %(set_vlenb)s;
+    %(set_vlen)s;
+
+    const int64_t vlmul = vtype_vlmul(machInst.vtype8);
+    const int32_t t_micro_vlmax = vtype_VLMAX(machInst.vtype8, vlen, true);
+    const int32_t micro_vlmax = vlmul < 0 ? t_micro_vlmax : t_micro_vlmax / 2;
+    [[maybe_unused]] const size_t offset =
+        (this->microIdx % 2 == 0) ? 0 : micro_vlmax;
+
+    %(vm_decl_rd)s;
+    %(copy_old_vd)s;
+    %(code)s;
+    %(op_wb)s;
+    return NoFault;
+}
+
+%(declare_varith_template)s;
+
+}};
+
+def template VectorFloatNarrowingMicroExecute {{
+
+template <typename ElemType>
+Fault
+%(class_name)s<ElemType>::execute(ExecContext* xc,
+                                  trace::InstRecord* traceData) const
+{
+    using et = ElemType;
+    using vu [[maybe_unused]] = decltype(et::v);
+    using ewt = typename double_width<et>::type;
+    using vwu = decltype(ewt::v);
+
+    MISA misa = xc->readMiscReg(MISCREG_ISA);
+    STATUS status = xc->readMiscReg(MISCREG_STATUS);
+    if (!misa.rvv || status.vs == VPUStatus::OFF) {
+        return std::make_shared<IllegalInstFault>(
+            "RVV is disabled or VPU is off", machInst);
+    }
+
+    if (machInst.vill)
+        return std::make_shared<IllegalInstFault>("VILL is set", machInst);
+
+    status.vs = VPUStatus::DIRTY;
+    xc->setMiscReg(MISCREG_STATUS, status);
+
+    VRM_REQUIRED;
+
+    %(op_decl)s;
+    %(op_rd)s;
+    %(set_vlenb)s;
+    %(set_vlen)s;
+
+    const int64_t vlmul = vtype_vlmul(machInst.vtype8);
+    const int32_t t_micro_vlmax = vtype_VLMAX(machInst.vtype8, vlen, true);
+    const int32_t micro_vlmax = vlmul < 0 ? t_micro_vlmax : t_micro_vlmax / 2;
+    [[maybe_unused]] const size_t offset =
+        (this->microIdx % 2 == 0) ? 0 : micro_vlmax;
+
+    %(vm_decl_rd)s;
+    %(copy_old_vd)s;
+    %(code)s;
+    %(op_wb)s;
+    return NoFault;
+}
+
+%(declare_varith_template)s;
+
+}};
+
+def template VectorFloatWideningDecodeBlock {{
+
+switch(machInst.vtype8.vsew) {
+case 0b010: return new %(class_name)s<float32_t>(machInst, vlen);
+default: GEM5_UNREACHABLE;
+}
+
+}};
+
+def template ViotaMacroDeclare {{
+
+template<typename ElemType>
+class %(class_name)s : public %(base_class)s {
+private:
+    int cnt = 0;
+    %(reg_idx_arr_decl)s;
+public:
+    %(class_name)s(ExtMachInst _machInst, uint32_t _vlen);
+    using %(base_class)s::generateDisassembly;
+};
+
+}};
+
+
+def template ViotaMacroConstructor {{
+
+template<typename ElemType>
+%(class_name)s<ElemType>::%(class_name)s(ExtMachInst _machInst, uint32_t _vlen)
+    : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vlen)
+{
+    %(set_reg_idx_arr)s;
+    %(constructor)s;
+    const uint32_t num_microops = vtype_regs_per_group(vtype);
+    int32_t tmp_vl = this->vl;
+    const int32_t micro_vlmax = vtype_VLMAX(_machInst.vtype8, vlen, true);
+    int32_t micro_vl = std::min(tmp_vl, micro_vlmax);
+
+    StaticInstPtr microop;
+
+    // Allow one empty micro op to hold IsLastMicroop flag
+    for (int i = 0; i < num_microops && micro_vl >= 0; ++i) {
+        microop = new %(class_name)sMicro<ElemType>(_machInst, micro_vl, i,
+            &cnt);
+        microop->setDelayedCommit();
+        this->microops.push_back(microop);
+        micro_vl = std::min(tmp_vl -= micro_vlmax, micro_vlmax);
+    }
+
+    this->microops.front()->setFirstMicroop();
+    this->microops.back()->setLastMicroop();
+}
+
+%(declare_varith_template)s;
+
+}};
+
+def template ViotaMicroDeclare {{
+
+template<typename ElemType>
+class %(class_name)s : public %(base_class)s
+{
+private:
+    RegId srcRegIdxArr[4];
+    RegId destRegIdxArr[1];
+    bool vm;
+    int* cnt;
+public:
+    %(class_name)s(ExtMachInst _machInst, uint32_t _microVl,
+                   uint8_t _microIdx, int* cnt);
+    Fault execute(ExecContext* xc, trace::InstRecord* traceData)const override;
+    using %(base_class)s::generateDisassembly;
+};
+
+}};
+
+def template ViotaMicroConstructor {{
+
+template<typename ElemType>
+%(class_name)s<ElemType>::%(class_name)s(ExtMachInst _machInst,
+    uint32_t _microVl, uint8_t _microIdx, int* cnt)
+    : %(base_class)s("%(mnemonic)s", _machInst,
+                     %(op_class)s, _microVl, _microIdx)
+{
+    this->vm = _machInst.vm;
+    this->cnt = cnt;
+    %(set_reg_idx_arr)s;
+    _numSrcRegs = 0;
+    _numDestRegs = 0;
+    %(set_dest_reg_idx)s;
+    %(set_src_reg_idx)s;
+    setSrcRegIdx(_numSrcRegs++, vecRegClass[_machInst.vs2]);
+}
+
+%(declare_varith_template)s;
+
+}};
+
+def template ViotaMicroExecute {{
+
+template <typename ElemType>
+Fault
+%(class_name)s<ElemType>::execute(ExecContext* xc,
+                                  trace::InstRecord* traceData) const
+{
+    using vu [[maybe_unused]] = std::make_unsigned_t<ElemType>;
+    using vi [[maybe_unused]] = std::make_signed_t<ElemType>;
+
+    MISA misa = xc->readMiscReg(MISCREG_ISA);
+    STATUS status = xc->readMiscReg(MISCREG_STATUS);
+    if (!misa.rvv || status.vs == VPUStatus::OFF) {
+        return std::make_shared<IllegalInstFault>(
+            "RVV is disabled or VPU is off", machInst);
+    }
+    if (machInst.vill)
+        return std::make_shared<IllegalInstFault>("VILL is set", machInst);
+
+    status.vs = VPUStatus::DIRTY;
+    xc->setMiscReg(MISCREG_STATUS, status);
+
+    %(op_decl)s;
+    %(op_rd)s;
+    %(set_vlenb)s;
+    %(vm_decl_rd)s;
+    %(copy_old_vd)s;
+    %(code)s;
+    %(op_wb)s;
+    return NoFault;
+}
+
+%(declare_varith_template)s;
+
+}};
+
+
+def template Vector1Vs1VdMaskConstructor {{
+
+template<typename ElemType>
+%(class_name)s<ElemType>::%(class_name)s(ExtMachInst _machInst)
+    : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s)
+{
+    this->vm = _machInst.vm;
+    %(set_reg_idx_arr)s;
+    %(set_dest_reg_idx)s;
+    %(set_src_reg_idx)s;
+    %(set_vm_idx)s;
+}
+
+%(declare_varith_template)s;
+
+}};
+
+def template Vector1Vs1VdMaskExecute {{
+
+template<typename ElemType>
+Fault
+%(class_name)s<ElemType>::execute(ExecContext* xc,
+                                  trace::InstRecord* traceData) const
+{
+    using vu = uint8_t;
+    MISA misa = xc->readMiscReg(MISCREG_ISA);
+    STATUS status = xc->readMiscReg(MISCREG_STATUS);
+    if (!misa.rvv || status.vs == VPUStatus::OFF) {
+        return std::make_shared<IllegalInstFault>(
+            "RVV is disabled or VPU is off", machInst);
+    }
+
+    if (machInst.vill)
+        return std::make_shared<IllegalInstFault>("VILL is set", machInst);
+
+    status.vs = VPUStatus::DIRTY;
+    xc->setMiscReg(MISCREG_STATUS, status);
+
+    %(op_decl)s;
+    %(op_rd)s;
+    %(set_vlenb)s;
+    %(vm_decl_rd)s;
+    %(copy_old_vd)s;
+    %(code)s;
+    %(op_wb)s;
+    return NoFault;
+};
+
+%(declare_varith_template)s;
+
+}};
+
+def template Vector1Vs1RdMaskDeclare {{
+
+template<typename ElemType>
+class %(class_name)s : public %(base_class)s {
+private:
+    RegId srcRegIdxArr[2];
+    RegId destRegIdxArr[1];
+    bool vm;
+public:
+    %(class_name)s(ExtMachInst _machInst);
+    Fault execute(ExecContext* xc, trace::InstRecord* traceData)const override;
+    using %(base_class)s::generateDisassembly;
+};
+
+}};
+
+def template Vector1Vs1RdMaskConstructor {{
+
+template<typename ElemType>
+%(class_name)s<ElemType>::%(class_name)s(ExtMachInst _machInst)
+    : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s)
+{
+    this->vm = _machInst.vm;
+    %(set_reg_idx_arr)s;
+    %(constructor)s;
+    %(set_vm_idx)s;
+}
+
+%(declare_varith_template)s;
+
+}};
+
+def template Vector1Vs1RdMaskExecute {{
+
+template<typename ElemType>
+Fault
+%(class_name)s<ElemType>::execute(ExecContext* xc,
+                                  trace::InstRecord* traceData) const
+{
+    using vu [[maybe_unused]] = std::make_unsigned_t<ElemType>;
+    using vi [[maybe_unused]] = std::make_signed_t<ElemType>;
+    MISA misa = xc->readMiscReg(MISCREG_ISA);
+    STATUS status = xc->readMiscReg(MISCREG_STATUS);
+    if (!misa.rvv || status.vs == VPUStatus::OFF) {
+        return std::make_shared<IllegalInstFault>(
+            "RVV is disabled or VPU is off", machInst);
+    }
+
+    if (machInst.vill)
+        return std::make_shared<IllegalInstFault>("VILL is set", machInst);
+
+    status.vs = VPUStatus::DIRTY;
+    xc->setMiscReg(MISCREG_STATUS, status);
+
+    %(op_rd)s;
+    uint64_t Rd = 0;
+    %(vm_decl_rd)s;
+    %(code)s;
+    %(op_wb)s;
+    return NoFault;
+};
+
+%(declare_varith_template)s;
+
+}};
+
+def template VectorIntMaskMacroDeclare {{
+
+template<typename ElemType>
+class %(class_name)s : public %(base_class)s {
+private:
+    %(reg_idx_arr_decl)s;
+public:
+    %(class_name)s(ExtMachInst _machInst, uint32_t _vlen);
+    using %(base_class)s::generateDisassembly;
+};
+
+}};
+
+def template VectorIntMaskMacroConstructor {{
+
+template<typename ElemType>
+%(class_name)s<ElemType>::%(class_name)s(ExtMachInst _machInst, uint32_t _vlen)
+    : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vlen)
+{
+    %(set_reg_idx_arr)s;
+    %(constructor)s;
+    const uint32_t num_microops = vtype_regs_per_group(vtype);
+    int32_t tmp_vl = this->vl;
+    const int32_t micro_vlmax = vtype_VLMAX(_machInst.vtype8, vlen, true);
+    int32_t micro_vl = std::min(tmp_vl, micro_vlmax);
+    StaticInstPtr microop;
+
+    if (micro_vl == 0) {
+        microop = new VectorNopMicroInst(_machInst);
+        this->microops.push_back(microop);
+    }
+    for (int i = 0; i < num_microops && micro_vl > 0; ++i) {
+        microop = new %(class_name)sMicro<ElemType>(_machInst, micro_vl, i);
+        microop->setDelayedCommit();
+        this->microops.push_back(microop);
+        micro_vl = std::min(tmp_vl -= micro_vlmax, micro_vlmax);
+    }
+    microop = new VMaskMergeMicroInst(_machInst, _machInst.vd,
+        this->microops.size(), _vlen, sizeof(ElemType));
+    this->microops.push_back(microop);
+
+    this->microops.front()->setFirstMicroop();
+    this->microops.back()->setLastMicroop();
+}
+
+%(declare_varith_template)s;
+
+}};
+
+def template VectorIntMaskMicroDeclare {{
+
+template<typename ElemType>
+class %(class_name)s : public %(base_class)s
+{
+private:
+    // vs1(rs1), vs2, old_vd, v0 for *.vv[m] or *.vx[m]
+    // vs2, old_vd, v0 for *.vi[m]
+    RegId srcRegIdxArr[4];
+    RegId destRegIdxArr[1];
+    bool vm;
+public:
+    %(class_name)s(ExtMachInst _machInst,
+                   uint32_t _microVl, uint8_t _microIdx);
+    Fault execute(ExecContext* xc, trace::InstRecord* traceData)const override;
+    using %(base_class)s::generateDisassembly;
+};
+
+}};
+
+def template VectorIntMaskMicroConstructor {{
+
+template<typename ElemType>
+%(class_name)s<ElemType>::%(class_name)s(ExtMachInst _machInst,
+                                         uint32_t _microVl, uint8_t _microIdx)
+: %(base_class)s("%(mnemonic)s", _machInst,
+                 %(op_class)s, _microVl, _microIdx)
+{
+    this->vm = _machInst.vm;
+    %(set_reg_idx_arr)s;
+    _numSrcRegs = 0;
+    _numDestRegs = 0;
+    %(set_dest_reg_idx)s;
+    %(set_src_reg_idx)s;
+}
+
+%(declare_varith_template)s;
+
+}};
+
+def template VectorIntMaskMicroExecute {{
+
+template <typename ElemType>
+Fault
+%(class_name)s<ElemType>::execute(ExecContext* xc,
+                                  trace::InstRecord* traceData) const
+{
+    using vu [[maybe_unused]] = std::make_unsigned_t<ElemType>;
+    using vi [[maybe_unused]] = std::make_signed_t<ElemType>;
+    MISA misa = xc->readMiscReg(MISCREG_ISA);
+    STATUS status = xc->readMiscReg(MISCREG_STATUS);
+    if (!misa.rvv || status.vs == VPUStatus::OFF) {
+        return std::make_shared<IllegalInstFault>(
+            "RVV is disabled or VPU is off", machInst);
+    }
+
+    if (machInst.vill)
+        return std::make_shared<IllegalInstFault>("VILL is set", machInst);
+
+    status.vs = VPUStatus::DIRTY;
+    xc->setMiscReg(MISCREG_STATUS, status);
+
+    %(op_decl)s;
+    %(op_rd)s;
+    %(set_vlenb)s;
+    %(vm_decl_rd)s;
+    %(copy_old_vd)s;
+
+    const uint16_t bit_offset = vlenb / sizeof(ElemType);
+    const uint16_t offset = bit_offset * microIdx;
+
+    %(code)s;
+    %(op_wb)s;
+    return NoFault;
+}
+
+%(declare_varith_template)s;
+
+}};
+
+def template VectorFloatMaskMacroDeclare {{
+
+template<typename ElemType>
+class %(class_name)s : public %(base_class)s {
+private:
+    %(reg_idx_arr_decl)s;
+public:
+    %(class_name)s(ExtMachInst _machInst, uint32_t _vlen);
+    using %(base_class)s::generateDisassembly;
+};
+
+}};
+
+def template VectorFloatMaskMacroConstructor {{
+
+template<typename ElemType>
+%(class_name)s<ElemType>::%(class_name)s(ExtMachInst _machInst, uint32_t _vlen)
+    : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vlen)
+{
+    %(set_reg_idx_arr)s;
+    %(constructor)s;
+    const uint32_t num_microops = vtype_regs_per_group(vtype);
+    int32_t tmp_vl = this->vl;
+    const int32_t micro_vlmax = vtype_VLMAX(_machInst.vtype8, vlen, true);
+    int32_t micro_vl = std::min(tmp_vl, micro_vlmax);
+    StaticInstPtr microop;
+
+    if (micro_vl == 0) {
+        microop = new VectorNopMicroInst(_machInst);
+        this->microops.push_back(microop);
+    }
+    for (int i = 0; i < num_microops && micro_vl > 0; ++i) {
+        microop = new %(class_name)sMicro<ElemType>(_machInst, micro_vl, i);
+        microop->setDelayedCommit();
+        this->microops.push_back(microop);
+        micro_vl = std::min(tmp_vl -= micro_vlmax, micro_vlmax);
+    }
+    microop = new VMaskMergeMicroInst(_machInst, _machInst.vd,
+        this->microops.size(), _vlen, sizeof(ElemType));
+    this->microops.push_back(microop);
+
+    this->microops.front()->setFirstMicroop();
+    this->microops.back()->setLastMicroop();
+}
+
+%(declare_varith_template)s;
+
+}};
+
+def template VectorFloatMaskMicroDeclare {{
+
+template<typename ElemType>
+class %(class_name)s : public %(base_class)s
+{
+private:
+    // vs1(rs1), vs2, old_vd, v0 for *.vv or *.vf
+    RegId srcRegIdxArr[4];
+    RegId destRegIdxArr[1];
+    bool vm;
+public:
+    %(class_name)s(ExtMachInst _machInst,
+                   uint32_t _microVl, uint8_t _microIdx);
+    Fault execute(ExecContext* xc, trace::InstRecord* traceData)const override;
+    using %(base_class)s::generateDisassembly;
+};
+
+}};
+
+def template VectorFloatMaskMicroConstructor {{
+
+template<typename ElemType>
+%(class_name)s<ElemType>::%(class_name)s(ExtMachInst _machInst,
+                                         uint32_t _microVl, uint8_t _microIdx)
+: %(base_class)s("%(mnemonic)s", _machInst,
+                 %(op_class)s, _microVl, _microIdx)
+{
+    this->vm = _machInst.vm;
+    %(set_reg_idx_arr)s;
+    _numSrcRegs = 0;
+    _numDestRegs = 0;
+    %(set_dest_reg_idx)s;
+    %(set_src_reg_idx)s;
+}
+
+%(declare_varith_template)s;
+
+}};
+
+def template VectorFloatMaskMicroExecute {{
+
+template <typename ElemType>
+Fault
+%(class_name)s<ElemType>::execute(ExecContext* xc,
+                                  trace::InstRecord* traceData) const
+{
+    using et = ElemType;
+    using vu = decltype(et::v);
+    MISA misa = xc->readMiscReg(MISCREG_ISA);
+    STATUS status = xc->readMiscReg(MISCREG_STATUS);
+    if (!misa.rvv || status.vs == VPUStatus::OFF) {
+        return std::make_shared<IllegalInstFault>(
+            "RVV is disabled or VPU is off", machInst);
+    }
+
+    if (machInst.vill)
+        return std::make_shared<IllegalInstFault>("VILL is set", machInst);
+
+    status.vs = VPUStatus::DIRTY;
+    xc->setMiscReg(MISCREG_STATUS, status);
+
+    %(op_decl)s;
+    %(op_rd)s;
+    %(set_vlenb)s;
+    %(vm_decl_rd)s;
+    %(copy_old_vd)s;
+
+    const uint16_t bit_offset = vlenb / sizeof(ElemType);
+    const uint16_t offset = bit_offset * microIdx;
+
+    %(code)s;
+    %(op_wb)s;
+    return NoFault;
+}
+
+%(declare_varith_template)s;
+
+}};
+
+def template VMvWholeMacroDeclare {{
+
+class %(class_name)s : public %(base_class)s {
+private:
+    %(reg_idx_arr_decl)s;
+public:
+    %(class_name)s(ExtMachInst _machInst);
+    using %(base_class)s::generateDisassembly;
+};
+
+}};
+
+def template VMvWholeMacroConstructor {{
+
+%(class_name)s::%(class_name)s(ExtMachInst _machInst)
+    : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s)
+{
+    %(set_reg_idx_arr)s;
+    %(constructor)s;
+    const uint32_t num_microops = _machInst.simm3 + 1;
+    StaticInstPtr microop;
+
+    for (int i = 0; i < num_microops; ++i) {
+        microop = new %(class_name)sMicro(_machInst, 0, i);
+        microop->setDelayedCommit();
+        this->microops.push_back(microop);
+    }
+
+    this->microops.front()->setFirstMicroop();
+    this->microops.back()->setLastMicroop();
+}
+
+}};
+
+def template VMvWholeMicroDeclare {{
+
+class %(class_name)s : public %(base_class)s
+{
+private:
+    RegId srcRegIdxArr[1];
+    RegId destRegIdxArr[1];
+    bool vm;
+public:
+    %(class_name)s(ExtMachInst _machInst, uint32_t _microVl,
+                   uint8_t _microIdx);
+    Fault execute(ExecContext* xc, trace::InstRecord* traceData)const override;
+    using %(base_class)s::generateDisassembly;
+};
+
+}};
+
+def template VMvWholeMicroConstructor {{
+
+%(class_name)s::%(class_name)s(ExtMachInst _machInst,
+                               uint32_t _microVl, uint8_t _microIdx)
+    : %(base_class)s("%(mnemonic)s", _machInst,
+                     %(op_class)s, _microVl, _microIdx)
+{
+    %(set_reg_idx_arr)s;
+    _numSrcRegs = 0;
+    _numDestRegs = 0;
+    setDestRegIdx(_numDestRegs++, vecRegClass[_machInst.vd + _microIdx]);
+    _numTypedDestRegs[VecRegClass]++;
+    setSrcRegIdx(_numSrcRegs++, vecRegClass[_machInst.vs2 + _microIdx]);
+}
+
+}};
+
+def template VMvWholeMicroExecute {{
+
+Fault
+%(class_name)s::execute(ExecContext* xc, trace::InstRecord* traceData) const
+{
+    // TODO: Check register alignment.
+    // TODO: If vd is equal to vs2 the instruction is an architectural NOP.
+    MISA misa = xc->readMiscReg(MISCREG_ISA);
+    STATUS status = xc->readMiscReg(MISCREG_STATUS);
+
+    if (!misa.rvv || status.vs == VPUStatus::OFF) {
+        return std::make_shared<IllegalInstFault>(
+            "RVV is disabled or VPU is off", machInst);
+    }
+
+    status.vs = VPUStatus::DIRTY;
+    xc->setMiscReg(MISCREG_STATUS, status);
+
+    %(op_decl)s;
+    %(op_rd)s;
+    %(set_vlen)s;
+    for (size_t i = 0; i < (vlen / 64); i++) {
+        %(code)s;
+    }
+    %(op_wb)s;
+    return NoFault;
+}
+
+}};
+
+def template VectorMaskDeclare {{
+
+template<typename ElemType>
+class %(class_name)s : public %(base_class)s {
+private:
+    RegId srcRegIdxArr[3];
+    RegId destRegIdxArr[1];
+public:
+    %(class_name)s(ExtMachInst _machInst);
+    Fault execute(ExecContext* xc, trace::InstRecord* traceData)const override;
+    using %(base_class)s::generateDisassembly;
+};
+
+}};
+
+def template VectorMaskConstructor {{
+
+template<typename ElemType>
+%(class_name)s<ElemType>::%(class_name)s(ExtMachInst _machInst)
+    : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s)
+{
+    %(set_reg_idx_arr)s;
+    %(set_dest_reg_idx)s;
+    %(set_src_reg_idx)s;
+}
+
+%(declare_varith_template)s;
+
+}};
+
+def template VectorMaskExecute {{
+
+template<typename ElemType>
+Fault
+%(class_name)s<ElemType>::execute(ExecContext* xc,
+                                  trace::InstRecord* traceData) const
+{
+    using vu = uint8_t;
+    MISA misa = xc->readMiscReg(MISCREG_ISA);
+    STATUS status = xc->readMiscReg(MISCREG_STATUS);
+    if (!misa.rvv || status.vs == VPUStatus::OFF) {
+        return std::make_shared<IllegalInstFault>(
+            "RVV is disabled or VPU is off", machInst);
+    }
+
+    if (machInst.vill)
+        return std::make_shared<IllegalInstFault>("VILL is set", machInst);
+
+    status.vs = VPUStatus::DIRTY;
+    xc->setMiscReg(MISCREG_STATUS, status);
+
+    %(op_decl)s;
+    %(op_rd)s;
+    // TODO: remove it
+    %(set_vlenb)s;
+    %(copy_old_vd)s;
+    %(code)s;
+    %(op_wb)s;
+
+    return NoFault;
+};
+
+%(declare_varith_template)s;
+
+}};
+
+def template VectorMaskDecodeBlock {{
+
+return new %(class_name)s<uint8_t>(machInst);
+
+}};
+
+def template VectorNonSplitDeclare {{
+
+template<typename ElemType>
+class %(class_name)s : public %(base_class)s {
+private:
+    RegId srcRegIdxArr[2];
+    RegId destRegIdxArr[1];
+public:
+    %(class_name)s(ExtMachInst _machInst);
+    Fault execute(ExecContext* xc, trace::InstRecord* traceData)const override;
+    using %(base_class)s::generateDisassembly;
+};
+
+}};
+
+def template VectorNonSplitConstructor {{
+
+template<typename ElemType>
+%(class_name)s<ElemType>::%(class_name)s(ExtMachInst _machInst)
+    : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s)
+{
+    %(set_reg_idx_arr)s;
+    %(constructor)s;
+    %(set_vm_idx)s;
+}
+
+%(declare_varith_template)s;
+
+}};
+
+def template VectorIntNonSplitExecute {{
+
+template <typename ElemType>
+Fault
+%(class_name)s<ElemType>::execute(ExecContext* xc,
+                                    trace::InstRecord* traceData) const
+{
+    using vu [[maybe_unused]] = std::make_unsigned_t<ElemType>;
+    using vi [[maybe_unused]] = std::make_signed_t<ElemType>;
+    MISA misa = xc->readMiscReg(MISCREG_ISA);
+    STATUS status = xc->readMiscReg(MISCREG_STATUS);
+    if (!misa.rvv || status.vs == VPUStatus::OFF) {
+        return std::make_shared<IllegalInstFault>(
+            "RVV is disabled or VPU is off", machInst);
+    }
+
+    if (machInst.vill)
+        return std::make_shared<IllegalInstFault>("VILL is set", machInst);
+
+    status.vs = VPUStatus::DIRTY;
+    xc->setMiscReg(MISCREG_STATUS, status);
+
+    %(op_decl)s;
+    %(op_rd)s;
+    %(vm_decl_rd)s;
+    %(code)s;
+    %(op_wb)s;
+    return NoFault;
+}
+
+%(declare_varith_template)s;
+
+}};
+
+def template VectorFloatNonSplitExecute {{
+
+template <typename ElemType>
+Fault
+%(class_name)s<ElemType>::execute(ExecContext* xc,
+                                    trace::InstRecord* traceData) const
+{
+    using et = ElemType;
+    using vu = decltype(et::v);
+    MISA misa = xc->readMiscReg(MISCREG_ISA);
+    STATUS status = xc->readMiscReg(MISCREG_STATUS);
+    if (!misa.rvv || status.vs == VPUStatus::OFF) {
+        return std::make_shared<IllegalInstFault>(
+            "RVV is disabled or VPU is off", machInst);
+    }
+
+    if (machInst.vill)
+        return std::make_shared<IllegalInstFault>("VILL is set", machInst);
+
+    status.vs = VPUStatus::DIRTY;
+    xc->setMiscReg(MISCREG_STATUS, status);
+
+    %(op_decl)s;
+    %(op_rd)s;
+    %(vm_decl_rd)s;
+    %(code)s;
+    %(op_wb)s;
+    return NoFault;
+}
+
+%(declare_varith_template)s;
+
+}};
+
+def template VectorFloatNonSplitDecodeBlock {{
+
+switch(machInst.vtype8.vsew) {
+case 0b010: return new %(class_name)s<float32_t>(machInst);
+case 0b011: return new %(class_name)s<float64_t>(machInst);
+default: GEM5_UNREACHABLE;
+}
+
+}};
+
+def template VectorIntNonSplitDecodeBlock {{
+
+switch(machInst.vtype8.vsew) {
+case 0b000: return new %(class_name)s<uint8_t>(machInst);
+case 0b001: return new %(class_name)s<uint16_t>(machInst);
+case 0b010: return new %(class_name)s<uint32_t>(machInst);
+case 0b011: return new %(class_name)s<uint64_t>(machInst);
+default: GEM5_UNREACHABLE;
+}
+
+}};
+
+def template VectorReduceMacroDeclare {{
+
+template<typename ElemType>
+class %(class_name)s : public %(base_class)s {
+private:
+    %(reg_idx_arr_decl)s;
+public:
+    %(class_name)s(ExtMachInst _machInst, uint32_t _vlen);
+    using %(base_class)s::generateDisassembly;
+};
+
+}};
+
+def template VectorReduceMacroConstructor {{
+
+template<typename ElemType>
+%(class_name)s<ElemType>::%(class_name)s(ExtMachInst _machInst, uint32_t _vlen)
+    : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vlen)
+{
+    %(set_reg_idx_arr)s;
+    %(constructor)s;
+    const uint32_t num_microops = vtype_regs_per_group(vtype);
+    int32_t tmp_vl = this->vl;
+    const int32_t micro_vlmax = vtype_VLMAX(_machInst.vtype8, vlen, true);
+    int32_t micro_vl = std::min(tmp_vl, micro_vlmax);
+    StaticInstPtr microop;
+
+    if (micro_vl == 0) {
+        microop = new VectorNopMicroInst(_machInst);
+        this->microops.push_back(microop);
+    }
+    for (int i = 0; i < num_microops && micro_vl > 0; ++i) {
+        microop = new %(class_name)sMicro<ElemType>(_machInst, micro_vl, i);
+        microop->setDelayedCommit();
+        this->microops.push_back(microop);
+        micro_vl = std::min(tmp_vl -= micro_vlmax, micro_vlmax);
+    }
+    this->microops.front()->setFirstMicroop();
+    this->microops.back()->setLastMicroop();
+}
+
+%(declare_varith_template)s;
+
+}};
+
+def template VectorReduceMicroDeclare {{
+
+template<typename ElemType>
+class %(class_name)s : public %(base_class)s
+{
+private:
+    // vs2, vs1, vd, vm
+    RegId srcRegIdxArr[4];
+    RegId destRegIdxArr[1];
+    bool vm;
+public:
+    %(class_name)s(ExtMachInst _machInst,
+                   uint32_t _microVl, uint8_t _microIdx);
+    Fault execute(ExecContext* xc, trace::InstRecord* traceData)const override;
+    using %(base_class)s::generateDisassembly;
+};
+
+}};
+
+def template VectorReduceMicroConstructor {{
+
+template<typename ElemType>
+%(class_name)s<ElemType>::%(class_name)s(ExtMachInst _machInst,
+                                         uint32_t _microVl, uint8_t _microIdx)
+: %(base_class)s("%(mnemonic)s", _machInst,
+                 %(op_class)s, _microVl, _microIdx)
+{
+    this->vm = _machInst.vm;
+    %(set_reg_idx_arr)s;
+    _numSrcRegs = 0;
+    _numDestRegs = 0;
+    %(set_dest_reg_idx)s;
+    %(set_src_reg_idx)s;
+}
+
+%(declare_varith_template)s;
+
+}};
+
+def template VectorReduceIntMicroExecute {{
+
+template <typename ElemType>
+Fault
+%(class_name)s<ElemType>::execute(ExecContext* xc,
+                                  trace::InstRecord* traceData) const
+{
+    %(type_def)s;
+    MISA misa = xc->readMiscReg(MISCREG_ISA);
+    STATUS status = xc->readMiscReg(MISCREG_STATUS);
+    if (!misa.rvv || status.vs == VPUStatus::OFF) {
+        return std::make_shared<IllegalInstFault>(
+            "RVV is disabled or VPU is off", machInst);
+    }
+
+
+    if (machInst.vill)
+        return std::make_shared<IllegalInstFault>("VILL is set", machInst);
+
+    status.vs = VPUStatus::DIRTY;
+    xc->setMiscReg(MISCREG_STATUS, status);
+
+    %(op_decl)s;
+    %(op_rd)s;
+    %(set_vlenb)s;
+    %(set_vlen)s;
+    %(vm_decl_rd)s;
+    %(copy_old_vd)s;
+
+    auto reduce_loop =
+        [&, this](const auto& f, const auto* _, const auto* vs2) {
+            ElemType microop_result = this->microIdx != 0 ? old_Vd[0] : Vs1[0];
+            for (uint32_t i = 0; i < this->microVl; i++) {
+                uint32_t ei = i + vtype_VLMAX(vtype, vlen, true) *
+                    this->microIdx;
+                if (this->vm || elem_mask(v0, ei)) {
+                    microop_result = f(microop_result, Vs2[i]);
+                }
+            }
+            return microop_result;
+        };
+
+    %(code)s;
+    %(op_wb)s;
+    return NoFault;
+}
+
+%(declare_varith_template)s;
+
+}};
+
+def template VectorReduceFloatMicroExecute {{
+
+template <typename ElemType>
+Fault
+%(class_name)s<ElemType>::execute(ExecContext* xc,
+                                  trace::InstRecord* traceData) const
+{
+    %(type_def)s;
+    MISA misa = xc->readMiscReg(MISCREG_ISA);
+    STATUS status = xc->readMiscReg(MISCREG_STATUS);
+
+    if (!misa.rvv || status.vs == VPUStatus::OFF) {
+        return std::make_shared<IllegalInstFault>(
+            "RVV is disabled or VPU is off", machInst);
+    }
+
+    if (machInst.vill)
+        return std::make_shared<IllegalInstFault>("VILL is set", machInst);
+
+    status.vs = VPUStatus::DIRTY;
+    xc->setMiscReg(MISCREG_STATUS, status);
+
+    %(op_decl)s;
+    %(op_rd)s;
+    %(set_vlenb)s;
+    %(set_vlen)s;
+    %(vm_decl_rd)s;
+    %(copy_old_vd)s;
+
+    Vd[0] = this->microIdx != 0 ? old_Vd[0] : Vs1[0];
+
+    auto reduce_loop =
+        [&, this](const auto& f, const auto* _, const auto* vs2) {
+            vu tmp_val = Vd[0];
+            for (uint32_t i = 0; i < this->microVl; i++) {
+                uint32_t ei = i + vtype_VLMAX(vtype, vlen, true) *
+                    this->microIdx;
+                if (this->vm || elem_mask(v0, ei)) {
+                    tmp_val = f(tmp_val, Vs2[i]).v;
+                }
+            }
+            return tmp_val;
+        };
+
+    %(code)s;
+    %(op_wb)s;
+    return NoFault;
+}
+
+%(declare_varith_template)s;
+
+}};
+
+def template VectorReduceFloatWideningMicroExecute {{
+
+template <typename ElemType>
+Fault
+%(class_name)s<ElemType>::execute(ExecContext* xc,
+                                  trace::InstRecord* traceData) const
+{
+    %(type_def)s;
+    MISA misa = xc->readMiscReg(MISCREG_ISA);
+    STATUS status = xc->readMiscReg(MISCREG_STATUS);
+    if (!misa.rvv || status.vs == VPUStatus::OFF) {
+        return std::make_shared<IllegalInstFault>(
+            "RVV is disabled or VPU is off", machInst);
+    }
+
+    if (machInst.vill)
+        return std::make_shared<IllegalInstFault>("VILL is set", machInst);
+
+    status.vs = VPUStatus::DIRTY;
+    xc->setMiscReg(MISCREG_STATUS, status);
+
+    %(op_decl)s;
+    %(op_rd)s;
+    %(set_vlenb)s;
+    %(set_vlen)s;
+    %(vm_decl_rd)s;
+    %(copy_old_vd)s;
+
+    Vd[0] = this->microIdx != 0 ? old_Vd[0] : Vs1[0];
+
+    auto reduce_loop =
+        [&, this](const auto& f, const auto* _, const auto* vs2) {
+            vwu tmp_val = Vd[0];
+            for (uint32_t i = 0; i < this->microVl; i++) {
+                uint32_t ei = i + vtype_VLMAX(vtype, vlen, true) *
+                    this->microIdx;
+                if (this->vm || elem_mask(v0, ei)) {
+                    tmp_val = f(tmp_val, Vs2[i]).v;
+                }
+            }
+            return tmp_val;
+        };
+
+    %(code)s;
+    %(op_wb)s;
+    return NoFault;
+}
+
+%(declare_varith_template)s;
+
+}};
+
+def template VectorGatherMacroDeclare {{
+
+template<typename ElemType, typename IndexType>
+class %(class_name)s : public %(base_class)s{
+private:
+    %(reg_idx_arr_decl)s;
+public:
+    %(class_name)s(ExtMachInst _machInst, uint32_t _vlen);
+    using %(base_class)s::generateDisassembly;
+};
+
+}};
+
+def template VectorGatherMacroConstructor {{
+
+template<typename ElemType, typename IndexType>
+%(class_name)s<ElemType, IndexType>::%(class_name)s(ExtMachInst _machInst,
+    uint32_t _vlen)
+    : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vlen)
+{
+    %(set_reg_idx_arr)s;
+    %(constructor)s;
+    constexpr uint32_t vd_eewb = sizeof(ElemType);
+    constexpr uint32_t vs2_eewb = sizeof(ElemType);
+    constexpr uint32_t vs1_eewb = sizeof(IndexType);
+    constexpr bool vs1_split = vd_eewb > vs1_eewb;
+    const int8_t lmul = vtype_vlmul(vtype);
+    const int8_t vs1_emul = lmul +
+        (vs1_split ? -(vs2_eewb / vs1_eewb) : vs1_eewb / vs2_eewb);
+    const uint8_t vs2_vregs = lmul < 0 ? 1 : 1 << lmul;
+    const uint8_t vs1_vregs = vs1_emul < 0 ? 1 : 1 << vs1_emul;
+    const uint8_t vd_vregs = vs2_vregs;
+    uint32_t vlenb = vlen >> 3;
+    const int32_t micro_vlmax = vlenb / std::max(vd_eewb, vs1_eewb);
+    int32_t remaining_vl = this->vl;
+    int32_t micro_vl = std::min(remaining_vl, micro_vlmax);
+    StaticInstPtr microop;
+
+    if (micro_vl == 0) {
+        microop = new VectorNopMicroInst(_machInst);
+        this->microops.push_back(microop);
+    }
+    for (uint8_t i = 0; i < std::max(vs1_vregs, vd_vregs) && micro_vl > 0;
+            i++) {
+        for (uint8_t j = 0; j < vs2_vregs; j++) {
+            microop = new %(class_name)sMicro<ElemType, IndexType>(
+                _machInst, micro_vl, i * vs2_vregs + j);
+            microop->setDelayedCommit();
+            this->microops.push_back(microop);
+        }
+        micro_vl = std::min(remaining_vl -= micro_vlmax, micro_vlmax);
+    }
+
+    this->microops.front()->setFirstMicroop();
+    this->microops.back()->setLastMicroop();
+}
+
+%(declare_varith_template)s;
+
+}};
+
+def template VectorGatherMicroDeclare {{
+
+template<typename ElemType, typename IndexType>
+class %(class_name)s : public %(base_class)s
+{
+private:
+    // vs2, vs1, vd, vm
+    RegId srcRegIdxArr[4];
+    RegId destRegIdxArr[1];
+    bool vm;
+public:
+    %(class_name)s(ExtMachInst _machInst,
+                   uint32_t _microVl, uint8_t _microIdx);
+    Fault execute(ExecContext* xc, trace::InstRecord* traceData)const override;
+    using %(base_class)s::generateDisassembly;
+};
+
+}};
+
+def template VectorGatherMicroConstructor {{
+
+template<typename ElemType, typename IndexType>
+%(class_name)s<ElemType, IndexType>::%(class_name)s(ExtMachInst _machInst,
+    uint32_t _microVl, uint8_t _microIdx)
+: %(base_class)s("%(mnemonic)s", _machInst,
+                 %(op_class)s, _microVl, _microIdx)
+{
+    this->vm = _machInst.vm;
+    %(set_reg_idx_arr)s;
+    _numSrcRegs = 0;
+    _numDestRegs = 0;
+    [[maybe_unused]] constexpr uint32_t vd_eewb = sizeof(ElemType);
+    [[maybe_unused]] constexpr uint32_t vs2_eewb = sizeof(ElemType);
+    [[maybe_unused]] constexpr uint32_t vs1_eewb = sizeof(IndexType);
+    constexpr uint8_t vs1_split_num = (vd_eewb + vs1_eewb - 1) / vs1_eewb;
+    constexpr uint8_t vd_split_num = (vs1_eewb + vd_eewb - 1) / vd_eewb;
+    const int8_t lmul = vtype_vlmul(vtype);
+    const uint8_t vs2_vregs = lmul < 0 ? 1 : 1 << lmul;
+    [[maybe_unused]] const uint8_t vs2_idx = _microIdx % vs2_vregs;
+    [[maybe_unused]] const uint8_t vs1_idx =
+        _microIdx / vs2_vregs / vs1_split_num;
+    [[maybe_unused]] const uint8_t vd_idx =
+        _microIdx / vs2_vregs / vd_split_num;
+    %(set_dest_reg_idx)s;
+    %(set_src_reg_idx)s;
+}
+
+%(declare_varith_template)s;
+
+}};
+
+def template VectorGatherMicroExecute {{
+
+template <typename ElemType, typename IndexType>
+Fault
+%(class_name)s<ElemType, IndexType>::execute(ExecContext* xc,
+                                  trace::InstRecord* traceData) const
+{
+    using vu [[maybe_unused]] = std::make_unsigned_t<ElemType>;
+    [[maybe_unused]] constexpr size_t sew = sizeof(vu) * 8;
+    MISA misa = xc->readMiscReg(MISCREG_ISA);
+    STATUS status = xc->readMiscReg(MISCREG_STATUS);
+    if (!misa.rvv || status.vs == VPUStatus::OFF) {
+        return std::make_shared<IllegalInstFault>(
+            "RVV is disabled or VPU is off", machInst);
+    }
+
+    if (machInst.vill)
+        return std::make_shared<IllegalInstFault>("VILL is set", machInst);
+
+    status.vs = VPUStatus::DIRTY;
+    xc->setMiscReg(MISCREG_STATUS, status);
+
+    %(op_decl)s;
+    %(op_rd)s;
+    %(set_vlenb)s;
+    %(set_vlen)s;
+    %(vm_decl_rd)s;
+    %(copy_old_vd)s;
+    const uint32_t vlmax = vtype_VLMAX(vtype,vlen);
+    constexpr uint8_t vd_eewb = sizeof(ElemType);
+    constexpr uint8_t vs1_eewb = sizeof(IndexType);
+    constexpr uint8_t vs2_eewb = sizeof(ElemType);
+    constexpr uint8_t vs1_split_num = (vd_eewb + vs1_eewb - 1) / vs1_eewb;
+    constexpr uint8_t vd_split_num = (vs1_eewb + vd_eewb - 1) / vd_eewb;
+    [[maybe_unused]] const uint16_t vd_elems = vlenb / vd_eewb;
+    [[maybe_unused]] const uint16_t vs1_elems = vlenb / vs1_eewb;
+    [[maybe_unused]] const uint16_t vs2_elems = vlenb / vs2_eewb;
+    [[maybe_unused]] const int8_t lmul = vtype_vlmul(vtype);
+    [[maybe_unused]] const uint8_t vs2_vregs = lmul < 0 ? 1 : 1 << lmul;
+    [[maybe_unused]] const uint8_t vs2_idx = microIdx % vs2_vregs;
+    [[maybe_unused]] const uint8_t vs1_idx =
+        microIdx / vs2_vregs / vs1_split_num;
+    [[maybe_unused]] const uint8_t vd_idx =
+        microIdx / vs2_vregs / vd_split_num;
+    [[maybe_unused]] const uint16_t vs1_bias =
+        vs1_elems * (vd_idx % vs1_split_num) / vs1_split_num;
+    [[maybe_unused]] const uint16_t vd_bias =
+        vd_elems * (vs1_idx % vd_split_num) / vd_split_num;
+
+    %(code)s;
+    %(op_wb)s;
+
+    return NoFault;
+}
+
+%(declare_varith_template)s;
+
+}};
+
+def template VectorGatherDecodeBlock {{
+
+switch(machInst.vtype8.vsew) {
+    case 0b000: {
+        using elem_type [[maybe_unused]] = uint8_t;
+        return new %(class_name)s<uint8_t, %(idx_type)s>(machInst, vlen);
+    }
+    case 0b001: {
+        using elem_type [[maybe_unused]] = uint16_t;
+        return new %(class_name)s<uint16_t, %(idx_type)s>(machInst, vlen);
+    }
+    case 0b010: {
+        using elem_type [[maybe_unused]] = uint32_t;
+        return new %(class_name)s<uint32_t, %(idx_type)s>(machInst, vlen);
+    }
+    case 0b011: {
+        using elem_type [[maybe_unused]] = uint64_t;
+        return new %(class_name)s<uint64_t, %(idx_type)s>(machInst, vlen);
+    }
+    default: GEM5_UNREACHABLE;
+}
+
+}};
+
+def template VectorIntVxsatMacroDeclare {{
+
+template<typename ElemType>
+class %(class_name)s : public %(base_class)s{
+private:
+    %(reg_idx_arr_decl)s;
+    bool vxsat = false;
+public:
+    %(class_name)s(ExtMachInst _machInst, uint32_t _vlen);
+    using %(base_class)s::generateDisassembly;
+};
+
+}};
+
+def template VectorIntVxsatMacroConstructor {{
+
+template<typename ElemType>
+%(class_name)s<ElemType>::%(class_name)s(ExtMachInst _machInst, uint32_t _vlen)
+    : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vlen)
+{
+    %(set_reg_idx_arr)s;
+    %(constructor)s;
+    const uint32_t num_microops = vtype_regs_per_group(vtype);
+    int32_t tmp_vl = this->vl;
+    const int32_t micro_vlmax = vtype_VLMAX(_machInst.vtype8, vlen, true);
+    int32_t micro_vl = std::min(tmp_vl, micro_vlmax);
+    StaticInstPtr microop;
+
+    if (micro_vl == 0) {
+        microop = new VectorNopMicroInst(_machInst);
+        this->microops.push_back(microop);
+    }
+    for (int i = 0; i < num_microops && micro_vl > 0; ++i) {
+        microop = new %(class_name)sMicro<ElemType>(_machInst,
+            micro_vl, i, &vxsat);
+        microop->setDelayedCommit();
+        this->microops.push_back(microop);
+        micro_vl = std::min(tmp_vl -= micro_vlmax, micro_vlmax);
+    }
+
+    microop = new VxsatMicroInst(&vxsat, _machInst);
+    microop->setFlag(StaticInst::IsSerializeAfter);
+    microop->setFlag(StaticInst::IsNonSpeculative);
+    this->microops.push_back(microop);
+    this->microops.front()->setFirstMicroop();
+    this->microops.back()->setLastMicroop();
+}
+
+%(declare_varith_template)s;
+
+}};
+
+def template VectorIntVxsatMicroDeclare {{
+
+template<typename ElemType>
+class %(class_name)s : public %(base_class)s
+{
+private:
+    RegId srcRegIdxArr[4];
+    RegId destRegIdxArr[1];
+    bool vm;
+    bool* vxsatptr;
+public:
+    %(class_name)s(ExtMachInst _machInst, uint32_t _microVl,
+                   uint8_t _microIdx, bool* vxsatptr);
+    Fault execute(ExecContext* xc, trace::InstRecord* traceData)const override;
+    using %(base_class)s::generateDisassembly;
+};
+
+}};
+
+def template VectorIntVxsatMicroConstructor {{
+
+template<typename ElemType>
+%(class_name)s<ElemType>::%(class_name)s(ExtMachInst _machInst,
+    uint32_t _microVl, uint8_t _microIdx, bool* vxsatptr)
+    : %(base_class)s("%(mnemonic)s", _machInst,
+                     %(op_class)s, _microVl, _microIdx)
+{
+    this->vm = _machInst.vm;
+    this->vxsatptr = vxsatptr;
+    %(set_reg_idx_arr)s;
+    _numSrcRegs = 0;
+    _numDestRegs = 0;
+    %(set_dest_reg_idx)s;
+    %(set_src_reg_idx)s;
+}
+
+%(declare_varith_template)s;
+
+}};
+
+def template VectorReduceIntWideningMicroExecute {{
+
+template <typename ElemType>
+Fault
+%(class_name)s<ElemType>::execute(ExecContext* xc,
+                                  trace::InstRecord* traceData) const
+{
+    using vu [[maybe_unused]] = std::make_unsigned_t<ElemType>;
+    using vi [[maybe_unused]] = std::make_signed_t<ElemType>;
+    using vwu [[maybe_unused]] = typename double_width<vu>::type;
+    using vwi [[maybe_unused]] = typename double_width<vi>::type;
+    MISA misa = xc->readMiscReg(MISCREG_ISA);
+    STATUS status = xc->readMiscReg(MISCREG_STATUS);
+    if (!misa.rvv || status.vs == VPUStatus::OFF) {
+        return std::make_shared<IllegalInstFault>(
+            "RVV is disabled or VPU is off", machInst);
+    }
+
+    if (machInst.vill)
+        return std::make_shared<IllegalInstFault>("VILL is set", machInst);
+
+    status.vs = VPUStatus::DIRTY;
+    xc->setMiscReg(MISCREG_STATUS, status);
+
+    %(op_decl)s;
+    %(op_rd)s;
+    %(set_vlenb)s;
+    %(set_vlen)s;
+    %(vm_decl_rd)s;
+    %(copy_old_vd)s;
+
+    Vd[0] = this->microIdx != 0 ? old_Vd[0] : Vs1[0];
+
+    auto reduce_loop =
+        [&, this](const auto& f, const auto* _, const auto* vs2) {
+            vwu tmp_val = Vd[0];
+            for (uint32_t i = 0; i < this->microVl; i++) {
+                uint32_t ei = i + vtype_VLMAX(vtype, vlen, true) *
+                    this->microIdx;
+                if (this->vm || elem_mask(v0, ei)) {
+                    tmp_val = f(tmp_val, Vs2[i]);
+                }
+            }
+            return tmp_val;
+        };
+
+    %(code)s;
+    %(op_wb)s;
+    return NoFault;
+}
+
+%(declare_varith_template)s;
+
+}};
+
+def template VectorSlideMacroDeclare {{
+
+template<typename ElemType>
+class %(class_name)s : public %(base_class)s {
+private:
+    %(reg_idx_arr_decl)s;
+public:
+    %(class_name)s(ExtMachInst _machInst, uint32_t _vlen);
+    using %(base_class)s::generateDisassembly;
+};
+
+}};
+
+def template VectorSlideUpMacroConstructor {{
+
+template<typename ElemType>
+%(class_name)s<ElemType>::%(class_name)s(ExtMachInst _machInst, uint32_t _vlen)
+    : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vlen)
+{
+    %(set_reg_idx_arr)s;
+    %(constructor)s;
+    const uint32_t num_microops = vtype_regs_per_group(vtype);
+    int32_t tmp_vl = this->vl;
+    const int32_t micro_vlmax = vtype_VLMAX(_machInst.vtype8, vlen, true);
+    int32_t micro_vl = std::min(tmp_vl, micro_vlmax);
+    StaticInstPtr microop;
+
+    if (micro_vl == 0) {
+        microop = new VectorNopMicroInst(_machInst);
+        this->microops.push_back(microop);
+    }
+    // Todo static filter out useless uop
+    int micro_idx = 0;
+    for (int i = 0; i < num_microops && micro_vl > 0; ++i) {
+        for (int j = 0; j <= i; ++j) {
+            microop = new %(class_name)sMicro<ElemType>(
+                _machInst, micro_vl, micro_idx++, i, j);
+            microop->setDelayedCommit();
+            this->microops.push_back(microop);
+        }
+        micro_vl = std::min(tmp_vl -= micro_vlmax, micro_vlmax);
+    }
+    this->microops.front()->setFirstMicroop();
+    this->microops.back()->setLastMicroop();
+}
+
+%(declare_varith_template)s;
+
+}};
+
+def template VectorSlideDownMacroConstructor {{
+
+template<typename ElemType>
+%(class_name)s<ElemType>::%(class_name)s(ExtMachInst _machInst, uint32_t _vlen)
+    : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vlen)
+{
+    %(set_reg_idx_arr)s;
+    %(constructor)s;
+    const uint32_t num_microops = vtype_regs_per_group(vtype);
+    int32_t tmp_vl = this->vl;
+    const int32_t micro_vlmax = vtype_VLMAX(_machInst.vtype8, vlen, true);
+    int32_t micro_vl = std::min(tmp_vl, micro_vlmax);
+    StaticInstPtr microop;
+
+    if (micro_vl == 0) {
+        microop = new VectorNopMicroInst(_machInst);
+        this->microops.push_back(microop);
+    }
+    // Todo static filter out useless uop
+    int micro_idx = 0;
+    for (int i = 0; i < num_microops && micro_vl > 0; ++i) {
+        for (int j = i; j < num_microops; ++j) {
+            microop = new %(class_name)sMicro<ElemType>(
+                _machInst, micro_vl, micro_idx++, i, j);
+            microop->setDelayedCommit();
+            this->microops.push_back(microop);
+        }
+        micro_vl = std::min(tmp_vl -= micro_vlmax, micro_vlmax);
+    }
+    this->microops.front()->setFirstMicroop();
+    this->microops.back()->setLastMicroop();
+}
+
+%(declare_varith_template)s;
+
+}};
+
+def template VectorSlideMicroDeclare {{
+
+template<typename ElemType>
+class %(class_name)s : public %(base_class)s
+{
+private:
+    // vs2, vs1, vs3(old_vd), vm for *.vv, *.vx
+    // vs2, (old_vd), vm for *.vi
+    RegId srcRegIdxArr[4];
+    RegId destRegIdxArr[1];
+    bool vm;
+public:
+    %(class_name)s(ExtMachInst _machInst, uint32_t _microVl,
+        uint8_t _microIdx, uint8_t _vdIdx, uint8_t _vs2Idx);
+    Fault execute(ExecContext* xc, trace::InstRecord* traceData)const override;
+    using %(base_class)s::generateDisassembly;
+};
+
+}};
+
+def template VectorSlideMicroConstructor {{
+
+template<typename ElemType>
+%(class_name)s<ElemType>::%(class_name)s(ExtMachInst _machInst,
+        uint32_t _microVl, uint8_t _microIdx, uint8_t _vdIdx, uint8_t _vs2Idx)
+    : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _microVl,
+        _microIdx, _vdIdx, _vs2Idx)
+{
+    this->vm = _machInst.vm;
+    %(set_reg_idx_arr)s;
+    _numSrcRegs = 0;
+    _numDestRegs = 0;
+    %(set_dest_reg_idx)s;
+    %(set_src_reg_idx)s;
+}
+
+%(declare_varith_template)s;
+
+}};
+
+def template VectorSlideMicroExecute {{
+
+template<typename ElemType>
+Fault
+%(class_name)s<ElemType>::execute(ExecContext* xc,
+                                  trace::InstRecord* traceData) const
+{
+    using vu [[maybe_unused]] = std::make_unsigned_t<ElemType>;
+    using vi [[maybe_unused]] = std::make_signed_t<ElemType>;
+    MISA misa = xc->readMiscReg(MISCREG_ISA);
+    STATUS status = xc->readMiscReg(MISCREG_STATUS);
+    if (!misa.rvv || status.vs == VPUStatus::OFF) {
+        return std::make_shared<IllegalInstFault>(
+            "RVV is disabled or VPU is off", machInst);
+    }
+
+    if (machInst.vill)
+        return std::make_shared<IllegalInstFault>("VILL is set", machInst);
+
+    status.vs = VPUStatus::DIRTY;
+    xc->setMiscReg(MISCREG_STATUS, status);
+
+    %(op_decl)s;
+    %(op_rd)s;
+    %(set_vlenb)s;
+    %(set_vlen)s;
+
+    [[maybe_unused]]const uint32_t vlmax = vtype_VLMAX(vtype, vlen);
+
+    %(vm_decl_rd)s;
+    %(copy_old_vd)s;
+    %(code)s;
+    %(op_wb)s;
+
+    return NoFault;
+};
+
+%(declare_varith_template)s;
+
+}};
+
+def template VectorFloatSlideMicroExecute {{
+
+template<typename ElemType>
+Fault
+%(class_name)s<ElemType>::execute(ExecContext* xc,
+                                  trace::InstRecord* traceData) const
+{
+    using et = ElemType;
+    using vu = decltype(et::v);
+    MISA misa = xc->readMiscReg(MISCREG_ISA);
+    STATUS status = xc->readMiscReg(MISCREG_STATUS);
+    if (!misa.rvv || status.vs == VPUStatus::OFF) {
+        return std::make_shared<IllegalInstFault>(
+            "RVV is disabled or VPU is off", machInst);
+    }
+
+    if (machInst.vill)
+        return std::make_shared<IllegalInstFault>("VILL is set", machInst);
+
+    status.vs = VPUStatus::DIRTY;
+    xc->setMiscReg(MISCREG_STATUS, status);
+
+    %(op_decl)s;
+    %(op_rd)s;
+    %(set_vlenb)s;
+    %(set_vlen)s;
+
+    [[maybe_unused]]const uint32_t vlmax = vtype_VLMAX(vtype, vlen);
+
+    %(vm_decl_rd)s;
+    %(copy_old_vd)s;
+    %(code)s;
+    %(op_wb)s;
+
+    return NoFault;
+};
+
+%(declare_varith_template)s;
+
+}};
diff --git a/src/arch/riscv/isa/templates/vector_mem.isa b/src/arch/riscv/isa/templates/vector_mem.isa
new file mode 100644
index 0000000000..84cee9af73
--- /dev/null
+++ b/src/arch/riscv/isa/templates/vector_mem.isa
@@ -0,0 +1,1671 @@
+// -*- mode:c++ -*-
+
+// Copyright (c) 2022 PLCT Lab
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met: redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer;
+// redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution;
+// neither the name of the copyright holders nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+def template VMemMacroDeclare {{
+
+class %(class_name)s : public %(base_class)s
+{
+private:
+    %(reg_idx_arr_decl)s;
+public:
+    %(class_name)s(ExtMachInst _machInst);
+    %(class_name)s(ExtMachInst _machInst, uint32_t _vlen);
+    using %(base_class)s::generateDisassembly;
+};
+
+}};
+
+def template VMemTemplateMacroDeclare {{
+
+template<typename ElemType>
+class %(class_name)s : public %(base_class)s
+{
+private:
+    %(reg_idx_arr_decl)s;
+public:
+    %(class_name)s(ExtMachInst _machInst);
+    %(class_name)s(ExtMachInst _machInst, uint32_t _vlen);
+    using %(base_class)s::generateDisassembly;
+};
+
+}};
+
+def template VleConstructor {{
+
+%(class_name)s::%(class_name)s(ExtMachInst _machInst, uint32_t _vlen)
+    : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vlen)
+{
+    %(set_reg_idx_arr)s;
+    %(constructor)s;
+
+    const int32_t micro_vlmax = vlen / width_EEW(_machInst.width);
+    const uint32_t num_microops = ceil((float) this->vl / (micro_vlmax));
+    int32_t remaining_vl = this->vl;
+    int32_t micro_vl = std::min(remaining_vl, micro_vlmax);
+
+    StaticInstPtr microop;
+
+    if (micro_vl == 0) {
+        microop = new VectorNopMicroInst(_machInst);
+        this->microops.push_back(microop);
+    }
+    for (int i = 0; i < num_microops && micro_vl > 0; ++i) {
+        microop = new %(class_name)sMicro(_machInst, micro_vl, i, vlen);
+        microop->setDelayedCommit();
+        microop->setFlag(IsLoad);
+        this->microops.push_back(microop);
+        micro_vl = std::min(remaining_vl -= micro_vlmax, micro_vlmax);
+    }
+
+    this->microops.front()->setFirstMicroop();
+    this->microops.back()->setLastMicroop();
+}
+
+}};
+
+def template VleMicroDeclare {{
+
+class %(class_name)s : public %(base_class)s
+{
+private:
+    RegId srcRegIdxArr[3];
+    RegId destRegIdxArr[1];
+public:
+    %(class_name)s(ExtMachInst _machInst, uint8_t _microVl,
+        uint8_t _microIdx, uint32_t _vlen);
+
+    Fault execute(ExecContext *, trace::InstRecord *) const override;
+    Fault initiateAcc(ExecContext *, trace::InstRecord *) const override;
+    Fault completeAcc(PacketPtr, ExecContext *,
+                      trace::InstRecord *) const override;
+    using %(base_class)s::generateDisassembly;
+
+};
+
+}};
+
+def template VleMicroConstructor {{
+
+%(class_name)s::%(class_name)s(ExtMachInst _machInst, uint8_t _microVl,
+    uint8_t _microIdx, uint32_t _vlen)
+  : %(base_class)s(
+        "%(mnemonic)s", _machInst, %(op_class)s, _microVl, _microIdx, _vlen)
+{
+    %(set_reg_idx_arr)s;
+    _numSrcRegs = 0;
+    _numDestRegs = 0;
+    setDestRegIdx(_numDestRegs++, vecRegClass[_machInst.vd + _microIdx]);
+    _numTypedDestRegs[VecRegClass]++;
+    setSrcRegIdx(_numSrcRegs++, intRegClass[_machInst.rs1]);
+    setSrcRegIdx(_numSrcRegs++, vecRegClass[_machInst.vd + _microIdx]);
+    if (!_machInst.vm) {
+        setSrcRegIdx(_numSrcRegs++, vecRegClass[0]);
+    }
+}
+
+}};
+
+def template VleMicroExecute {{
+
+Fault
+%(class_name)s::execute(ExecContext *xc, trace::InstRecord *traceData) const
+{
+    Addr EA;
+    %(op_decl)s;
+    %(op_rd)s;
+    %(set_vlenb)s;
+    %(set_vlen)s;
+    %(ea_code)s;
+
+    RiscvISA::vreg_t tmp_v0;
+    uint8_t *v0;
+    MISA misa = xc->readMiscReg(MISCREG_ISA);
+    STATUS status = xc->readMiscReg(MISCREG_STATUS);
+
+    if (!misa.rvv || status.vs == VPUStatus::OFF) {
+        return std::make_shared<IllegalInstFault>(
+            "RVV is disabled or VPU is off", machInst);
+    }
+
+    if (machInst.vill)
+        return std::make_shared<IllegalInstFault>("VILL is set", machInst);
+
+    status.vs = VPUStatus::DIRTY;
+    xc->setMiscReg(MISCREG_STATUS, status);
+
+    if(!machInst.vm) {
+        xc->getRegOperand(this, _numSrcRegs - 1, &tmp_v0);
+        v0 = tmp_v0.as<uint8_t>();
+    }
+
+    uint32_t mem_size = width_EEW(machInst.width) / 8 * this->microVl;
+
+    const std::vector<bool> byte_enable(mem_size, true);
+    Fault fault = xc->readMem(EA, Mem.as<uint8_t>(), mem_size, memAccessFlags,
+                              byte_enable);
+    if (fault != NoFault)
+        return fault;
+
+    const size_t micro_vlmax = vtype_VLMAX(machInst.vtype8, vlen, true);
+    const size_t micro_elems = vlen / width_EEW(machInst.width);
+
+    size_t ei;
+
+    for (size_t i = 0; i < micro_elems; i++) {
+        ei = i + micro_vlmax * microIdx;
+        %(memacc_code)s;
+    }
+
+    %(op_wb)s;
+    return fault;
+}
+
+}};
+
+def template VleMicroInitiateAcc {{
+
+Fault
+%(class_name)s::initiateAcc(ExecContext* xc,
+                            trace::InstRecord* traceData) const
+{
+
+    Addr EA;
+
+    %(op_src_decl)s;
+    %(op_rd)s;
+    %(set_vlenb)s;
+    %(ea_code)s;
+
+    MISA misa = xc->readMiscReg(MISCREG_ISA);
+    STATUS status = xc->readMiscReg(MISCREG_STATUS);
+    if (!misa.rvv || status.vs == VPUStatus::OFF) {
+        return std::make_shared<IllegalInstFault>(
+            "RVV is disabled or VPU is off", machInst);
+    }
+    if (machInst.vill)
+        return std::make_shared<IllegalInstFault>("VILL is set", machInst);
+
+    uint32_t mem_size = width_EEW(this->machInst.width) / 8 * this->microVl;
+
+    const std::vector<bool> byte_enable(mem_size, true);
+    Fault fault = initiateMemRead(xc, EA, mem_size, memAccessFlags,
+                                  byte_enable);
+    return fault;
+}
+
+}};
+
+def template VleMicroCompleteAcc {{
+
+Fault
+%(class_name)s::completeAcc(PacketPtr pkt, ExecContext *xc,
+                            trace::InstRecord *traceData) const
+{
+    %(op_decl)s;
+    %(op_rd)s;
+    %(set_vlen)s;
+
+    STATUS status = xc->readMiscReg(MISCREG_STATUS);
+    status.vs = VPUStatus::DIRTY;
+    xc->setMiscReg(MISCREG_STATUS, status);
+
+    RiscvISA::vreg_t tmp_v0;
+    uint8_t *v0;
+    if(!machInst.vm) {
+        xc->getRegOperand(this, _numSrcRegs - 1, &tmp_v0);
+        v0 = tmp_v0.as<uint8_t>();
+    }
+
+    memcpy(Mem.as<uint8_t>(), pkt->getPtr<uint8_t>(), pkt->getSize());
+
+    const size_t micro_vlmax = vtype_VLMAX(machInst.vtype8, vlen, true);
+    const size_t micro_elems = vlen / width_EEW(machInst.width);
+
+    size_t ei;
+    for (size_t i = 0; i < micro_elems; i++) {
+        ei = i + micro_vlmax * microIdx;
+        %(memacc_code)s;
+    }
+
+    %(op_wb)s;
+    return NoFault;
+}
+
+}};
+
+def template VseConstructor {{
+
+%(class_name)s::%(class_name)s(ExtMachInst _machInst, uint32_t _vlen)
+    : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vlen)
+{
+    %(set_reg_idx_arr)s;
+    %(constructor)s;
+
+    const int32_t micro_vlmax = vlen / width_EEW(_machInst.width);
+    const uint32_t num_microops = ceil((float) this->vl / (micro_vlmax));
+    int32_t remaining_vl = this->vl;
+    int32_t micro_vl = std::min(remaining_vl, micro_vlmax);
+
+    StaticInstPtr microop;
+
+    if (micro_vl == 0) {
+        microop = new VectorNopMicroInst(_machInst);
+        this->microops.push_back(microop);
+    }
+    for (int i = 0; i < num_microops && micro_vl > 0; ++i) {
+        microop = new %(class_name)sMicro(_machInst, micro_vl, i, vlen);
+        microop->setDelayedCommit();
+        microop->setFlag(IsStore);
+        this->microops.push_back(microop);
+        micro_vl = std::min(remaining_vl -= micro_vlmax, micro_vlmax);
+    }
+
+    this->microops.front()->setFlag(IsFirstMicroop);
+    this->microops.back()->setFlag(IsLastMicroop);
+}
+
+}};
+
+def template VseMicroDeclare {{
+
+class %(class_name)s : public %(base_class)s
+{
+private:
+    RegId srcRegIdxArr[3];
+    RegId destRegIdxArr[0];
+public:
+    %(class_name)s(ExtMachInst _machInst,
+        uint8_t _microVl, uint8_t _microIdx, uint32_t _vlen);
+
+    Fault execute(ExecContext *, trace::InstRecord *) const override;
+    Fault initiateAcc(ExecContext *, trace::InstRecord *) const override;
+    Fault completeAcc(PacketPtr, ExecContext *,
+                      trace::InstRecord *) const override;
+    using %(base_class)s::generateDisassembly;
+};
+
+}};
+
+def template VseMicroConstructor {{
+
+%(class_name)s::%(class_name)s(ExtMachInst _machInst,
+    uint8_t _microVl, uint8_t _microIdx, uint32_t _vlen)
+  : %(base_class)s(
+        "%(mnemonic)s", _machInst, %(op_class)s, _microVl, _microIdx, _vlen)
+{
+    %(set_reg_idx_arr)s;
+    _numSrcRegs = 0;
+    _numDestRegs = 0;
+    setSrcRegIdx(_numSrcRegs++, intRegClass[_machInst.rs1]);
+    setSrcRegIdx(_numSrcRegs++, vecRegClass[_machInst.vs3 + _microIdx]);
+    if (!_machInst.vm) {
+        setSrcRegIdx(_numSrcRegs++, vecRegClass[0]);
+    }
+    this->flags[IsVector] = true;
+    this->flags[IsStore] = true;
+}
+
+}};
+
+def template VseMicroExecute {{
+
+Fault
+%(class_name)s::execute(ExecContext *xc, trace::InstRecord *traceData) const
+{
+    Addr EA;
+
+    RiscvISA::vreg_t tmp_v0;
+    uint8_t *v0;
+    MISA misa = xc->readMiscReg(MISCREG_ISA);
+    STATUS status = xc->readMiscReg(MISCREG_STATUS);
+    if (!misa.rvv || status.vs == VPUStatus::OFF) {
+        return std::make_shared<IllegalInstFault>(
+            "RVV is disabled or VPU is off", machInst);
+    }
+    if (machInst.vill)
+        return std::make_shared<IllegalInstFault>("VILL is set", machInst);
+    if(!machInst.vm) {
+        xc->getRegOperand(this, _numSrcRegs - 1, &tmp_v0);
+        v0 = tmp_v0.as<uint8_t>();
+    }
+
+    %(op_decl)s;
+    %(op_rd)s;
+    %(set_vlenb)s;
+    %(set_vlen)s;
+    %(ea_code)s;
+
+    const size_t micro_vlmax = vtype_VLMAX(machInst.vtype8, vlen, true);
+    const size_t eewb = width_EEW(machInst.width) / 8;
+    const size_t mem_size = eewb * microVl;
+    std::vector<bool> byte_enable(mem_size, false);
+    size_t ei;
+    for (size_t i = 0; i < microVl; i++) {
+        ei = i + micro_vlmax * microIdx;
+        if (machInst.vm || elem_mask(v0, ei)) {
+            %(memacc_code)s;
+            auto it = byte_enable.begin() + i * eewb;
+            std::fill(it, it + eewb, true);
+        }
+    }
+
+    Fault fault;
+    fault = xc->writeMem(Mem.as<uint8_t>(), mem_size, EA, memAccessFlags,
+                         nullptr, byte_enable);
+    return fault;
+}
+
+}};
+
+def template VseMicroInitiateAcc {{
+
+Fault
+%(class_name)s::initiateAcc(ExecContext* xc,
+                            trace::InstRecord* traceData) const
+{
+    Addr EA;
+
+    RiscvISA::vreg_t tmp_v0;
+    uint8_t *v0;
+    MISA misa = xc->readMiscReg(MISCREG_ISA);
+    STATUS status = xc->readMiscReg(MISCREG_STATUS);
+    if (!misa.rvv || status.vs == VPUStatus::OFF) {
+        return std::make_shared<IllegalInstFault>(
+            "RVV is disabled or VPU is off", machInst);
+    }
+    if (machInst.vill)
+        return std::make_shared<IllegalInstFault>("VILL is set", machInst);
+    if(!machInst.vm) {
+        xc->getRegOperand(this, _numSrcRegs - 1, &tmp_v0);
+        v0 = tmp_v0.as<uint8_t>();
+    }
+
+    %(op_decl)s;
+    %(op_rd)s;
+    %(set_vlenb)s;
+    %(set_vlen)s;
+    %(ea_code)s;
+
+    const size_t micro_vlmax = vtype_VLMAX(machInst.vtype8, vlen, true);
+    const size_t eewb = width_EEW(machInst.width) / 8;
+    const size_t mem_size = eewb * microVl;
+    std::vector<bool> byte_enable(mem_size, false);
+    size_t ei;
+    for (size_t i = 0; i < microVl; i++) {
+        ei = i + micro_vlmax * microIdx;
+        if (machInst.vm || elem_mask(v0, ei)) {
+            %(memacc_code)s;
+            auto it = byte_enable.begin() + i * eewb;
+            std::fill(it, it + eewb, true);
+        }
+    }
+
+    Fault fault;
+    fault = xc->writeMem(Mem.as<uint8_t>(), mem_size, EA, memAccessFlags,
+                         nullptr, byte_enable);
+    return fault;
+}
+
+}};
+
+def template VseMicroCompleteAcc {{
+
+Fault
+%(class_name)s::completeAcc(PacketPtr pkt, ExecContext* xc,
+                            trace::InstRecord* traceData) const
+{
+    return NoFault;
+}
+
+}};
+
+def template VlmConstructor {{
+
+%(class_name)s::%(class_name)s(ExtMachInst _machInst, uint32_t _vlen)
+    : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vlen)
+{
+    %(set_reg_idx_arr)s;
+    %(constructor)s;
+
+    const uint32_t micro_vlmax = vlen / width_EEW(_machInst.width);
+    int32_t micro_vl = (std::min(this->vl, micro_vlmax) + 7) / 8;
+    StaticInstPtr microop;
+
+    if (micro_vl == 0) {
+        microop = new VectorNopMicroInst(_machInst);
+    } else {
+        microop = new Vle8_vMicro(_machInst, micro_vl, 0, vlen);
+        microop->setDelayedCommit();
+        microop->setFlag(IsLoad);
+    }
+    this->microops.push_back(microop);
+
+    this->microops.front()->setFirstMicroop();
+    this->microops.back()->setLastMicroop();
+}
+
+}};
+
+def template VsmConstructor {{
+
+%(class_name)s::%(class_name)s(ExtMachInst _machInst, uint32_t _vlen)
+    : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vlen)
+{
+    %(set_reg_idx_arr)s;
+    %(constructor)s;
+
+    const uint32_t micro_vlmax = vlen / width_EEW(_machInst.width);
+    int32_t micro_vl = (std::min(this->vl, micro_vlmax) + 7) / 8;
+
+    StaticInstPtr microop;
+    if (micro_vl == 0) {
+        microop = new VectorNopMicroInst(_machInst);
+    } else {
+        microop = new Vse8_vMicro(_machInst, micro_vl, 0, vlen);
+        microop->setDelayedCommit();
+        microop->setFlag(IsStore);
+    }
+    this->microops.push_back(microop);
+
+    this->microops.front()->setFirstMicroop();
+    this->microops.back()->setLastMicroop();
+}
+
+}};
+
+def template VsWholeConstructor {{
+
+%(class_name)s::%(class_name)s(ExtMachInst _machInst, uint32_t _vlen)
+  : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vlen)
+{
+    %(set_reg_idx_arr)s;
+    %(constructor)s;
+
+    size_t NFIELDS = machInst.nf + 1;
+    const int32_t micro_vlmax = vlen / width_EEW(_machInst.width);
+
+    StaticInstPtr microop;
+    for (int i = 0; i < NFIELDS; ++i) {
+        microop = new %(class_name)sMicro(_machInst, micro_vlmax, i, vlen);
+        microop->setDelayedCommit();
+        microop->setFlag(IsStore);
+        this->microops.push_back(microop);
+    }
+
+    this->microops.front()->setFirstMicroop();
+    this->microops.back()->setLastMicroop();
+}
+
+}};
+
+def template VsWholeMicroDeclare {{
+
+class %(class_name)s: public %(base_class)s
+{
+private:
+    RegId destRegIdxArr[0];
+    RegId srcRegIdxArr[2];
+public:
+    %(class_name)s(ExtMachInst _machInst,
+        uint8_t _microVl, uint8_t _microIdx, uint32_t _vlen);
+
+    Fault execute(ExecContext *, trace::InstRecord *) const override;
+    Fault initiateAcc(ExecContext *, trace::InstRecord *) const override;
+    Fault completeAcc(PacketPtr, ExecContext *,
+                        trace::InstRecord *) const override;
+    using %(base_class)s::generateDisassembly;
+};
+
+}};
+
+def template VsWholeMicroConstructor {{
+
+%(class_name)s::%(class_name)s(ExtMachInst _machInst,
+    uint8_t _microVl, uint8_t _microIdx, uint32_t _vlen)
+  : %(base_class)s(
+        "%(mnemonic)s", _machInst, %(op_class)s, _microVl, _microIdx, _vlen)
+{
+    %(set_reg_idx_arr)s;
+    _numSrcRegs = 0;
+    _numDestRegs = 0;
+    setSrcRegIdx(_numSrcRegs++, intRegClass[_machInst.rs1]);
+    setSrcRegIdx(_numSrcRegs++, vecRegClass[_machInst.vs3 + _microIdx]);
+    this->flags[IsVector] = true;
+    this->flags[IsStore] = true;
+}
+
+}};
+
+def template VsWholeMicroExecute {{
+
+Fault
+%(class_name)s::execute(ExecContext *xc, trace::InstRecord *traceData) const
+{
+    Addr EA;
+    MISA misa = xc->readMiscReg(MISCREG_ISA);
+    STATUS status = xc->readMiscReg(MISCREG_STATUS);
+    if (!misa.rvv || status.vs == VPUStatus::OFF) {
+        return std::make_shared<IllegalInstFault>(
+            "RVV is disabled or VPU is off", machInst);
+    }
+    %(op_decl)s;
+    %(op_rd)s;
+    %(set_vlenb)s;
+    %(ea_code)s;
+
+
+    for (size_t i = 0; i < vlenb; i++) {
+        %(memacc_code)s;
+    }
+
+    Fault fault = writeMemAtomicLE(xc, traceData, *(vreg_t::Container*)(&Mem),
+                                   vlenb, EA, memAccessFlags, nullptr);
+    return fault;
+}
+
+}};
+
+def template VsWholeMicroInitiateAcc {{
+
+Fault
+%(class_name)s::initiateAcc(ExecContext* xc,
+        trace::InstRecord* traceData) const
+{
+    Addr EA;
+    MISA misa = xc->readMiscReg(MISCREG_ISA);
+    STATUS status = xc->readMiscReg(MISCREG_STATUS);
+    if (!misa.rvv || status.vs == VPUStatus::OFF) {
+        return std::make_shared<IllegalInstFault>(
+            "RVV is disabled or VPU is off", machInst);
+    }
+    %(op_decl)s;
+    %(op_rd)s;
+    %(set_vlenb)s;
+    %(ea_code)s;
+
+
+    for (size_t i = 0; i < vlenb; i++) {
+        %(memacc_code)s;
+    }
+
+    Fault fault = writeMemTimingLE(xc, traceData, *(vreg_t::Container*)(&Mem),
+                                   EA, vlenb, memAccessFlags, nullptr);
+    return fault;
+}
+
+}};
+
+def template VsWholeMicroCompleteAcc {{
+
+Fault
+%(class_name)s::completeAcc(PacketPtr pkt, ExecContext* xc,
+                            trace::InstRecord* traceData) const
+{
+    return NoFault;
+}
+
+}};
+
+def template VlWholeConstructor {{
+
+%(class_name)s::%(class_name)s(ExtMachInst _machInst, uint32_t _vlen)
+    : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vlen)
+{
+    %(set_reg_idx_arr)s;
+    %(constructor)s;
+
+    size_t NFIELDS = machInst.nf + 1;
+
+    const int32_t micro_vlmax = vlen / width_EEW(_machInst.width);
+
+    StaticInstPtr microop;
+    for (int i = 0; i < NFIELDS; ++i) {
+        microop = new %(class_name)sMicro(_machInst, micro_vlmax, i, vlen);
+        microop->setDelayedCommit();
+        microop->setFlag(IsLoad);
+        this->microops.push_back(microop);
+    }
+
+    this->microops.front()->setFirstMicroop();
+    this->microops.back()->setLastMicroop();
+}
+
+}};
+
+def template VlWholeMicroDeclare {{
+
+class %(class_name)s: public %(base_class)s
+{
+private:
+    RegId destRegIdxArr[1];
+    RegId srcRegIdxArr[1];
+public:
+    %(class_name)s(ExtMachInst _machInst,
+        uint8_t _microVl, uint8_t _microIdx, uint32_t _vlen);
+
+    Fault execute(ExecContext *, trace::InstRecord *) const override;
+    Fault initiateAcc(ExecContext *, trace::InstRecord *) const override;
+    Fault completeAcc(PacketPtr, ExecContext *,
+                        trace::InstRecord *) const override;
+    using %(base_class)s::generateDisassembly;
+};
+
+}};
+
+def template VlWholeMicroConstructor {{
+
+%(class_name)s::%(class_name)s(ExtMachInst _machInst,
+    uint8_t _microVl, uint8_t _microIdx, uint32_t _vlen)
+  : %(base_class)s("%(mnemonic)s_micro", _machInst, %(op_class)s, _microVl,
+      _microIdx, _vlen)
+{
+    %(set_reg_idx_arr)s;
+    _numSrcRegs = 0;
+    _numDestRegs = 0;
+    setDestRegIdx(_numDestRegs++, vecRegClass[_machInst.vd + _microIdx]);
+    _numTypedDestRegs[VecRegClass]++;
+    setSrcRegIdx(_numSrcRegs++, intRegClass[_machInst.rs1]);
+    this->flags[IsVector] = true;
+    this->flags[IsLoad] = true;
+}
+
+}};
+
+def template VlWholeMicroExecute {{
+
+Fault
+%(class_name)s::execute(ExecContext *xc, trace::InstRecord *traceData) const
+{
+    Addr EA;
+    MISA misa = xc->readMiscReg(MISCREG_ISA);
+    STATUS status = xc->readMiscReg(MISCREG_STATUS);
+    if (!misa.rvv || status.vs == VPUStatus::OFF) {
+        return std::make_shared<IllegalInstFault>(
+            "RVV is disabled or VPU is off", machInst);
+    }
+
+    status.vs = VPUStatus::DIRTY;
+    xc->setMiscReg(MISCREG_STATUS, status);
+
+    %(op_decl)s;
+    %(op_rd)s;
+    %(set_vlenb)s;
+    %(set_vlen)s;
+    %(ea_code)s;
+
+    Fault fault = readMemAtomicLE(xc, traceData, EA,
+                            *(vreg_t::Container*)(&Mem), vlenb,
+                            memAccessFlags);
+    if (fault != NoFault)
+        return fault;
+
+    size_t elem_per_reg = vlen / width_EEW(machInst.width);
+    for (size_t i = 0; i < elem_per_reg; i++) {
+        %(memacc_code)s;
+    }
+
+    %(op_wb)s;
+    return NoFault;
+}
+
+}};
+
+def template VlWholeMicroInitiateAcc {{
+
+Fault
+%(class_name)s::initiateAcc(ExecContext* xc,
+                            trace::InstRecord* traceData) const
+{
+    Addr EA;
+    MISA misa = xc->readMiscReg(MISCREG_ISA);
+    STATUS status = xc->readMiscReg(MISCREG_STATUS);
+    if (!misa.rvv || status.vs == VPUStatus::OFF) {
+        return std::make_shared<IllegalInstFault>(
+            "RVV is disabled or VPU is off", machInst);
+    }
+    %(op_src_decl)s;
+    %(op_rd)s;
+    %(set_vlenb)s;
+    %(ea_code)s;
+
+    const std::vector<bool> byte_enable(vlenb, true);
+    Fault fault = initiateMemRead(xc, EA, vlenb, memAccessFlags, byte_enable);
+    return fault;
+}
+
+}};
+
+def template VlWholeMicroCompleteAcc {{
+
+Fault
+%(class_name)s::completeAcc(PacketPtr pkt, ExecContext* xc,
+        trace::InstRecord* traceData) const
+{
+    %(op_decl)s;
+    %(op_rd)s;
+    %(set_vlen)s;
+
+    STATUS status = xc->readMiscReg(MISCREG_STATUS);
+    status.vs = VPUStatus::DIRTY;
+    xc->setMiscReg(MISCREG_STATUS, status);
+
+    memcpy(Mem.as<uint8_t>(), pkt->getPtr<uint8_t>(), pkt->getSize());
+
+    size_t elem_per_reg = vlen / width_EEW(machInst.width);
+    for (size_t i = 0; i < elem_per_reg; ++i) {
+        %(memacc_code)s;
+    }
+
+    %(op_wb)s;
+    return NoFault;
+}
+
+}};
+
+def template VlStrideConstructor {{
+
+%(class_name)s::%(class_name)s(ExtMachInst _machInst, uint32_t _vlen)
+    : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vlen)
+{
+    %(set_reg_idx_arr)s;
+    %(constructor)s;
+
+    const int32_t num_elems_per_vreg = vlen / width_EEW(_machInst.width);
+    int32_t remaining_vl = this->vl;
+    // Num of elems in one vreg
+    int32_t micro_vl = std::min(remaining_vl, num_elems_per_vreg);
+    StaticInstPtr microop;
+
+    if (micro_vl == 0) {
+        microop = new VectorNopMicroInst(_machInst);
+        this->microops.push_back(microop);
+    }
+    for (int i = 0; micro_vl > 0; ++i) {
+        for (int j = 0; j < micro_vl; ++j) {
+            microop = new %(class_name)sMicro(machInst, i, j, micro_vl);
+            microop->setFlag(IsDelayedCommit);
+            microop->setFlag(IsLoad);
+            this->microops.push_back(microop);
+        }
+        remaining_vl -= num_elems_per_vreg;
+        micro_vl = std::min(remaining_vl, num_elems_per_vreg);
+    }
+
+    this->microops.front()->setFlag(IsFirstMicroop);
+    this->microops.back()->setFlag(IsLastMicroop);
+    this->flags[IsVector] = true;
+}
+
+}};
+
+def template VlStrideMicroDeclare {{
+
+class %(class_name)s : public %(base_class)s
+{
+private:
+    // rs1, rs2, vd, vm
+    RegId srcRegIdxArr[4];
+    RegId destRegIdxArr[1];
+public:
+    %(class_name)s(ExtMachInst _machInst, uint8_t _regIdx, uint8_t _microIdx,
+        uint32_t _microVl);
+
+    Fault execute(ExecContext *, trace::InstRecord *) const override;
+    Fault initiateAcc(ExecContext *, trace::InstRecord *) const override;
+    Fault completeAcc(PacketPtr, ExecContext *,
+                      trace::InstRecord *) const override;
+    using %(base_class)s::generateDisassembly;
+};
+
+}};
+
+def template VlStrideMicroConstructor {{
+
+%(class_name)s::%(class_name)s(
+    ExtMachInst _machInst, uint8_t _regIdx, uint8_t _microIdx,
+    uint32_t _microVl)
+  : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s,
+        _regIdx, _microIdx, _microVl)
+{
+    %(set_reg_idx_arr)s;
+    _numSrcRegs = 0;
+    _numDestRegs = 0;
+    setDestRegIdx(_numDestRegs++, vecRegClass[_machInst.vd + _regIdx]);
+    _numTypedDestRegs[VecRegClass]++;
+    setSrcRegIdx(_numSrcRegs++, intRegClass[_machInst.rs1]);
+    setSrcRegIdx(_numSrcRegs++, intRegClass[_machInst.rs2]);
+    // We treat agnostic as undistrubed
+    setSrcRegIdx(_numSrcRegs++, vecRegClass[_machInst.vd + _regIdx]);
+    if (!_machInst.vm) {
+        setSrcRegIdx(_numSrcRegs++, vecRegClass[0]);
+    }
+    this->flags[IsLoad] = true;
+}
+
+}};
+
+def template VlStrideMicroExecute {{
+
+Fault
+%(class_name)s::execute(ExecContext *xc, trace::InstRecord *traceData) const
+{
+    Fault fault = NoFault;
+    Addr EA;
+    MISA misa = xc->readMiscReg(MISCREG_ISA);
+    STATUS status = xc->readMiscReg(MISCREG_STATUS);
+    if (!misa.rvv || status.vs == VPUStatus::OFF) {
+        return std::make_shared<IllegalInstFault>(
+            "RVV is disabled or VPU is off", machInst);
+    }
+
+    if (machInst.vill)
+        return std::make_shared<IllegalInstFault>("VILL is set", machInst);
+
+    status.vs = VPUStatus::DIRTY;
+    xc->setMiscReg(MISCREG_STATUS, status);
+
+    %(op_decl)s;
+    %(op_rd)s;
+    %(set_vlenb)s;
+    constexpr uint8_t elem_size = sizeof(Vd[0]);
+    %(ea_code)s; // ea_code depends on elem_size
+
+    RiscvISA::vreg_t tmp_v0;
+    uint8_t *v0;
+    if (!machInst.vm) {
+        xc->getRegOperand(this, _numSrcRegs-1, &tmp_v0);
+        v0 = tmp_v0.as<uint8_t>();
+    }
+
+    uint32_t mem_size = elem_size;
+    const std::vector<bool> byte_enable(mem_size, true);
+
+    size_t ei = this->regIdx * vlenb / elem_size + this->microIdx;
+    if (machInst.vm || elem_mask(v0, ei)) {
+        fault = xc->readMem(EA, Mem.as<uint8_t>(), mem_size,
+                                memAccessFlags, byte_enable);
+        if (fault != NoFault)
+            return fault;
+        %(memacc_code)s; /* Vd[this->microIdx] = Mem[0]; */
+    }
+
+    %(op_wb)s;
+    return fault;
+}
+
+}};
+
+def template VlStrideMicroInitiateAcc {{
+
+Fault
+%(class_name)s::initiateAcc(ExecContext* xc,
+                            trace::InstRecord* traceData) const
+{
+    Fault fault = NoFault;
+    Addr EA;
+    MISA misa = xc->readMiscReg(MISCREG_ISA);
+    STATUS status = xc->readMiscReg(MISCREG_STATUS);
+    if (!misa.rvv || status.vs == VPUStatus::OFF) {
+        return std::make_shared<IllegalInstFault>(
+            "RVV is disabled or VPU is off", machInst);
+    }
+    if (machInst.vill)
+        return std::make_shared<IllegalInstFault>("VILL is set", machInst);
+    %(op_src_decl)s;
+    %(op_rd)s;
+    %(set_vlenb)s;
+    constexpr uint8_t elem_size = sizeof(Vd[0]);
+    %(ea_code)s; // ea_code depends on elem_size
+
+    RiscvISA::vreg_t tmp_v0;
+    uint8_t *v0;
+    if (!machInst.vm) {
+        xc->getRegOperand(this, _numSrcRegs-1, &tmp_v0);
+        v0 = tmp_v0.as<uint8_t>();
+    }
+
+    uint32_t mem_size = elem_size;
+    size_t ei = this->regIdx * vlenb / elem_size + this->microIdx;
+    bool need_load = machInst.vm || elem_mask(v0, ei);
+    const std::vector<bool> byte_enable(mem_size, need_load);
+    fault = initiateMemRead(xc, EA, mem_size, memAccessFlags, byte_enable);
+    return fault;
+}
+
+}};
+
+def template VlStrideMicroCompleteAcc {{
+
+Fault
+%(class_name)s::completeAcc(PacketPtr pkt, ExecContext *xc,
+                            trace::InstRecord *traceData) const
+{
+    %(op_decl)s;
+    %(op_rd)s;
+    %(set_vlenb)s;
+
+    STATUS status = xc->readMiscReg(MISCREG_STATUS);
+    status.vs = VPUStatus::DIRTY;
+    xc->setMiscReg(MISCREG_STATUS, status);
+
+    constexpr uint8_t elem_size = sizeof(Vd[0]);
+
+    RiscvISA::vreg_t old_vd;
+    decltype(Vd) old_Vd = nullptr;
+    // We treat agnostic as undistrubed
+    xc->getRegOperand(this, 2, &old_vd);
+    old_Vd = old_vd.as<std::remove_reference_t<decltype(Vd[0])> >();
+
+    RiscvISA::vreg_t tmp_v0;
+    uint8_t *v0;
+    if (!machInst.vm) {
+        xc->getRegOperand(this, _numSrcRegs-1, &tmp_v0);
+        v0 = tmp_v0.as<uint8_t>();
+    }
+
+    if (microIdx == 0) {
+        // treat vma as vmu
+        // if (machInst.vtype8.vma == 0)
+        memcpy(Vd, old_Vd, microVl * elem_size);
+        // treat vta as vtu
+        // if (machInst.vtype8.vta == 0)
+        memcpy(Vd + microVl, old_Vd + microVl, vlenb - microVl * elem_size);
+    } else {
+        memcpy(Vd, old_Vd, vlenb);
+    }
+
+    size_t ei = this->regIdx * vlenb / sizeof(Vd[0]) + this->microIdx;
+    if (machInst.vm || elem_mask(v0, ei)) {
+        memcpy(Mem.as<uint8_t>(), pkt->getPtr<uint8_t>(), pkt->getSize());
+        %(memacc_code)s; /* Vd[this->microIdx] = Mem[0]; */
+    }
+
+    %(op_wb)s;
+    return NoFault;
+}
+
+}};
+
+def template VsStrideConstructor {{
+
+%(class_name)s::%(class_name)s(ExtMachInst _machInst, uint32_t _vlen)
+    : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vlen)
+{
+    %(set_reg_idx_arr)s;
+    %(constructor)s;
+
+    const int32_t num_elems_per_vreg = vlen / width_EEW(_machInst.width);
+    int32_t remaining_vl = this->vl;
+    // Num of elems in one vreg
+    int32_t micro_vl = std::min(remaining_vl, num_elems_per_vreg);
+    StaticInstPtr microop;
+
+    if (micro_vl == 0) {
+        microop = new VectorNopMicroInst(_machInst);
+        this->microops.push_back(microop);
+    }
+    for (int i = 0; micro_vl > 0; ++i) {
+        for (int j = 0; j < micro_vl; ++j) {
+            microop = new %(class_name)sMicro(machInst, i, j, micro_vl);
+            microop->setFlag(IsDelayedCommit);
+            microop->setFlag(IsStore);
+            this->microops.push_back(microop);
+        }
+        remaining_vl -= num_elems_per_vreg;
+        micro_vl = std::min(remaining_vl, num_elems_per_vreg);
+    }
+
+    this->microops.front()->setFlag(IsFirstMicroop);
+    this->microops.back()->setFlag(IsLastMicroop);
+    this->flags[IsVector] = true;
+}
+
+}};
+
+def template VsStrideMicroDeclare {{
+
+class %(class_name)s : public %(base_class)s
+{
+private:
+    // rs1, rs2, vs3, vm
+    RegId srcRegIdxArr[4];
+    RegId destRegIdxArr[0];
+public:
+    %(class_name)s(ExtMachInst _machInst, uint8_t _regIdx, uint8_t _microIdx,
+            uint32_t _microVl);
+
+    Fault execute(ExecContext *, trace::InstRecord *) const override;
+    Fault initiateAcc(ExecContext *, trace::InstRecord *) const override;
+    Fault completeAcc(PacketPtr, ExecContext *,
+                      trace::InstRecord *) const override;
+    using %(base_class)s::generateDisassembly;
+};
+
+}};
+
+def template VsStrideMicroConstructor {{
+
+%(class_name)s::%(class_name)s(
+    ExtMachInst _machInst, uint8_t _regIdx, uint8_t _microIdx,
+    uint32_t _microVl)
+  : %(base_class)s("%(mnemonic)s""_micro", _machInst, %(op_class)s,
+      _regIdx, _microIdx, _microVl)
+{
+    %(set_reg_idx_arr)s;
+    _numSrcRegs = 0;
+    _numDestRegs = 0;
+    setSrcRegIdx(_numSrcRegs++, intRegClass[_machInst.rs1]);
+    setSrcRegIdx(_numSrcRegs++, intRegClass[_machInst.rs2]);
+    setSrcRegIdx(_numSrcRegs++, vecRegClass[_machInst.vs3 + _regIdx]);
+    if (!_machInst.vm) {
+        setSrcRegIdx(_numSrcRegs++, vecRegClass[0]);
+    }
+    this->flags[IsStore] = true;
+}
+
+}};
+
+def template VsStrideMicroExecute {{
+
+Fault
+%(class_name)s::execute(ExecContext *xc, trace::InstRecord *traceData) const
+{
+    Fault fault = NoFault;
+    Addr EA;
+    MISA misa = xc->readMiscReg(MISCREG_ISA);
+    STATUS status = xc->readMiscReg(MISCREG_STATUS);
+    if (!misa.rvv || status.vs == VPUStatus::OFF) {
+        return std::make_shared<IllegalInstFault>(
+            "RVV is disabled or VPU is off", machInst);
+    }
+    if (machInst.vill)
+        return std::make_shared<IllegalInstFault>("VILL is set", machInst);
+    %(op_decl)s;
+    %(op_rd)s;
+    %(set_vlenb)s;
+    constexpr uint8_t elem_size = sizeof(Vs3[0]);
+    %(ea_code)s;
+
+    RiscvISA::vreg_t tmp_v0;
+    uint8_t *v0;
+    if(!machInst.vm) {
+        xc->getRegOperand(this, _numSrcRegs - 1, &tmp_v0);
+        v0 = tmp_v0.as<uint8_t>();
+    }
+
+    uint32_t mem_size = elem_size;
+    const std::vector<bool> byte_enable(mem_size, true);
+
+    size_t ei = this->regIdx * vlenb / elem_size + this->microIdx;
+    if (machInst.vm || elem_mask(v0, ei)) {
+        %(memacc_code)s;
+        fault = xc->writeMem(Mem.as<uint8_t>(), mem_size, EA,
+                             memAccessFlags, nullptr, byte_enable);
+    }
+    return fault;
+}
+
+}};
+
+def template VsStrideMicroInitiateAcc {{
+
+Fault
+%(class_name)s::initiateAcc(ExecContext* xc,
+                            trace::InstRecord* traceData) const
+{
+    Fault fault = NoFault;
+    Addr EA;
+    MISA misa = xc->readMiscReg(MISCREG_ISA);
+    STATUS status = xc->readMiscReg(MISCREG_STATUS);
+    if (!misa.rvv || status.vs == VPUStatus::OFF) {
+        return std::make_shared<IllegalInstFault>(
+            "RVV is disabled or VPU is off", machInst);
+    }
+    if (machInst.vill)
+        return std::make_shared<IllegalInstFault>("VILL is set", machInst);
+    RiscvISA::vreg_t tmp_v0;
+    uint8_t *v0;
+    if(!machInst.vm) {
+        xc->getRegOperand(this, _numSrcRegs - 1, &tmp_v0);
+        v0 = tmp_v0.as<uint8_t>();
+    }
+
+    %(op_decl)s;
+    %(op_rd)s;
+    %(set_vlenb)s;
+    constexpr uint8_t elem_size = sizeof(Vs3[0]);
+    %(ea_code)s;
+
+    uint32_t mem_size = elem_size;
+
+    size_t ei = this->regIdx * vlenb / elem_size + this->microIdx;
+    bool need_store = machInst.vm || elem_mask(v0, ei);
+    if (need_store) {
+        const std::vector<bool> byte_enable(mem_size, need_store);
+        %(memacc_code)s;
+        fault = xc->writeMem(Mem.as<uint8_t>(), mem_size, EA,
+                            memAccessFlags, nullptr, byte_enable);
+    }
+    return fault;
+}
+
+}};
+
+def template VsStrideMicroCompleteAcc {{
+
+Fault
+%(class_name)s::completeAcc(PacketPtr pkt, ExecContext* xc,
+                            trace::InstRecord* traceData) const
+{
+    return NoFault;
+}
+
+}};
+
+def template VlIndexConstructor {{
+
+template<typename ElemType>
+%(class_name)s<ElemType>::%(class_name)s(ExtMachInst _machInst, uint32_t _vlen)
+    : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vlen)
+{
+    %(set_reg_idx_arr)s;
+    %(constructor)s;
+
+    const uint32_t vd_eewb = sizeof(ElemType);
+    const uint32_t vs2_eewb = width_EEW(_machInst.width) / 8;
+    const uint8_t vs2_split_num = (vd_eewb + vs2_eewb - 1) / vs2_eewb;
+    const uint8_t vd_split_num = (vs2_eewb + vd_eewb - 1) / vd_eewb;
+    uint32_t vlenb = vlen >> 3;
+    const int32_t micro_vlmax = vlenb / std::max(vd_eewb, vs2_eewb);
+    int32_t remaining_vl = this->vl;
+    int32_t micro_vl = std::min(remaining_vl, micro_vlmax);
+    StaticInstPtr microop;
+
+    if (micro_vl == 0) {
+        microop = new VectorNopMicroInst(_machInst);
+        this->microops.push_back(microop);
+    }
+    for (uint8_t i = 0; micro_vl > 0; i++) {
+        for (uint8_t j = 0; j < micro_vl; ++j) {
+            uint8_t vdRegIdx = i / vd_split_num;
+            uint8_t vs2RegIdx = i / vs2_split_num;
+            uint8_t vdElemIdx = j + micro_vlmax * (i % vd_split_num);
+            uint8_t vs2ElemIdx = j + micro_vlmax * (i % vs2_split_num);
+            microop = new %(class_name)sMicro<ElemType>(machInst,
+                vdRegIdx, vdElemIdx, vs2RegIdx, vs2ElemIdx);
+            microop->setFlag(IsDelayedCommit);
+            microop->setFlag(IsLoad);
+            this->microops.push_back(microop);
+        }
+        remaining_vl -= micro_vlmax;
+        micro_vl = std::min(remaining_vl, micro_vlmax);
+    }
+
+    this->microops.front()->setFlag(IsFirstMicroop);
+    this->microops.back()->setFlag(IsLastMicroop);
+    this->flags[IsVector] = true;
+}
+
+%(declare_vmem_template)s;
+
+}};
+
+def template VlIndexMicroDeclare {{
+
+template<typename ElemType>
+class %(class_name)s : public %(base_class)s
+{
+private:
+    // rs1, vs2, vd, vm
+    RegId srcRegIdxArr[4];
+    RegId destRegIdxArr[1];
+public:
+    %(class_name)s(ExtMachInst _machInst,
+        uint8_t _vdRegIdx, uint8_t _vdElemIdx,
+        uint8_t _vs2RegIdx, uint8_t _vs2ElemIdx);
+
+    Fault execute(ExecContext *, trace::InstRecord *) const override;
+    Fault initiateAcc(ExecContext *, trace::InstRecord *) const override;
+    Fault completeAcc(PacketPtr, ExecContext *,
+                      trace::InstRecord *) const override;
+    using %(base_class)s::generateDisassembly;
+};
+
+}};
+
+def template VlIndexMicroConstructor {{
+
+template<typename ElemType>
+%(class_name)s<ElemType>::%(class_name)s(
+    ExtMachInst _machInst,uint8_t _vdRegIdx, uint8_t _vdElemIdx,
+    uint8_t _vs2RegIdx, uint8_t _vs2ElemIdx)
+  : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s,
+      _vdRegIdx, _vdElemIdx, _vs2RegIdx, _vs2ElemIdx)
+{
+    %(set_reg_idx_arr)s;
+    _numSrcRegs = 0;
+    _numDestRegs = 0;
+    setDestRegIdx(_numDestRegs++, vecRegClass[_machInst.vd + _vdRegIdx]);
+    _numTypedDestRegs[VecRegClass]++;
+    setSrcRegIdx(_numSrcRegs++, intRegClass[_machInst.rs1]);
+    setSrcRegIdx(_numSrcRegs++, vecRegClass[_machInst.vs2 + _vs2RegIdx]);
+    // We treat agnostic as undistrubed
+    setSrcRegIdx(_numSrcRegs++, vecRegClass[_machInst.vd + _vdRegIdx]);
+    if (!_machInst.vm) {
+        setSrcRegIdx(_numSrcRegs++, vecRegClass[0]);
+    }
+    this->flags[IsLoad] = true;
+}
+
+%(declare_vmem_template)s;
+
+}};
+
+def template VlIndexMicroExecute {{
+
+template<typename ElemType>
+Fault
+%(class_name)s<ElemType>::execute(ExecContext *xc,
+    trace::InstRecord *traceData)const
+{
+    using vu = std::make_unsigned_t<ElemType>;
+    Fault fault = NoFault;
+    Addr EA;
+    MISA misa = xc->readMiscReg(MISCREG_ISA);
+    STATUS status = xc->readMiscReg(MISCREG_STATUS);
+    if (!misa.rvv || status.vs == VPUStatus::OFF) {
+        return std::make_shared<IllegalInstFault>(
+            "RVV is disabled or VPU is off", machInst);
+    }
+
+    if (machInst.vill)
+        return std::make_shared<IllegalInstFault>("VILL is set", machInst);
+
+    status.vs = VPUStatus::DIRTY;
+    xc->setMiscReg(MISCREG_STATUS, status);
+
+    %(op_decl)s;
+    %(op_rd)s;
+    %(set_vlenb)s;
+    %(ea_code)s;
+    constexpr uint8_t elem_size = sizeof(Vd[0]);
+    RiscvISA::vreg_t tmp_v0;
+    uint8_t *v0;
+    if (!machInst.vm) {
+        xc->getRegOperand(this, _numSrcRegs-1, &tmp_v0);
+        v0 = tmp_v0.as<uint8_t>();
+    }
+
+    uint32_t mem_size = elem_size;
+    const std::vector<bool> byte_enable(mem_size, true);
+    size_t ei = this->vdRegIdx * vlenb / elem_size + this->vdElemIdx;
+    if (machInst.vm || elem_mask(v0, ei)) {
+        fault = xc->readMem(EA, Mem.as<uint8_t>(), mem_size,
+                                memAccessFlags, byte_enable);
+        if (fault != NoFault)
+            return fault;
+        %(memacc_code)s; /* Vd[this->vdElemIdx] = Mem[0]; */
+    }
+
+    %(op_wb)s;
+    return fault;
+}
+
+}};
+
+def template VlIndexMicroInitiateAcc {{
+
+template<typename ElemType>
+Fault
+%(class_name)s<ElemType>::initiateAcc(ExecContext* xc,
+                            trace::InstRecord* traceData) const
+{
+    using vu = std::make_unsigned_t<ElemType>;
+    Fault fault = NoFault;
+    Addr EA;
+    MISA misa = xc->readMiscReg(MISCREG_ISA);
+    STATUS status = xc->readMiscReg(MISCREG_STATUS);
+    if (!misa.rvv || status.vs == VPUStatus::OFF) {
+        return std::make_shared<IllegalInstFault>(
+            "RVV is disabled or VPU is off", machInst);
+    }
+    if (machInst.vill)
+        return std::make_shared<IllegalInstFault>("VILL is set", machInst);
+    %(op_src_decl)s;
+    %(op_rd)s;
+    %(set_vlenb)s;
+    constexpr uint8_t elem_size = sizeof(Vd[0]);
+    %(ea_code)s; // ea_code depends on elem_size
+
+    RiscvISA::vreg_t tmp_v0;
+    uint8_t *v0;
+    if (!machInst.vm) {
+        xc->getRegOperand(this, _numSrcRegs-1, &tmp_v0);
+        v0 = tmp_v0.as<uint8_t>();
+    }
+
+    uint32_t mem_size = elem_size;
+
+    size_t ei = this->vdRegIdx * vlenb / elem_size + this->vdElemIdx;
+    bool need_load = machInst.vm || elem_mask(v0, ei);
+    const std::vector<bool> byte_enable(mem_size, need_load);
+    fault = initiateMemRead(xc, EA, mem_size, memAccessFlags, byte_enable);
+    return fault;
+}
+
+}};
+
+def template VlIndexMicroCompleteAcc {{
+
+template<typename ElemType>
+Fault
+%(class_name)s<ElemType>::completeAcc(PacketPtr pkt, ExecContext *xc,
+                            trace::InstRecord *traceData) const
+{
+    STATUS status = xc->readMiscReg(MISCREG_STATUS);
+    status.vs = VPUStatus::DIRTY;
+    xc->setMiscReg(MISCREG_STATUS, status);
+
+    using vu = std::make_unsigned_t<ElemType>;
+    %(op_decl)s;
+    %(op_rd)s;
+    %(set_vlenb)s;
+
+    constexpr uint8_t elem_size = sizeof(Vd[0]);
+
+    RiscvISA::vreg_t old_vd;;
+    decltype(Vd) old_Vd = nullptr;
+    // We treat agnostic as undistrubed
+    xc->getRegOperand(this, 2, &old_vd);
+    old_Vd = old_vd.as<std::remove_reference_t<decltype(Vd[0])> >();
+
+    RiscvISA::vreg_t tmp_v0;
+    uint8_t *v0;
+    if (!machInst.vm) {
+        xc->getRegOperand(this, _numSrcRegs-1, &tmp_v0);
+        v0 = tmp_v0.as<uint8_t>();
+    }
+
+    memcpy(Vd, old_Vd, vlenb);
+
+    size_t ei = this->vdRegIdx * vlenb / elem_size + this->vdElemIdx;
+    if (machInst.vm || elem_mask(v0, ei)) {
+        memcpy(Mem.as<uint8_t>(), pkt->getPtr<uint8_t>(), pkt->getSize());
+        %(memacc_code)s; /* Vd[this->microIdx] = Mem[0]; */
+    }
+
+    %(op_wb)s;
+    return NoFault;
+}
+
+%(declare_vmem_template)s;
+
+}};
+
+def template VsIndexConstructor {{
+
+template<typename ElemType>
+%(class_name)s<ElemType>::%(class_name)s(ExtMachInst _machInst, uint32_t _vlen)
+    : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vlen)
+{
+    %(set_reg_idx_arr)s;
+    %(constructor)s;
+
+    const uint32_t vs3_eewb = sizeof(ElemType);
+    const uint32_t vs2_eewb = width_EEW(_machInst.width) / 8;
+    const uint8_t vs2_split_num = (vs3_eewb + vs2_eewb - 1) / vs2_eewb;
+    const uint8_t vs3_split_num = (vs2_eewb + vs3_eewb - 1) / vs3_eewb;
+    uint32_t vlenb = vlen >> 3;
+    const int32_t micro_vlmax = vlenb / std::max(vs3_eewb, vs2_eewb);
+    int32_t remaining_vl = this->vl;
+    int32_t micro_vl = std::min(remaining_vl, micro_vlmax);
+    StaticInstPtr microop;
+
+    if (micro_vl == 0) {
+        microop = new VectorNopMicroInst(_machInst);
+        this->microops.push_back(microop);
+    }
+    for (uint8_t i = 0; micro_vl > 0; i++) {
+        for (uint8_t j = 0; j < micro_vl; ++j) {
+            uint8_t vs3RegIdx = i / vs3_split_num;
+            uint8_t vs2RegIdx = i / vs2_split_num;
+            uint8_t vs3ElemIdx = j + micro_vlmax * (i % vs3_split_num);
+            uint8_t vs2ElemIdx = j + micro_vlmax * (i % vs2_split_num);
+            microop = new %(class_name)sMicro<ElemType>(machInst,
+                vs3RegIdx, vs3ElemIdx, vs2RegIdx, vs2ElemIdx);
+            microop->setFlag(IsDelayedCommit);
+            microop->setFlag(IsStore);
+            this->microops.push_back(microop);
+        }
+        remaining_vl -= micro_vlmax;
+        micro_vl = std::min(remaining_vl, micro_vlmax);
+    }
+
+    this->microops.front()->setFlag(IsFirstMicroop);
+    this->microops.back()->setFlag(IsLastMicroop);
+    this->flags[IsVector] = true;
+}
+
+%(declare_vmem_template)s;
+
+}};
+
+def template VsIndexMicroDeclare {{
+
+template<typename ElemType>
+class %(class_name)s : public %(base_class)s
+{
+private:
+    // rs1, vs2, vs3, vm
+    RegId srcRegIdxArr[4];
+    RegId destRegIdxArr[0];
+public:
+    %(class_name)s(ExtMachInst _machInst,
+        uint8_t _vs3RegIdx, uint8_t _vs3ElemIdx,
+        uint8_t _vs2RegIdx, uint8_t _vs2ElemIdx);
+
+    Fault execute(ExecContext *, trace::InstRecord *) const override;
+    Fault initiateAcc(ExecContext *, trace::InstRecord *) const override;
+    Fault completeAcc(PacketPtr, ExecContext *,
+                      trace::InstRecord *) const override;
+    using %(base_class)s::generateDisassembly;
+};
+
+}};
+
+def template VsIndexMicroConstructor {{
+
+template<typename ElemType>
+%(class_name)s<ElemType>::%(class_name)s(ExtMachInst _machInst,
+    uint8_t _vs3RegIdx, uint8_t _vs3ElemIdx,
+    uint8_t _vs2RegIdx, uint8_t _vs2ElemIdx)
+  : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s,
+      _vs3RegIdx, _vs3ElemIdx, _vs2RegIdx, _vs2ElemIdx)
+{
+    %(set_reg_idx_arr)s;
+    _numSrcRegs = 0;
+    _numDestRegs = 0;
+    setSrcRegIdx(_numSrcRegs++, intRegClass[_machInst.rs1]);
+    setSrcRegIdx(_numSrcRegs++, vecRegClass[_machInst.vs2 + _vs2RegIdx]);
+    // We treat agnostic as undistrubed
+    setSrcRegIdx(_numSrcRegs++, vecRegClass[_machInst.vs3 + _vs3RegIdx]);
+    if (!_machInst.vm) {
+        setSrcRegIdx(_numSrcRegs++, vecRegClass[0]);
+    }
+    this->flags[IsStore] = true;
+}
+
+%(declare_vmem_template)s;
+
+}};
+
+def template VsIndexMicroExecute {{
+
+template<typename ElemType>
+Fault
+%(class_name)s<ElemType>::execute(ExecContext *xc,
+    trace::InstRecord *traceData)const
+{
+    using vu = std::make_unsigned_t<ElemType>;
+    Fault fault = NoFault;
+    Addr EA;
+    MISA misa = xc->readMiscReg(MISCREG_ISA);
+    STATUS status = xc->readMiscReg(MISCREG_STATUS);
+    if (!misa.rvv || status.vs == VPUStatus::OFF) {
+        return std::make_shared<IllegalInstFault>(
+            "RVV is disabled or VPU is off", machInst);
+    }
+    if (machInst.vill)
+        return std::make_shared<IllegalInstFault>("VILL is set", machInst);
+    %(op_decl)s;
+    %(op_rd)s;
+    %(set_vlenb)s;
+    %(ea_code)s;
+    constexpr uint8_t elem_size = sizeof(Vs3[0]);
+    RiscvISA::vreg_t tmp_v0;
+    uint8_t *v0;
+    if (!machInst.vm) {
+        xc->getRegOperand(this, _numSrcRegs-1, &tmp_v0);
+        v0 = tmp_v0.as<uint8_t>();
+    }
+
+    uint32_t mem_size = elem_size;
+    const std::vector<bool> byte_enable(mem_size, true);
+
+    size_t ei = this->vs3RegIdx * vlenb / elem_size + this->vs3ElemIdx;
+    if (machInst.vm || elem_mask(v0, ei)) {
+        %(memacc_code)s; /* Mem[0] = Vs3[this->vs3ElemIdx] */
+        fault = xc->writeMem(Mem.as<uint8_t>(), mem_size, EA,
+                             memAccessFlags, nullptr, byte_enable);
+    }
+    return fault;
+}
+
+}};
+
+def template VsIndexMicroInitiateAcc {{
+
+template<typename ElemType>
+Fault
+%(class_name)s<ElemType>::initiateAcc(ExecContext* xc,
+                            trace::InstRecord* traceData) const
+{
+    using vu = std::make_unsigned_t<ElemType>;
+    Fault fault = NoFault;
+    Addr EA;
+    MISA misa = xc->readMiscReg(MISCREG_ISA);
+    STATUS status = xc->readMiscReg(MISCREG_STATUS);
+    if (!misa.rvv || status.vs == VPUStatus::OFF) {
+        return std::make_shared<IllegalInstFault>(
+            "RVV is disabled or VPU is off", machInst);
+    }
+    if (machInst.vill)
+        return std::make_shared<IllegalInstFault>("VILL is set", machInst);
+    %(op_src_decl)s;
+    %(op_rd)s;
+    %(set_vlenb)s;
+    %(ea_code)s;
+    constexpr uint8_t elem_size = sizeof(Vs3[0]);
+    RiscvISA::vreg_t tmp_v0;
+    uint8_t *v0;
+    if (!machInst.vm) {
+        xc->getRegOperand(this, _numSrcRegs-1, &tmp_v0);
+        v0 = tmp_v0.as<uint8_t>();
+    }
+
+    constexpr uint8_t mem_size = elem_size;
+    const std::vector<bool> byte_enable(mem_size, true);
+
+    size_t ei = this->vs3RegIdx * vlenb / elem_size + this->vs3ElemIdx;
+    if (machInst.vm || elem_mask(v0, ei)) {
+        %(memacc_code)s; /* Mem[0] = Vs3[this->vs3ElemIdx] */
+        fault = xc->writeMem(Mem.as<uint8_t>(), mem_size, EA,
+                             memAccessFlags, nullptr, byte_enable);
+    }
+    return fault;
+}
+
+}};
+
+def template VsIndexMicroCompleteAcc {{
+
+template<typename ElemType>
+Fault
+%(class_name)s<ElemType>::completeAcc(PacketPtr pkt, ExecContext* xc,
+                            trace::InstRecord* traceData) const
+{
+    return NoFault;
+}
+
+%(declare_vmem_template)s;
+
+}};
+
+def template VMemBaseDecodeBlock {{
+    return new %(class_name)s(machInst, vlen);
+}};
+
+def template VMemTemplateDecodeBlock {{
+
+switch(machInst.vtype8.vsew) {
+    case 0b000: {
+        return new %(class_name)s<uint8_t>(machInst);
+    }
+    case 0b001: {
+        return new %(class_name)s<uint16_t>(machInst);
+    }
+    case 0b010: {
+        return new %(class_name)s<uint32_t>(machInst);
+    }
+    case 0b011: {
+        return new %(class_name)s<uint64_t>(machInst);
+    }
+    default: GEM5_UNREACHABLE;
+}
+
+}};
+
+def template VMemSplitTemplateDecodeBlock {{
+
+switch(machInst.vtype8.vsew) {
+    case 0b000: {
+        return new %(class_name)s<uint8_t>(machInst, vlen);
+    }
+    case 0b001: {
+        return new %(class_name)s<uint16_t>(machInst, vlen);
+    }
+    case 0b010: {
+        return new %(class_name)s<uint32_t>(machInst, vlen);
+    }
+    case 0b011: {
+        return new %(class_name)s<uint64_t>(machInst, vlen);
+    }
+    default: GEM5_UNREACHABLE;
+}
+
+}};
diff --git a/src/arch/riscv/linux/fs_workload.cc b/src/arch/riscv/linux/fs_workload.cc
index 4a4a3812ec..2946d6c324 100644
--- a/src/arch/riscv/linux/fs_workload.cc
+++ b/src/arch/riscv/linux/fs_workload.cc
@@ -75,5 +75,126 @@ FsLinux::initState()
     }
 }
 
+void
+BootloaderKernelWorkload::loadBootloaderSymbolTable()
+{
+    if (params().bootloader_filename != "") {
+        Addr bootloader_paddr_offset = params().bootloader_addr;
+        bootloader = loader::createObjectFile(params().bootloader_filename);
+        bootloaderSymbolTable = bootloader->symtab();
+        auto renamedBootloaderSymbolTable = \
+            bootloaderSymbolTable.offset(
+                bootloader_paddr_offset
+            )->functionSymbols()->rename(
+                [](std::string &name) {
+                    name = "bootloader." + name;
+                }
+            );
+        loader::debugSymbolTable.insert(*renamedBootloaderSymbolTable);
+    }
+}
+
+void
+BootloaderKernelWorkload::loadKernelSymbolTable()
+{
+    if (params().kernel_filename != "") {
+        kernel = loader::createObjectFile(params().kernel_filename);
+        kernelSymbolTable = kernel->symtab();
+        auto renamedKernelSymbolTable = \
+            kernelSymbolTable.functionSymbols()->rename(
+                [](std::string &name) {
+                    name = "kernel." + name;
+                }
+            );
+        loader::debugSymbolTable.insert(*renamedKernelSymbolTable);
+    }
+}
+
+void
+BootloaderKernelWorkload::loadBootloader()
+{
+    if (params().bootloader_filename != "") {
+        Addr bootloader_addr_offset = params().bootloader_addr;
+        bootloader->buildImage().offset(bootloader_addr_offset).write(
+            system->physProxy
+        );
+        delete bootloader;
+
+        inform("Loaded bootloader \'%s\' at 0x%llx\n",
+               params().bootloader_filename,
+               bootloader_addr_offset);
+    } else {
+        inform("Bootloader is not specified.\n");
+    }
+}
+
+void
+BootloaderKernelWorkload::loadKernel()
+{
+    if (params().kernel_filename != "") {
+        Addr kernel_paddr_offset = params().kernel_addr;
+        kernel->buildImage().offset(kernel_paddr_offset).write(
+            system->physProxy
+        );
+        delete kernel;
+
+        inform("Loaded kernel \'%s\' at 0x%llx\n",
+                params().kernel_filename,
+                kernel_paddr_offset);
+    } else {
+        inform("Kernel is not specified.\n");
+    }
+}
+
+
+void
+BootloaderKernelWorkload::loadDtb()
+{
+    if (params().dtb_filename != "") {
+        auto *dtb_file = new loader::DtbFile(params().dtb_filename);
+
+        dtb_file->buildImage().offset(params().dtb_addr)
+            .write(system->physProxy);
+        delete dtb_file;
+
+        inform("Loaded DTB \'%s\' at 0x%llx\n",
+                params().dtb_filename,
+                params().dtb_addr);
+
+        for (auto *tc: system->threads) {
+            tc->setReg(int_reg::A1, params().dtb_addr);
+        }
+    } else {
+        inform("DTB file is not specified.\n");
+    }
+}
+
+void
+BootloaderKernelWorkload::initState()
+{
+    loadBootloader();
+    loadKernel();
+    loadDtb();
+
+    for (auto *tc: system->threads) {
+        RiscvISA::Reset().invoke(tc);
+        tc->activate();
+    }
+}
+
+void
+BootloaderKernelWorkload::serialize(CheckpointOut &checkpoint) const
+{
+    bootloaderSymbolTable.serialize("bootloader_symbol_table", checkpoint);
+    kernelSymbolTable.serialize("kernel_symbol_table", checkpoint);
+}
+
+void
+BootloaderKernelWorkload::unserialize(CheckpointIn &checkpoint)
+{
+    bootloaderSymbolTable.unserialize("bootloader_symbol_table", checkpoint);
+    kernelSymbolTable.unserialize("kernel_symbol_table", checkpoint);
+}
+
 } // namespace RiscvISA
 } // namespace gem5
diff --git a/src/arch/riscv/linux/fs_workload.hh b/src/arch/riscv/linux/fs_workload.hh
index 1dc704d906..a0366a27c4 100644
--- a/src/arch/riscv/linux/fs_workload.hh
+++ b/src/arch/riscv/linux/fs_workload.hh
@@ -29,7 +29,10 @@
 #ifndef __ARCH_RISCV_LINUX_SYSTEM_HH__
 #define __ARCH_RISCV_LINUX_SYSTEM_HH__
 
+#include <string>
+
 #include "arch/riscv/remote_gdb.hh"
+#include "params/RiscvBootloaderKernelWorkload.hh"
 #include "params/RiscvLinux.hh"
 #include "sim/kernel_workload.hh"
 
@@ -58,6 +61,64 @@ class FsLinux : public KernelWorkload
     ByteOrder byteOrder() const override { return ByteOrder::little; }
 };
 
+class BootloaderKernelWorkload: public Workload
+{
+  private:
+    Addr entryPoint = 0;
+    loader::ObjectFile *kernel = nullptr;
+    loader::ObjectFile *bootloader = nullptr;
+    loader::SymbolTable kernelSymbolTable;
+    loader::SymbolTable bootloaderSymbolTable;
+    const std::string bootArgs;
+
+  private:
+    void loadBootloaderSymbolTable();
+    void loadKernelSymbolTable();
+    void loadBootloader();
+    void loadKernel();
+    void loadDtb();
+
+  public:
+    PARAMS(RiscvBootloaderKernelWorkload);
+    BootloaderKernelWorkload(const Params &p)
+        : Workload(p), entryPoint(p.entry_point), bootArgs(p.boot_args)
+    {
+        loadBootloaderSymbolTable();
+        loadKernelSymbolTable();
+    }
+
+    void initState() override;
+
+    void
+    setSystem(System *sys) override
+    {
+        Workload::setSystem(sys);
+        gdb = BaseRemoteGDB::build<RemoteGDB>(
+            params().remote_gdb_port, system);
+    }
+
+    Addr getEntry() const override { return entryPoint; }
+
+    ByteOrder byteOrder() const override { return ByteOrder::little; }
+
+    loader::Arch getArch() const override { return kernel->getArch(); }
+
+    const loader::SymbolTable &
+    symtab(ThreadContext *tc) override
+    {
+        return kernelSymbolTable;
+    }
+
+    bool
+    insertSymbol(const loader::Symbol &symbol) override
+    {
+        return kernelSymbolTable.insert(symbol);
+    }
+
+    void serialize(CheckpointOut &checkpoint) const override;
+    void unserialize(CheckpointIn &checkpoint) override;
+};
+
 } // namespace RiscvISA
 } // namespace gem5
 
diff --git a/src/arch/riscv/pcstate.hh b/src/arch/riscv/pcstate.hh
index de07145dc3..91fb507034 100644
--- a/src/arch/riscv/pcstate.hh
+++ b/src/arch/riscv/pcstate.hh
@@ -43,6 +43,7 @@
 #define __ARCH_RISCV_PCSTATE_HH__
 
 #include "arch/generic/pcstate.hh"
+#include "arch/riscv/regs/vector.hh"
 #include "enums/RiscvType.hh"
 
 namespace gem5
@@ -56,15 +57,28 @@ constexpr enums::RiscvType RV64 = enums::RV64;
 
 class PCState : public GenericISA::UPCState<4>
 {
-  private:
+  protected:
+    typedef GenericISA::UPCState<4> Base;
+
     bool _compressed = false;
-    RiscvType _rv_type = RV64;
+    RiscvType _rvType = RV64;
+    uint64_t _vlenb = 32;
+    VTYPE _vtype = (1ULL << 63); // vtype.vill = 1 at initial;
+    uint32_t _vl = 0;
 
   public:
+    PCState(const PCState &other) : Base(other),
+        _rvType(other._rvType), _vlenb(other._vlenb),
+        _vtype(other._vtype), _vl(other._vl)
+    {}
+    PCState &operator=(const PCState &other) = default;
     PCState() = default;
-    PCState(const PCState &other) = default;
-    PCState(Addr addr, RiscvType rv_type) : UPCState(addr), _rv_type(rv_type)
+    explicit PCState(Addr addr) { set(addr); }
+    explicit PCState(Addr addr, RiscvType rvType, uint64_t vlenb)
     {
+        set(addr);
+        _rvType = rvType;
+        _vlenb = vlenb;
     }
 
     PCStateBase *clone() const override { return new PCState(*this); }
@@ -75,23 +89,65 @@ class PCState : public GenericISA::UPCState<4>
         Base::update(other);
         auto &pcstate = other.as<PCState>();
         _compressed = pcstate._compressed;
-        _rv_type = pcstate._rv_type;
+        _rvType = pcstate._rvType;
+        _vlenb = pcstate._vlenb;
+        _vtype = pcstate._vtype;
+        _vl = pcstate._vl;
     }
 
     void compressed(bool c) { _compressed = c; }
     bool compressed() const { return _compressed; }
 
-    void rvType(RiscvType rv_type) { _rv_type = rv_type; }
-    RiscvType rvType() const { return _rv_type; }
+    void rvType(RiscvType rvType) { _rvType = rvType; }
+    RiscvType rvType() const { return _rvType; }
+
+    void vlenb(uint64_t v) { _vlenb = v; }
+    uint64_t vlenb() const { return _vlenb; }
+
+    void vtype(VTYPE v) { _vtype = v; }
+    VTYPE vtype() const { return _vtype; }
+
+    void vl(uint32_t v) { _vl = v; }
+    uint32_t vl() const { return _vl; }
+
+    uint64_t size() const { return _compressed ? 2 : 4; }
 
     bool
     branching() const override
     {
-        if (_compressed) {
-            return npc() != pc() + 2 || nupc() != upc() + 1;
-        } else {
-            return npc() != pc() + 4 || nupc() != upc() + 1;
-        }
+        return npc() != pc() + size() || nupc() != upc() + 1;
+    }
+
+    bool
+    equals(const PCStateBase &other) const override
+    {
+        auto &opc = other.as<PCState>();
+        return Base::equals(other) &&
+            _vlenb == opc._vlenb &&
+            _vtype == opc._vtype &&
+            _vl == opc._vl;
+    }
+
+    void
+    serialize(CheckpointOut &cp) const override
+    {
+        Base::serialize(cp);
+        SERIALIZE_SCALAR(_rvType);
+        SERIALIZE_SCALAR(_vlenb);
+        SERIALIZE_SCALAR(_vtype);
+        SERIALIZE_SCALAR(_vl);
+        SERIALIZE_SCALAR(_compressed);
+    }
+
+    void
+    unserialize(CheckpointIn &cp) override
+    {
+        Base::unserialize(cp);
+        UNSERIALIZE_SCALAR(_rvType);
+        UNSERIALIZE_SCALAR(_vlenb);
+        UNSERIALIZE_SCALAR(_vtype);
+        UNSERIALIZE_SCALAR(_vl);
+        UNSERIALIZE_SCALAR(_compressed);
     }
 };
 
diff --git a/src/arch/riscv/regs/float.hh b/src/arch/riscv/regs/float.hh
index 4809372070..cca9e1be2f 100644
--- a/src/arch/riscv/regs/float.hh
+++ b/src/arch/riscv/regs/float.hh
@@ -211,6 +211,20 @@ const std::vector<std::string> RegNames = {
 
 } // namespace float_reg
 
+inline float32_t
+fsgnj32(float32_t a, float32_t b, bool n, bool x) {
+    if (n) b.v = ~b.v;
+    else if (x) b.v = a.v ^ b.v;
+    return f32(insertBits(b.v, 30, 0, a.v));
+}
+
+inline float64_t
+fsgnj64(float64_t a, float64_t b, bool n, bool x) {
+    if (n) b.v = ~b.v;
+    else if (x) b.v = a.v ^ b.v;
+    return f64(insertBits(b.v, 62, 0, a.v));
+}
+
 } // namespace RiscvISA
 } // namespace gem5
 
diff --git a/src/arch/riscv/regs/misc.hh b/src/arch/riscv/regs/misc.hh
index 5ea3536141..d36776d7ef 100644
--- a/src/arch/riscv/regs/misc.hh
+++ b/src/arch/riscv/regs/misc.hh
@@ -191,6 +191,14 @@ enum MiscRegIndex
     MISCREG_FFLAGS,
     MISCREG_FRM,
 
+    MISCREG_VSTART,
+    MISCREG_VXSAT,
+    MISCREG_VXRM,
+    MISCREG_VCSR,
+    MISCREG_VL,
+    MISCREG_VTYPE,
+    MISCREG_VLENB,
+
     // These registers are not in the standard, hence does not exist in the
     // CSRData map. These are mainly used to provide a minimal implementation
     // for non-maskable-interrupt in our simple cpu.
@@ -476,7 +484,15 @@ enum CSRIndex
     CSR_TDATA3 = 0x7A3,
     CSR_DCSR = 0x7B0,
     CSR_DPC = 0x7B1,
-    CSR_DSCRATCH = 0x7B2
+    CSR_DSCRATCH = 0x7B2,
+
+    CSR_VSTART       = 0x008,
+    CSR_VXSAT        = 0x009,
+    CSR_VXRM         = 0x00A,
+    CSR_VCSR         = 0x00F,
+    CSR_VL           = 0xC20,
+    CSR_VTYPE        = 0xC21,
+    CSR_VLENB        = 0xC22
 };
 
 struct CSRMetadata
@@ -492,233 +508,473 @@ constexpr uint64_t rvTypeFlags(T... args) {
 }
 
 const std::unordered_map<int, CSRMetadata> CSRData = {
-    {CSR_USTATUS, {"ustatus", MISCREG_STATUS, rvTypeFlags(RV64, RV32)}},
-    {CSR_UIE, {"uie", MISCREG_IE, rvTypeFlags(RV64, RV32)}},
-    {CSR_UTVEC, {"utvec", MISCREG_UTVEC, rvTypeFlags(RV64, RV32)}},
-    {CSR_USCRATCH, {"uscratch", MISCREG_USCRATCH, rvTypeFlags(RV64, RV32)}},
-    {CSR_UEPC, {"uepc", MISCREG_UEPC, rvTypeFlags(RV64, RV32)}},
-    {CSR_UCAUSE, {"ucause", MISCREG_UCAUSE, rvTypeFlags(RV64, RV32)}},
-    {CSR_UTVAL, {"utval", MISCREG_UTVAL, rvTypeFlags(RV64, RV32)}},
-    {CSR_UIP, {"uip", MISCREG_IP, rvTypeFlags(RV64, RV32)}},
-    {CSR_FFLAGS, {"fflags", MISCREG_FFLAGS, rvTypeFlags(RV64, RV32)}},
-    {CSR_FRM, {"frm", MISCREG_FRM, rvTypeFlags(RV64, RV32)}},
-    {CSR_FCSR, {"fcsr", MISCREG_FFLAGS, rvTypeFlags(RV64, RV32)}}, // Actually FRM << 5 | FFLAGS
-    {CSR_CYCLE, {"cycle", MISCREG_CYCLE, rvTypeFlags(RV64, RV32)}},
-    {CSR_TIME, {"time", MISCREG_TIME, rvTypeFlags(RV64, RV32)}},
-    {CSR_INSTRET, {"instret", MISCREG_INSTRET, rvTypeFlags(RV64, RV32)}},
-    {CSR_HPMCOUNTER03, {"hpmcounter03", MISCREG_HPMCOUNTER03, rvTypeFlags(RV64, RV32)}},
-    {CSR_HPMCOUNTER04, {"hpmcounter04", MISCREG_HPMCOUNTER04, rvTypeFlags(RV64, RV32)}},
-    {CSR_HPMCOUNTER05, {"hpmcounter05", MISCREG_HPMCOUNTER05, rvTypeFlags(RV64, RV32)}},
-    {CSR_HPMCOUNTER06, {"hpmcounter06", MISCREG_HPMCOUNTER06, rvTypeFlags(RV64, RV32)}},
-    {CSR_HPMCOUNTER07, {"hpmcounter07", MISCREG_HPMCOUNTER07, rvTypeFlags(RV64, RV32)}},
-    {CSR_HPMCOUNTER08, {"hpmcounter08", MISCREG_HPMCOUNTER08, rvTypeFlags(RV64, RV32)}},
-    {CSR_HPMCOUNTER09, {"hpmcounter09", MISCREG_HPMCOUNTER09, rvTypeFlags(RV64, RV32)}},
-    {CSR_HPMCOUNTER10, {"hpmcounter10", MISCREG_HPMCOUNTER10, rvTypeFlags(RV64, RV32)}},
-    {CSR_HPMCOUNTER11, {"hpmcounter11", MISCREG_HPMCOUNTER11, rvTypeFlags(RV64, RV32)}},
-    {CSR_HPMCOUNTER12, {"hpmcounter12", MISCREG_HPMCOUNTER12, rvTypeFlags(RV64, RV32)}},
-    {CSR_HPMCOUNTER13, {"hpmcounter13", MISCREG_HPMCOUNTER13, rvTypeFlags(RV64, RV32)}},
-    {CSR_HPMCOUNTER14, {"hpmcounter14", MISCREG_HPMCOUNTER14, rvTypeFlags(RV64, RV32)}},
-    {CSR_HPMCOUNTER15, {"hpmcounter15", MISCREG_HPMCOUNTER15, rvTypeFlags(RV64, RV32)}},
-    {CSR_HPMCOUNTER16, {"hpmcounter16", MISCREG_HPMCOUNTER16, rvTypeFlags(RV64, RV32)}},
-    {CSR_HPMCOUNTER17, {"hpmcounter17", MISCREG_HPMCOUNTER17, rvTypeFlags(RV64, RV32)}},
-    {CSR_HPMCOUNTER18, {"hpmcounter18", MISCREG_HPMCOUNTER18, rvTypeFlags(RV64, RV32)}},
-    {CSR_HPMCOUNTER19, {"hpmcounter19", MISCREG_HPMCOUNTER19, rvTypeFlags(RV64, RV32)}},
-    {CSR_HPMCOUNTER20, {"hpmcounter20", MISCREG_HPMCOUNTER20, rvTypeFlags(RV64, RV32)}},
-    {CSR_HPMCOUNTER21, {"hpmcounter21", MISCREG_HPMCOUNTER21, rvTypeFlags(RV64, RV32)}},
-    {CSR_HPMCOUNTER22, {"hpmcounter22", MISCREG_HPMCOUNTER22, rvTypeFlags(RV64, RV32)}},
-    {CSR_HPMCOUNTER23, {"hpmcounter23", MISCREG_HPMCOUNTER23, rvTypeFlags(RV64, RV32)}},
-    {CSR_HPMCOUNTER24, {"hpmcounter24", MISCREG_HPMCOUNTER24, rvTypeFlags(RV64, RV32)}},
-    {CSR_HPMCOUNTER25, {"hpmcounter25", MISCREG_HPMCOUNTER25, rvTypeFlags(RV64, RV32)}},
-    {CSR_HPMCOUNTER26, {"hpmcounter26", MISCREG_HPMCOUNTER26, rvTypeFlags(RV64, RV32)}},
-    {CSR_HPMCOUNTER27, {"hpmcounter27", MISCREG_HPMCOUNTER27, rvTypeFlags(RV64, RV32)}},
-    {CSR_HPMCOUNTER28, {"hpmcounter28", MISCREG_HPMCOUNTER28, rvTypeFlags(RV64, RV32)}},
-    {CSR_HPMCOUNTER29, {"hpmcounter29", MISCREG_HPMCOUNTER29, rvTypeFlags(RV64, RV32)}},
-    {CSR_HPMCOUNTER30, {"hpmcounter30", MISCREG_HPMCOUNTER30, rvTypeFlags(RV64, RV32)}},
-    {CSR_HPMCOUNTER31, {"hpmcounter31", MISCREG_HPMCOUNTER31, rvTypeFlags(RV64, RV32)}},
-    {CSR_CYCLEH, {"cycleh", MISCREG_CYCLEH, rvTypeFlags(RV32)}},
-    {CSR_TIMEH, {"timeh", MISCREG_TIMEH, rvTypeFlags(RV32)}},
-    {CSR_INSTRETH, {"instreth", MISCREG_INSTRETH, rvTypeFlags(RV32)}},
-    {CSR_HPMCOUNTER03H, {"hpmcounter03h", MISCREG_HPMCOUNTER03H, rvTypeFlags(RV32)}},
-    {CSR_HPMCOUNTER04H, {"hpmcounter04h", MISCREG_HPMCOUNTER04H, rvTypeFlags(RV32)}},
-    {CSR_HPMCOUNTER05H, {"hpmcounter05h", MISCREG_HPMCOUNTER05H, rvTypeFlags(RV32)}},
-    {CSR_HPMCOUNTER06H, {"hpmcounter06h", MISCREG_HPMCOUNTER06H, rvTypeFlags(RV32)}},
-    {CSR_HPMCOUNTER07H, {"hpmcounter07h", MISCREG_HPMCOUNTER07H, rvTypeFlags(RV32)}},
-    {CSR_HPMCOUNTER08H, {"hpmcounter08h", MISCREG_HPMCOUNTER08H, rvTypeFlags(RV32)}},
-    {CSR_HPMCOUNTER09H, {"hpmcounter09h", MISCREG_HPMCOUNTER09H, rvTypeFlags(RV32)}},
-    {CSR_HPMCOUNTER10H, {"hpmcounter10h", MISCREG_HPMCOUNTER10H, rvTypeFlags(RV32)}},
-    {CSR_HPMCOUNTER11H, {"hpmcounter11h", MISCREG_HPMCOUNTER11H, rvTypeFlags(RV32)}},
-    {CSR_HPMCOUNTER12H, {"hpmcounter12h", MISCREG_HPMCOUNTER12H, rvTypeFlags(RV32)}},
-    {CSR_HPMCOUNTER13H, {"hpmcounter13h", MISCREG_HPMCOUNTER13H, rvTypeFlags(RV32)}},
-    {CSR_HPMCOUNTER14H, {"hpmcounter14h", MISCREG_HPMCOUNTER14H, rvTypeFlags(RV32)}},
-    {CSR_HPMCOUNTER15H, {"hpmcounter15h", MISCREG_HPMCOUNTER15H, rvTypeFlags(RV32)}},
-    {CSR_HPMCOUNTER16H, {"hpmcounter16h", MISCREG_HPMCOUNTER16H, rvTypeFlags(RV32)}},
-    {CSR_HPMCOUNTER17H, {"hpmcounter17h", MISCREG_HPMCOUNTER17H, rvTypeFlags(RV32)}},
-    {CSR_HPMCOUNTER18H, {"hpmcounter18h", MISCREG_HPMCOUNTER18H, rvTypeFlags(RV32)}},
-    {CSR_HPMCOUNTER19H, {"hpmcounter19h", MISCREG_HPMCOUNTER19H, rvTypeFlags(RV32)}},
-    {CSR_HPMCOUNTER20H, {"hpmcounter20h", MISCREG_HPMCOUNTER20H, rvTypeFlags(RV32)}},
-    {CSR_HPMCOUNTER21H, {"hpmcounter21h", MISCREG_HPMCOUNTER21H, rvTypeFlags(RV32)}},
-    {CSR_HPMCOUNTER22H, {"hpmcounter22h", MISCREG_HPMCOUNTER22H, rvTypeFlags(RV32)}},
-    {CSR_HPMCOUNTER23H, {"hpmcounter23h", MISCREG_HPMCOUNTER23H, rvTypeFlags(RV32)}},
-    {CSR_HPMCOUNTER24H, {"hpmcounter24h", MISCREG_HPMCOUNTER24H, rvTypeFlags(RV32)}},
-    {CSR_HPMCOUNTER25H, {"hpmcounter25h", MISCREG_HPMCOUNTER25H, rvTypeFlags(RV32)}},
-    {CSR_HPMCOUNTER26H, {"hpmcounter26h", MISCREG_HPMCOUNTER26H, rvTypeFlags(RV32)}},
-    {CSR_HPMCOUNTER27H, {"hpmcounter27h", MISCREG_HPMCOUNTER27H, rvTypeFlags(RV32)}},
-    {CSR_HPMCOUNTER28H, {"hpmcounter28h", MISCREG_HPMCOUNTER28H, rvTypeFlags(RV32)}},
-    {CSR_HPMCOUNTER29H, {"hpmcounter29h", MISCREG_HPMCOUNTER29H, rvTypeFlags(RV32)}},
-    {CSR_HPMCOUNTER30H, {"hpmcounter30h", MISCREG_HPMCOUNTER30H, rvTypeFlags(RV32)}},
-    {CSR_HPMCOUNTER31H, {"hpmcounter31h", MISCREG_HPMCOUNTER31H, rvTypeFlags(RV32)}},
-
-    {CSR_SSTATUS, {"sstatus", MISCREG_STATUS, rvTypeFlags(RV64, RV32)}},
-    {CSR_SEDELEG, {"sedeleg", MISCREG_SEDELEG, rvTypeFlags(RV64, RV32)}},
-    {CSR_SIDELEG, {"sideleg", MISCREG_SIDELEG, rvTypeFlags(RV64, RV32)}},
-    {CSR_SIE, {"sie", MISCREG_IE, rvTypeFlags(RV64, RV32)}},
-    {CSR_STVEC, {"stvec", MISCREG_STVEC, rvTypeFlags(RV64, RV32)}},
-    {CSR_SCOUNTEREN, {"scounteren", MISCREG_SCOUNTEREN, rvTypeFlags(RV64, RV32)}},
-    {CSR_SSCRATCH, {"sscratch", MISCREG_SSCRATCH, rvTypeFlags(RV64, RV32)}},
-    {CSR_SEPC, {"sepc", MISCREG_SEPC, rvTypeFlags(RV64, RV32)}},
-    {CSR_SCAUSE, {"scause", MISCREG_SCAUSE, rvTypeFlags(RV64, RV32)}},
-    {CSR_STVAL, {"stval", MISCREG_STVAL, rvTypeFlags(RV64, RV32)}},
-    {CSR_SIP, {"sip", MISCREG_IP, rvTypeFlags(RV64, RV32)}},
-    {CSR_SATP, {"satp", MISCREG_SATP, rvTypeFlags(RV64, RV32)}},
-
-    {CSR_MVENDORID, {"mvendorid", MISCREG_VENDORID, rvTypeFlags(RV64, RV32)}},
-    {CSR_MARCHID, {"marchid", MISCREG_ARCHID, rvTypeFlags(RV64, RV32)}},
-    {CSR_MIMPID, {"mimpid", MISCREG_IMPID, rvTypeFlags(RV64, RV32)}},
-    {CSR_MHARTID, {"mhartid", MISCREG_HARTID, rvTypeFlags(RV64, RV32)}},
-    {CSR_MSTATUS, {"mstatus", MISCREG_STATUS, rvTypeFlags(RV64, RV32)}},
-    {CSR_MISA, {"misa", MISCREG_ISA, rvTypeFlags(RV64, RV32)}},
-    {CSR_MEDELEG, {"medeleg", MISCREG_MEDELEG, rvTypeFlags(RV64, RV32)}},
-    {CSR_MIDELEG, {"mideleg", MISCREG_MIDELEG, rvTypeFlags(RV64, RV32)}},
-    {CSR_MIE, {"mie", MISCREG_IE, rvTypeFlags(RV64, RV32)}},
-    {CSR_MTVEC, {"mtvec", MISCREG_MTVEC, rvTypeFlags(RV64, RV32)}},
-    {CSR_MCOUNTEREN, {"mcounteren", MISCREG_MCOUNTEREN, rvTypeFlags(RV64, RV32)}},
-    {CSR_MSTATUSH, {"mstatush", MISCREG_MSTATUSH, rvTypeFlags(RV32)}},
-    {CSR_MSCRATCH, {"mscratch", MISCREG_MSCRATCH, rvTypeFlags(RV64, RV32)}},
-    {CSR_MEPC, {"mepc", MISCREG_MEPC, rvTypeFlags(RV64, RV32)}},
-    {CSR_MCAUSE, {"mcause", MISCREG_MCAUSE, rvTypeFlags(RV64, RV32)}},
-    {CSR_MTVAL, {"mtval", MISCREG_MTVAL, rvTypeFlags(RV64, RV32)}},
-    {CSR_MIP, {"mip", MISCREG_IP, rvTypeFlags(RV64, RV32)}},
-    {CSR_PMPCFG0, {"pmpcfg0", MISCREG_PMPCFG0, rvTypeFlags(RV64, RV32)}},
-    {CSR_PMPCFG1, {"pmpcfg1", MISCREG_PMPCFG1, rvTypeFlags(RV32)}},  // pmpcfg1 rv32 only
-    {CSR_PMPCFG2, {"pmpcfg2", MISCREG_PMPCFG2, rvTypeFlags(RV64, RV32)}},
-    {CSR_PMPCFG3, {"pmpcfg3", MISCREG_PMPCFG3, rvTypeFlags(RV32)}},  // pmpcfg3 rv32 only
-    {CSR_PMPADDR00, {"pmpaddr0", MISCREG_PMPADDR00, rvTypeFlags(RV64, RV32)}},
-    {CSR_PMPADDR01, {"pmpaddr1", MISCREG_PMPADDR01, rvTypeFlags(RV64, RV32)}},
-    {CSR_PMPADDR02, {"pmpaddr2", MISCREG_PMPADDR02, rvTypeFlags(RV64, RV32)}},
-    {CSR_PMPADDR03, {"pmpaddr3", MISCREG_PMPADDR03, rvTypeFlags(RV64, RV32)}},
-    {CSR_PMPADDR04, {"pmpaddr4", MISCREG_PMPADDR04, rvTypeFlags(RV64, RV32)}},
-    {CSR_PMPADDR05, {"pmpaddr5", MISCREG_PMPADDR05, rvTypeFlags(RV64, RV32)}},
-    {CSR_PMPADDR06, {"pmpaddr6", MISCREG_PMPADDR06, rvTypeFlags(RV64, RV32)}},
-    {CSR_PMPADDR07, {"pmpaddr7", MISCREG_PMPADDR07, rvTypeFlags(RV64, RV32)}},
-    {CSR_PMPADDR08, {"pmpaddr8", MISCREG_PMPADDR08, rvTypeFlags(RV64, RV32)}},
-    {CSR_PMPADDR09, {"pmpaddr9", MISCREG_PMPADDR09, rvTypeFlags(RV64, RV32)}},
-    {CSR_PMPADDR10, {"pmpaddr10", MISCREG_PMPADDR10, rvTypeFlags(RV64, RV32)}},
-    {CSR_PMPADDR11, {"pmpaddr11", MISCREG_PMPADDR11, rvTypeFlags(RV64, RV32)}},
-    {CSR_PMPADDR12, {"pmpaddr12", MISCREG_PMPADDR12, rvTypeFlags(RV64, RV32)}},
-    {CSR_PMPADDR13, {"pmpaddr13", MISCREG_PMPADDR13, rvTypeFlags(RV64, RV32)}},
-    {CSR_PMPADDR14, {"pmpaddr14", MISCREG_PMPADDR14, rvTypeFlags(RV64, RV32)}},
-    {CSR_PMPADDR15, {"pmpaddr15", MISCREG_PMPADDR15, rvTypeFlags(RV64, RV32)}},
-    {CSR_MCYCLE, {"mcycle", MISCREG_CYCLE, rvTypeFlags(RV64, RV32)}},
-    {CSR_MINSTRET, {"minstret", MISCREG_INSTRET, rvTypeFlags(RV64, RV32)}},
-    {CSR_MHPMCOUNTER03, {"mhpmcounter03", MISCREG_HPMCOUNTER03, rvTypeFlags(RV64, RV32)}},
-    {CSR_MHPMCOUNTER04, {"mhpmcounter04", MISCREG_HPMCOUNTER04, rvTypeFlags(RV64, RV32)}},
-    {CSR_MHPMCOUNTER05, {"mhpmcounter05", MISCREG_HPMCOUNTER05, rvTypeFlags(RV64, RV32)}},
-    {CSR_MHPMCOUNTER06, {"mhpmcounter06", MISCREG_HPMCOUNTER06, rvTypeFlags(RV64, RV32)}},
-    {CSR_MHPMCOUNTER07, {"mhpmcounter07", MISCREG_HPMCOUNTER07, rvTypeFlags(RV64, RV32)}},
-    {CSR_MHPMCOUNTER08, {"mhpmcounter08", MISCREG_HPMCOUNTER08, rvTypeFlags(RV64, RV32)}},
-    {CSR_MHPMCOUNTER09, {"mhpmcounter09", MISCREG_HPMCOUNTER09, rvTypeFlags(RV64, RV32)}},
-    {CSR_MHPMCOUNTER10, {"mhpmcounter10", MISCREG_HPMCOUNTER10, rvTypeFlags(RV64, RV32)}},
-    {CSR_MHPMCOUNTER11, {"mhpmcounter11", MISCREG_HPMCOUNTER11, rvTypeFlags(RV64, RV32)}},
-    {CSR_MHPMCOUNTER12, {"mhpmcounter12", MISCREG_HPMCOUNTER12, rvTypeFlags(RV64, RV32)}},
-    {CSR_MHPMCOUNTER13, {"mhpmcounter13", MISCREG_HPMCOUNTER13, rvTypeFlags(RV64, RV32)}},
-    {CSR_MHPMCOUNTER14, {"mhpmcounter14", MISCREG_HPMCOUNTER14, rvTypeFlags(RV64, RV32)}},
-    {CSR_MHPMCOUNTER15, {"mhpmcounter15", MISCREG_HPMCOUNTER15, rvTypeFlags(RV64, RV32)}},
-    {CSR_MHPMCOUNTER16, {"mhpmcounter16", MISCREG_HPMCOUNTER16, rvTypeFlags(RV64, RV32)}},
-    {CSR_MHPMCOUNTER17, {"mhpmcounter17", MISCREG_HPMCOUNTER17, rvTypeFlags(RV64, RV32)}},
-    {CSR_MHPMCOUNTER18, {"mhpmcounter18", MISCREG_HPMCOUNTER18, rvTypeFlags(RV64, RV32)}},
-    {CSR_MHPMCOUNTER19, {"mhpmcounter19", MISCREG_HPMCOUNTER19, rvTypeFlags(RV64, RV32)}},
-    {CSR_MHPMCOUNTER20, {"mhpmcounter20", MISCREG_HPMCOUNTER20, rvTypeFlags(RV64, RV32)}},
-    {CSR_MHPMCOUNTER21, {"mhpmcounter21", MISCREG_HPMCOUNTER21, rvTypeFlags(RV64, RV32)}},
-    {CSR_MHPMCOUNTER22, {"mhpmcounter22", MISCREG_HPMCOUNTER22, rvTypeFlags(RV64, RV32)}},
-    {CSR_MHPMCOUNTER23, {"mhpmcounter23", MISCREG_HPMCOUNTER23, rvTypeFlags(RV64, RV32)}},
-    {CSR_MHPMCOUNTER24, {"mhpmcounter24", MISCREG_HPMCOUNTER24, rvTypeFlags(RV64, RV32)}},
-    {CSR_MHPMCOUNTER25, {"mhpmcounter25", MISCREG_HPMCOUNTER25, rvTypeFlags(RV64, RV32)}},
-    {CSR_MHPMCOUNTER26, {"mhpmcounter26", MISCREG_HPMCOUNTER26, rvTypeFlags(RV64, RV32)}},
-    {CSR_MHPMCOUNTER27, {"mhpmcounter27", MISCREG_HPMCOUNTER27, rvTypeFlags(RV64, RV32)}},
-    {CSR_MHPMCOUNTER28, {"mhpmcounter28", MISCREG_HPMCOUNTER28, rvTypeFlags(RV64, RV32)}},
-    {CSR_MHPMCOUNTER29, {"mhpmcounter29", MISCREG_HPMCOUNTER29, rvTypeFlags(RV64, RV32)}},
-    {CSR_MHPMCOUNTER30, {"mhpmcounter30", MISCREG_HPMCOUNTER30, rvTypeFlags(RV64, RV32)}},
-    {CSR_MHPMCOUNTER31, {"mhpmcounter31", MISCREG_HPMCOUNTER31, rvTypeFlags(RV64, RV32)}},
-
-    {CSR_MCYCLEH, {"mcycleh", MISCREG_CYCLEH, rvTypeFlags(RV32)}},
-    {CSR_MINSTRETH, {"minstreth", MISCREG_INSTRETH, rvTypeFlags(RV32)}},
-    {CSR_MHPMCOUNTER03H, {"mhpmcounter03h", MISCREG_HPMCOUNTER03H, rvTypeFlags(RV32)}},
-    {CSR_MHPMCOUNTER04H, {"mhpmcounter04h", MISCREG_HPMCOUNTER04H, rvTypeFlags(RV32)}},
-    {CSR_MHPMCOUNTER05H, {"mhpmcounter05h", MISCREG_HPMCOUNTER05H, rvTypeFlags(RV32)}},
-    {CSR_MHPMCOUNTER06H, {"mhpmcounter06h", MISCREG_HPMCOUNTER06H, rvTypeFlags(RV32)}},
-    {CSR_MHPMCOUNTER07H, {"mhpmcounter07h", MISCREG_HPMCOUNTER07H, rvTypeFlags(RV32)}},
-    {CSR_MHPMCOUNTER08H, {"mhpmcounter08h", MISCREG_HPMCOUNTER08H, rvTypeFlags(RV32)}},
-    {CSR_MHPMCOUNTER09H, {"mhpmcounter09h", MISCREG_HPMCOUNTER09H, rvTypeFlags(RV32)}},
-    {CSR_MHPMCOUNTER10H, {"mhpmcounter10h", MISCREG_HPMCOUNTER10H, rvTypeFlags(RV32)}},
-    {CSR_MHPMCOUNTER11H, {"mhpmcounter11h", MISCREG_HPMCOUNTER11H, rvTypeFlags(RV32)}},
-    {CSR_MHPMCOUNTER12H, {"mhpmcounter12h", MISCREG_HPMCOUNTER12H, rvTypeFlags(RV32)}},
-    {CSR_MHPMCOUNTER13H, {"mhpmcounter13h", MISCREG_HPMCOUNTER13H, rvTypeFlags(RV32)}},
-    {CSR_MHPMCOUNTER14H, {"mhpmcounter14h", MISCREG_HPMCOUNTER14H, rvTypeFlags(RV32)}},
-    {CSR_MHPMCOUNTER15H, {"mhpmcounter15h", MISCREG_HPMCOUNTER15H, rvTypeFlags(RV32)}},
-    {CSR_MHPMCOUNTER16H, {"mhpmcounter16h", MISCREG_HPMCOUNTER16H, rvTypeFlags(RV32)}},
-    {CSR_MHPMCOUNTER17H, {"mhpmcounter17h", MISCREG_HPMCOUNTER17H, rvTypeFlags(RV32)}},
-    {CSR_MHPMCOUNTER18H, {"mhpmcounter18h", MISCREG_HPMCOUNTER18H, rvTypeFlags(RV32)}},
-    {CSR_MHPMCOUNTER19H, {"mhpmcounter19h", MISCREG_HPMCOUNTER19H, rvTypeFlags(RV32)}},
-    {CSR_MHPMCOUNTER20H, {"mhpmcounter20h", MISCREG_HPMCOUNTER20H, rvTypeFlags(RV32)}},
-    {CSR_MHPMCOUNTER21H, {"mhpmcounter21h", MISCREG_HPMCOUNTER21H, rvTypeFlags(RV32)}},
-    {CSR_MHPMCOUNTER22H, {"mhpmcounter22h", MISCREG_HPMCOUNTER22H, rvTypeFlags(RV32)}},
-    {CSR_MHPMCOUNTER23H, {"mhpmcounter23h", MISCREG_HPMCOUNTER23H, rvTypeFlags(RV32)}},
-    {CSR_MHPMCOUNTER24H, {"mhpmcounter24h", MISCREG_HPMCOUNTER24H, rvTypeFlags(RV32)}},
-    {CSR_MHPMCOUNTER25H, {"mhpmcounter25h", MISCREG_HPMCOUNTER25H, rvTypeFlags(RV32)}},
-    {CSR_MHPMCOUNTER26H, {"mhpmcounter26h", MISCREG_HPMCOUNTER26H, rvTypeFlags(RV32)}},
-    {CSR_MHPMCOUNTER27H, {"mhpmcounter27h", MISCREG_HPMCOUNTER27H, rvTypeFlags(RV32)}},
-    {CSR_MHPMCOUNTER28H, {"mhpmcounter28h", MISCREG_HPMCOUNTER28H, rvTypeFlags(RV32)}},
-    {CSR_MHPMCOUNTER29H, {"mhpmcounter29h", MISCREG_HPMCOUNTER29H, rvTypeFlags(RV32)}},
-    {CSR_MHPMCOUNTER30H, {"mhpmcounter30h", MISCREG_HPMCOUNTER30H, rvTypeFlags(RV32)}},
-    {CSR_MHPMCOUNTER31H, {"mhpmcounter31h", MISCREG_HPMCOUNTER31H, rvTypeFlags(RV32)}},
-
-    {CSR_MHPMEVENT03, {"mhpmevent03", MISCREG_HPMEVENT03, rvTypeFlags(RV64, RV32)}},
-    {CSR_MHPMEVENT04, {"mhpmevent04", MISCREG_HPMEVENT04, rvTypeFlags(RV64, RV32)}},
-    {CSR_MHPMEVENT05, {"mhpmevent05", MISCREG_HPMEVENT05, rvTypeFlags(RV64, RV32)}},
-    {CSR_MHPMEVENT06, {"mhpmevent06", MISCREG_HPMEVENT06, rvTypeFlags(RV64, RV32)}},
-    {CSR_MHPMEVENT07, {"mhpmevent07", MISCREG_HPMEVENT07, rvTypeFlags(RV64, RV32)}},
-    {CSR_MHPMEVENT08, {"mhpmevent08", MISCREG_HPMEVENT08, rvTypeFlags(RV64, RV32)}},
-    {CSR_MHPMEVENT09, {"mhpmevent09", MISCREG_HPMEVENT09, rvTypeFlags(RV64, RV32)}},
-    {CSR_MHPMEVENT10, {"mhpmevent10", MISCREG_HPMEVENT10, rvTypeFlags(RV64, RV32)}},
-    {CSR_MHPMEVENT11, {"mhpmevent11", MISCREG_HPMEVENT11, rvTypeFlags(RV64, RV32)}},
-    {CSR_MHPMEVENT12, {"mhpmevent12", MISCREG_HPMEVENT12, rvTypeFlags(RV64, RV32)}},
-    {CSR_MHPMEVENT13, {"mhpmevent13", MISCREG_HPMEVENT13, rvTypeFlags(RV64, RV32)}},
-    {CSR_MHPMEVENT14, {"mhpmevent14", MISCREG_HPMEVENT14, rvTypeFlags(RV64, RV32)}},
-    {CSR_MHPMEVENT15, {"mhpmevent15", MISCREG_HPMEVENT15, rvTypeFlags(RV64, RV32)}},
-    {CSR_MHPMEVENT16, {"mhpmevent16", MISCREG_HPMEVENT16, rvTypeFlags(RV64, RV32)}},
-    {CSR_MHPMEVENT17, {"mhpmevent17", MISCREG_HPMEVENT17, rvTypeFlags(RV64, RV32)}},
-    {CSR_MHPMEVENT18, {"mhpmevent18", MISCREG_HPMEVENT18, rvTypeFlags(RV64, RV32)}},
-    {CSR_MHPMEVENT19, {"mhpmevent19", MISCREG_HPMEVENT19, rvTypeFlags(RV64, RV32)}},
-    {CSR_MHPMEVENT20, {"mhpmevent20", MISCREG_HPMEVENT20, rvTypeFlags(RV64, RV32)}},
-    {CSR_MHPMEVENT21, {"mhpmevent21", MISCREG_HPMEVENT21, rvTypeFlags(RV64, RV32)}},
-    {CSR_MHPMEVENT22, {"mhpmevent22", MISCREG_HPMEVENT22, rvTypeFlags(RV64, RV32)}},
-    {CSR_MHPMEVENT23, {"mhpmevent23", MISCREG_HPMEVENT23, rvTypeFlags(RV64, RV32)}},
-    {CSR_MHPMEVENT24, {"mhpmevent24", MISCREG_HPMEVENT24, rvTypeFlags(RV64, RV32)}},
-    {CSR_MHPMEVENT25, {"mhpmevent25", MISCREG_HPMEVENT25, rvTypeFlags(RV64, RV32)}},
-    {CSR_MHPMEVENT26, {"mhpmevent26", MISCREG_HPMEVENT26, rvTypeFlags(RV64, RV32)}},
-    {CSR_MHPMEVENT27, {"mhpmevent27", MISCREG_HPMEVENT27, rvTypeFlags(RV64, RV32)}},
-    {CSR_MHPMEVENT28, {"mhpmevent28", MISCREG_HPMEVENT28, rvTypeFlags(RV64, RV32)}},
-    {CSR_MHPMEVENT29, {"mhpmevent29", MISCREG_HPMEVENT29, rvTypeFlags(RV64, RV32)}},
-    {CSR_MHPMEVENT30, {"mhpmevent30", MISCREG_HPMEVENT30, rvTypeFlags(RV64, RV32)}},
-    {CSR_MHPMEVENT31, {"mhpmevent31", MISCREG_HPMEVENT31, rvTypeFlags(RV64, RV32)}},
-
-    {CSR_TSELECT, {"tselect", MISCREG_TSELECT, rvTypeFlags(RV64, RV32)}},
-    {CSR_TDATA1, {"tdata1", MISCREG_TDATA1, rvTypeFlags(RV64, RV32)}},
-    {CSR_TDATA2, {"tdata2", MISCREG_TDATA2, rvTypeFlags(RV64, RV32)}},
-    {CSR_TDATA3, {"tdata3", MISCREG_TDATA3, rvTypeFlags(RV64, RV32)}},
-    {CSR_DCSR, {"dcsr", MISCREG_DCSR, rvTypeFlags(RV64, RV32)}},
-    {CSR_DPC, {"dpc", MISCREG_DPC, rvTypeFlags(RV64, RV32)}},
-    {CSR_DSCRATCH, {"dscratch", MISCREG_DSCRATCH, rvTypeFlags(RV64, RV32)}}
+    {CSR_USTATUS,
+        {"ustatus", MISCREG_STATUS, rvTypeFlags(RV64, RV32)}},
+    {CSR_UIE,
+        {"uie", MISCREG_IE, rvTypeFlags(RV64, RV32)}},
+    {CSR_UTVEC,
+        {"utvec", MISCREG_UTVEC, rvTypeFlags(RV64, RV32)}},
+    {CSR_USCRATCH,
+        {"uscratch", MISCREG_USCRATCH, rvTypeFlags(RV64, RV32)}},
+    {CSR_UEPC,
+        {"uepc", MISCREG_UEPC, rvTypeFlags(RV64, RV32)}},
+    {CSR_UCAUSE,
+        {"ucause", MISCREG_UCAUSE, rvTypeFlags(RV64, RV32)}},
+    {CSR_UTVAL,
+        {"utval", MISCREG_UTVAL, rvTypeFlags(RV64, RV32)}},
+    {CSR_UIP,
+        {"uip", MISCREG_IP, rvTypeFlags(RV64, RV32)}},
+    {CSR_FFLAGS,
+        {"fflags", MISCREG_FFLAGS, rvTypeFlags(RV64, RV32)}},
+    {CSR_FRM,
+        {"frm", MISCREG_FRM, rvTypeFlags(RV64, RV32)}},
+     // Actually FRM << 5 | FFLAGS
+    {CSR_FCSR,
+        {"fcsr", MISCREG_FFLAGS, rvTypeFlags(RV64, RV32)}},
+    {CSR_CYCLE,
+        {"cycle", MISCREG_CYCLE, rvTypeFlags(RV64, RV32)}},
+    {CSR_TIME,
+        {"time", MISCREG_TIME, rvTypeFlags(RV64, RV32)}},
+    {CSR_INSTRET,
+        {"instret", MISCREG_INSTRET, rvTypeFlags(RV64, RV32)}},
+    {CSR_HPMCOUNTER03,
+        {"hpmcounter03", MISCREG_HPMCOUNTER03, rvTypeFlags(RV64, RV32)}},
+    {CSR_HPMCOUNTER04,
+        {"hpmcounter04", MISCREG_HPMCOUNTER04, rvTypeFlags(RV64, RV32)}},
+    {CSR_HPMCOUNTER05,
+        {"hpmcounter05", MISCREG_HPMCOUNTER05, rvTypeFlags(RV64, RV32)}},
+    {CSR_HPMCOUNTER06,
+        {"hpmcounter06", MISCREG_HPMCOUNTER06, rvTypeFlags(RV64, RV32)}},
+    {CSR_HPMCOUNTER07,
+        {"hpmcounter07", MISCREG_HPMCOUNTER07, rvTypeFlags(RV64, RV32)}},
+    {CSR_HPMCOUNTER08,
+        {"hpmcounter08", MISCREG_HPMCOUNTER08, rvTypeFlags(RV64, RV32)}},
+    {CSR_HPMCOUNTER09,
+        {"hpmcounter09", MISCREG_HPMCOUNTER09, rvTypeFlags(RV64, RV32)}},
+    {CSR_HPMCOUNTER10,
+        {"hpmcounter10", MISCREG_HPMCOUNTER10, rvTypeFlags(RV64, RV32)}},
+    {CSR_HPMCOUNTER11,
+        {"hpmcounter11", MISCREG_HPMCOUNTER11, rvTypeFlags(RV64, RV32)}},
+    {CSR_HPMCOUNTER12,
+        {"hpmcounter12", MISCREG_HPMCOUNTER12, rvTypeFlags(RV64, RV32)}},
+    {CSR_HPMCOUNTER13,
+        {"hpmcounter13", MISCREG_HPMCOUNTER13, rvTypeFlags(RV64, RV32)}},
+    {CSR_HPMCOUNTER14,
+        {"hpmcounter14", MISCREG_HPMCOUNTER14, rvTypeFlags(RV64, RV32)}},
+    {CSR_HPMCOUNTER15,
+        {"hpmcounter15", MISCREG_HPMCOUNTER15, rvTypeFlags(RV64, RV32)}},
+    {CSR_HPMCOUNTER16,
+        {"hpmcounter16", MISCREG_HPMCOUNTER16, rvTypeFlags(RV64, RV32)}},
+    {CSR_HPMCOUNTER17,
+        {"hpmcounter17", MISCREG_HPMCOUNTER17, rvTypeFlags(RV64, RV32)}},
+    {CSR_HPMCOUNTER18,
+        {"hpmcounter18", MISCREG_HPMCOUNTER18, rvTypeFlags(RV64, RV32)}},
+    {CSR_HPMCOUNTER19,
+        {"hpmcounter19", MISCREG_HPMCOUNTER19, rvTypeFlags(RV64, RV32)}},
+    {CSR_HPMCOUNTER20,
+        {"hpmcounter20", MISCREG_HPMCOUNTER20, rvTypeFlags(RV64, RV32)}},
+    {CSR_HPMCOUNTER21,
+        {"hpmcounter21", MISCREG_HPMCOUNTER21, rvTypeFlags(RV64, RV32)}},
+    {CSR_HPMCOUNTER22,
+        {"hpmcounter22", MISCREG_HPMCOUNTER22, rvTypeFlags(RV64, RV32)}},
+    {CSR_HPMCOUNTER23,
+        {"hpmcounter23", MISCREG_HPMCOUNTER23, rvTypeFlags(RV64, RV32)}},
+    {CSR_HPMCOUNTER24,
+        {"hpmcounter24", MISCREG_HPMCOUNTER24, rvTypeFlags(RV64, RV32)}},
+    {CSR_HPMCOUNTER25,
+        {"hpmcounter25", MISCREG_HPMCOUNTER25, rvTypeFlags(RV64, RV32)}},
+    {CSR_HPMCOUNTER26,
+        {"hpmcounter26", MISCREG_HPMCOUNTER26, rvTypeFlags(RV64, RV32)}},
+    {CSR_HPMCOUNTER27,
+        {"hpmcounter27", MISCREG_HPMCOUNTER27, rvTypeFlags(RV64, RV32)}},
+    {CSR_HPMCOUNTER28,
+        {"hpmcounter28", MISCREG_HPMCOUNTER28, rvTypeFlags(RV64, RV32)}},
+    {CSR_HPMCOUNTER29,
+        {"hpmcounter29", MISCREG_HPMCOUNTER29, rvTypeFlags(RV64, RV32)}},
+    {CSR_HPMCOUNTER30,
+        {"hpmcounter30", MISCREG_HPMCOUNTER30, rvTypeFlags(RV64, RV32)}},
+    {CSR_HPMCOUNTER31,
+        {"hpmcounter31", MISCREG_HPMCOUNTER31, rvTypeFlags(RV64, RV32)}},
+    {CSR_CYCLEH,
+        {"cycleh", MISCREG_CYCLEH, rvTypeFlags(RV32)}},
+    {CSR_TIMEH,
+        {"timeh", MISCREG_TIMEH, rvTypeFlags(RV32)}},
+    {CSR_INSTRETH,
+        {"instreth", MISCREG_INSTRETH, rvTypeFlags(RV32)}},
+    {CSR_HPMCOUNTER03H,
+        {"hpmcounter03h", MISCREG_HPMCOUNTER03H, rvTypeFlags(RV32)}},
+    {CSR_HPMCOUNTER04H,
+        {"hpmcounter04h", MISCREG_HPMCOUNTER04H, rvTypeFlags(RV32)}},
+    {CSR_HPMCOUNTER05H,
+        {"hpmcounter05h", MISCREG_HPMCOUNTER05H, rvTypeFlags(RV32)}},
+    {CSR_HPMCOUNTER06H,
+        {"hpmcounter06h", MISCREG_HPMCOUNTER06H, rvTypeFlags(RV32)}},
+    {CSR_HPMCOUNTER07H,
+        {"hpmcounter07h", MISCREG_HPMCOUNTER07H, rvTypeFlags(RV32)}},
+    {CSR_HPMCOUNTER08H,
+        {"hpmcounter08h", MISCREG_HPMCOUNTER08H, rvTypeFlags(RV32)}},
+    {CSR_HPMCOUNTER09H,
+        {"hpmcounter09h", MISCREG_HPMCOUNTER09H, rvTypeFlags(RV32)}},
+    {CSR_HPMCOUNTER10H,
+        {"hpmcounter10h", MISCREG_HPMCOUNTER10H, rvTypeFlags(RV32)}},
+    {CSR_HPMCOUNTER11H,
+        {"hpmcounter11h", MISCREG_HPMCOUNTER11H, rvTypeFlags(RV32)}},
+    {CSR_HPMCOUNTER12H,
+        {"hpmcounter12h", MISCREG_HPMCOUNTER12H, rvTypeFlags(RV32)}},
+    {CSR_HPMCOUNTER13H,
+        {"hpmcounter13h", MISCREG_HPMCOUNTER13H, rvTypeFlags(RV32)}},
+    {CSR_HPMCOUNTER14H,
+        {"hpmcounter14h", MISCREG_HPMCOUNTER14H, rvTypeFlags(RV32)}},
+    {CSR_HPMCOUNTER15H,
+        {"hpmcounter15h", MISCREG_HPMCOUNTER15H, rvTypeFlags(RV32)}},
+    {CSR_HPMCOUNTER16H,
+        {"hpmcounter16h", MISCREG_HPMCOUNTER16H, rvTypeFlags(RV32)}},
+    {CSR_HPMCOUNTER17H,
+        {"hpmcounter17h", MISCREG_HPMCOUNTER17H, rvTypeFlags(RV32)}},
+    {CSR_HPMCOUNTER18H,
+        {"hpmcounter18h", MISCREG_HPMCOUNTER18H, rvTypeFlags(RV32)}},
+    {CSR_HPMCOUNTER19H,
+        {"hpmcounter19h", MISCREG_HPMCOUNTER19H, rvTypeFlags(RV32)}},
+    {CSR_HPMCOUNTER20H,
+        {"hpmcounter20h", MISCREG_HPMCOUNTER20H, rvTypeFlags(RV32)}},
+    {CSR_HPMCOUNTER21H,
+        {"hpmcounter21h", MISCREG_HPMCOUNTER21H, rvTypeFlags(RV32)}},
+    {CSR_HPMCOUNTER22H,
+        {"hpmcounter22h", MISCREG_HPMCOUNTER22H, rvTypeFlags(RV32)}},
+    {CSR_HPMCOUNTER23H,
+        {"hpmcounter23h", MISCREG_HPMCOUNTER23H, rvTypeFlags(RV32)}},
+    {CSR_HPMCOUNTER24H,
+        {"hpmcounter24h", MISCREG_HPMCOUNTER24H, rvTypeFlags(RV32)}},
+    {CSR_HPMCOUNTER25H,
+        {"hpmcounter25h", MISCREG_HPMCOUNTER25H, rvTypeFlags(RV32)}},
+    {CSR_HPMCOUNTER26H,
+        {"hpmcounter26h", MISCREG_HPMCOUNTER26H, rvTypeFlags(RV32)}},
+    {CSR_HPMCOUNTER27H,
+        {"hpmcounter27h", MISCREG_HPMCOUNTER27H, rvTypeFlags(RV32)}},
+    {CSR_HPMCOUNTER28H,
+        {"hpmcounter28h", MISCREG_HPMCOUNTER28H, rvTypeFlags(RV32)}},
+    {CSR_HPMCOUNTER29H,
+        {"hpmcounter29h", MISCREG_HPMCOUNTER29H, rvTypeFlags(RV32)}},
+    {CSR_HPMCOUNTER30H,
+        {"hpmcounter30h", MISCREG_HPMCOUNTER30H, rvTypeFlags(RV32)}},
+    {CSR_HPMCOUNTER31H,
+        {"hpmcounter31h", MISCREG_HPMCOUNTER31H, rvTypeFlags(RV32)}},
+
+    {CSR_SSTATUS,
+        {"sstatus", MISCREG_STATUS, rvTypeFlags(RV64, RV32)}},
+    {CSR_SEDELEG,
+        {"sedeleg", MISCREG_SEDELEG, rvTypeFlags(RV64, RV32)}},
+    {CSR_SIDELEG,
+        {"sideleg", MISCREG_SIDELEG, rvTypeFlags(RV64, RV32)}},
+    {CSR_SIE,
+        {"sie", MISCREG_IE, rvTypeFlags(RV64, RV32)}},
+    {CSR_STVEC,
+        {"stvec", MISCREG_STVEC, rvTypeFlags(RV64, RV32)}},
+    {CSR_SCOUNTEREN,
+        {"scounteren", MISCREG_SCOUNTEREN, rvTypeFlags(RV64, RV32)}},
+    {CSR_SSCRATCH,
+        {"sscratch", MISCREG_SSCRATCH, rvTypeFlags(RV64, RV32)}},
+    {CSR_SEPC,
+        {"sepc", MISCREG_SEPC, rvTypeFlags(RV64, RV32)}},
+    {CSR_SCAUSE,
+        {"scause", MISCREG_SCAUSE, rvTypeFlags(RV64, RV32)}},
+    {CSR_STVAL,
+        {"stval", MISCREG_STVAL, rvTypeFlags(RV64, RV32)}},
+    {CSR_SIP,
+        {"sip", MISCREG_IP, rvTypeFlags(RV64, RV32)}},
+    {CSR_SATP,
+        {"satp", MISCREG_SATP, rvTypeFlags(RV64, RV32)}},
+
+    {CSR_MVENDORID,
+        {"mvendorid", MISCREG_VENDORID, rvTypeFlags(RV64, RV32)}},
+    {CSR_MARCHID,
+        {"marchid", MISCREG_ARCHID, rvTypeFlags(RV64, RV32)}},
+    {CSR_MIMPID,
+        {"mimpid", MISCREG_IMPID, rvTypeFlags(RV64, RV32)}},
+    {CSR_MHARTID,
+        {"mhartid", MISCREG_HARTID, rvTypeFlags(RV64, RV32)}},
+    {CSR_MSTATUS,
+        {"mstatus", MISCREG_STATUS, rvTypeFlags(RV64, RV32)}},
+    {CSR_MISA,
+        {"misa", MISCREG_ISA, rvTypeFlags(RV64, RV32)}},
+    {CSR_MEDELEG,
+        {"medeleg", MISCREG_MEDELEG, rvTypeFlags(RV64, RV32)}},
+    {CSR_MIDELEG,
+        {"mideleg", MISCREG_MIDELEG, rvTypeFlags(RV64, RV32)}},
+    {CSR_MIE,
+        {"mie", MISCREG_IE, rvTypeFlags(RV64, RV32)}},
+    {CSR_MTVEC,
+        {"mtvec", MISCREG_MTVEC, rvTypeFlags(RV64, RV32)}},
+    {CSR_MCOUNTEREN,
+        {"mcounteren", MISCREG_MCOUNTEREN, rvTypeFlags(RV64, RV32)}},
+    {CSR_MSTATUSH,
+        {"mstatush", MISCREG_MSTATUSH, rvTypeFlags(RV32)}},
+    {CSR_MSCRATCH,
+        {"mscratch", MISCREG_MSCRATCH, rvTypeFlags(RV64, RV32)}},
+    {CSR_MEPC,
+        {"mepc", MISCREG_MEPC, rvTypeFlags(RV64, RV32)}},
+    {CSR_MCAUSE,
+        {"mcause", MISCREG_MCAUSE, rvTypeFlags(RV64, RV32)}},
+    {CSR_MTVAL,
+        {"mtval", MISCREG_MTVAL, rvTypeFlags(RV64, RV32)}},
+    {CSR_MIP,
+        {"mip", MISCREG_IP, rvTypeFlags(RV64, RV32)}},
+    {CSR_PMPCFG0,
+        {"pmpcfg0", MISCREG_PMPCFG0, rvTypeFlags(RV64, RV32)}},
+    // pmpcfg1 rv32 only
+    {CSR_PMPCFG1,
+        {"pmpcfg1", MISCREG_PMPCFG1, rvTypeFlags(RV32)}},
+    {CSR_PMPCFG2,
+        {"pmpcfg2", MISCREG_PMPCFG2, rvTypeFlags(RV64, RV32)}},
+    // pmpcfg3 rv32 only
+    {CSR_PMPCFG3,
+        {"pmpcfg3", MISCREG_PMPCFG3, rvTypeFlags(RV32)}},
+    {CSR_PMPADDR00,
+        {"pmpaddr0", MISCREG_PMPADDR00, rvTypeFlags(RV64, RV32)}},
+    {CSR_PMPADDR01,
+        {"pmpaddr1", MISCREG_PMPADDR01, rvTypeFlags(RV64, RV32)}},
+    {CSR_PMPADDR02,
+        {"pmpaddr2", MISCREG_PMPADDR02, rvTypeFlags(RV64, RV32)}},
+    {CSR_PMPADDR03,
+        {"pmpaddr3", MISCREG_PMPADDR03, rvTypeFlags(RV64, RV32)}},
+    {CSR_PMPADDR04,
+        {"pmpaddr4", MISCREG_PMPADDR04, rvTypeFlags(RV64, RV32)}},
+    {CSR_PMPADDR05,
+        {"pmpaddr5", MISCREG_PMPADDR05, rvTypeFlags(RV64, RV32)}},
+    {CSR_PMPADDR06,
+        {"pmpaddr6", MISCREG_PMPADDR06, rvTypeFlags(RV64, RV32)}},
+    {CSR_PMPADDR07,
+        {"pmpaddr7", MISCREG_PMPADDR07, rvTypeFlags(RV64, RV32)}},
+    {CSR_PMPADDR08,
+        {"pmpaddr8", MISCREG_PMPADDR08, rvTypeFlags(RV64, RV32)}},
+    {CSR_PMPADDR09,
+        {"pmpaddr9", MISCREG_PMPADDR09, rvTypeFlags(RV64, RV32)}},
+    {CSR_PMPADDR10,
+        {"pmpaddr10", MISCREG_PMPADDR10, rvTypeFlags(RV64, RV32)}},
+    {CSR_PMPADDR11,
+        {"pmpaddr11", MISCREG_PMPADDR11, rvTypeFlags(RV64, RV32)}},
+    {CSR_PMPADDR12,
+        {"pmpaddr12", MISCREG_PMPADDR12, rvTypeFlags(RV64, RV32)}},
+    {CSR_PMPADDR13,
+        {"pmpaddr13", MISCREG_PMPADDR13, rvTypeFlags(RV64, RV32)}},
+    {CSR_PMPADDR14,
+        {"pmpaddr14", MISCREG_PMPADDR14, rvTypeFlags(RV64, RV32)}},
+    {CSR_PMPADDR15,
+        {"pmpaddr15", MISCREG_PMPADDR15, rvTypeFlags(RV64, RV32)}},
+    {CSR_MCYCLE,
+        {"mcycle", MISCREG_CYCLE, rvTypeFlags(RV64, RV32)}},
+    {CSR_MINSTRET,
+        {"minstret", MISCREG_INSTRET, rvTypeFlags(RV64, RV32)}},
+    {CSR_MHPMCOUNTER03,
+        {"mhpmcounter03", MISCREG_HPMCOUNTER03, rvTypeFlags(RV64, RV32)}},
+    {CSR_MHPMCOUNTER04,
+        {"mhpmcounter04", MISCREG_HPMCOUNTER04, rvTypeFlags(RV64, RV32)}},
+    {CSR_MHPMCOUNTER05,
+        {"mhpmcounter05", MISCREG_HPMCOUNTER05, rvTypeFlags(RV64, RV32)}},
+    {CSR_MHPMCOUNTER06,
+        {"mhpmcounter06", MISCREG_HPMCOUNTER06, rvTypeFlags(RV64, RV32)}},
+    {CSR_MHPMCOUNTER07,
+        {"mhpmcounter07", MISCREG_HPMCOUNTER07, rvTypeFlags(RV64, RV32)}},
+    {CSR_MHPMCOUNTER08,
+        {"mhpmcounter08", MISCREG_HPMCOUNTER08, rvTypeFlags(RV64, RV32)}},
+    {CSR_MHPMCOUNTER09,
+        {"mhpmcounter09", MISCREG_HPMCOUNTER09, rvTypeFlags(RV64, RV32)}},
+    {CSR_MHPMCOUNTER10,
+        {"mhpmcounter10", MISCREG_HPMCOUNTER10, rvTypeFlags(RV64, RV32)}},
+    {CSR_MHPMCOUNTER11,
+        {"mhpmcounter11", MISCREG_HPMCOUNTER11, rvTypeFlags(RV64, RV32)}},
+    {CSR_MHPMCOUNTER12,
+        {"mhpmcounter12", MISCREG_HPMCOUNTER12, rvTypeFlags(RV64, RV32)}},
+    {CSR_MHPMCOUNTER13,
+        {"mhpmcounter13", MISCREG_HPMCOUNTER13, rvTypeFlags(RV64, RV32)}},
+    {CSR_MHPMCOUNTER14,
+        {"mhpmcounter14", MISCREG_HPMCOUNTER14, rvTypeFlags(RV64, RV32)}},
+    {CSR_MHPMCOUNTER15,
+        {"mhpmcounter15", MISCREG_HPMCOUNTER15, rvTypeFlags(RV64, RV32)}},
+    {CSR_MHPMCOUNTER16,
+        {"mhpmcounter16", MISCREG_HPMCOUNTER16, rvTypeFlags(RV64, RV32)}},
+    {CSR_MHPMCOUNTER17,
+        {"mhpmcounter17", MISCREG_HPMCOUNTER17, rvTypeFlags(RV64, RV32)}},
+    {CSR_MHPMCOUNTER18,
+        {"mhpmcounter18", MISCREG_HPMCOUNTER18, rvTypeFlags(RV64, RV32)}},
+    {CSR_MHPMCOUNTER19,
+        {"mhpmcounter19", MISCREG_HPMCOUNTER19, rvTypeFlags(RV64, RV32)}},
+    {CSR_MHPMCOUNTER20,
+        {"mhpmcounter20", MISCREG_HPMCOUNTER20, rvTypeFlags(RV64, RV32)}},
+    {CSR_MHPMCOUNTER21,
+        {"mhpmcounter21", MISCREG_HPMCOUNTER21, rvTypeFlags(RV64, RV32)}},
+    {CSR_MHPMCOUNTER22,
+        {"mhpmcounter22", MISCREG_HPMCOUNTER22, rvTypeFlags(RV64, RV32)}},
+    {CSR_MHPMCOUNTER23,
+        {"mhpmcounter23", MISCREG_HPMCOUNTER23, rvTypeFlags(RV64, RV32)}},
+    {CSR_MHPMCOUNTER24,
+        {"mhpmcounter24", MISCREG_HPMCOUNTER24, rvTypeFlags(RV64, RV32)}},
+    {CSR_MHPMCOUNTER25,
+        {"mhpmcounter25", MISCREG_HPMCOUNTER25, rvTypeFlags(RV64, RV32)}},
+    {CSR_MHPMCOUNTER26,
+        {"mhpmcounter26", MISCREG_HPMCOUNTER26, rvTypeFlags(RV64, RV32)}},
+    {CSR_MHPMCOUNTER27,
+        {"mhpmcounter27", MISCREG_HPMCOUNTER27, rvTypeFlags(RV64, RV32)}},
+    {CSR_MHPMCOUNTER28,
+        {"mhpmcounter28", MISCREG_HPMCOUNTER28, rvTypeFlags(RV64, RV32)}},
+    {CSR_MHPMCOUNTER29,
+        {"mhpmcounter29", MISCREG_HPMCOUNTER29, rvTypeFlags(RV64, RV32)}},
+    {CSR_MHPMCOUNTER30,
+        {"mhpmcounter30", MISCREG_HPMCOUNTER30, rvTypeFlags(RV64, RV32)}},
+    {CSR_MHPMCOUNTER31,
+        {"mhpmcounter31", MISCREG_HPMCOUNTER31, rvTypeFlags(RV64, RV32)}},
+
+    {CSR_MCYCLEH,
+        {"mcycleh", MISCREG_CYCLEH, rvTypeFlags(RV32)}},
+    {CSR_MINSTRETH,
+        {"minstreth", MISCREG_INSTRETH, rvTypeFlags(RV32)}},
+    {CSR_MHPMCOUNTER03H,
+        {"mhpmcounter03h", MISCREG_HPMCOUNTER03H, rvTypeFlags(RV32)}},
+    {CSR_MHPMCOUNTER04H,
+        {"mhpmcounter04h", MISCREG_HPMCOUNTER04H, rvTypeFlags(RV32)}},
+    {CSR_MHPMCOUNTER05H,
+        {"mhpmcounter05h", MISCREG_HPMCOUNTER05H, rvTypeFlags(RV32)}},
+    {CSR_MHPMCOUNTER06H,
+        {"mhpmcounter06h", MISCREG_HPMCOUNTER06H, rvTypeFlags(RV32)}},
+    {CSR_MHPMCOUNTER07H,
+        {"mhpmcounter07h", MISCREG_HPMCOUNTER07H, rvTypeFlags(RV32)}},
+    {CSR_MHPMCOUNTER08H,
+        {"mhpmcounter08h", MISCREG_HPMCOUNTER08H, rvTypeFlags(RV32)}},
+    {CSR_MHPMCOUNTER09H,
+        {"mhpmcounter09h", MISCREG_HPMCOUNTER09H, rvTypeFlags(RV32)}},
+    {CSR_MHPMCOUNTER10H,
+        {"mhpmcounter10h", MISCREG_HPMCOUNTER10H, rvTypeFlags(RV32)}},
+    {CSR_MHPMCOUNTER11H,
+        {"mhpmcounter11h", MISCREG_HPMCOUNTER11H, rvTypeFlags(RV32)}},
+    {CSR_MHPMCOUNTER12H,
+        {"mhpmcounter12h", MISCREG_HPMCOUNTER12H, rvTypeFlags(RV32)}},
+    {CSR_MHPMCOUNTER13H,
+        {"mhpmcounter13h", MISCREG_HPMCOUNTER13H, rvTypeFlags(RV32)}},
+    {CSR_MHPMCOUNTER14H,
+        {"mhpmcounter14h", MISCREG_HPMCOUNTER14H, rvTypeFlags(RV32)}},
+    {CSR_MHPMCOUNTER15H,
+        {"mhpmcounter15h", MISCREG_HPMCOUNTER15H, rvTypeFlags(RV32)}},
+    {CSR_MHPMCOUNTER16H,
+        {"mhpmcounter16h", MISCREG_HPMCOUNTER16H, rvTypeFlags(RV32)}},
+    {CSR_MHPMCOUNTER17H,
+        {"mhpmcounter17h", MISCREG_HPMCOUNTER17H, rvTypeFlags(RV32)}},
+    {CSR_MHPMCOUNTER18H,
+        {"mhpmcounter18h", MISCREG_HPMCOUNTER18H, rvTypeFlags(RV32)}},
+    {CSR_MHPMCOUNTER19H,
+        {"mhpmcounter19h", MISCREG_HPMCOUNTER19H, rvTypeFlags(RV32)}},
+    {CSR_MHPMCOUNTER20H,
+        {"mhpmcounter20h", MISCREG_HPMCOUNTER20H, rvTypeFlags(RV32)}},
+    {CSR_MHPMCOUNTER21H,
+        {"mhpmcounter21h", MISCREG_HPMCOUNTER21H, rvTypeFlags(RV32)}},
+    {CSR_MHPMCOUNTER22H,
+        {"mhpmcounter22h", MISCREG_HPMCOUNTER22H, rvTypeFlags(RV32)}},
+    {CSR_MHPMCOUNTER23H,
+        {"mhpmcounter23h", MISCREG_HPMCOUNTER23H, rvTypeFlags(RV32)}},
+    {CSR_MHPMCOUNTER24H,
+        {"mhpmcounter24h", MISCREG_HPMCOUNTER24H, rvTypeFlags(RV32)}},
+    {CSR_MHPMCOUNTER25H,
+        {"mhpmcounter25h", MISCREG_HPMCOUNTER25H, rvTypeFlags(RV32)}},
+    {CSR_MHPMCOUNTER26H,
+        {"mhpmcounter26h", MISCREG_HPMCOUNTER26H, rvTypeFlags(RV32)}},
+    {CSR_MHPMCOUNTER27H,
+        {"mhpmcounter27h", MISCREG_HPMCOUNTER27H, rvTypeFlags(RV32)}},
+    {CSR_MHPMCOUNTER28H,
+        {"mhpmcounter28h", MISCREG_HPMCOUNTER28H, rvTypeFlags(RV32)}},
+    {CSR_MHPMCOUNTER29H,
+        {"mhpmcounter29h", MISCREG_HPMCOUNTER29H, rvTypeFlags(RV32)}},
+    {CSR_MHPMCOUNTER30H,
+        {"mhpmcounter30h", MISCREG_HPMCOUNTER30H, rvTypeFlags(RV32)}},
+    {CSR_MHPMCOUNTER31H,
+        {"mhpmcounter31h", MISCREG_HPMCOUNTER31H, rvTypeFlags(RV32)}},
+
+    {CSR_MHPMEVENT03,
+        {"mhpmevent03", MISCREG_HPMEVENT03, rvTypeFlags(RV64, RV32)}},
+    {CSR_MHPMEVENT04,
+        {"mhpmevent04", MISCREG_HPMEVENT04, rvTypeFlags(RV64, RV32)}},
+    {CSR_MHPMEVENT05,
+        {"mhpmevent05", MISCREG_HPMEVENT05, rvTypeFlags(RV64, RV32)}},
+    {CSR_MHPMEVENT06,
+        {"mhpmevent06", MISCREG_HPMEVENT06, rvTypeFlags(RV64, RV32)}},
+    {CSR_MHPMEVENT07,
+        {"mhpmevent07", MISCREG_HPMEVENT07, rvTypeFlags(RV64, RV32)}},
+    {CSR_MHPMEVENT08,
+        {"mhpmevent08", MISCREG_HPMEVENT08, rvTypeFlags(RV64, RV32)}},
+    {CSR_MHPMEVENT09,
+        {"mhpmevent09", MISCREG_HPMEVENT09, rvTypeFlags(RV64, RV32)}},
+    {CSR_MHPMEVENT10,
+        {"mhpmevent10", MISCREG_HPMEVENT10, rvTypeFlags(RV64, RV32)}},
+    {CSR_MHPMEVENT11,
+        {"mhpmevent11", MISCREG_HPMEVENT11, rvTypeFlags(RV64, RV32)}},
+    {CSR_MHPMEVENT12,
+        {"mhpmevent12", MISCREG_HPMEVENT12, rvTypeFlags(RV64, RV32)}},
+    {CSR_MHPMEVENT13,
+        {"mhpmevent13", MISCREG_HPMEVENT13, rvTypeFlags(RV64, RV32)}},
+    {CSR_MHPMEVENT14,
+        {"mhpmevent14", MISCREG_HPMEVENT14, rvTypeFlags(RV64, RV32)}},
+    {CSR_MHPMEVENT15,
+        {"mhpmevent15", MISCREG_HPMEVENT15, rvTypeFlags(RV64, RV32)}},
+    {CSR_MHPMEVENT16,
+        {"mhpmevent16", MISCREG_HPMEVENT16, rvTypeFlags(RV64, RV32)}},
+    {CSR_MHPMEVENT17,
+        {"mhpmevent17", MISCREG_HPMEVENT17, rvTypeFlags(RV64, RV32)}},
+    {CSR_MHPMEVENT18,
+        {"mhpmevent18", MISCREG_HPMEVENT18, rvTypeFlags(RV64, RV32)}},
+    {CSR_MHPMEVENT19,
+        {"mhpmevent19", MISCREG_HPMEVENT19, rvTypeFlags(RV64, RV32)}},
+    {CSR_MHPMEVENT20,
+        {"mhpmevent20", MISCREG_HPMEVENT20, rvTypeFlags(RV64, RV32)}},
+    {CSR_MHPMEVENT21,
+        {"mhpmevent21", MISCREG_HPMEVENT21, rvTypeFlags(RV64, RV32)}},
+    {CSR_MHPMEVENT22,
+        {"mhpmevent22", MISCREG_HPMEVENT22, rvTypeFlags(RV64, RV32)}},
+    {CSR_MHPMEVENT23,
+        {"mhpmevent23", MISCREG_HPMEVENT23, rvTypeFlags(RV64, RV32)}},
+    {CSR_MHPMEVENT24,
+        {"mhpmevent24", MISCREG_HPMEVENT24, rvTypeFlags(RV64, RV32)}},
+    {CSR_MHPMEVENT25,
+        {"mhpmevent25", MISCREG_HPMEVENT25, rvTypeFlags(RV64, RV32)}},
+    {CSR_MHPMEVENT26,
+        {"mhpmevent26", MISCREG_HPMEVENT26, rvTypeFlags(RV64, RV32)}},
+    {CSR_MHPMEVENT27,
+        {"mhpmevent27", MISCREG_HPMEVENT27, rvTypeFlags(RV64, RV32)}},
+    {CSR_MHPMEVENT28,
+        {"mhpmevent28", MISCREG_HPMEVENT28, rvTypeFlags(RV64, RV32)}},
+    {CSR_MHPMEVENT29,
+        {"mhpmevent29", MISCREG_HPMEVENT29, rvTypeFlags(RV64, RV32)}},
+    {CSR_MHPMEVENT30,
+        {"mhpmevent30", MISCREG_HPMEVENT30, rvTypeFlags(RV64, RV32)}},
+    {CSR_MHPMEVENT31,
+        {"mhpmevent31", MISCREG_HPMEVENT31, rvTypeFlags(RV64, RV32)}},
+
+    {CSR_TSELECT,
+        {"tselect", MISCREG_TSELECT, rvTypeFlags(RV64, RV32)}},
+    {CSR_TDATA1,
+        {"tdata1", MISCREG_TDATA1, rvTypeFlags(RV64, RV32)}},
+    {CSR_TDATA2,
+        {"tdata2", MISCREG_TDATA2, rvTypeFlags(RV64, RV32)}},
+    {CSR_TDATA3,
+        {"tdata3", MISCREG_TDATA3, rvTypeFlags(RV64, RV32)}},
+    {CSR_DCSR,
+        {"dcsr", MISCREG_DCSR, rvTypeFlags(RV64, RV32)}},
+    {CSR_DPC,
+        {"dpc", MISCREG_DPC, rvTypeFlags(RV64, RV32)}},
+    {CSR_DSCRATCH,
+        {"dscratch", MISCREG_DSCRATCH, rvTypeFlags(RV64, RV32)}},
+
+    {CSR_VSTART,
+        {"vstart", MISCREG_VSTART, rvTypeFlags(RV64, RV32)}},
+    {CSR_VXSAT,
+        {"vxsat", MISCREG_VXSAT, rvTypeFlags(RV64, RV32)}},
+    {CSR_VXRM,
+        {"vxrm", MISCREG_VXRM, rvTypeFlags(RV64, RV32)}},
+    {CSR_VCSR,
+        {"vcsr", MISCREG_VCSR, rvTypeFlags(RV64, RV32)}},
+    {CSR_VL,
+        {"vl", MISCREG_VL, rvTypeFlags(RV64, RV32)}},
+    {CSR_VTYPE,
+        {"vtype", MISCREG_VTYPE, rvTypeFlags(RV64, RV32)}},
+    {CSR_VLENB,
+        {"VLENB", MISCREG_VLENB, rvTypeFlags(RV64, RV32)}}
 };
 
 /**
@@ -816,6 +1072,7 @@ const off_t SBE_OFFSET[enums::Num_RiscvType] = {
 const off_t SXL_OFFSET = 34;
 const off_t UXL_OFFSET = 32;
 const off_t FS_OFFSET = 13;
+const off_t VS_OFFSET = 9;
 const off_t FRM_OFFSET = 5;
 
 const RegVal ISA_MXL_MASKS[enums::Num_RiscvType] = {
@@ -853,7 +1110,7 @@ const RegVal STATUS_MPRV_MASK = 1ULL << 17;
 const RegVal STATUS_XS_MASK = 3ULL << 15;
 const RegVal STATUS_FS_MASK = 3ULL << FS_OFFSET;
 const RegVal STATUS_MPP_MASK = 3ULL << 11;
-const RegVal STATUS_VS_MASK = 3ULL << 9;
+const RegVal STATUS_VS_MASK = 3ULL << VS_OFFSET;
 const RegVal STATUS_SPP_MASK = 1ULL << 8;
 const RegVal STATUS_MPIE_MASK = 1ULL << 7;
 const RegVal STATUS_SPIE_MASK = 1ULL << 5;
diff --git a/src/arch/riscv/regs/vector.hh b/src/arch/riscv/regs/vector.hh
new file mode 100644
index 0000000000..60c840395f
--- /dev/null
+++ b/src/arch/riscv/regs/vector.hh
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) 2022 PLCT Lab
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+#ifndef __ARCH_RISCV_REGS_VECTOR_HH__
+#define __ARCH_RISCV_REGS_VECTOR_HH__
+
+#include <cstdint>
+#include <string>
+#include <vector>
+
+#include "arch/generic/vec_pred_reg.hh"
+#include "arch/generic/vec_reg.hh"
+#include "arch/riscv/types.hh"
+#include "base/bitunion.hh"
+#include "cpu/reg_class.hh"
+#include "debug/VecRegs.hh"
+
+namespace gem5
+{
+
+namespace RiscvISA
+{
+
+using VecRegContainer = gem5::VecRegContainer<MaxVecLenInBytes>;
+using vreg_t = VecRegContainer;
+
+
+const int NumVecStandardRegs = 32;
+const int NumVecInternalRegs = 8; // Used by vector uop
+const int NumVecRegs = NumVecStandardRegs + NumVecInternalRegs;
+
+const std::vector<std::string> VecRegNames = {
+    "v0",   "v1",   "v2",   "v3",   "v4",   "v5",   "v6",   "v7",
+    "v8",   "v9",   "v10",  "v11",  "v12",  "v13",  "v14",  "v15",
+    "v16",  "v17",  "v18",  "v19",  "v20",  "v21",  "v22",  "v23",
+    "v24",  "v25",  "v26",  "v27",  "v28",  "v29",  "v30",  "v31",
+    "vtmp0", "vtmp1", "vtmp2", "vtmp3", "vtmp4", "vtmp5", "vtmp6", "vtmp7"
+};
+
+// vector index
+const int VecMemInternalReg0 = NumVecStandardRegs;
+
+static inline TypedRegClassOps<RiscvISA::VecRegContainer> vecRegClassOps;
+
+inline constexpr RegClass vecRegClass =
+    RegClass(VecRegClass, VecRegClassName, NumVecRegs, debug::VecRegs).
+        ops(vecRegClassOps).
+        regType<VecRegContainer>();
+
+BitUnion64(VTYPE)
+    Bitfield<63> vill;
+    Bitfield<7, 0> vtype8;
+    Bitfield<7> vma;
+    Bitfield<6> vta;
+    Bitfield<5, 3> vsew;
+    Bitfield<2, 0> vlmul;
+EndBitUnion(VTYPE)
+
+} // namespace RiscvISA
+} // namespace gem5
+
+#endif // __ARCH_RISCV_REGS_VECTOR_HH__
diff --git a/src/arch/riscv/types.hh b/src/arch/riscv/types.hh
index 1d501dc05f..c7edffc2f7 100644
--- a/src/arch/riscv/types.hh
+++ b/src/arch/riscv/types.hh
@@ -42,7 +42,6 @@
 #ifndef __ARCH_RISCV_TYPES_HH__
 #define __ARCH_RISCV_TYPES_HH__
 
-#include "arch/riscv/pcstate.hh"
 #include "base/bitunion.hh"
 
 namespace gem5
@@ -178,6 +177,10 @@ BitUnion64(ExtMachInst)
 
 EndBitUnion(ExtMachInst)
 
+constexpr unsigned MaxVecLenInBits = 65536;
+constexpr unsigned MaxVecLenInBytes  = MaxVecLenInBits >> 3;
+
+
 } // namespace RiscvISA
 } // namespace gem5
 
diff --git a/src/arch/riscv/utility.hh b/src/arch/riscv/utility.hh
index 5fccc84c79..bac499e523 100644
--- a/src/arch/riscv/utility.hh
+++ b/src/arch/riscv/utility.hh
@@ -51,6 +51,7 @@
 
 #include "arch/riscv/regs/float.hh"
 #include "arch/riscv/regs/int.hh"
+#include "arch/riscv/regs/vector.hh"
 #include "base/types.hh"
 #include "cpu/reg_class.hh"
 #include "cpu/static_inst.hh"
@@ -130,7 +131,14 @@ registerName(RegId reg)
             return str.str();
         }
         return float_reg::RegNames[reg.index()];
-    } else {
+    } else if (reg.is(VecRegClass)) {
+        if (reg.index() >= NumVecRegs) {
+            std::stringstream str;
+            str << "?? (v" << reg.index() << ')';
+            return str.str();
+        }
+        return VecRegNames[reg.index()];
+    } else  {
         /* It must be an InvalidRegClass, in RISC-V we should treat it as a
          * zero register for the disassembler to work correctly.
          */
@@ -233,6 +241,543 @@ remu(T rs1, T rs2)
     return (rs2 == 0) ? rs1 : rs1 % rs2;
 }
 
+// Vector extension functions
+inline uint64_t
+vtype_SEW(const uint64_t vtype)
+{
+    return 8 << bits(vtype, 5, 3);
+}
+
+/*
+* Encode LMUL to lmul as follows:
+*     LMUL    vlmul    lmul
+*      1       000       0
+*      2       001       1
+*      4       010       2
+*      8       011       3
+*      -       100       -
+*     1/8      101      -3
+*     1/4      110      -2
+*     1/2      111      -1
+*
+* then, we can calculate VLMAX = vlen >> (vsew + 3 - lmul)
+* e.g. vlen = 256 bits, SEW = 16, LMUL = 1/8
+*      => VLMAX = vlen >> (1 + 3 - (-3))
+*               = 256 >> 7
+*               = 2
+* Ref: https://github.com/qemu/qemu/blob/5e9d14f2/target/riscv/cpu.h
+*/
+inline uint64_t
+vtype_VLMAX(const uint64_t vtype, const uint64_t vlen,
+    const bool per_reg = false)
+{
+    int64_t lmul = (int64_t)sext<3>(bits(vtype, 2, 0));
+    lmul = per_reg ? std::min<int64_t>(0, lmul) : lmul;
+    int64_t vsew = bits(vtype, 5, 3);
+    return vlen >> (vsew + 3 - lmul);
+}
+
+inline int64_t
+vtype_vlmul(const uint64_t vtype)
+{
+    return (int64_t)sext<3>(bits(vtype, 2, 0));
+}
+
+inline uint64_t
+vtype_regs_per_group(const uint64_t vtype)
+{
+    int64_t lmul = (int64_t)sext<3>(bits(vtype, 2, 0));
+    return 1 << std::max<int64_t>(0, lmul);
+}
+
+inline void
+vtype_set_vill(uint64_t& vtype)
+{
+    vtype = (uint64_t)0 ^ (1UL << (sizeof(RegVal) * 8 - 1));
+}
+
+inline uint64_t
+width_EEW(uint64_t width)
+{
+    switch (width) {
+    case 0b000: return 8;
+    case 0b101: return 16;
+    case 0b110: return 32;
+    case 0b111: return 64;
+    default: GEM5_UNREACHABLE;
+    }
+}
+
+/*
+  *  Spec Section 4.5
+  *  Ref:
+  *  https://github.com/qemu/qemu/blob/c7d773ae/target/riscv/vector_helper.c
+*/
+template<typename T>
+inline int
+elem_mask(const T* vs, const int index)
+{
+    static_assert(std::is_integral_v<T>);
+    int idx = index / (sizeof(T)*8);
+    int pos = index % (sizeof(T)*8);
+    return (vs[idx] >> pos) & 1;
+}
+
+template<typename Type> struct double_width;
+template<> struct double_width<uint8_t>     { using type = uint16_t;};
+template<> struct double_width<uint16_t>    { using type = uint32_t;};
+template<> struct double_width<uint32_t>    { using type = uint64_t;};
+template<> struct double_width<int8_t>      { using type = int16_t; };
+template<> struct double_width<int16_t>     { using type = int32_t; };
+template<> struct double_width<int32_t>     { using type = int64_t; };
+template<> struct double_width<float32_t>   { using type = float64_t;};
+
+template<typename Type> struct double_widthf;
+template<> struct double_widthf<uint32_t>    { using type = float64_t;};
+template<> struct double_widthf<int32_t>     { using type = float64_t;};
+
+template<typename FloatType, typename IntType = decltype(FloatType::v)> auto
+ftype(IntType a) -> FloatType
+{
+    if constexpr(std::is_same_v<uint32_t, IntType>)
+        return f32(a);
+    else if constexpr(std::is_same_v<uint64_t, IntType>)
+        return f64(a);
+    GEM5_UNREACHABLE;
+}
+
+// TODO: Consolidate ftype_freg(freg_t a) and ftype(IntType a) into a
+// single function
+template<typename FloatType, typename IntType = decltype(FloatType::v)> auto
+ftype_freg(freg_t a) -> FloatType
+{
+    if constexpr(std::is_same_v<uint32_t, IntType>)
+        return f32(a);
+    else if constexpr(std::is_same_v<uint64_t, IntType>)
+        return f64(a);
+    GEM5_UNREACHABLE;
+}
+
+template<typename FloatType> FloatType
+fadd(FloatType a, FloatType b)
+{
+    if constexpr(std::is_same_v<float32_t, FloatType>)
+        return f32_add(a, b);
+    else if constexpr(std::is_same_v<float64_t, FloatType>)
+        return f64_add(a, b);
+    GEM5_UNREACHABLE;
+}
+
+template<typename FloatType> FloatType
+fsub(FloatType a, FloatType b)
+{
+    if constexpr(std::is_same_v<float32_t, FloatType>)
+        return f32_sub(a, b);
+    else if constexpr(std::is_same_v<float64_t, FloatType>)
+        return f64_sub(a, b);
+    GEM5_UNREACHABLE;
+}
+
+template<typename FloatType> FloatType
+fmin(FloatType a, FloatType b)
+{
+    if constexpr(std::is_same_v<float32_t, FloatType>)
+        return f32_min(a, b);
+    else if constexpr(std::is_same_v<float64_t, FloatType>)
+        return f64_min(a, b);
+    GEM5_UNREACHABLE;
+}
+
+template<typename FloatType> FloatType
+fmax(FloatType a, FloatType b)
+{
+    if constexpr(std::is_same_v<float32_t, FloatType>)
+        return f32_max(a, b);
+    else if constexpr(std::is_same_v<float64_t, FloatType>)
+        return f64_max(a, b);
+    GEM5_UNREACHABLE;
+}
+
+template<typename FloatType> FloatType
+fdiv(FloatType a, FloatType b)
+{
+    if constexpr(std::is_same_v<float32_t, FloatType>)
+        return f32_div(a, b);
+    else if constexpr(std::is_same_v<float64_t, FloatType>)
+        return f64_div(a, b);
+    GEM5_UNREACHABLE;
+}
+
+template<typename FloatType> FloatType
+fmul(FloatType a, FloatType b)
+{
+    if constexpr(std::is_same_v<float32_t, FloatType>)
+        return f32_mul(a, b);
+    else if constexpr(std::is_same_v<float64_t, FloatType>)
+        return f64_mul(a, b);
+    GEM5_UNREACHABLE;
+}
+
+template<typename FloatType> FloatType
+fsqrt(FloatType a)
+{
+    if constexpr(std::is_same_v<float32_t, FloatType>)
+        return f32_sqrt(a);
+    else if constexpr(std::is_same_v<float64_t, FloatType>)
+        return f64_sqrt(a);
+    GEM5_UNREACHABLE;
+}
+
+template<typename FloatType> FloatType
+frsqrte7(FloatType a)
+{
+    if constexpr(std::is_same_v<float32_t, FloatType>)
+        return f32_rsqrte7(a);
+    else if constexpr(std::is_same_v<float64_t, FloatType>)
+        return f64_rsqrte7(a);
+    GEM5_UNREACHABLE;
+}
+
+template<typename FloatType> FloatType
+frecip7(FloatType a)
+{
+    if constexpr(std::is_same_v<float32_t, FloatType>)
+        return f32_recip7(a);
+    else if constexpr(std::is_same_v<float64_t, FloatType>)
+        return f64_recip7(a);
+    GEM5_UNREACHABLE;
+}
+
+template<typename FloatType> FloatType
+fclassify(FloatType a)
+{
+    if constexpr(std::is_same_v<float32_t, FloatType>)
+        return f32(f32_classify(a));
+    else if constexpr(std::is_same_v<float64_t, FloatType>)
+        return f64(f64_classify(a));
+    GEM5_UNREACHABLE;
+}
+
+template<typename FloatType> FloatType
+fsgnj(FloatType a, FloatType b, bool n, bool x)
+{
+    if constexpr(std::is_same_v<float32_t, FloatType>)
+        return fsgnj32(a, b, n, x);
+    else if constexpr(std::is_same_v<float64_t, FloatType>)
+        return fsgnj64(a, b, n, x);
+    GEM5_UNREACHABLE;
+}
+
+template<typename FloatType> bool
+fle(FloatType a, FloatType b)
+{
+    if constexpr(std::is_same_v<float32_t, FloatType>)
+        return f32_le(a, b);
+    else if constexpr(std::is_same_v<float64_t, FloatType>)
+        return f64_le(a, b);
+    GEM5_UNREACHABLE;
+}
+
+template<typename FloatType> bool
+feq(FloatType a, FloatType b)
+{
+    if constexpr(std::is_same_v<float32_t, FloatType>)
+        return f32_eq(a, b);
+    else if constexpr(std::is_same_v<float64_t, FloatType>)
+        return f64_eq(a, b);
+    GEM5_UNREACHABLE;
+}
+
+template<typename FloatType> bool
+flt(FloatType a, FloatType b)
+{
+    if constexpr(std::is_same_v<float32_t, FloatType>)
+        return f32_lt(a, b);
+    else if constexpr(std::is_same_v<float64_t, FloatType>)
+        return f64_lt(a, b);
+    GEM5_UNREACHABLE;
+}
+
+template<typename FloatType> FloatType
+fmadd(FloatType a, FloatType b, FloatType c)
+{
+    if constexpr(std::is_same_v<float32_t, FloatType>)
+        return f32_mulAdd(a, b, c);
+    else if constexpr(std::is_same_v<float64_t, FloatType>)
+        return f64_mulAdd(a, b, c);
+    GEM5_UNREACHABLE;
+}
+
+template<typename FloatType> FloatType
+fneg(FloatType a)
+{
+    if constexpr(std::is_same_v<float32_t, FloatType>)
+        return f32(a.v ^ uint32_t(mask(31, 31)));
+    else if constexpr(std::is_same_v<float64_t, FloatType>)
+        return f64(a.v ^ mask(63, 63));
+    GEM5_UNREACHABLE;
+}
+
+template<typename FT, typename WFT = typename double_width<FT>::type> WFT
+fwiden(FT a)
+{
+    if constexpr(std::is_same_v<float32_t, FT>)
+        return f32_to_f64(a);
+    GEM5_UNREACHABLE;
+}
+
+template<typename FloatType, typename IntType = decltype(FloatType::v)> IntType
+f_to_ui(FloatType a, uint_fast8_t mode)
+{
+    if constexpr(std::is_same_v<float32_t, FloatType>)
+        return f32_to_ui32(a, mode, true);
+    else if constexpr(std::is_same_v<float64_t, FloatType>)
+        return f64_to_ui64(a, mode, true);
+    GEM5_UNREACHABLE;
+}
+
+template<
+    typename FloatType,
+    typename IntType = decltype(double_width<FloatType>::type::v)
+> IntType
+f_to_wui(FloatType a, uint_fast8_t mode)
+{
+    if constexpr(std::is_same_v<float32_t, FloatType>)
+        return f32_to_ui64(a, mode, true);
+    GEM5_UNREACHABLE;
+}
+
+template<
+    typename IntType,
+    typename FloatType = typename double_widthf<IntType>::type
+> IntType
+f_to_nui(FloatType a, uint_fast8_t mode)
+{
+    if constexpr(std::is_same_v<float64_t, FloatType>)
+        return f64_to_ui32(a, mode, true);
+    GEM5_UNREACHABLE;
+}
+
+template<typename FloatType, typename IntType = decltype(FloatType::v)> IntType
+f_to_i(FloatType a, uint_fast8_t mode)
+{
+    if constexpr(std::is_same_v<float32_t, FloatType>)
+        return (uint32_t)f32_to_i32(a, mode, true);
+    else if constexpr(std::is_same_v<float64_t, FloatType>)
+        return (uint64_t)f64_to_i64(a, mode, true);
+    GEM5_UNREACHABLE;
+}
+
+template<
+    typename FloatType,
+    typename IntType = decltype(double_width<FloatType>::type::v)
+> IntType
+f_to_wi(FloatType a, uint_fast8_t mode)
+{
+    if constexpr(std::is_same_v<float32_t, FloatType>)
+        return (uint64_t)f32_to_i64(a, mode, true);
+    GEM5_UNREACHABLE;
+}
+
+template<
+    typename IntType,
+    typename FloatType = typename double_widthf<IntType>::type
+> IntType
+f_to_ni(FloatType a, uint_fast8_t mode)
+{
+    if constexpr(std::is_same_v<float64_t, FloatType>)
+        return (uint32_t)f64_to_i32(a, mode, true);
+    GEM5_UNREACHABLE;
+}
+
+template<typename FloatType, typename IntType = decltype(FloatType::v)>
+FloatType
+ui_to_f(IntType a)
+{
+    if constexpr(std::is_same_v<float32_t, FloatType>)
+        return ui32_to_f32(a);
+    else if constexpr(std::is_same_v<float64_t, FloatType>)
+        return ui64_to_f64(a);
+    GEM5_UNREACHABLE;
+}
+
+template<
+    typename IntType,
+    typename FloatType = typename double_widthf<IntType>::type
+> FloatType
+ui_to_wf(IntType a)
+{
+    if constexpr(std::is_same_v<float64_t, FloatType>)
+        return ui32_to_f64(a);
+    GEM5_UNREACHABLE;
+}
+
+template<
+    typename FloatType,
+    typename IntType = decltype(double_width<FloatType>::type::v)
+> FloatType
+ui_to_nf(IntType a)
+{
+    if constexpr(std::is_same_v<float32_t, FloatType>)
+        return ui64_to_f32(a);
+    GEM5_UNREACHABLE;
+}
+
+template<typename FloatType, typename IntType = decltype(FloatType::v)>
+FloatType
+i_to_f(IntType a)
+{
+    if constexpr(std::is_same_v<float32_t, FloatType>)
+        return i32_to_f32((int32_t)a);
+    else if constexpr(std::is_same_v<float64_t, FloatType>)
+        return i64_to_f64((int64_t)a);
+    GEM5_UNREACHABLE;
+}
+
+template<
+    typename IntType,
+    typename FloatType = typename double_widthf<IntType>::type
+> FloatType
+i_to_wf(IntType a)
+{
+    if constexpr(std::is_same_v<float64_t, FloatType>)
+        return i32_to_f64((int32_t)a);
+    GEM5_UNREACHABLE;
+}
+
+template<
+    typename FloatType,
+    typename IntType = std::make_signed_t<
+        decltype(double_width<FloatType>::type::v)
+    >
+> FloatType
+i_to_nf(IntType a)
+{
+    if constexpr(std::is_same_v<float32_t, FloatType>)
+        return i64_to_f32(a);
+    GEM5_UNREACHABLE;
+}
+
+template<
+    typename FloatType,
+    typename FloatWType = typename double_width<FloatType>::type
+> FloatWType
+f_to_wf(FloatType a)
+{
+    if constexpr(std::is_same_v<float32_t, FloatType>)
+        return f32_to_f64(a);
+    GEM5_UNREACHABLE;
+}
+
+template<
+    typename FloatNType,
+    typename FloatType = typename double_width<FloatNType>::type
+> FloatNType
+f_to_nf(FloatType a)
+{
+    if constexpr(std::is_same_v<float64_t, FloatType>)
+        return f64_to_f32(a);
+    GEM5_UNREACHABLE;
+}
+
+//ref:  https://locklessinc.com/articles/sat_arithmetic/
+template<typename T> T
+sat_add(T x, T y, bool* sat)
+{
+    using UT = std::make_unsigned_t<T>;
+    UT ux = x;
+    UT uy = y;
+    UT res = ux + uy;
+
+    int sh = sizeof(T) * 8 - 1;
+
+    ux = (ux >> sh) + (((UT)0x1 << sh) - 1);
+
+    if ((T) ((ux ^ uy) | ~(uy ^ res)) >= 0) {
+    res = ux;
+    *sat = true;
+    }
+    return res;
+}
+
+template<typename T> T
+sat_sub(T x, T y, bool* sat)
+{
+    using UT = std::make_unsigned_t<T>;
+    UT ux = x;
+    UT uy = y;
+    UT res = ux - uy;
+
+    int sh = sizeof(T) * 8 - 1;
+
+    ux = (ux >> sh) + (((UT)0x1 << sh) - 1);
+
+    if ((T) ((ux ^ uy) & (ux ^ res)) < 0) {
+    res = ux;
+    *sat = true;
+    }
+    return res;
+}
+
+template<typename T> T
+sat_addu(T x, T y, bool* sat)
+{
+    T res = x + y;
+
+    bool t = res < x;
+    if (false == *sat){
+    *sat = t;
+    }
+    res |= -(res < x);
+
+    return res;
+}
+
+template<typename T> T
+sat_subu(T x, T y, bool* sat)
+{
+    T res = x - y;
+
+    bool t = !(res <= x);
+    if (false == *sat){
+    *sat = t;
+    }
+
+    res &= -(res <= x);
+
+    return res;
+}
+
+/**
+ * Ref:
+ * https://github.com/riscv-software-src/riscv-isa-sim
+ */
+template<typename T> T
+int_rounding(T result, uint8_t xrm, unsigned gb) {
+    const uint64_t lsb = 1UL << gb;
+    const uint64_t lsb_half = lsb >> 1;
+    switch (xrm) {
+    case 0 /* RNU */:
+        result += lsb_half;
+        break;
+    case 1 /* RNE */:
+        if ((result & lsb_half) &&
+            ((result & (lsb_half - 1)) || (result & lsb)))
+            result += lsb;
+        break;
+    case 2 /* RDN */:
+        break;
+    case 3 /* ROD */:
+        if (result & (lsb - 1))
+            result |= lsb;
+        break;
+    default:
+        panic("Invalid xrm value %d", (int)xrm);
+    }
+
+    return result;
+}
+
 } // namespace RiscvISA
 } // namespace gem5
 
diff --git a/src/arch/x86/X86FsWorkload.py b/src/arch/x86/X86FsWorkload.py
index 294241b51c..277a37988e 100644
--- a/src/arch/x86/X86FsWorkload.py
+++ b/src/arch/x86/X86FsWorkload.py
@@ -65,6 +65,7 @@ class X86FsWorkload(KernelWorkload):
     acpi_description_table_pointer = Param.X86ACPIRSDP(
         X86ACPIRSDP(), "ACPI root description pointer structure"
     )
+    enable_osxsave = Param.Bool(False, "Enable OSXSAVE in CR4 register")
 
 
 class X86FsLinux(X86FsWorkload):
diff --git a/src/arch/x86/X86ISA.py b/src/arch/x86/X86ISA.py
index bb72c415e9..aa48d1aa6e 100644
--- a/src/arch/x86/X86ISA.py
+++ b/src/arch/x86/X86ISA.py
@@ -54,3 +54,73 @@ class X86ISA(BaseISA):
     vendor_string = Param.String(
         "HygonGenuine", "Vendor string for CPUID instruction"
     )
+    name_string = Param.String(
+        "Fake gem5 x86_64 CPU", "Processor name for CPUID instruction"
+    )
+
+    # For the functions that return numerical values we use a vector of ints.
+    # The order of the values is: EAX, EBX, EDX, ECX.
+    #
+    # If the CPU function can take an index, the index value is used as an
+    # offset into the vector and four numerical values are added for each
+    # possible index value. For example, if the function accepts 3 index
+    # values, there are 12 total ints in the vector param. In addition, the
+    # last values for functions which take an index must be all zeros. All
+    # zeros indicates to the KVM cpu / OS that there are no more index values
+    # to iterate over.
+    #
+    # A good resource for these values can be found here:
+    #     https://sandpile.org/x86/cpuid.htm
+    # 0000_0001h
+    FamilyModelStepping = VectorParam.UInt32(
+        [0x00020F51, 0x00000805, 0xEFDBFBFF, 0x00000209],
+        "type/family/model/stepping and feature flags",
+    )
+    # 0000_0004h
+    CacheParams = VectorParam.UInt32(
+        [0x00000000, 0x00000000, 0x00000000, 0x00000000],
+        "cache configuration descriptors",
+    )
+    # 0000_0007h
+    ExtendedFeatures = VectorParam.UInt32(
+        [0x00000000, 0x01800000, 0x00000000, 0x00000000], "feature flags"
+    )
+    # 0000_000Dh - This uses ECX index, so the last entry must be all zeros
+    ExtendedState = VectorParam.UInt32(
+        [
+            0x00000000,
+            0x00000000,
+            0x00000000,
+            0x00000000,
+            0x00000000,
+            0x00000000,
+            0x00000000,
+            0x00000000,
+        ],
+        "extended state enumeration",
+    )
+    # 8000_0001h
+    FamilyModelSteppingBrandFeatures = VectorParam.UInt32(
+        [0x00020F51, 0x00000405, 0xEBD3FBFF, 0x00020001],
+        "family/model/stepping and features flags",
+    )
+    # 8000_0005h
+    L1CacheAndTLB = VectorParam.UInt32(
+        [0xFF08FF08, 0xFF20FF20, 0x40020140, 0x40020140],
+        "L1 cache and L1 TLB configuration descriptors",
+    )
+    # 8000_0006h
+    L2L3CacheAndL2TLB = VectorParam.UInt32(
+        [0x00000000, 0x42004200, 0x00000000, 0x04008140],
+        "L2/L3 cache and L2 TLB configuration descriptors",
+    )
+    # 8000_0007h
+    APMInfo = VectorParam.UInt32(
+        [0x80000018, 0x68747541, 0x69746E65, 0x444D4163],
+        "processor feedback capabilities",
+    )
+    # 8000_0008h
+    LongModeAddressSize = VectorParam.UInt32(
+        [0x00003030, 0x00000000, 0x00000000, 0x00000000],
+        "miscellaneous information",
+    )
diff --git a/src/arch/x86/bios/ACPI.py b/src/arch/x86/bios/ACPI.py
index fbbeda015c..90d0ce094c 100644
--- a/src/arch/x86/bios/ACPI.py
+++ b/src/arch/x86/bios/ACPI.py
@@ -36,6 +36,7 @@
 from m5.params import *
 from m5.SimObject import SimObject
 
+
 # ACPI description table header. Subclasses contain and handle the actual
 # contents as appropriate for that type of table.
 class X86ACPISysDescTable(SimObject):
diff --git a/src/arch/x86/cpuid.cc b/src/arch/x86/cpuid.cc
index ac4709ce0e..2ce9ec9289 100644
--- a/src/arch/x86/cpuid.cc
+++ b/src/arch/x86/cpuid.cc
@@ -31,162 +31,135 @@
 #include "arch/x86/isa.hh"
 #include "base/bitfield.hh"
 #include "cpu/thread_context.hh"
+#include "debug/X86.hh"
 
 namespace gem5
 {
 
-namespace X86ISA {
-    enum StandardCpuidFunction
-    {
-        VendorAndLargestStdFunc,
-        FamilyModelStepping,
-        CacheAndTLB,
-        SerialNumber,
-        CacheParams,
-        MonitorMwait,
-        ThermalPowerMgmt,
-        ExtendedFeatures,
-        NumStandardCpuidFuncs
-    };
-
-    enum ExtendedCpuidFunctions
-    {
-        VendorAndLargestExtFunc,
-        FamilyModelSteppingBrandFeatures,
-        NameString1,
-        NameString2,
-        NameString3,
-        L1CacheAndTLB,
-        L2L3CacheAndL2TLB,
-        APMInfo,
-        LongModeAddressSize,
-
-        /*
-         * The following are defined by the spec but not yet implemented
-         */
-/*      // Function 9 is reserved
-        SVMInfo = 10,
-        // Functions 11-24 are reserved
-        TLB1GBPageInfo = 25,
-        PerformanceInfo,*/
-
-        NumExtendedCpuidFuncs
-    };
-
-    static const int nameStringSize = 48;
-    static const char nameString[nameStringSize] = "Fake M5 x86_64 CPU";
-
-    uint64_t
-    stringToRegister(const char *str)
-    {
-        uint64_t reg = 0;
-        for (int pos = 3; pos >=0; pos--) {
-            reg <<= 8;
-            reg |= str[pos];
-        }
-        return reg;
+namespace X86ISA
+{
+
+X86CPUID::X86CPUID(const std::string& vendor, const std::string& name)
+    : vendorString(vendor), nameString(name)
+{
+    fatal_if(vendorString.size() != 12,
+             "CPUID vendor string must be 12 characters\n");
+}
+
+void
+X86CPUID::addStandardFunc(uint32_t func, std::vector<uint32_t> values)
+{
+    capabilities[func] = values;
+}
+
+void
+X86CPUID::addExtendedFunc(uint32_t func, std::vector<uint32_t> values)
+{
+    // Extended functions begin with 8000_0000h, but the enum is based from
+    // zero, so we need to add that to the function value.
+    capabilities[func | 0x80000000] = values;
+}
+
+bool
+X86CPUID::doCpuid(ThreadContext * tc, uint32_t function, uint32_t index,
+                  CpuidResult &result)
+{
+    constexpr uint32_t ext = 0x80000000;
+
+    DPRINTF(X86, "Calling CPUID function %x with index %d\n", function, index);
+
+    // Handle the string-related CPUID functions specially
+    if (function == VendorAndLargestStdFunc) {
+        result = CpuidResult(NumStandardCpuidFuncs - 1,
+                             stringToRegister(vendorString.c_str()),
+                             stringToRegister(vendorString.c_str() + 4),
+                             stringToRegister(vendorString.c_str() + 8));
+
+        return true;
+    } else if (function == (ext | VendorAndLargestExtFunc)) {
+        result = CpuidResult(0x80000000 + NumExtendedCpuidFuncs - 1,
+                             stringToRegister(vendorString.c_str()),
+                             stringToRegister(vendorString.c_str() + 4),
+                             stringToRegister(vendorString.c_str() + 8));
+
+        return true;
+    } else if ((function == (ext | NameString1)) ||
+               (function == (ext | NameString2)) ||
+               (function == (ext | NameString3))) {
+        // Zero fill anything beyond the end of the string. This
+        // should go away once the string is a vetted parameter.
+        char cleanName[nameStringSize];
+        memset(cleanName, '\0', nameStringSize);
+        strncpy(cleanName, nameString.c_str(), nameStringSize-1);
+
+        int funcNum = bits(function, 15, 0);
+        int offset = (funcNum - NameString1) * 16;
+        assert(nameStringSize >= offset + 16);
+        result = CpuidResult(
+                stringToRegister(cleanName + offset + 0),
+                stringToRegister(cleanName + offset + 4),
+                stringToRegister(cleanName + offset + 12),
+                stringToRegister(cleanName + offset + 8));
+
+        return true;
+    }
+
+    // Ignore anything not in the map of supported CPUID functions.
+    // This is checked after the string-related functions as those are not
+    // in the capabilities map.
+    if (!capabilities.count(function)) {
+        return false;
+    }
+
+    int cap_offset = 0;
+
+    // Ignore index values for functions that do not take index values.
+    if (hasSignificantIndex(function)) {
+        cap_offset = index * 4;
+    }
+
+    // Ensure we have the offset and 4 dwords after it.
+    assert(capabilities[function].size() >= (cap_offset + 4));
+
+    auto &cap_vec = capabilities[function];
+    result = CpuidResult(cap_vec[cap_offset + 0], cap_vec[cap_offset + 1],
+                         cap_vec[cap_offset + 2], cap_vec[cap_offset + 3]);
+    DPRINTF(X86, "CPUID function %x returning (%x, %x, %x, %x)\n",
+            function, result.rax, result.rbx, result.rdx, result.rcx);
+
+    return true;
+}
+
+uint64_t
+X86CPUID::stringToRegister(const char *str)
+{
+    uint64_t reg = 0;
+    for (int pos = 3; pos >=0; pos--) {
+        reg <<= 8;
+        reg |= str[pos];
     }
+    return reg;
+}
+
+// Return true if the CPUID function takes ECX index as an input AND
+// those multiple index values are supported in gem5.
+bool
+X86CPUID::hasSignificantIndex(uint32_t function)
+{
+    uint16_t family = bits(function, 31, 16);
+    uint16_t funcNum = bits(function, 15, 0);
 
-    bool
-    doCpuid(ThreadContext * tc, uint32_t function,
-            uint32_t index, CpuidResult &result)
-    {
-        uint16_t family = bits(function, 31, 16);
-        uint16_t funcNum = bits(function, 15, 0);
-        if (family == 0x8000) {
-            // The extended functions
-            switch (funcNum) {
-              case VendorAndLargestExtFunc:
-                {
-                  ISA *isa = dynamic_cast<ISA *>(tc->getIsaPtr());
-                  auto vendor_string = isa->getVendorString();
-                  result = CpuidResult(
-                          0x80000000 + NumExtendedCpuidFuncs - 1,
-                          stringToRegister(vendor_string.c_str()),
-                          stringToRegister(vendor_string.c_str() + 4),
-                          stringToRegister(vendor_string.c_str() + 8));
-                }
-                break;
-              case FamilyModelSteppingBrandFeatures:
-                result = CpuidResult(0x00020f51, 0x00000405,
-                                     0xebd3fbff, 0x00020001);
-                break;
-              case NameString1:
-              case NameString2:
-              case NameString3:
-                {
-                    // Zero fill anything beyond the end of the string. This
-                    // should go away once the string is a vetted parameter.
-                    char cleanName[nameStringSize];
-                    memset(cleanName, '\0', nameStringSize);
-                    strncpy(cleanName, nameString, nameStringSize);
-
-                    int offset = (funcNum - NameString1) * 16;
-                    assert(nameStringSize >= offset + 16);
-                    result = CpuidResult(
-                            stringToRegister(cleanName + offset + 0),
-                            stringToRegister(cleanName + offset + 4),
-                            stringToRegister(cleanName + offset + 12),
-                            stringToRegister(cleanName + offset + 8));
-                }
-                break;
-              case L1CacheAndTLB:
-                result = CpuidResult(0xff08ff08, 0xff20ff20,
-                                     0x40020140, 0x40020140);
-                break;
-              case L2L3CacheAndL2TLB:
-                result = CpuidResult(0x00000000, 0x42004200,
-                                     0x00000000, 0x04008140);
-                break;
-              case APMInfo:
-                result = CpuidResult(0x80000018, 0x68747541,
-                                     0x69746e65, 0x444d4163);
-                break;
-              case LongModeAddressSize:
-                result = CpuidResult(0x00003030, 0x00000000,
-                                     0x00000000, 0x00000000);
-                break;
-/*            case SVMInfo:
-              case TLB1GBPageInfo:
-              case PerformanceInfo:*/
-              default:
-                warn("x86 cpuid family 0x8000: unimplemented function %u",
-                    funcNum);
-                return false;
-            }
-        } else if (family == 0x0000) {
-            // The standard functions
-            switch (funcNum) {
-              case VendorAndLargestStdFunc:
-                {
-                  ISA *isa = dynamic_cast<ISA *>(tc->getIsaPtr());
-                  auto vendor_string = isa->getVendorString();
-                  result = CpuidResult(
-                          NumStandardCpuidFuncs - 1,
-                          stringToRegister(vendor_string.c_str()),
-                          stringToRegister(vendor_string.c_str() + 4),
-                          stringToRegister(vendor_string.c_str() + 8));
-                }
-                break;
-              case FamilyModelStepping:
-                result = CpuidResult(0x00020f51, 0x00000805,
-                                     0xefdbfbff, 0x00000209);
-                break;
-              case ExtendedFeatures:
-                result = CpuidResult(0x00000000, 0x01800000,
-                                     0x00000000, 0x00000000);
-                break;
-              default:
-                warn("x86 cpuid family 0x0000: unimplemented function %u",
-                    funcNum);
-                return false;
-            }
-        } else {
-            warn("x86 cpuid: unknown family %#x", family);
+    if (family == 0x0000) {
+        switch (funcNum) {
+          case ExtendedState:
+            return true;
+          default:
             return false;
         }
-
-        return true;
     }
+
+    return false;
+}
+
 } // namespace X86ISA
 } // namespace gem5
diff --git a/src/arch/x86/cpuid.hh b/src/arch/x86/cpuid.hh
index 5c1a8ccb16..1c932980d2 100644
--- a/src/arch/x86/cpuid.hh
+++ b/src/arch/x86/cpuid.hh
@@ -29,7 +29,10 @@
 #ifndef __ARCH_X86_CPUID_HH__
 #define __ARCH_X86_CPUID_HH__
 
+#include <unordered_map>
+
 #include "base/types.hh"
+#include "params/X86ISA.hh"
 
 namespace gem5
 {
@@ -38,28 +41,74 @@ class ThreadContext;
 
 namespace X86ISA
 {
-    struct CpuidResult
-    {
-        uint64_t rax;
-        uint64_t rbx;
-        uint64_t rcx;
-        uint64_t rdx;
-
-        // These are not in alphebetical order on purpose. The order reflects
-        // how the CPUID orders the registers when it returns results.
-        CpuidResult(uint64_t _rax, uint64_t _rbx,
-                    uint64_t _rdx, uint64_t _rcx) :
-            rax(_rax), rbx(_rbx), rcx(_rcx), rdx(_rdx)
-        {}
-
-        CpuidResult()
-        {}
-    };
 
-    uint64_t stringToRegister(const char *str);
+enum StandardCpuidFunction
+{
+    VendorAndLargestStdFunc,
+    FamilyModelStepping,
+    CacheAndTLB,
+    SerialNumber,
+    CacheParams,
+    MonitorMwait,
+    ThermalPowerMgmt,
+    ExtendedFeatures,
+    ExtendedState = 0xD,
+    NumStandardCpuidFuncs
+};
+
+enum ExtendedCpuidFunctions
+{
+    VendorAndLargestExtFunc,
+    FamilyModelSteppingBrandFeatures,
+    NameString1,
+    NameString2,
+    NameString3,
+    L1CacheAndTLB,
+    L2L3CacheAndL2TLB,
+    APMInfo,
+    LongModeAddressSize,
+    NumExtendedCpuidFuncs
+};
+
+constexpr int nameStringSize = 48;
+
+struct CpuidResult
+{
+    uint64_t rax;
+    uint64_t rbx;
+    uint64_t rcx;
+    uint64_t rdx;
+
+    // These are not in alphebetical order on purpose. The order reflects
+    // how the CPUID orders the registers when it returns results.
+    CpuidResult(uint64_t _rax, uint64_t _rbx,
+                uint64_t _rdx, uint64_t _rcx) :
+        rax(_rax), rbx(_rbx), rcx(_rcx), rdx(_rdx)
+    {}
+
+    CpuidResult()
+    {}
+};
+
+class X86CPUID
+{
+  public:
+    X86CPUID(const std::string& vendor, const std::string& name);
+
+    void addStandardFunc(uint32_t func, std::vector<uint32_t> values);
+    void addExtendedFunc(uint32_t func, std::vector<uint32_t> values);
 
     bool doCpuid(ThreadContext * tc, uint32_t function,
-            uint32_t index, CpuidResult &result);
+                 uint32_t index, CpuidResult &result);
+    bool hasSignificantIndex(uint32_t function);
+
+  private:
+    const std::string vendorString;
+    const std::string nameString;
+    std::unordered_map<uint32_t, std::vector<uint32_t>> capabilities;
+
+    uint64_t stringToRegister(const char *str);
+};
 
 } // namespace X86ISA
 } // namespace gem5
diff --git a/src/arch/x86/decoder.cc b/src/arch/x86/decoder.cc
index ef87ff37c4..af2456d6ab 100644
--- a/src/arch/x86/decoder.cc
+++ b/src/arch/x86/decoder.cc
@@ -687,6 +687,8 @@ Decoder::decode(ExtMachInst mach_inst, Addr addr)
         (*instMap)[mach_inst] = si;
     }
 
+    si->size(basePC + offset - origPC);
+
     DPRINTF(Decode, "Decode: Decoded %s instruction: %#x\n",
             si->getName(), mach_inst);
     return si;
@@ -732,8 +734,7 @@ Decoder::decode(PCStateBase &next_pc)
         start = 0;
     }
 
-    si = decode(emi, origPC);
-    return si;
+    return decode(emi, origPC);
 }
 
 StaticInstPtr
diff --git a/src/arch/x86/fs_workload.cc b/src/arch/x86/fs_workload.cc
index 1a412380a6..88d7deed68 100644
--- a/src/arch/x86/fs_workload.cc
+++ b/src/arch/x86/fs_workload.cc
@@ -58,7 +58,8 @@ FsWorkload::FsWorkload(const Params &p) : KernelWorkload(p),
     smbiosTable(p.smbios_table),
     mpFloatingPointer(p.intel_mp_pointer),
     mpConfigTable(p.intel_mp_table),
-    rsdp(p.acpi_description_table_pointer)
+    rsdp(p.acpi_description_table_pointer),
+    enable_osxsave(p.enable_osxsave)
 {}
 
 void
@@ -295,6 +296,7 @@ FsWorkload::initState()
     CR4 cr4 = tc->readMiscRegNoEffect(misc_reg::Cr4);
     // Turn on pae.
     cr4.pae = 1;
+    cr4.osxsave = enable_osxsave;
     tc->setMiscReg(misc_reg::Cr4, cr4);
 
     // Point to the page tables.
diff --git a/src/arch/x86/fs_workload.hh b/src/arch/x86/fs_workload.hh
index 9d14f91bb5..81db414fb2 100644
--- a/src/arch/x86/fs_workload.hh
+++ b/src/arch/x86/fs_workload.hh
@@ -106,6 +106,9 @@ class FsWorkload : public KernelWorkload
             Addr &fpSize, Addr &tableSize, Addr table=0);
 
     void writeOutACPITables(Addr begin, Addr &size);
+
+  private:
+    bool enable_osxsave;
 };
 
 } // namespace X86ISA
diff --git a/src/arch/x86/insts/macroop.hh b/src/arch/x86/insts/macroop.hh
index 36718f77fd..071037b173 100644
--- a/src/arch/x86/insts/macroop.hh
+++ b/src/arch/x86/insts/macroop.hh
@@ -103,6 +103,14 @@ class MacroopBase : public X86StaticInst
     {
         return env;
     }
+
+    void size(size_t newSize) override
+    {
+        for (int i = 0; i < numMicroops; i++) {
+            microops[i]->size(newSize);
+        }
+        _size = newSize;
+    }
 };
 
 } // namespace X86ISA
diff --git a/src/arch/x86/isa.cc b/src/arch/x86/isa.cc
index 31efae3a43..7d401a6c59 100644
--- a/src/arch/x86/isa.cc
+++ b/src/arch/x86/isa.cc
@@ -151,10 +151,20 @@ RegClass matRegClass(MatRegClass, MatRegClassName, 1, debug::MatRegs);
 
 } // anonymous namespace
 
-ISA::ISA(const X86ISAParams &p) : BaseISA(p), vendorString(p.vendor_string)
+ISA::ISA(const X86ISAParams &p)
+    : BaseISA(p), cpuid(new X86CPUID(p.vendor_string, p.name_string))
 {
-    fatal_if(vendorString.size() != 12,
-             "CPUID vendor string must be 12 characters\n");
+    cpuid->addStandardFunc(FamilyModelStepping, p.FamilyModelStepping);
+    cpuid->addStandardFunc(CacheParams, p.CacheParams);
+    cpuid->addStandardFunc(ExtendedFeatures, p.ExtendedFeatures);
+    cpuid->addStandardFunc(ExtendedState, p.ExtendedState);
+
+    cpuid->addExtendedFunc(FamilyModelSteppingBrandFeatures,
+                          p.FamilyModelSteppingBrandFeatures);
+    cpuid->addExtendedFunc(L1CacheAndTLB, p.L1CacheAndTLB);
+    cpuid->addExtendedFunc(L2L3CacheAndL2TLB, p.L2L3CacheAndL2TLB);
+    cpuid->addExtendedFunc(APMInfo, p.APMInfo);
+    cpuid->addExtendedFunc(LongModeAddressSize, p.LongModeAddressSize);
 
     _regClasses.push_back(&flatIntRegClass);
     _regClasses.push_back(&flatFloatRegClass);
@@ -252,7 +262,7 @@ ISA::setMiscRegNoEffect(RegIndex idx, RegVal val)
         reg_width = 3;
         break;
       case misc_reg::Ftw:
-        reg_width = 8;
+        reg_width = 16;
         break;
       case misc_reg::Fsw:
       case misc_reg::Fcw:
diff --git a/src/arch/x86/isa.hh b/src/arch/x86/isa.hh
index f7ae210f96..9c6dcf0921 100644
--- a/src/arch/x86/isa.hh
+++ b/src/arch/x86/isa.hh
@@ -33,6 +33,7 @@
 #include <string>
 
 #include "arch/generic/isa.hh"
+#include "arch/x86/cpuid.hh"
 #include "arch/x86/pcstate.hh"
 #include "arch/x86/regs/ccr.hh"
 #include "arch/x86/regs/float.hh"
@@ -93,6 +94,8 @@ class ISA : public BaseISA
     void setThreadContext(ThreadContext *_tc) override;
 
     std::string getVendorString() const;
+
+    std::unique_ptr<X86CPUID> cpuid;
 };
 
 } // namespace X86ISA
diff --git a/src/arch/x86/isa/decoder/two_byte_opcodes.isa b/src/arch/x86/isa/decoder/two_byte_opcodes.isa
index 38937cb3e2..dac5706a06 100644
--- a/src/arch/x86/isa/decoder/two_byte_opcodes.isa
+++ b/src/arch/x86/isa/decoder/two_byte_opcodes.isa
@@ -690,8 +690,9 @@
             }
             0x2: CPUIDInst::CPUID({{
                 CpuidResult result;
-                bool success = doCpuid(xc->tcBase(), bits(Rax, 31, 0),
-                    bits(Rcx, 31, 0), result);
+                ISA *isa = dynamic_cast<ISA *>(xc->tcBase()->getIsaPtr());
+                bool success = isa->cpuid->doCpuid(xc->tcBase(),
+                    bits(Rax, 31, 0), bits(Rcx, 31, 0), result);
                 if (success) {
                     Rax = result.rax;
                     Rbx = result.rbx;
diff --git a/src/arch/x86/isa/includes.isa b/src/arch/x86/isa/includes.isa
index 6fc5f448a0..9445f2032b 100644
--- a/src/arch/x86/isa/includes.isa
+++ b/src/arch/x86/isa/includes.isa
@@ -63,6 +63,7 @@ output header {{
 #include "arch/x86/insts/microregop.hh"
 #include "arch/x86/insts/microspecop.hh"
 #include "arch/x86/insts/static_inst.hh"
+#include "arch/x86/isa.hh"
 #include "arch/x86/regs/ccr.hh"
 #include "arch/x86/regs/int.hh"
 #include "arch/x86/regs/misc.hh"
diff --git a/src/arch/x86/isa/microops/fpop.isa b/src/arch/x86/isa/microops/fpop.isa
index 5365c587ec..b0b925f679 100644
--- a/src/arch/x86/isa/microops/fpop.isa
+++ b/src/arch/x86/isa/microops/fpop.isa
@@ -430,4 +430,6 @@ let {{
     class Pop87(Fp0Op):
         code = ''
         op_class = 'IntAluOp'
+        def __init__(self):
+            super().__init__(spm=1)
 }};
diff --git a/src/arch/x86/isa/microops/mediaop.isa b/src/arch/x86/isa/microops/mediaop.isa
index 599b5faef5..0b1d1fe0eb 100644
--- a/src/arch/x86/isa/microops/mediaop.isa
+++ b/src/arch/x86/isa/microops/mediaop.isa
@@ -393,7 +393,7 @@ let {{
 
                 // Handle saturation.
                 if (signBit) {
-                    if (overflow != mask(destBits - srcBits + 1)) {
+                    if (overflow != mask(srcBits - destBits + 1)) {
                         if (signedOp())
                             picked = (1ULL << (destBits - 1));
                         else
@@ -421,7 +421,7 @@ let {{
 
                 // Handle saturation.
                 if (signBit) {
-                    if (overflow != mask(destBits - srcBits + 1)) {
+                    if (overflow != mask(srcBits - destBits + 1)) {
                         if (signedOp())
                             picked = (1ULL << (destBits - 1));
                         else
diff --git a/src/arch/x86/isa/specialize.isa b/src/arch/x86/isa/specialize.isa
index a86d5126b6..236465dc1b 100644
--- a/src/arch/x86/isa/specialize.isa
+++ b/src/arch/x86/isa/specialize.isa
@@ -240,7 +240,7 @@ let {{
                     regFormat = \
                         "printReg(out, intRegClass[%s], regSize);\n"
                     regSuffix = "_R"
-                env.addToDisassembly(regFormat % ModRMRegIndex)
+                env.addToDisassembly(regFormat % ModRMRMIndex)
                 return doSplitDecode("MODRM_MOD",
                     {"3" : (specializeInst, Name + regSuffix,
                             copy.copy(opTypes), regEnv)},
@@ -268,7 +268,7 @@ let {{
                     regFormat = \
                         "printReg(out, intRegClass[%s], regSize);\n"
                     Name += "_R"
-                env.addToDisassembly(regFormat % ModRMRegIndex)
+                env.addToDisassembly(regFormat % ModRMRMIndex)
             elif opType.tag in ("X", "Y"):
                 # This type of memory addressing is for string instructions.
                 # They'll use the right index and segment internally.
diff --git a/src/arch/x86/kvm/x86_cpu.cc b/src/arch/x86/kvm/x86_cpu.cc
index 7faa9159ab..da6e1bb9e1 100644
--- a/src/arch/x86/kvm/x86_cpu.cc
+++ b/src/arch/x86/kvm/x86_cpu.cc
@@ -37,10 +37,12 @@
 #include "arch/x86/cpuid.hh"
 #include "arch/x86/faults.hh"
 #include "arch/x86/interrupts.hh"
+#include "arch/x86/isa.hh"
 #include "arch/x86/regs/float.hh"
 #include "arch/x86/regs/int.hh"
 #include "arch/x86/regs/msr.hh"
 #include "arch/x86/utility.hh"
+#include "base/bitunion.hh"
 #include "base/compiler.hh"
 #include "cpu/kvm/base.hh"
 #include "debug/Drain.hh"
@@ -73,6 +75,13 @@ using namespace X86ISA;
 // data) is used to indicate that a segment has been accessed.
 #define SEG_TYPE_BIT_ACCESSED 1
 
+// Some linux distro s(e.g., RHEL7) define the KVM macros using "BIT" but do
+// not include where BIT is defined, so define it here in that case.
+#ifndef BIT
+#define BIT(nr)         (1UL << (nr))
+#endif
+
+
 struct GEM5_PACKED FXSave
 {
     uint16_t fcw;
@@ -109,6 +118,32 @@ struct GEM5_PACKED FXSave
 
 static_assert(sizeof(FXSave) == 512, "Unexpected size of FXSave");
 
+BitUnion64(XStateBV)
+    Bitfield<0> fpu;
+    Bitfield<1> sse;
+    Bitfield<2> avx;
+    Bitfield<4, 3> mpx;
+    Bitfield<7, 5> avx512;
+    Bitfield<8> pt;
+    Bitfield<9> pkru;
+    Bitfield<10> pasid;
+    Bitfield<12, 11> cet;
+    Bitfield<13> hdc;
+    Bitfield<14> uintr;
+    Bitfield<15> lbr;
+    Bitfield<16> hwp;
+    Bitfield<18, 17> amx;
+    Bitfield<63, 19> reserved;
+EndBitUnion(XStateBV)
+
+struct XSaveHeader
+{
+    XStateBV xstate_bv;
+    uint64_t reserved[7];
+};
+
+static_assert(sizeof(XSaveHeader) == 64, "Unexpected size of XSaveHeader");
+
 #define FOREACH_IREG() \
     do { \
         APPLY_IREG(rax, int_reg::Rax); \
@@ -904,6 +939,19 @@ X86KvmCPU::updateKvmStateFPUXSave()
 
     updateKvmStateFPUCommon(tc, xsave);
 
+    /**
+     * The xsave header (Vol. 1, Section 13.4.2 of the Intel Software
+     * Development Manual) directly follows the legacy xsave region
+     * (i.e., the FPU/SSE state). The first 8 bytes of the xsave header
+     * hold a state-component bitmap called xstate_bv. We need to set
+     * the state component bits corresponding to the FPU and SSE
+     * states.
+     */
+    XSaveHeader& xsave_hdr =
+      * (XSaveHeader *) ((char *) &kxsave + sizeof(FXSave));
+    xsave_hdr.xstate_bv.fpu = 1;
+    xsave_hdr.xstate_bv.sse = 1;
+
     if (tc->readMiscRegNoEffect(misc_reg::Fiseg))
         warn_once("misc_reg::Fiseg is non-zero.\n");
 
@@ -1419,12 +1467,12 @@ X86KvmCPU::ioctlRun()
 
 static struct kvm_cpuid_entry2
 makeKvmCpuid(uint32_t function, uint32_t index,
-             CpuidResult &result)
+             CpuidResult &result, uint32_t flags = 0)
 {
     struct kvm_cpuid_entry2 e;
     e.function = function;
     e.index = index;
-    e.flags = 0;
+    e.flags = flags;
     e.eax = (uint32_t)result.rax;
     e.ebx = (uint32_t)result.rbx;
     e.ecx = (uint32_t)result.rcx;
@@ -1437,33 +1485,76 @@ void
 X86KvmCPU::updateCPUID()
 {
     Kvm::CPUIDVector m5_supported;
-
-    /* TODO: We currently don't support any of the functions that
-     * iterate through data structures in the CPU using an index. It's
-     * currently not a problem since M5 doesn't expose any of them at
-     * the moment.
-     */
+    X86ISA::ISA *isa = dynamic_cast<X86ISA::ISA *>(tc->getIsaPtr());
 
     /* Basic features */
     CpuidResult func0;
-    X86ISA::doCpuid(tc, 0x0, 0, func0);
+    isa->cpuid->doCpuid(tc, 0x0, 0, func0);
     for (uint32_t function = 0; function <= func0.rax; ++function) {
         CpuidResult cpuid;
         uint32_t idx(0);
 
-        X86ISA::doCpuid(tc, function, idx, cpuid);
-        m5_supported.push_back(makeKvmCpuid(function, idx, cpuid));
+        if (!isa->cpuid->hasSignificantIndex(function)) {
+            isa->cpuid->doCpuid(tc, function, idx, cpuid);
+            m5_supported.push_back(makeKvmCpuid(function, idx, cpuid));
+        } else {
+            while (true) {
+                [[maybe_unused]] bool rv = isa->cpuid->doCpuid(
+                    tc, function, idx, cpuid);
+                assert(rv);
+
+                if (idx &&
+                    !cpuid.rax && !cpuid.rbx && !cpuid.rdx && !cpuid.rcx) {
+                    break;
+                }
+
+                /*
+                 * For functions in family 0, this flag tells Linux to compare
+                 * the index as well as the function number rather than only
+                 * the function number. Important: Do NOT set this flag if the
+                 * function does not take an index. Doing so will break SMP.
+                 */
+                uint32_t flag = KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
+                m5_supported.push_back(
+                    makeKvmCpuid(function, idx, cpuid, flag));
+                idx++;
+            }
+        }
     }
 
     /* Extended features */
     CpuidResult efunc0;
-    X86ISA::doCpuid(tc, 0x80000000, 0, efunc0);
+    isa->cpuid->doCpuid(tc, 0x80000000, 0, efunc0);
     for (uint32_t function = 0x80000000; function <= efunc0.rax; ++function) {
         CpuidResult cpuid;
         uint32_t idx(0);
 
-        X86ISA::doCpuid(tc, function, idx, cpuid);
-        m5_supported.push_back(makeKvmCpuid(function, idx, cpuid));
+        if (!isa->cpuid->hasSignificantIndex(function)) {
+            isa->cpuid->doCpuid(tc, function, idx, cpuid);
+            m5_supported.push_back(makeKvmCpuid(function, idx, cpuid));
+        } else {
+            while (true) {
+                [[maybe_unused]] bool rv = isa->cpuid->doCpuid(
+                    tc, function, idx, cpuid);
+                assert(rv);
+
+                if (idx &&
+                    !cpuid.rax && !cpuid.rbx && !cpuid.rdx && !cpuid.rcx) {
+                    break;
+                }
+
+                /*
+                 * For functions in family 0, this flag tells Linux to compare
+                 * the index as well as the function number rather than only
+                 * the function number. Important: Do NOT set this flag if the
+                 * function does not take an index. Doing so will break SMP.
+                 */
+                uint32_t flag = KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
+                m5_supported.push_back(
+                    makeKvmCpuid(function, idx, cpuid, flag));
+                idx++;
+            }
+        }
     }
 
     setCPUID(m5_supported);
diff --git a/src/arch/x86/pcstate.hh b/src/arch/x86/pcstate.hh
index a0ed6ffe9f..95984d7a96 100644
--- a/src/arch/x86/pcstate.hh
+++ b/src/arch/x86/pcstate.hh
@@ -66,7 +66,7 @@ class PCState : public GenericISA::UPCState<8>
     }
 
     void
-    set(Addr val)
+    set(Addr val) override
     {
         Base::set(val);
         _size = 0;
diff --git a/src/arch/x86/process.cc b/src/arch/x86/process.cc
index a195fdf888..10833783fd 100644
--- a/src/arch/x86/process.cc
+++ b/src/arch/x86/process.cc
@@ -397,6 +397,7 @@ X86_64Process::initState()
             tc->setMiscReg(misc_reg::Cr8, cr8);
 
             tc->setMiscReg(misc_reg::Mxcsr, 0x1f80);
+            tc->setMiscReg(misc_reg::Ftw, 0xffff);
 
             tc->setMiscReg(misc_reg::ApicBase, 0xfee00900);
 
@@ -482,13 +483,35 @@ X86_64Process::initState()
         physProxy.writeBlob(idtPhysAddr + 0xE0, &PFGate, sizeof(PFGate));
 
         /* System call handler */
+        // First, we write to the MMIO m5ops range (0xffffc90000007000)
+        // to trap out of the VM back into gem5 to emulate the system
+        // call. Upon re-entering the VM, we need to flush the TLB in
+        // case the system call modified existing page mappings (e.g.,
+        // munmap, mremap, brk). To do this, we can simply read/write
+        // cr3; however, doing so requires saving the value to an
+        // intermediate GPR (%rax, in this case). We save/restore the
+        // value of %rax in the scratch region syscallDataBuf.
+        const Addr syscallDataBuf = syscallCodeVirtAddr + 0x100;
         uint8_t syscallBlob[] = {
             // mov    %rax, (0xffffc90000007000)
             0x48, 0xa3, 0x00, 0x70, 0x00,
             0x00, 0x00, 0xc9, 0xff, 0xff,
+            // mov    %rax, (syscallDataBuf)
+            0x48, 0xa3, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00,
+            // mov    %cr3, %rax
+            0x0f, 0x20, 0xd8,
+            // mov    %rax, %cr3
+            0x0f, 0x22, 0xd8,
+            // mov    (syscallDataBuf), %rax
+            0x48, 0xa1, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00,
             // sysret
             0x48, 0x0f, 0x07
         };
+        assert(syscallDataBuf >= syscallCodePhysAddr + sizeof syscallBlob);
+        std::memcpy(&syscallBlob[12], &syscallDataBuf, sizeof syscallDataBuf);
+        std::memcpy(&syscallBlob[28], &syscallDataBuf, sizeof syscallDataBuf);
 
         physProxy.writeBlob(syscallCodePhysAddr,
                             syscallBlob, sizeof(syscallBlob));
@@ -593,6 +616,7 @@ X86_64Process::initState()
             tc->setMiscReg(misc_reg::Cr0, cr0);
 
             tc->setMiscReg(misc_reg::Mxcsr, 0x1f80);
+            tc->setMiscReg(misc_reg::Ftw, 0xffff);
 
             // Setting CR3 to the process pid so that concatinated
             // page addr with lower 12 bits of CR3 can be used in SE
@@ -727,6 +751,7 @@ I386Process::initState()
         tc->setMiscReg(misc_reg::Cr0, cr0);
 
         tc->setMiscReg(misc_reg::Mxcsr, 0x1f80);
+        tc->setMiscReg(misc_reg::Ftw, 0xffff);
     }
 }
 
@@ -994,7 +1019,8 @@ X86Process::argsInit(int pageSize,
     initVirtMem->write(auxv_array_end, zero);
     auxv_array_end += sizeof(zero);
 
-    initVirtMem->writeString(aux_data_base, platform.c_str());
+    initVirtMem->writeString(aux_data_base + numRandomBytes,
+                             platform.c_str());
 
     copyStringArray(envp, envp_array_base, env_data_base,
                     ByteOrder::little, *initVirtMem);
diff --git a/src/base/Graphics.py b/src/base/Graphics.py
index b0bec3b137..b58e287129 100644
--- a/src/base/Graphics.py
+++ b/src/base/Graphics.py
@@ -36,6 +36,7 @@
 from m5.SimObject import SimObject
 from m5.params import *
 
+
 # Image Formats:
 # Auto option will let gem5 to choose the image format it prefers.
 class ImageFormat(Enum):
diff --git a/src/base/SConsopts b/src/base/SConsopts
index 8e0661203f..68e40587b9 100644
--- a/src/base/SConsopts
+++ b/src/base/SConsopts
@@ -69,11 +69,13 @@ werror_env.Append(CCFLAGS=['-Werror'])
 with gem5_scons.Configure(werror_env) as conf:
 
     # Store result in the main environment
-    main['CONF']['HAVE_DEPRECATED_NAMESPACE'] = conf.TryCompile('''
-        int main() {return 0;}
-        namespace [[gnu::deprecated("Test namespace deprecation")]]
-        test_deprecated_namespace {}
-    ''', '.cc')
+    main['CONF']['HAVE_DEPRECATED_NAMESPACE'] = bool(
+        conf.TryCompile('''
+            int main() {return 0;}
+            namespace [[gnu::deprecated("Test namespace deprecation")]]
+            test_deprecated_namespace {}
+        ''', '.cc')
+    )
 
     if not main['CONF']['HAVE_DEPRECATED_NAMESPACE']:
         warning("Deprecated namespaces are not supported by this compiler.\n"
diff --git a/src/base/amo.test.cc b/src/base/amo.test.cc
index 10e5540da4..e511117eea 100644
--- a/src/base/amo.test.cc
+++ b/src/base/amo.test.cc
@@ -64,9 +64,10 @@ TEST(AmoTest, AtomicOpMin)
     std::string test_string_smaller = "apple";
     std::string test_string_bigger = "cat";
 
-    TypedAtomicOpFunctor<int> *amo_op_int = new AtomicOpMin<int>(10);
-    TypedAtomicOpFunctor<std::string> *amo_op_string =
-        new AtomicOpMin<std::string>("base");
+    std::unique_ptr<TypedAtomicOpFunctor<int>> amo_op_int =
+        std::make_unique<AtomicOpMin<int>>(10);
+    std::unique_ptr<TypedAtomicOpFunctor<std::string>> amo_op_string =
+        std::make_unique<AtomicOpMin<std::string>>("base");
     amo_op_int->execute(&test_int_smaller);
     amo_op_int->execute(&test_int_bigger);
     amo_op_string->execute(&test_string_smaller);
@@ -85,9 +86,10 @@ TEST(AmoTest, AtomicOpMax)
     std::string test_string_smaller = "apple";
     std::string test_string_bigger = "cat";
 
-    TypedAtomicOpFunctor<int> *amo_op_int = new AtomicOpMax<int>(10);
-    TypedAtomicOpFunctor<std::string> *amo_op_string =
-        new AtomicOpMax<std::string>("base");
+    std::unique_ptr<TypedAtomicOpFunctor<int>> amo_op_int =
+        std::make_unique<AtomicOpMax<int>>(10);
+    std::unique_ptr<TypedAtomicOpFunctor<std::string>> amo_op_string =
+        std::make_unique<AtomicOpMax<std::string>>("base");
     amo_op_int->execute(&test_int_smaller);
     amo_op_int->execute(&test_int_bigger);
     amo_op_string->execute(&test_string_smaller);
@@ -104,8 +106,10 @@ TEST(AmoTest, AtomicOpDec)
     int test_int = 10;
     char test_char = 'c';
 
-    TypedAtomicOpFunctor<int> *amo_op_int = new AtomicOpDec<int>();
-    TypedAtomicOpFunctor<char> *amo_op_char = new AtomicOpDec<char>();
+    std::unique_ptr<TypedAtomicOpFunctor<int>> amo_op_int =
+        std::make_unique<AtomicOpDec<int>>();
+    std::unique_ptr<TypedAtomicOpFunctor<char>> amo_op_char =
+        std::make_unique<AtomicOpDec<char>>();
     amo_op_int->execute(&test_int);
     amo_op_char->execute(&test_char);
 
@@ -118,8 +122,10 @@ TEST(AmoTest, AtomicOpInc)
     int test_int = 10;
     char test_char = 'c';
 
-    TypedAtomicOpFunctor<int> *amo_op_int = new AtomicOpInc<int>();
-    TypedAtomicOpFunctor<char> *amo_op_char = new AtomicOpInc<char>();
+    std::unique_ptr<TypedAtomicOpFunctor<int>> amo_op_int =
+        std::make_unique<AtomicOpInc<int>>();
+    std::unique_ptr<TypedAtomicOpFunctor<char>> amo_op_char =
+        std::make_unique<AtomicOpInc<char>>();
     amo_op_int->execute(&test_int);
     amo_op_char->execute(&test_char);
 
@@ -132,8 +138,10 @@ TEST(AmoTest, AtomicOpSub)
     int test_int = 10;
     char test_char = 'c';
 
-    TypedAtomicOpFunctor<int> *amo_op_int = new AtomicOpSub<int>(2);
-    TypedAtomicOpFunctor<char> *amo_op_char = new AtomicOpSub<char>('a');
+    std::unique_ptr<TypedAtomicOpFunctor<int>> amo_op_int =
+        std::make_unique<AtomicOpSub<int>>(2);
+    std::unique_ptr<TypedAtomicOpFunctor<char>> amo_op_char =
+        std::make_unique<AtomicOpSub<char>>('a');
     amo_op_int->execute(&test_int);
     amo_op_char->execute(&test_char);
 
@@ -146,8 +154,10 @@ TEST(AmoTest, AtomicOpAdd)
     int test_int = 10;
     char test_char = 'c';
 
-    TypedAtomicOpFunctor<int> *amo_op_int = new AtomicOpAdd<int>(2);
-    TypedAtomicOpFunctor<char> *amo_op_char = new AtomicOpAdd<char>(2);
+    std::unique_ptr<TypedAtomicOpFunctor<int>> amo_op_int =
+        std::make_unique<AtomicOpAdd<int>>(2);
+    std::unique_ptr<TypedAtomicOpFunctor<char>> amo_op_char =
+        std::make_unique<AtomicOpAdd<char>>(2);
     amo_op_int->execute(&test_int);
     amo_op_char->execute(&test_char);
 
@@ -160,8 +170,10 @@ TEST(AmoTest, AtomicOpExch)
     int test_int = 10;
     char test_char = 'c';
 
-    TypedAtomicOpFunctor<int> *amo_op_int = new AtomicOpExch<int>(2);
-    TypedAtomicOpFunctor<char> *amo_op_char = new AtomicOpExch<char>('a');
+    std::unique_ptr<TypedAtomicOpFunctor<int>> amo_op_int =
+        std::make_unique<AtomicOpExch<int>>(2);
+    std::unique_ptr<TypedAtomicOpFunctor<char>> amo_op_char =
+        std::make_unique<AtomicOpExch<char>>('a');
     amo_op_int->execute(&test_int);
     amo_op_char->execute(&test_char);
 
@@ -174,8 +186,10 @@ TEST(AmoTest, AtomicOpXor)
     int test_int = 10;
     char test_char = 'c';
 
-    TypedAtomicOpFunctor<int> *amo_op_int = new AtomicOpXor<int>(2);
-    TypedAtomicOpFunctor<char> *amo_op_char = new AtomicOpXor<char>('a');
+    std::unique_ptr<TypedAtomicOpFunctor<int>> amo_op_int =
+        std::make_unique<AtomicOpXor<int>>(2);
+    std::unique_ptr<TypedAtomicOpFunctor<char>> amo_op_char =
+        std::make_unique<AtomicOpXor<char>>('a');
     amo_op_int->execute(&test_int);
     amo_op_char->execute(&test_char);
 
@@ -188,8 +202,10 @@ TEST(AmoTest, AtomicOpOr)
     int test_int = 8;
     bool test_bool = true;
 
-    TypedAtomicOpFunctor<int> *amo_op_int = new AtomicOpOr<int>(2);
-    TypedAtomicOpFunctor<bool> *amo_op_bool = new AtomicOpOr<bool>(false);
+    std::unique_ptr<TypedAtomicOpFunctor<int>> amo_op_int =
+        std::make_unique<AtomicOpOr<int>>(2);
+    std::unique_ptr<TypedAtomicOpFunctor<bool>> amo_op_bool =
+        std::make_unique<AtomicOpOr<bool>>(false);
     amo_op_int->execute(&test_int);
     amo_op_bool->execute(&test_bool);
 
@@ -202,8 +218,10 @@ TEST(AmoTest, AtomicOpAnd)
     int test_int = 10;
     char test_char = 'c';
 
-    TypedAtomicOpFunctor<int> *amo_op_int = new AtomicOpAnd<int>(6);
-    TypedAtomicOpFunctor<char> *amo_op_char = new AtomicOpAnd<char>('a');
+    std::unique_ptr<TypedAtomicOpFunctor<int>> amo_op_int =
+        std::make_unique<AtomicOpAnd<int>>(6);
+    std::unique_ptr<TypedAtomicOpFunctor<char>> amo_op_char =
+        std::make_unique<AtomicOpAnd<char>>('a');
     amo_op_int->execute(&test_int);
     amo_op_char->execute(&test_char);
 
@@ -215,8 +233,8 @@ TEST(AmoTest, AtomicGeneric2Op)
 {
     int test_int = 9;
 
-    TypedAtomicOpFunctor<int> *amo_op_int =
-        new AtomicGeneric2Op<int>(9, multiply2Op);
+    std::unique_ptr<TypedAtomicOpFunctor<int>> amo_op_int =
+        std::make_unique<AtomicGeneric2Op<int>>(9, multiply2Op);
     amo_op_int->execute(&test_int);
 
     EXPECT_EQ(test_int, 81);
@@ -226,8 +244,8 @@ TEST(AmoTest, AtomicGeneric3Op)
 {
     int test_int = 2;
 
-    TypedAtomicOpFunctor<int> *amo_op_int =
-        new AtomicGeneric3Op<int>(4, 3, multiply3Op);
+    std::unique_ptr<TypedAtomicOpFunctor<int>> amo_op_int =
+        std::make_unique<AtomicGeneric3Op<int>>(4, 3, multiply3Op);
     amo_op_int->execute(&test_int);
 
     EXPECT_EQ(test_int, 24);
@@ -239,8 +257,8 @@ TEST(AmoTest, AtomicGenericPair3Op)
 
     std::array<int, 2> a = {6, 3};
     std::array<int, 2> c = {10, 8};
-    TypedAtomicOpFunctor<int> *amo_op_int =
-         new AtomicGenericPair3Op<int>(a, c, addSubColumns);
+    std::unique_ptr<TypedAtomicOpFunctor<int>> amo_op_int =
+            std::make_unique<AtomicGenericPair3Op<int>>(a, c, addSubColumns);
     amo_op_int->execute(&test_int);
 
     EXPECT_EQ(test_int, 10);
diff --git a/src/base/bitfield.hh b/src/base/bitfield.hh
index eecea02981..bceed60f4e 100644
--- a/src/base/bitfield.hh
+++ b/src/base/bitfield.hh
@@ -41,9 +41,12 @@
 #ifndef __BASE_BITFIELD_HH__
 #define __BASE_BITFIELD_HH__
 
+#include <bitset>
 #include <cassert>
+#include <climits>
 #include <cstddef>
 #include <cstdint>
+#include <limits>
 #include <type_traits>
 
 namespace gem5
@@ -303,17 +306,31 @@ findMsbSet(uint64_t val)
     return msb;
 }
 
-/**
- * Returns the bit position of the LSB that is set in the input
- *
- * @ingroup api_bitfield
- */
+namespace {
+template<typename T>
+constexpr bool
+hasBuiltinCtz() {
+// Since the defined(__has_builtin) in the subsequent #if statement
+// won't short-circuit the macro expansion of
+// __has_builtin(__builtin_ctz), we must explicitly define it as zero
+// if it's undefined to avoid a preprocessor error.
+#ifndef __has_builtin
+#   define __has_builtin(foo) 0
+#endif
+#if defined(__has_builtin) && __has_builtin(__builtin_ctz)
+    return sizeof(unsigned long long) >= sizeof(T);
+#else
+    return false;
+#endif
+}
+
+[[maybe_unused]]
 constexpr int
-findLsbSet(uint64_t val)
-{
+findLsbSetFallback(uint64_t val) {
     int lsb = 0;
-    if (!val)
+    if (!val) {
         return sizeof(val) * 8;
+    }
     if (!bits(val, 31, 0)) {
         lsb += 32;
         val >>= 32;
@@ -334,10 +351,58 @@ findLsbSet(uint64_t val)
         lsb += 2;
         val >>= 2;
     }
-    if (!bits(val, 0, 0))
+    if (!bits(val, 0, 0)) {
         lsb += 1;
+    }
     return lsb;
 }
+} // anonymous namespace
+
+/**
+ * Returns the bit position of the LSB that is set in the input
+ * That function will either use a builtin that exploit a "count trailing
+ * zeros" instruction or use fall back method, `findLsbSetFallback`.
+ *
+ * @ingroup api_bitfield
+ */
+constexpr int
+findLsbSet(uint64_t val) {
+    if (val == 0) return 64;
+
+    if constexpr (hasBuiltinCtz<decltype(val)>()) {
+        return __builtin_ctzll(val);
+    } else {
+        return findLsbSetFallback(val);
+    }
+}
+
+
+template<size_t N>
+constexpr int
+findLsbSet(std::bitset<N> bs)
+{
+    if constexpr (N <= 64) {
+        return findLsbSet(bs.to_ullong());
+    } else {
+        if (bs.none()) return N;
+        // Mask of ones
+        constexpr std::bitset<N> mask(std::numeric_limits<uint64_t>::max());
+        // Is the lsb set in the rightmost 64 bits ?
+        auto nextQword{bs & mask};
+        int i{0};
+        while (nextQword.none()) {
+            // If no, shift by 64 bits and repeat
+            i += 64;
+            bs >>= 64;
+            nextQword = bs & mask;
+        }
+        // If yes, account for the bumber of 64-bit shifts and add the
+        // remaining using the uint64_t implementation. Store in intermediate
+        // variable to ensure valid conversion from ullong to uint64_t.
+        uint64_t remaining{nextQword.to_ullong()};
+        return i + findLsbSet(remaining);
+    }
+}
 
 /**
  * Returns the number of set ones in the provided value.
diff --git a/src/base/bitfield.test.cc b/src/base/bitfield.test.cc
index 1711ea68bf..94cff8e155 100644
--- a/src/base/bitfield.test.cc
+++ b/src/base/bitfield.test.cc
@@ -316,6 +316,7 @@ TEST(BitfieldTest, FindLsb)
 {
     uint64_t val = (1ULL << 63) + (1 << 1);
     EXPECT_EQ(1, findLsbSet(val));
+    EXPECT_EQ(1, findLsbSetFallback(val));
 }
 
 TEST(BitfieldTest, FindLsbZero)
@@ -323,6 +324,23 @@ TEST(BitfieldTest, FindLsbZero)
     EXPECT_EQ(64, findLsbSet(0));
 }
 
+TEST(BitfieldTest, FindLsbGeneralized)
+{
+    static constexpr size_t N{1000};
+    std::bitset<N> bs{0};
+    EXPECT_EQ(findLsbSet(bs), N);
+    for (size_t i{0}; i < N ; ++i) {
+        bs = std::bitset<N>{1} << i;
+        ASSERT_EQ(findLsbSet(bs), i);
+    }
+
+    const auto leadingOne = std::bitset<N>{1} << (N-1);
+    for (size_t i{0}; i < N ; ++i) {
+        bs = leadingOne | (std::bitset<N>{1} << i);
+        ASSERT_EQ(findLsbSet(bs), i);
+    }
+}
+
 /*
  * The following tests "popCount(X)". popCount counts the number of bits set to
  * one.
diff --git a/src/base/cprintf_formats.hh b/src/base/cprintf_formats.hh
index 4a64780c4a..e4c1048e01 100644
--- a/src/base/cprintf_formats.hh
+++ b/src/base/cprintf_formats.hh
@@ -34,6 +34,8 @@
 #include <ostream>
 #include <sstream>
 
+#include "base/stl_helpers.hh"
+
 namespace gem5
 {
 
@@ -221,6 +223,7 @@ template <typename T>
 static inline void
 _formatString(std::ostream &out, const T &data, Format &fmt)
 {
+    using stl_helpers::operator<<;
     if (fmt.width > 0) {
         std::stringstream foo;
         foo << data;
diff --git a/src/base/inifile.cc b/src/base/inifile.cc
index 8c0662d0e3..23253e9df5 100644
--- a/src/base/inifile.cc
+++ b/src/base/inifile.cc
@@ -42,17 +42,6 @@ namespace gem5
 IniFile::IniFile()
 {}
 
-IniFile::~IniFile()
-{
-    SectionTable::iterator i = table.begin();
-    SectionTable::iterator end = table.end();
-
-    while (i != end) {
-        delete (*i).second;
-        ++i;
-    }
-}
-
 bool
 IniFile::load(const std::string &file)
 {
@@ -82,15 +71,15 @@ IniFile::Section::addEntry(const std::string &entryName,
 
     if (ei == table.end()) {
         // new entry
-        table[entryName] = new Entry(value);
+        table.emplace(entryName, value);
     }
     else if (append) {
         // append new reult to old entry
-        ei->second->appendValue(value);
+        ei->second.appendValue(value);
     }
     else {
         // override old entry
-        ei->second->setValue(value);
+        ei->second.setValue(value);
     }
 }
 
@@ -120,39 +109,42 @@ IniFile::Section::add(const std::string &assignment)
 
 
 IniFile::Entry *
+IniFile::Section::findEntry(const std::string &entryName)
+{
+    return const_cast<IniFile::Entry *>(
+        std::as_const(*this).findEntry(entryName));
+}
+
+const IniFile::Entry *
 IniFile::Section::findEntry(const std::string &entryName) const
 {
     referenced = true;
 
-    EntryTable::const_iterator ei = table.find(entryName);
+    auto ei = table.find(entryName);
 
-    return (ei == table.end()) ? NULL : ei->second;
+    return (ei == table.end()) ? nullptr : &ei->second;
 }
 
 
 IniFile::Section *
 IniFile::addSection(const std::string &sectionName)
 {
-    SectionTable::iterator i = table.find(sectionName);
-
-    if (i != table.end()) {
-        return i->second;
-    }
-    else {
-        // new entry
-        Section *sec = new Section();
-        table[sectionName] = sec;
-        return sec;
-    }
+    return &table[sectionName];
 }
 
-
 IniFile::Section *
+IniFile::findSection(const std::string &sectionName)
+{
+    return const_cast<IniFile::Section*>(
+        std::as_const(*this).findSection(sectionName));
+}
+
+const IniFile::Section *
 IniFile::findSection(const std::string &sectionName) const
 {
-    SectionTable::const_iterator i = table.find(sectionName);
+    auto i = table.find(sectionName);
 
-    return (i == table.end()) ? NULL : i->second;
+    return (i == table.end()) ? nullptr : &i->second;
 }
 
 
@@ -215,11 +207,11 @@ bool
 IniFile::find(const std::string &sectionName, const std::string &entryName,
               std::string &value) const
 {
-    Section *section = findSection(sectionName);
+    auto* section = findSection(sectionName);
     if (section == NULL)
         return false;
 
-    Entry *entry = section->findEntry(entryName);
+    auto* entry = section->findEntry(entryName);
     if (entry == NULL)
         return false;
 
@@ -232,7 +224,7 @@ bool
 IniFile::entryExists(const std::string &sectionName,
         const std::string &entryName) const
 {
-    Section *section = findSection(sectionName);
+    auto* section = findSection(sectionName);
 
     if (!section)
         return false;
@@ -248,13 +240,13 @@ IniFile::sectionExists(const std::string &sectionName) const
 
 
 bool
-IniFile::Section::printUnreferenced(const std::string &sectionName)
+IniFile::Section::printUnreferenced(const std::string &sectionName) const
 {
     bool unref = false;
     bool search_unref_entries = false;
     std::vector<std::string> unref_ok_entries;
 
-    Entry *entry = findEntry("unref_entries_ok");
+    auto* entry = findEntry("unref_entries_ok");
     if (entry != NULL) {
         tokenize(unref_ok_entries, entry->getValue(), ' ');
         if (unref_ok_entries.size()) {
@@ -262,10 +254,9 @@ IniFile::Section::printUnreferenced(const std::string &sectionName)
         }
     }
 
-    for (EntryTable::iterator ei = table.begin();
-         ei != table.end(); ++ei) {
-        const std::string &entryName = ei->first;
-        entry = ei->second;
+    for (auto& ei: table) {
+        const std::string &entryName = ei.first;
+        entry = &ei.second;
 
         if (entryName == "unref_section_ok" ||
             entryName == "unref_entries_ok")
@@ -294,32 +285,29 @@ IniFile::Section::printUnreferenced(const std::string &sectionName)
 void
 IniFile::getSectionNames(std::vector<std::string> &list) const
 {
-    for (SectionTable::const_iterator i = table.begin();
-         i != table.end(); ++i)
-    {
-        list.push_back((*i).first);
+    for (auto& entry: table) {
+        auto& sectionName = entry.first;
+        list.push_back(sectionName);
     }
 }
 
 bool
-IniFile::printUnreferenced()
+IniFile::printUnreferenced() const
 {
     bool unref = false;
 
-    for (SectionTable::iterator i = table.begin();
-         i != table.end(); ++i) {
-        const std::string &sectionName = i->first;
-        Section *section = i->second;
+    for (auto& entry: table) {
+        auto& [sectionName, section] = entry;
 
-        if (!section->isReferenced()) {
-            if (section->findEntry("unref_section_ok") == NULL) {
+        if (!section.isReferenced()) {
+            if (section.findEntry("unref_section_ok") == NULL) {
                 std::cerr << "Section " << sectionName << " not referenced."
                           << std::endl;
                 unref = true;
             }
         }
         else {
-            if (section->printUnreferenced(sectionName)) {
+            if (section.printUnreferenced(sectionName)) {
                 unref = true;
             }
         }
@@ -330,12 +318,11 @@ IniFile::printUnreferenced()
 
 
 void
-IniFile::Section::dump(const std::string &sectionName)
+IniFile::Section::dump(const std::string &sectionName) const
 {
-    for (EntryTable::iterator ei = table.begin();
-         ei != table.end(); ++ei) {
-        std::cout << sectionName << ": " << (*ei).first << " => "
-                  << (*ei).second->getValue() << "\n";
+    for (auto& ei: table) {
+        std::cout << sectionName << ": " << ei.first << " => "
+                  << ei.second.getValue() << "\n";
     }
 }
 
@@ -344,7 +331,7 @@ IniFile::dump()
 {
     for (SectionTable::iterator i = table.begin();
          i != table.end(); ++i) {
-        i->second->dump(i->first);
+        i->second.dump(i->first);
     }
 }
 
@@ -364,9 +351,9 @@ void
 IniFile::visitSection(const std::string &sectionName,
     IniFile::VisitSectionCallback cb)
 {
-    const auto& section = *table.at(sectionName);
+    const auto& section = table.at(sectionName);
     for (const auto& pair : section) {
-        cb(pair.first, pair.second->getValue());
+        cb(pair.first, pair.second.getValue());
     }
 }
 
diff --git a/src/base/inifile.hh b/src/base/inifile.hh
index 72f1df7b05..d17193d5bf 100644
--- a/src/base/inifile.hh
+++ b/src/base/inifile.hh
@@ -72,7 +72,7 @@ class IniFile
         }
 
         /// Has this entry been used?
-        bool isReferenced() { return referenced; }
+        bool isReferenced() const { return referenced; }
 
         /// Fetch the value.
         const std::string &getValue() const;
@@ -94,7 +94,7 @@ class IniFile
     class Section
     {
         /// EntryTable type.  Map of strings to Entry object pointers.
-        typedef std::unordered_map<std::string, Entry *> EntryTable;
+        typedef std::unordered_map<std::string, Entry> EntryTable;
 
         EntryTable      table;          ///< Table of entries.
         mutable bool    referenced;     ///< Has this section been used?
@@ -107,7 +107,7 @@ class IniFile
         }
 
         /// Has this section been used?
-        bool isReferenced() { return referenced; }
+        bool isReferenced() const { return referenced; }
 
         /// Add an entry to the table.  If an entry with the same name
         /// already exists, the 'append' parameter is checked If true,
@@ -125,24 +125,25 @@ class IniFile
 
         /// Find the entry with the given name.
         /// @retval Pointer to the entry object, or NULL if none.
-        Entry *findEntry(const std::string &entryName) const;
+        Entry *findEntry(const std::string &entryName);
+        const Entry *findEntry(const std::string &entryName) const;
 
         /// Print the unreferenced entries in this section to cerr.
         /// Messages can be suppressed using "unref_section_ok" and
         /// "unref_entries_ok".
         /// @param sectionName Name of this section, for use in output message.
         /// @retval True if any entries were printed.
-        bool printUnreferenced(const std::string &sectionName);
+        bool printUnreferenced(const std::string &sectionName) const;
 
         /// Print the contents of this section to cout (for debugging).
-        void dump(const std::string &sectionName);
+        void dump(const std::string &sectionName) const;
 
         EntryTable::const_iterator begin() const;
         EntryTable::const_iterator end() const;
     };
 
     /// SectionTable type.  Map of strings to Section object pointers.
-    typedef std::unordered_map<std::string, Section *> SectionTable;
+    typedef std::unordered_map<std::string, Section> SectionTable;
 
   protected:
     /// Hash of section names to Section object pointers.
@@ -155,15 +156,13 @@ class IniFile
 
     /// Look up section with the given name.
     /// @retval Pointer to section object, or NULL if not found.
-    Section *findSection(const std::string &sectionName) const;
+    Section *findSection(const std::string &sectionName);
+    const Section *findSection(const std::string &sectionName) const;
 
   public:
     /// Constructor.
     IniFile();
 
-    /// Destructor.
-    ~IniFile();
-
     /// Load parameter settings from given istream.  This is a helper
     /// function for load(string) and loadCPP(), which open a file
     /// and then pass it here.
@@ -206,7 +205,7 @@ class IniFile
 
     /// Print unreferenced entries in object.  Iteratively calls
     /// printUnreferend() on all the constituent sections.
-    bool printUnreferenced();
+    bool printUnreferenced() const;
 
     /// Dump contents to cout.  For debugging.
     void dump();
diff --git a/src/base/loader/elf_object.cc b/src/base/loader/elf_object.cc
index 4b1467acf0..5eef4cb44c 100644
--- a/src/base/loader/elf_object.cc
+++ b/src/base/loader/elf_object.cc
@@ -196,6 +196,27 @@ ElfObject::ElfObject(ImageFileDataPtr ifd) : ObjectFile(ifd)
                     continue;
                 }
 
+                switch (GELF_ST_TYPE(sym.st_info)) {
+                  case STT_NOTYPE:
+                    symbol.type = loader::Symbol::SymbolType::NoType;
+                    break;
+                  case STT_OBJECT:
+                    symbol.type = loader::Symbol::SymbolType::Object;
+                    break;
+                  case STT_FUNC:
+                    symbol.type = loader::Symbol::SymbolType::Function;
+                    break;
+                  case STT_SECTION:
+                    symbol.type = loader::Symbol::SymbolType::Section;
+                    break;
+                  case STT_FILE:
+                    symbol.type = loader::Symbol::SymbolType::File;
+                    break;
+                  default:
+                    symbol.type = loader::Symbol::SymbolType::Other;
+                    break;
+                }
+
                 if (_symtab.insert(symbol)) {
                     DPRINTF(Loader, "Symbol: %-40s value %#x.\n",
                             symbol.name, symbol.address);
diff --git a/src/base/loader/symtab.cc b/src/base/loader/symtab.cc
index 941ea101c9..cd7bec3d56 100644
--- a/src/base/loader/symtab.cc
+++ b/src/base/loader/symtab.cc
@@ -79,8 +79,11 @@ SymbolTable::insert(const SymbolTable &other)
                           nameMap.begin(), nameMap.end(),
                           std::inserter(intersection, intersection.begin()),
                           nameMap.value_comp());
-    if (!intersection.empty())
+    if (!intersection.empty()) {
+        warn("Cannot insert a new symbol table due to name collisions. "
+             "Adding prefix to each symbol's name can resolve this issue.");
         return false;
+    }
 
     for (const Symbol &symbol: other)
         insert(symbol);
@@ -98,6 +101,7 @@ SymbolTable::serialize(const std::string &base, CheckpointOut &cp) const
         paramOut(cp, csprintf("%s.addr_%d", base, i), symbol.address);
         paramOut(cp, csprintf("%s.symbol_%d", base, i), symbol.name);
         paramOut(cp, csprintf("%s.binding_%d", base, i), (int)symbol.binding);
+        paramOut(cp, csprintf("%s.type_%d", base, i), (int)symbol.type);
         i++;
     }
 }
@@ -113,12 +117,15 @@ SymbolTable::unserialize(const std::string &base, CheckpointIn &cp,
         Addr address;
         std::string name;
         Symbol::Binding binding = default_binding;
+        Symbol::SymbolType type = Symbol::SymbolType::Other;
 
         paramIn(cp, csprintf("%s.addr_%d", base, i), address);
         paramIn(cp, csprintf("%s.symbol_%d", base, i), name);
         if (!optParamIn(cp, csprintf("%s.binding_%d", base, i), binding))
             binding = default_binding;
-        insert({binding, name, address});
+        if (!optParamIn(cp, csprintf("%s.type_%d", base, i), type))
+            type = Symbol::SymbolType::Other;
+        insert({binding, type, name, address});
     }
 }
 
diff --git a/src/base/loader/symtab.hh b/src/base/loader/symtab.hh
index 2e50523c32..654064c9db 100644
--- a/src/base/loader/symtab.hh
+++ b/src/base/loader/symtab.hh
@@ -56,7 +56,19 @@ struct Symbol
         Weak
     };
 
+    // The ELF64_ST_TYPE field of gelf's st_info
+    enum class SymbolType
+    {
+        NoType,
+        Object,
+        Function,
+        Section,
+        File,
+        Other
+    };
+
     Binding binding;
+    SymbolType type;
     std::string name;
     Addr address;
 };
@@ -164,6 +176,22 @@ class SymbolTable
         return filter(filt);
     }
 
+    /**
+     * Generate a new table by applying a filter that only accepts the symbols
+     * whose type matches the given symbol type.
+     *
+     * @param The type that must be matched.
+     * @return A new table, filtered by type.
+     */
+    SymbolTablePtr
+    filterBySymbolType(const Symbol::SymbolType& symbol_type) const
+    {
+        auto filt = [symbol_type](const Symbol &symbol) {
+            return symbol.type == symbol_type;
+        };
+        return filter(filt);
+    }
+
   public:
     typedef SymbolVector::iterator iterator;
     typedef SymbolVector::const_iterator const_iterator;
@@ -290,6 +318,17 @@ class SymbolTable
         return filterByBinding(Symbol::Binding::Weak);
     }
 
+    /**
+     * Generates a new symbol table containing only function symbols.
+     *
+     * @return The new table.
+     */
+    SymbolTablePtr
+    functionSymbols() const
+    {
+        return filterBySymbolType(Symbol::SymbolType::Function);
+    }
+
     /**
      * Serialize the table's contents.
      *
diff --git a/src/base/loader/symtab.test.cc b/src/base/loader/symtab.test.cc
index 313055392a..1705a4165c 100644
--- a/src/base/loader/symtab.test.cc
+++ b/src/base/loader/symtab.test.cc
@@ -58,6 +58,12 @@ getSymbolError(const loader::Symbol& symbol, const loader::Symbol& expected)
             (int)expected.binding << "`.\n";
     }
 
+    if (symbol.type != expected.type) {
+        ss << "    symbols' types do not match: seen `" <<
+            (int)symbol.type << "`, expected `" <<
+            (int)expected.type << "`.\n";
+    }
+
     if (symbol.name != expected.name) {
         ss << "    symbols' names do not match: seen `" << symbol.name <<
             "`, expected `" << expected.name << "`.\n";
@@ -136,7 +142,9 @@ TEST(LoaderSymtabTest, InsertSymbolNoName)
 {
     loader::SymbolTable symtab;
 
-    loader::Symbol symbol = {loader::Symbol::Binding::Local, "", 0x10};
+    loader::Symbol symbol = \
+        {loader::Symbol::Binding::Local, loader::Symbol::SymbolType::Other,
+            "", 0x10};
     ASSERT_FALSE(symtab.insert(symbol));
     ASSERT_TRUE(checkTable(symtab, {}));
 }
@@ -146,7 +154,9 @@ TEST(LoaderSymtabTest, InsertOneSymbol)
 {
     loader::SymbolTable symtab;
 
-    loader::Symbol symbol = {loader::Symbol::Binding::Local, "symbol", 0x10};
+    loader::Symbol symbol = \
+        {loader::Symbol::Binding::Local, loader::Symbol::SymbolType::Other,
+            "symbol", 0x10};
     ASSERT_TRUE(symtab.insert(symbol));
 
     ASSERT_FALSE(symtab.empty());
@@ -160,8 +170,10 @@ TEST(LoaderSymtabTest, InsertSymbolExistingName)
 
     const std::string name = "symbol";
     loader::Symbol symbols[] = {
-        {loader::Symbol::Binding::Local, name, 0x10},
-        {loader::Symbol::Binding::Local, name, 0x20},
+        {loader::Symbol::Binding::Local, loader::Symbol::SymbolType::Other,
+            name, 0x10},
+        {loader::Symbol::Binding::Local, loader::Symbol::SymbolType::Other,
+            name, 0x20},
     };
     ASSERT_TRUE(symtab.insert(symbols[0]));
     ASSERT_FALSE(symtab.insert(symbols[1]));
@@ -177,8 +189,10 @@ TEST(LoaderSymtabTest, InsertSymbolExistingAddress)
 
     const Addr addr = 0x10;
     loader::Symbol symbols[] = {
-        {loader::Symbol::Binding::Local, "symbol", addr},
-        {loader::Symbol::Binding::Local, "symbol2", addr},
+        {loader::Symbol::Binding::Local, loader::Symbol::SymbolType::Other,
+            "symbol", addr},
+        {loader::Symbol::Binding::Local, loader::Symbol::SymbolType::Other,
+            "symbol2", addr},
     };
     ASSERT_TRUE(symtab.insert(symbols[0]));
     ASSERT_TRUE(symtab.insert(symbols[1]));
@@ -193,9 +207,12 @@ TEST(LoaderSymtabTest, InsertMultipleSymbols)
     loader::SymbolTable symtab;
 
     loader::Symbol symbols[] = {
-        {loader::Symbol::Binding::Local, "symbol", 0x10},
-        {loader::Symbol::Binding::Local, "symbol2", 0x20},
-        {loader::Symbol::Binding::Local, "symbol3", 0x30},
+        {loader::Symbol::Binding::Local, loader::Symbol::SymbolType::Other,
+            "symbol", 0x10},
+        {loader::Symbol::Binding::Local, loader::Symbol::SymbolType::Other,
+            "symbol2", 0x20},
+        {loader::Symbol::Binding::Local, loader::Symbol::SymbolType::Other,
+            "symbol3", 0x30},
     };
     EXPECT_TRUE(symtab.insert(symbols[0]));
     EXPECT_TRUE(symtab.insert(symbols[1]));
@@ -212,9 +229,12 @@ TEST(LoaderSymtabTest, ClearMultiple)
     loader::SymbolTable symtab;
 
     loader::Symbol symbols[] = {
-        {loader::Symbol::Binding::Local, "symbol", 0x10},
-        {loader::Symbol::Binding::Local, "symbol2", 0x20},
-        {loader::Symbol::Binding::Local, "symbol3", 0x30},
+        {loader::Symbol::Binding::Local, loader::Symbol::SymbolType::Other,
+            "symbol", 0x10},
+        {loader::Symbol::Binding::Local, loader::Symbol::SymbolType::Other,
+            "symbol2", 0x20},
+        {loader::Symbol::Binding::Local, loader::Symbol::SymbolType::Other,
+            "symbol3", 0x30},
     };
     EXPECT_TRUE(symtab.insert(symbols[0]));
     EXPECT_TRUE(symtab.insert(symbols[1]));
@@ -234,9 +254,12 @@ TEST(LoaderSymtabTest, Offset)
     loader::SymbolTable symtab;
 
     loader::Symbol symbols[] = {
-        {loader::Symbol::Binding::Local, "symbol", 0x10},
-        {loader::Symbol::Binding::Local, "symbol2", 0x20},
-        {loader::Symbol::Binding::Local, "symbol3", 0x30},
+        {loader::Symbol::Binding::Local, loader::Symbol::SymbolType::Other,
+            "symbol", 0x10},
+        {loader::Symbol::Binding::Local, loader::Symbol::SymbolType::Other,
+            "symbol2", 0x20},
+        {loader::Symbol::Binding::Local, loader::Symbol::SymbolType::Other,
+            "symbol3", 0x30},
     };
     EXPECT_TRUE(symtab.insert(symbols[0]));
     EXPECT_TRUE(symtab.insert(symbols[1]));
@@ -250,9 +273,12 @@ TEST(LoaderSymtabTest, Offset)
 
     // Check that the new table is offset
     loader::Symbol expected_symbols[] = {
-        {symbols[0].binding, symbols[0].name, symbols[0].address + offset},
-        {symbols[1].binding, symbols[1].name, symbols[1].address + offset},
-        {symbols[2].binding, symbols[2].name, symbols[2].address + offset},
+        {symbols[0].binding, symbols[0].type, symbols[0].name,
+            symbols[0].address + offset},
+        {symbols[1].binding, symbols[1].type, symbols[1].name,
+            symbols[1].address + offset},
+        {symbols[2].binding, symbols[2].type, symbols[2].name,
+            symbols[2].address + offset},
     };
     ASSERT_TRUE(checkTable(*symtab_new, {expected_symbols[0],
         expected_symbols[1], expected_symbols[2]}));
@@ -267,10 +293,14 @@ TEST(LoaderSymtabTest, Mask)
     loader::SymbolTable symtab;
 
     loader::Symbol symbols[] = {
-        {loader::Symbol::Binding::Local, "symbol", 0x1310},
-        {loader::Symbol::Binding::Local, "symbol2", 0x2810},
-        {loader::Symbol::Binding::Local, "symbol3", 0x2920},
-        {loader::Symbol::Binding::Local, "symbol4", 0x3C20},
+        {loader::Symbol::Binding::Local, loader::Symbol::SymbolType::Other,
+            "symbol", 0x1310},
+        {loader::Symbol::Binding::Local, loader::Symbol::SymbolType::Other,
+            "symbol2", 0x2810},
+        {loader::Symbol::Binding::Local, loader::Symbol::SymbolType::Other,
+            "symbol3", 0x2920},
+        {loader::Symbol::Binding::Local, loader::Symbol::SymbolType::Other,
+            "symbol4", 0x3C20},
     };
     EXPECT_TRUE(symtab.insert(symbols[0]));
     EXPECT_TRUE(symtab.insert(symbols[1]));
@@ -286,10 +316,14 @@ TEST(LoaderSymtabTest, Mask)
 
     // Check that the new table is masked
     loader::Symbol expected_symbols[] = {
-        {symbols[0].binding, symbols[0].name, symbols[0].address & mask},
-        {symbols[1].binding, symbols[1].name, symbols[1].address & mask},
-        {symbols[2].binding, symbols[2].name, symbols[2].address & mask},
-        {symbols[3].binding, symbols[3].name, symbols[3].address & mask},
+        {symbols[0].binding, symbols[0].type, symbols[0].name,
+            symbols[0].address & mask},
+        {symbols[1].binding, symbols[1].type, symbols[1].name,
+            symbols[1].address & mask},
+        {symbols[2].binding, symbols[2].type, symbols[2].name,
+            symbols[2].address & mask},
+        {symbols[3].binding, symbols[3].type, symbols[3].name,
+            symbols[3].address & mask},
     };
     ASSERT_TRUE(checkTable(*symtab_new, {expected_symbols[0],
         expected_symbols[1], expected_symbols[2], expected_symbols[3]}));
@@ -304,10 +338,14 @@ TEST(LoaderSymtabTest, Rename)
     loader::SymbolTable symtab;
 
     loader::Symbol symbols[] = {
-        {loader::Symbol::Binding::Local, "symbol", 0x10},
-        {loader::Symbol::Binding::Local, "symbol2", 0x20},
-        {loader::Symbol::Binding::Local, "symbol3", 0x30},
-        {loader::Symbol::Binding::Local, "symbol4", 0x40},
+        {loader::Symbol::Binding::Local, loader::Symbol::SymbolType::Other,
+            "symbol", 0x10},
+        {loader::Symbol::Binding::Local, loader::Symbol::SymbolType::Other,
+            "symbol2", 0x20},
+        {loader::Symbol::Binding::Local, loader::Symbol::SymbolType::Other,
+            "symbol3", 0x30},
+        {loader::Symbol::Binding::Local, loader::Symbol::SymbolType::Other,
+            "symbol4", 0x40},
     };
     EXPECT_TRUE(symtab.insert(symbols[0]));
     EXPECT_TRUE(symtab.insert(symbols[1]));
@@ -323,10 +361,14 @@ TEST(LoaderSymtabTest, Rename)
 
     // Check that the new table's symbols have been renamed
     loader::Symbol expected_symbols[] = {
-        {symbols[0].binding, symbols[0].name + "_suffix", symbols[0].address},
-        {symbols[1].binding, symbols[1].name + "_suffix", symbols[1].address},
-        {symbols[2].binding, symbols[2].name + "_suffix", symbols[2].address},
-        {symbols[3].binding, symbols[3].name + "_suffix", symbols[3].address},
+        {symbols[0].binding, symbols[0].type, symbols[0].name + "_suffix",
+            symbols[0].address},
+        {symbols[1].binding, symbols[1].type, symbols[1].name + "_suffix",
+            symbols[1].address},
+        {symbols[2].binding, symbols[2].type, symbols[2].name + "_suffix",
+            symbols[2].address},
+        {symbols[3].binding, symbols[3].type, symbols[3].name + "_suffix",
+            symbols[3].address},
     };
     ASSERT_TRUE(checkTable(*symtab_new, {expected_symbols[0],
         expected_symbols[1], expected_symbols[2], expected_symbols[3]}));
@@ -341,10 +383,14 @@ TEST(LoaderSymtabTest, RenameNonUnique)
     loader::SymbolTable symtab;
 
     loader::Symbol symbols[] = {
-        {loader::Symbol::Binding::Local, "symbol", 0x10},
-        {loader::Symbol::Binding::Local, "symbol2", 0x20},
-        {loader::Symbol::Binding::Local, "symbol3", 0x30},
-        {loader::Symbol::Binding::Local, "symbol4", 0x40},
+        {loader::Symbol::Binding::Local, loader::Symbol::SymbolType::Other,
+            "symbol", 0x10},
+        {loader::Symbol::Binding::Local, loader::Symbol::SymbolType::Other,
+            "symbol2", 0x20},
+        {loader::Symbol::Binding::Local, loader::Symbol::SymbolType::Other,
+            "symbol3", 0x30},
+        {loader::Symbol::Binding::Local, loader::Symbol::SymbolType::Other,
+            "symbol4", 0x40},
     };
     EXPECT_TRUE(symtab.insert(symbols[0]));
     EXPECT_TRUE(symtab.insert(symbols[1]));
@@ -366,9 +412,12 @@ TEST(LoaderSymtabTest, RenameNonUnique)
     // Check that the new table's symbols have been renamed, yet it does not
     // contain the symbols with duplicated names
     loader::Symbol expected_symbols[] = {
-        {symbols[0].binding, "NonUniqueName", symbols[0].address},
-        {symbols[1].binding, symbols[1].name, symbols[1].address},
-        {symbols[3].binding, symbols[3].name, symbols[3].address},
+        {symbols[0].binding, symbols[0].type, "NonUniqueName",
+            symbols[0].address},
+        {symbols[1].binding, symbols[1].type, symbols[1].name,
+            symbols[1].address},
+        {symbols[3].binding, symbols[3].type, symbols[3].name,
+            symbols[3].address},
     };
     ASSERT_TRUE(checkTable(*symtab_new, {expected_symbols[0],
         expected_symbols[1], expected_symbols[2]}));
@@ -383,11 +432,16 @@ TEST(LoaderSymtabTest, Globals)
     loader::SymbolTable symtab;
 
     loader::Symbol symbols[] = {
-        {loader::Symbol::Binding::Local, "symbol", 0x10},
-        {loader::Symbol::Binding::Global, "symbol2", 0x20},
-        {loader::Symbol::Binding::Local, "symbol3", 0x30},
-        {loader::Symbol::Binding::Weak, "symbol4", 0x40},
-        {loader::Symbol::Binding::Weak, "symbol5", 0x50}
+        {loader::Symbol::Binding::Local, loader::Symbol::SymbolType::Other,
+            "symbol", 0x10},
+        {loader::Symbol::Binding::Global, loader::Symbol::SymbolType::Other,
+            "symbol2", 0x20},
+        {loader::Symbol::Binding::Local, loader::Symbol::SymbolType::Other,
+            "symbol3", 0x30},
+        {loader::Symbol::Binding::Weak, loader::Symbol::SymbolType::Other,
+            "symbol4", 0x40},
+        {loader::Symbol::Binding::Weak, loader::Symbol::SymbolType::Other,
+            "symbol5", 0x50}
     };
     EXPECT_TRUE(symtab.insert(symbols[0]));
     EXPECT_TRUE(symtab.insert(symbols[1]));
@@ -414,11 +468,16 @@ TEST(LoaderSymtabTest, Locals)
     loader::SymbolTable symtab;
 
     loader::Symbol symbols[] = {
-        {loader::Symbol::Binding::Local, "symbol", 0x10},
-        {loader::Symbol::Binding::Global, "symbol2", 0x20},
-        {loader::Symbol::Binding::Local, "symbol3", 0x30},
-        {loader::Symbol::Binding::Weak, "symbol4", 0x40},
-        {loader::Symbol::Binding::Weak, "symbol5", 0x50}
+        {loader::Symbol::Binding::Local, loader::Symbol::SymbolType::Other,
+            "symbol", 0x10},
+        {loader::Symbol::Binding::Global, loader::Symbol::SymbolType::Other,
+            "symbol2", 0x20},
+        {loader::Symbol::Binding::Local, loader::Symbol::SymbolType::Other,
+            "symbol3", 0x30},
+        {loader::Symbol::Binding::Weak, loader::Symbol::SymbolType::Other,
+            "symbol4", 0x40},
+        {loader::Symbol::Binding::Weak, loader::Symbol::SymbolType::Other,
+            "symbol5", 0x50}
     };
     EXPECT_TRUE(symtab.insert(symbols[0]));
     EXPECT_TRUE(symtab.insert(symbols[1]));
@@ -445,11 +504,16 @@ TEST(LoaderSymtabTest, Weaks)
     loader::SymbolTable symtab;
 
     loader::Symbol symbols[] = {
-        {loader::Symbol::Binding::Local, "symbol", 0x10},
-        {loader::Symbol::Binding::Global, "symbol2", 0x20},
-        {loader::Symbol::Binding::Local, "symbol3", 0x30},
-        {loader::Symbol::Binding::Weak, "symbol4", 0x40},
-        {loader::Symbol::Binding::Weak, "symbol5", 0x50}
+        {loader::Symbol::Binding::Local, loader::Symbol::SymbolType::Other,
+            "symbol", 0x10},
+        {loader::Symbol::Binding::Global, loader::Symbol::SymbolType::Other,
+            "symbol2", 0x20},
+        {loader::Symbol::Binding::Local, loader::Symbol::SymbolType::Other,
+            "symbol3", 0x30},
+        {loader::Symbol::Binding::Weak, loader::Symbol::SymbolType::Other,
+            "symbol4", 0x40},
+        {loader::Symbol::Binding::Weak, loader::Symbol::SymbolType::Other,
+            "symbol5", 0x50}
     };
     EXPECT_TRUE(symtab.insert(symbols[0]));
     EXPECT_TRUE(symtab.insert(symbols[1]));
@@ -467,12 +531,50 @@ TEST(LoaderSymtabTest, Weaks)
     ASSERT_TRUE(checkTable(*symtab_new, {symbols[3], symbols[4]}));
 }
 
+/**
+ * Test the creation of a new filtered table containing only function symbols
+ * of the original table. Also verifies if the original table is kept the same.
+ */
+TEST(LoaderSymtabTest, FunctionSymbols)
+{
+    loader::SymbolTable symtab;
+
+    loader::Symbol symbols[] = {
+        {loader::Symbol::Binding::Global, loader::Symbol::SymbolType::NoType,
+            "symbol", 0x10},
+        {loader::Symbol::Binding::Global, loader::Symbol::SymbolType::File,
+            "symbol2", 0x20},
+        {loader::Symbol::Binding::Global, loader::Symbol::SymbolType::Function,
+            "symbol3", 0x30},
+        {loader::Symbol::Binding::Global, loader::Symbol::SymbolType::Object,
+            "symbol4", 0x40},
+        {loader::Symbol::Binding::Global, loader::Symbol::SymbolType::Function,
+            "symbol5", 0x50}
+    };
+    EXPECT_TRUE(symtab.insert(symbols[0]));
+    EXPECT_TRUE(symtab.insert(symbols[1]));
+    EXPECT_TRUE(symtab.insert(symbols[2]));
+    EXPECT_TRUE(symtab.insert(symbols[3]));
+    EXPECT_TRUE(symtab.insert(symbols[4]));
+
+    const auto symtab_new = symtab.functionSymbols();
+
+    // Check that the original table is not modified
+    ASSERT_TRUE(checkTable(symtab, {symbols[0], symbols[1], symbols[2],
+        symbols[3], symbols[4]}));
+
+    // Check that the new table only contains function symbols
+    ASSERT_TRUE(checkTable(*symtab_new, {symbols[2], symbols[4]}));
+}
+
 /** Test searching for a non-existent address. */
 TEST(LoaderSymtabTest, FindNonExistentAddress)
 {
     loader::SymbolTable symtab;
 
-    loader::Symbol symbol = {loader::Symbol::Binding::Local, "symbol", 0x10};
+    loader::Symbol symbol = \
+        {loader::Symbol::Binding::Local, loader::Symbol::SymbolType::Other,
+            "symbol", 0x10};
     EXPECT_TRUE(symtab.insert(symbol));
 
     ASSERT_EQ(symtab.find(0x0), symtab.end());
@@ -484,9 +586,12 @@ TEST(LoaderSymtabTest, FindUniqueAddress)
     loader::SymbolTable symtab;
 
     loader::Symbol symbols[] = {
-        {loader::Symbol::Binding::Local, "symbol", 0x10},
-        {loader::Symbol::Binding::Local, "symbol2", 0x20},
-        {loader::Symbol::Binding::Local, "symbol3", 0x30},
+        {loader::Symbol::Binding::Local, loader::Symbol::SymbolType::Other,
+            "symbol", 0x10},
+        {loader::Symbol::Binding::Local, loader::Symbol::SymbolType::Other,
+            "symbol2", 0x20},
+        {loader::Symbol::Binding::Local, loader::Symbol::SymbolType::Other,
+            "symbol3", 0x30},
     };
     EXPECT_TRUE(symtab.insert(symbols[0]));
     EXPECT_TRUE(symtab.insert(symbols[1]));
@@ -506,9 +611,12 @@ TEST(LoaderSymtabTest, FindNonUniqueAddress)
 
     const Addr addr = 0x20;
     loader::Symbol symbols[] = {
-        {loader::Symbol::Binding::Local, "symbol", 0x10},
-        {loader::Symbol::Binding::Local, "symbol2", addr},
-        {loader::Symbol::Binding::Local, "symbol3", addr},
+        {loader::Symbol::Binding::Local, loader::Symbol::SymbolType::Other,
+            "symbol", 0x10},
+        {loader::Symbol::Binding::Local, loader::Symbol::SymbolType::Other,
+            "symbol2", addr},
+        {loader::Symbol::Binding::Local, loader::Symbol::SymbolType::Other,
+            "symbol3", addr},
     };
     EXPECT_TRUE(symtab.insert(symbols[0]));
     EXPECT_TRUE(symtab.insert(symbols[1]));
@@ -524,7 +632,9 @@ TEST(LoaderSymtabTest, FindNonExistentName)
 {
     loader::SymbolTable symtab;
 
-    loader::Symbol symbol = {loader::Symbol::Binding::Local, "symbol", 0x10};
+    loader::Symbol symbol = \
+        {loader::Symbol::Binding::Local, loader::Symbol::SymbolType::Other,
+            "symbol", 0x10};
     EXPECT_TRUE(symtab.insert(symbol));
 
     const auto it = symtab.find("symbol2");
@@ -537,9 +647,12 @@ TEST(LoaderSymtabTest, FindExistingName)
     loader::SymbolTable symtab;
 
     loader::Symbol symbols[] = {
-        {loader::Symbol::Binding::Local, "symbol", 0x10},
-        {loader::Symbol::Binding::Local, "symbol2", 0x20},
-        {loader::Symbol::Binding::Local, "symbol3", 0x30},
+        {loader::Symbol::Binding::Local, loader::Symbol::SymbolType::Other,
+            "symbol", 0x10},
+        {loader::Symbol::Binding::Local, loader::Symbol::SymbolType::Other,
+            "symbol2", 0x20},
+        {loader::Symbol::Binding::Local, loader::Symbol::SymbolType::Other,
+            "symbol3", 0x30},
     };
     EXPECT_TRUE(symtab.insert(symbols[0]));
     EXPECT_TRUE(symtab.insert(symbols[1]));
@@ -556,8 +669,10 @@ TEST(LoaderSymtabTest, FindNearestExact)
     loader::SymbolTable symtab;
 
     loader::Symbol symbols[] = {
-        {loader::Symbol::Binding::Local, "symbol", 0x10},
-        {loader::Symbol::Binding::Local, "symbol2", 0x20},
+        {loader::Symbol::Binding::Local, loader::Symbol::SymbolType::Other,
+            "symbol", 0x10},
+        {loader::Symbol::Binding::Local, loader::Symbol::SymbolType::Other,
+            "symbol2", 0x20},
     };
     EXPECT_TRUE(symtab.insert(symbols[0]));
     EXPECT_TRUE(symtab.insert(symbols[1]));
@@ -575,7 +690,9 @@ TEST(LoaderSymtabTest, FindNearestRound)
 {
     loader::SymbolTable symtab;
 
-    loader::Symbol symbol = {loader::Symbol::Binding::Local, "symbol", 0x10};
+    loader::Symbol symbol = \
+        {loader::Symbol::Binding::Local, loader::Symbol::SymbolType::Other,
+            "symbol", 0x10};
     EXPECT_TRUE(symtab.insert(symbol));
 
     const auto it = symtab.findNearest(symbol.address + 0x1);
@@ -593,8 +710,10 @@ TEST(LoaderSymtabTest, FindNearestRoundWithNext)
     loader::SymbolTable symtab;
 
     loader::Symbol symbols[] = {
-        {loader::Symbol::Binding::Local, "symbol", 0x10},
-        {loader::Symbol::Binding::Local, "symbol2", 0x20},
+        {loader::Symbol::Binding::Local, loader::Symbol::SymbolType::Other,
+            "symbol", 0x10},
+        {loader::Symbol::Binding::Local, loader::Symbol::SymbolType::Other,
+            "symbol2", 0x20},
     };
     EXPECT_TRUE(symtab.insert(symbols[0]));
     EXPECT_TRUE(symtab.insert(symbols[1]));
@@ -615,7 +734,9 @@ TEST(LoaderSymtabTest, FindNearestRoundWithNextNonExistent)
 {
     loader::SymbolTable symtab;
 
-    loader::Symbol symbol = {loader::Symbol::Binding::Local, "symbol", 0x10};
+    loader::Symbol symbol = \
+        {loader::Symbol::Binding::Local, loader::Symbol::SymbolType::Other,
+            "symbol", 0x10};
     EXPECT_TRUE(symtab.insert(symbol));
 
     Addr next_addr;
@@ -633,7 +754,9 @@ TEST(LoaderSymtabTest, FindNearestNonExistent)
 {
     loader::SymbolTable symtab;
 
-    loader::Symbol symbol = {loader::Symbol::Binding::Local, "symbol", 0x10};
+    loader::Symbol symbol = \
+        {loader::Symbol::Binding::Local, loader::Symbol::SymbolType::Other,
+            "symbol", 0x10};
     EXPECT_TRUE(symtab.insert(symbol));
 
     const auto it = symtab.findNearest(symbol.address - 0x1);
@@ -648,12 +771,17 @@ TEST(LoaderSymtabTest, InsertTableConflicting)
 {
     const std::string name = "symbol";
     loader::Symbol symbols[] = {
-        {loader::Symbol::Binding::Local, name, 0x10},
-        {loader::Symbol::Binding::Local, "symbol2", 0x20},
-        {loader::Symbol::Binding::Local, "symbol3", 0x30},
-        {loader::Symbol::Binding::Local, "symbol4", 0x40},
+        {loader::Symbol::Binding::Local, loader::Symbol::SymbolType::Other,
+            name, 0x10},
+        {loader::Symbol::Binding::Local, loader::Symbol::SymbolType::Other,
+            "symbol2", 0x20},
+        {loader::Symbol::Binding::Local, loader::Symbol::SymbolType::Other,
+            "symbol3", 0x30},
+        {loader::Symbol::Binding::Local, loader::Symbol::SymbolType::Other,
+            "symbol4", 0x40},
         // Introduce name conflict
-        {loader::Symbol::Binding::Local, name, 0x50},
+        {loader::Symbol::Binding::Local, loader::Symbol::SymbolType::Other,
+            name, 0x50},
     };
 
     // Populate table 1
@@ -682,11 +810,16 @@ TEST(LoaderSymtabTest, InsertTableConflicting)
 TEST(LoaderSymtabTest, InsertTable)
 {
     loader::Symbol symbols[] = {
-        {loader::Symbol::Binding::Local, "symbol", 0x10},
-        {loader::Symbol::Binding::Local, "symbol2", 0x20},
-        {loader::Symbol::Binding::Local, "symbol3", 0x30},
-        {loader::Symbol::Binding::Local, "symbol4", 0x40},
-        {loader::Symbol::Binding::Local, "symbol5", 0x50},
+        {loader::Symbol::Binding::Local, loader::Symbol::SymbolType::Other,
+            "symbol", 0x10},
+        {loader::Symbol::Binding::Local, loader::Symbol::SymbolType::Other,
+            "symbol2", 0x20},
+        {loader::Symbol::Binding::Local, loader::Symbol::SymbolType::Other,
+            "symbol3", 0x30},
+        {loader::Symbol::Binding::Local, loader::Symbol::SymbolType::Other,
+            "symbol4", 0x40},
+        {loader::Symbol::Binding::Local, loader::Symbol::SymbolType::Other,
+            "symbol5", 0x50},
     };
 
     // Populate table 1
@@ -719,9 +852,12 @@ TEST_F(LoaderSymtabSerializationFixture, Serialization)
     // Populate the table
     loader::SymbolTable symtab;
     loader::Symbol symbols[] = {
-        {loader::Symbol::Binding::Local, "symbol", 0x10},
-        {loader::Symbol::Binding::Local, "symbol2", 0x20},
-        {loader::Symbol::Binding::Local, "symbol3", 0x30},
+        {loader::Symbol::Binding::Local, loader::Symbol::SymbolType::Other,
+            "symbol", 0x10},
+        {loader::Symbol::Binding::Local, loader::Symbol::SymbolType::Other,
+            "symbol2", 0x20},
+        {loader::Symbol::Binding::Local, loader::Symbol::SymbolType::Other,
+            "symbol3", 0x30},
     };
     EXPECT_TRUE(symtab.insert(symbols[0]));
     EXPECT_TRUE(symtab.insert(symbols[1]));
@@ -735,22 +871,31 @@ TEST_F(LoaderSymtabSerializationFixture, Serialization)
     // Verify the output
     ASSERT_THAT(cp.str(), ::testing::StrEq("\n[Section1]\ntest.size=3\n"
         "test.addr_0=16\ntest.symbol_0=symbol\ntest.binding_0=1\n"
+        "test.type_0=5\n"
         "test.addr_1=32\ntest.symbol_1=symbol2\ntest.binding_1=1\n"
-        "test.addr_2=48\ntest.symbol_2=symbol3\ntest.binding_2=1\n"));
+        "test.type_1=5\n"
+        "test.addr_2=48\ntest.symbol_2=symbol3\ntest.binding_2=1\n"
+        "test.type_2=5\n"));
 }
 
 /** Test unserialization. */
 TEST_F(LoaderSymtabSerializationFixture, Unserialization)
 {
     loader::Symbol symbols[] = {
-        {loader::Symbol::Binding::Local, "symbol", 0x10},
-        {loader::Symbol::Binding::Local, "symbol2", 0x20},
-        {loader::Symbol::Binding::Local, "symbol3", 0x30},
+        {loader::Symbol::Binding::Local, loader::Symbol::SymbolType::Other,
+            "symbol", 0x10},
+        {loader::Symbol::Binding::Local, loader::Symbol::SymbolType::Other,
+            "symbol2", 0x20},
+        {loader::Symbol::Binding::Local, loader::Symbol::SymbolType::Other,
+            "symbol3", 0x30},
     };
     simulateSerialization("\n[Section1]\ntest.size=3\n"
         "test.addr_0=16\ntest.symbol_0=symbol\ntest.binding_0=1\n"
+        "test.type_0=5\n"
         "test.addr_1=32\ntest.symbol_1=symbol2\ntest.binding_1=1\n"
-        "test.addr_2=48\ntest.symbol_2=symbol3\ntest.binding_2=1\n");
+        "test.type_1=5\n"
+        "test.addr_2=48\ntest.symbol_2=symbol3\ntest.binding_2=1\n"
+        "test.type_2=5\n");
 
     loader::SymbolTable unserialized_symtab;
     CheckpointIn cp(getDirName());
@@ -771,14 +916,19 @@ TEST_F(LoaderSymtabSerializationFixture, Unserialization)
 TEST_F(LoaderSymtabSerializationFixture, UnserializationMissingBinding)
 {
     loader::Symbol symbols[] = {
-        {loader::Symbol::Binding::Local, "symbol", 0x10},
-        {loader::Symbol::Binding::Global, "symbol2", 0x20},
-        {loader::Symbol::Binding::Local, "symbol3", 0x30},
+        {loader::Symbol::Binding::Local, loader::Symbol::SymbolType::Other,
+            "symbol", 0x10},
+        {loader::Symbol::Binding::Global, loader::Symbol::SymbolType::Other,
+            "symbol2", 0x20},
+        {loader::Symbol::Binding::Local, loader::Symbol::SymbolType::Other,
+            "symbol3", 0x30},
     };
     simulateSerialization("\n[Section1]\ntest.size=3\n"
         "test.addr_0=16\ntest.symbol_0=symbol\ntest.binding_0=1\n"
-        "test.addr_1=32\ntest.symbol_1=symbol2\n"
-        "test.addr_2=48\ntest.symbol_2=symbol3\ntest.binding_2=1\n");
+        "test.type_0=5\n"
+        "test.addr_1=32\ntest.symbol_1=symbol2\ntest.type_1=5\n"
+        "test.addr_2=48\ntest.symbol_2=symbol3\ntest.binding_2=1\n"
+        "test.type_2=5\n");
 
     loader::SymbolTable unserialized_symtab;
     CheckpointIn cp(getDirName());
@@ -801,14 +951,20 @@ TEST_F(LoaderSymtabSerializationFixture,
     UnserializationMissingBindingChangeDefault)
 {
     loader::Symbol symbols[] = {
-        {loader::Symbol::Binding::Local, "symbol", 0x10},
-        {loader::Symbol::Binding::Weak, "symbol2", 0x20},
-        {loader::Symbol::Binding::Local, "symbol3", 0x30},
+        {loader::Symbol::Binding::Local, loader::Symbol::SymbolType::Other,
+            "symbol", 0x10},
+        {loader::Symbol::Binding::Weak, loader::Symbol::SymbolType::Other,
+            "symbol2", 0x20},
+        {loader::Symbol::Binding::Local, loader::Symbol::SymbolType::Other,
+            "symbol3", 0x30},
     };
     simulateSerialization("\n[Section1]\ntest.size=3\n"
         "test.addr_0=16\ntest.symbol_0=symbol\ntest.binding_0=1\n"
+        "test.type_0=5\n"
         "test.addr_1=32\ntest.symbol_1=symbol2\n"
-        "test.addr_2=48\ntest.symbol_2=symbol3\ntest.binding_2=1\n");
+        "test.type_1=5\n"
+        "test.addr_2=48\ntest.symbol_2=symbol3\ntest.binding_2=1\n"
+        "test.type_2=5\n");
 
     loader::SymbolTable unserialized_symtab;
     CheckpointIn cp(getDirName());
diff --git a/src/base/memoizer.hh b/src/base/memoizer.hh
index c3390887b7..4d3816e9ea 100644
--- a/src/base/memoizer.hh
+++ b/src/base/memoizer.hh
@@ -85,7 +85,7 @@ class Memoizer
     using ret_type = Ret;
     using args_type = std::tuple<Args...>;
 
-    constexpr Memoizer(Ret _func(Args...))
+    constexpr Memoizer(Ret (*_func)(Args...))
      : func(_func)
     {
         validateMemoizer();
diff --git a/src/base/stl_helpers.hh b/src/base/stl_helpers.hh
index d12f266350..b70eea9992 100644
--- a/src/base/stl_helpers.hh
+++ b/src/base/stl_helpers.hh
@@ -26,57 +26,10 @@
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
-#ifndef __BASE_STL_HELPERS_HH__
-#define __BASE_STL_HELPERS_HH__
+#ifndef BASE_STL_HELPERS_HH
+#define BASE_STL_HELPERS_HH
 
-#include <algorithm>
-#include <iostream>
-#include <type_traits>
-#include <vector>
+#include "base/stl_helpers/hash_helpers.hh"
+#include "base/stl_helpers/ostream_helpers.hh"
 
-#include "base/compiler.hh"
-
-namespace gem5
-{
-
-namespace stl_helpers
-{
-
-template <typename T, typename Enabled=void>
-struct IsHelpedContainer : public std::false_type {};
-
-template <typename ...Types>
-struct IsHelpedContainer<std::vector<Types...>> : public std::true_type {};
-
-template <typename ...Types>
-constexpr bool IsHelpedContainerV = IsHelpedContainer<Types...>::value;
-
-/**
- * Write out all elements in an stl container as a space separated
- * list enclosed in square brackets
- *
- * @ingroup api_base_utils
- */
-
-template <typename T>
-std::enable_if_t<IsHelpedContainerV<T>, std::ostream &>
-operator<<(std::ostream& out, const T &t)
-{
-    out << "[ ";
-    bool first = true;
-    auto printer = [&first, &out](const auto &elem) {
-        if (first)
-            out << elem;
-        else
-            out << " " << elem;
-    };
-    std::for_each(t.begin(), t.end(), printer);
-    out << " ]";
-    out << std::flush;
-    return out;
-}
-
-} // namespace stl_helpers
-} // namespace gem5
-
-#endif // __BASE_STL_HELPERS_HH__
+#endif // BASE_STL_HELPERS_HH
diff --git a/src/base/stl_helpers/SConscript b/src/base/stl_helpers/SConscript
new file mode 100644
index 0000000000..7328cf066a
--- /dev/null
+++ b/src/base/stl_helpers/SConscript
@@ -0,0 +1,29 @@
+# Copyright (c) 2023 Arteris, Inc. and its applicable licensors and affiliates.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer; redistributions in binary
+# form must reproduce the above copyright notice, this list of conditions and
+# the following disclaimer in the documentation and/or other materials provided
+# with the distribution; neither the name of the copyright holders nor the
+# names of its contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+
+Import('*')
+
+GTest('hash_helpers.test', 'hash_helpers.test.cc')
+GTest('ostream_helpers.test', 'ostream_helpers.test.cc')
diff --git a/src/base/stl_helpers/hash_helpers.hh b/src/base/stl_helpers/hash_helpers.hh
new file mode 100644
index 0000000000..1432d522bd
--- /dev/null
+++ b/src/base/stl_helpers/hash_helpers.hh
@@ -0,0 +1,201 @@
+/*
+ * Copyright (c) 2023 Arteris, Inc. and its applicable licensors and
+ * affiliates.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef BASE_STL_HELPERS_HASH_HELPERS_HH
+#define BASE_STL_HELPERS_HASH_HELPERS_HH
+
+#include <functional>
+#include <numeric>
+#include <tuple>
+#include <type_traits>
+#include <unordered_map>
+#include <unordered_set>
+#include <utility>
+
+#include "base/type_traits.hh"
+
+#include <functional>
+#include <tuple>
+#include <type_traits>
+#include <utility>
+
+namespace gem5::stl_helpers
+{
+
+namespace hash_impl
+{
+// The math in hash_combine and hash_refine functions are inspired from Jon
+// Maiga's work hosted at https://github.com/jonmaiga/mx3 under the CC0
+// license. It makes use of two components: a stream mixer for combination and
+// a scalar mixer for refinement.
+// The stream mixer is a lighter weight function with lower entropy used to
+// combine hash values while the scalar mixer is a high entropy function that
+// increases the overall hashing quality.
+// The tradeoff of not using hash_refine has not been thoroughtly tested and is
+// only done based on Maiga's return on exprerience.
+static constexpr uint64_t C = 0xbea225f9eb34556d;
+template<typename... T>
+constexpr size_t hash_combine(T... hashes) {
+    // gcc reports unused variable if T is the empty pack
+    [[maybe_unused]] auto combine = [](uint64_t a, uint64_t b) {
+        b *= C;
+        b ^= b >> 39;
+        a += b * C;
+        a *= C;
+        return a;
+    };
+    // The following couple of expressions is equivalent to a hypothetical
+    // functional "acc = hashes.fold_left(0, combine)". The comma operator
+    // effectively repeats the expression in the second level parenthesis for
+    // each argument in the parameter pack hashes, in order. Thus, final value
+    // of acc is the recursive combination of all hashes.
+    uint64_t acc{0};
+    ((acc = combine(acc, static_cast<uint64_t>(hashes))), ...);
+    return static_cast<size_t>(acc);
+}
+
+constexpr size_t hash_refine(size_t x) {
+    x ^= x >> 32;
+    x *= C;
+    x ^= x >> 29;
+    x *= C;
+    x ^= x >> 32;
+    x *= C;
+    x ^= x >> 29;
+    return static_cast<size_t>(x);
+}
+
+// SFINAE-enabled hash functor
+template<typename T, typename = void>
+struct hash;
+
+// Reuse std::hash whenever possible
+template<typename T>
+struct hash<T, std::enable_if_t<is_std_hash_enabled_v<T>>>: std::hash<T>
+{};
+
+// Enable type deduction for hash object construction
+template<typename T>
+constexpr auto make_hash_for(const T&) {
+    return hash<T>();
+}
+
+// Compute a hash without the hassle of constructing a hash functor
+template<typename T>
+constexpr auto hash_value(const T& v) {
+    return make_hash_for(v)(v);
+}
+
+// Hash for tuple
+template<typename... T>
+struct hash<std::tuple<T...>>
+{
+    constexpr size_t operator()(const std::tuple<T...>& t) const {
+        if constexpr (sizeof...(T) == 0) {
+            return 0;
+        } else {
+            return std::apply([](const auto&... e){
+               return hash_refine(hash_combine(hash_value(e)...));
+            }, t);
+        }
+    }
+};
+
+// Hash for pairs (based on hash for 2-uple)
+template<typename T, typename U>
+struct hash<std::pair<T, U>>
+{
+    constexpr size_t operator()(const std::pair<T, U>& p) const {
+        return hash_value(std::tie(p.first, p.second));
+    }
+};
+
+// Hash for any iterable of stl_helpers::hash-enabled types.
+template<typename T>
+struct hash<T, std::enable_if_t<
+    !is_std_hash_enabled_v<T> && is_iterable_v<T>>>
+{
+    constexpr size_t operator()(const T& t) const {
+        auto b = begin(t);
+        auto e = end(t);
+        if (b == e) return 0;
+        // Equivalent to hypothetical functional style
+        // return t.map(hash_value).reduce(hash_combine)
+        auto h = std::accumulate(next(b), e, hash_value(*b),
+            [](const auto& acc, const auto& val) {
+                return hash_combine(acc, hash_value(val));
+            });
+        return hash_refine(h);
+    }
+};
+
+template<typename, typename = void>
+constexpr bool is_hash_enabled = false;
+
+template <typename T>
+constexpr bool is_hash_enabled<T,
+    std::void_t<decltype(hash<T>()(std::declval<T>()))>> = true;
+
+} // namespace hash_impl
+
+// Export useful hash_impl functions
+using hash_impl::hash;
+using hash_impl::make_hash_for;
+using hash_impl::hash_value;
+using hash_impl::is_hash_enabled;
+
+/*
+ * Provide unordered_map and unordered_set with stl_helpers::hash functions.
+ * These aliases enable clean use of stl_helpers::hash as default Hash template
+ * parameter. The reason for not using an alias is that template type aliases
+ * with default template arguments do not behave well with template parameter
+ * deductions in certain situations. One must remember that std::unordered_X
+ * is not a polymorphic type and as such, gem5::stl_helpers::unordered_X shall
+ * never be owned as a std::unordered_X.
+ */
+template<
+    typename Key,
+    typename T,
+    typename Hash = hash<Key>,
+    typename KeyEqual = std::equal_to<Key>,
+    typename Allocator = std::allocator< std::pair<const Key, T> >>
+struct unordered_map: std::unordered_map<Key, T, Hash, KeyEqual, Allocator>
+{};
+
+template<
+    typename Key,
+    typename Hash = hash<Key>,
+    typename KeyEqual = std::equal_to<Key>,
+    typename Allocator = std::allocator<Key>>
+struct unordered_set: std::unordered_set<Key, Hash, KeyEqual, Allocator>
+{};
+
+} // namespace gem5::stl_helpers
+
+#endif // BASE_STL_HELPERS_HASH_HELPERS_HH
diff --git a/src/base/stl_helpers/hash_helpers.test.cc b/src/base/stl_helpers/hash_helpers.test.cc
new file mode 100644
index 0000000000..f6f07c5808
--- /dev/null
+++ b/src/base/stl_helpers/hash_helpers.test.cc
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2023 Arteris, Inc. and its applicable licensors and
+ * affiliates.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <gtest/gtest.h>
+
+#include <string>
+#include <tuple>
+#include <unordered_map>
+#include <utility>
+#include <vector>
+
+#include "base/stl_helpers/hash_helpers.hh"
+
+using namespace gem5;
+
+TEST(HashHelpers, isHashEnabled)
+{
+    EXPECT_TRUE(stl_helpers::is_hash_enabled<int>);
+    EXPECT_TRUE(stl_helpers::is_hash_enabled<long>);
+    EXPECT_TRUE(stl_helpers::is_hash_enabled<double>);
+    EXPECT_TRUE(stl_helpers::is_hash_enabled<std::string>);
+    EXPECT_TRUE(stl_helpers::is_hash_enabled<void*>);
+    using vector_t = std::vector<int>;
+    EXPECT_TRUE(stl_helpers::is_hash_enabled<vector_t>);
+    using tuple_t = std::tuple<int, bool, int**, std::string(*)(float)>;
+    EXPECT_TRUE(stl_helpers::is_hash_enabled<tuple_t>);
+    EXPECT_TRUE((stl_helpers::is_hash_enabled<std::pair<vector_t, tuple_t>>));
+    EXPECT_TRUE((stl_helpers::is_hash_enabled<
+        std::unordered_map<tuple_t, vector_t>>));
+}
+
+// The following tests do not test the hash value as it is considered an
+// implementation detail and there is no contract on the way that value is
+// computed. Testing for hash quality is extremelly computationnaly intensive
+// and is not suitable for unit tests. Consider these tests to be more of a
+// "does it compile?" check as well as a small set of examples for the user.
+TEST(HashHelpers, hashPair)
+{
+    auto p = std::make_pair(1, std::string("hello"));
+    auto hashVal = stl_helpers::hash_value(p);
+    auto hashFunc = stl_helpers::hash<std::pair<int, std::string>>{};
+    EXPECT_EQ(hashVal, hashFunc(p));
+}
+
+TEST(HashHelpers, hashTuple)
+{
+    auto t = std::make_tuple(1, "hello", 4.2, std::make_pair(true, 0.f));
+    auto hashVal = stl_helpers::hash_value(t);
+    auto hashFunc = stl_helpers::hash<decltype(t)>{};
+    EXPECT_EQ(hashVal, hashFunc(t));
+}
+
+TEST(HashHelpers, hashVector)
+{
+    auto v = std::vector<int>{1, 2, 3, 4, 5, 6, 7, 8, 9};
+    auto hashVal = stl_helpers::hash_value(v);
+    auto hashFunc = stl_helpers::hash<decltype(v)>{};
+    EXPECT_EQ(hashVal, hashFunc(v));
+}
diff --git a/src/base/stl_helpers/ostream_helpers.hh b/src/base/stl_helpers/ostream_helpers.hh
new file mode 100644
index 0000000000..680d55f23c
--- /dev/null
+++ b/src/base/stl_helpers/ostream_helpers.hh
@@ -0,0 +1,222 @@
+/*
+ * Copyright (c) 2023 Arteris, Inc. and its applicable licensors and
+ * affiliates.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef BASE_STL_HELPERS_OSTREAM_HELPERS_HH
+#define BASE_STL_HELPERS_OSTREAM_HELPERS_HH
+
+#include <iostream>
+#include <memory>
+#include <tuple>
+#include <utility>
+
+#include "base/type_traits.hh"
+#include "magic_enum/magic_enum.hh"
+
+namespace gem5::stl_helpers
+{
+
+/*
+ * Wrap any object in a Printer object to force using a opExtract_impl printing
+ * function. This is not required for types that do not already enable
+ * operator<< in another namespace. However, to enable the special printing
+ * function for, e.g., raw pointers, those must be wrapped in a Printer.
+ */
+template<typename T>
+struct Printer
+{
+    Printer(const T& value): value{value} {}
+    const T& value;
+};
+
+namespace opExtract_impl
+{
+
+/*
+ * In order to provide a specialization for operator<< with stl_helpers-enabled
+ * types
+ * without loosing the hability to use it with other types, a dual-dispatch
+ * mechanism is used. The only entry point in the system is through a primary
+ * dispatch function that won't resolve for non-helped types. Then, recursive
+ * calls go through the secondary dispatch interface that sort between helped
+ * and non-helped types. Helped types will enter the system back through the
+ * primary dispatch interface while other types will look for operator<<
+ * through regular lookup, especially ADL.
+ */
+
+template<typename T>
+std::ostream&
+opExtractSecDisp(std::ostream& os, const T& v);
+
+template <typename E>
+std::enable_if_t<std::is_enum_v<E>,
+std::ostream&>
+opExtractPrimDisp(std::ostream& os, const E& e)
+{
+    return os << magic_enum::enum_name(e);
+}
+
+template <typename... T>
+std::ostream&
+opExtractPrimDisp(std::ostream& os, const std::tuple<T...>& p)
+{
+    std::apply([&](auto&&... e) {
+        std::size_t n{0};
+        os << '(';
+        ((opExtractSecDisp(os, e) << (++n != sizeof...(T) ? ", " : "")), ...);
+        os << ')';
+    }, p);
+    return os;
+}
+
+template <typename T, typename U>
+std::ostream&
+opExtractPrimDisp(std::ostream& os, const std::pair<T, U>& p)
+{
+    return opExtractPrimDisp(os, std::tie(p.first, p.second));
+}
+
+template <typename T>
+std::enable_if_t<is_iterable_v<T>, std::ostream&>
+opExtractPrimDisp(std::ostream& os, const T& v)
+{
+    os << "[ ";
+    for (auto& e: v) {
+        opExtractSecDisp(os, e) << ", ";
+    }
+    return os << ']';
+}
+
+template <typename T>
+std::ostream&
+opExtractPrimDisp(std::ostream& os, const std::optional<T>& o)
+{
+    if (o) {
+        return opExtractSecDisp(os, *o);
+    } else {
+        return os << "(-)";
+    }
+}
+
+template <typename T>
+std::ostream&
+opExtractPrimDisp(std::ostream& os, T* p);
+
+template <typename T>
+std::ostream&
+opExtractPrimDisp(std::ostream& os, const std::shared_ptr<T>& p)
+{
+    return opExtractPrimDisp(os, p.get());
+}
+
+template <typename T>
+std::ostream&
+opExtractPrimDisp(std::ostream& os, const std::unique_ptr<T>& p)
+{
+    return opExtractPrimDisp(os, p.get());
+}
+
+template <typename T>
+std::ostream&
+opExtractPrimDisp(std::ostream& os, const Printer<T>& p);
+
+template <typename, typename = void>
+constexpr bool isOpExtractNativelySupported = false;
+
+template <typename T>
+constexpr bool isOpExtractNativelySupported<T,
+    std::void_t<decltype(
+        std::declval<std::ostream&>() << std::declval<T>())>> = true;
+
+template <typename, typename = void>
+constexpr bool isOpExtractHelped = false;
+
+template <typename T>
+constexpr bool isOpExtractHelped<T,
+    std::void_t<decltype(
+        opExtractPrimDisp(std::declval<std::ostream&>(),
+                          std::declval<T>()))>>
+    = true;
+
+template <typename T>
+constexpr bool needsDispatch =
+    isOpExtractHelped<T> && !isOpExtractNativelySupported<T>;
+
+template <typename T>
+std::ostream&
+opExtractPrimDisp(std::ostream& os, T* p)
+{
+    if (!p) {
+        return os << "nullptr";
+    }
+    if constexpr (isOpExtractHelped<T> || isOpExtractNativelySupported<T>) {
+        os << '(' << p << ": ";
+        opExtractSecDisp(os, *p);
+        return os << ')';
+    } else {
+        return os << p;
+    }
+}
+
+template <typename T>
+std::ostream&
+opExtractPrimDisp(std::ostream& os, const Printer<T>& p)
+{
+    if constexpr (isOpExtractHelped<T>) {
+        return opExtractPrimDisp(os, p.value);
+    } else {
+        return os << p.value;
+    }
+}
+
+
+template<typename T>
+std::ostream&
+opExtractSecDisp(std::ostream& os, const T& v)
+{
+    if constexpr (needsDispatch<T>) {
+        return opExtractPrimDisp(os, v);
+    } else {
+        return os << v;
+    }
+}
+
+} // namespace opExtract_impl
+
+// use the Printer wrapper or add "using stl_helpers::operator<<" in the scope
+// where you want to use that operator<<.
+template<typename T>
+std::enable_if_t<opExtract_impl::needsDispatch<T>, std::ostream&>
+operator<<(std::ostream& os, const T& v)
+{
+    return opExtract_impl::opExtractPrimDisp(os, v);
+}
+
+} // namespace gem5::stl_helpers
+
+#endif // BASE_STL_HELPERS_OSTREAM_HELPERS_HH
diff --git a/src/base/stl_helpers/ostream_helpers.test.cc b/src/base/stl_helpers/ostream_helpers.test.cc
new file mode 100644
index 0000000000..84c936f4a8
--- /dev/null
+++ b/src/base/stl_helpers/ostream_helpers.test.cc
@@ -0,0 +1,118 @@
+/*
+ * Copyright (c) 2023 Arteris, Inc. and its applicable licensors and
+ * affiliates.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer; redistributions in binary
+ * form must reproduce the above copyright notice, this list of conditions and
+ * the following disclaimer in the documentation and/or other materials
+ * provided with the distribution; neither the name of the copyright holders
+ * nor the names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <gtest/gtest.h>
+
+#include <map>
+#include <sstream>
+#include <string_view>
+#include <vector>
+
+#include "base/stl_helpers/ostream_helpers.hh"
+
+
+TEST(OstreamHelpers, pair) {
+    using gem5::stl_helpers::operator<<;
+    auto p = std::make_pair(1, 2);
+    std::ostringstream os;
+    os << p;
+    EXPECT_EQ(os.str(), "(1, 2)");
+}
+
+TEST(OstreamHelpers, tuple) {
+    using gem5::stl_helpers::operator<<;
+    auto t = std::make_tuple(true,
+        std::make_pair("Hello", std::string_view("World")), '!');
+    std::ostringstream os;
+    os << t;
+    EXPECT_EQ(os.str(), "(1, (Hello, World), !)");
+}
+
+TEST(OstreamHelpers, vector) {
+    using gem5::stl_helpers::operator<<;
+    auto v = std::vector<const char*>{"abc", "defg", "hijklm", "\n"};
+    std::ostringstream os;
+    os << v;
+    EXPECT_EQ(os.str(), "[ abc, defg, hijklm, \n, ]");
+}
+
+TEST(OstreamHelpers, map) {
+    using gem5::stl_helpers::operator<<;
+    auto m = std::map<char, int>{{'a', 0}, {'b', 1}, {'c', 2}, {'d', 3}};
+    std::ostringstream os;
+    os << m;
+    EXPECT_EQ(os.str(), "[ (a, 0), (b, 1), (c, 2), (d, 3), ]");
+}
+
+TEST(OstreamHelpers, optional) {
+    using gem5::stl_helpers::operator<<;
+    auto m = std::make_optional<int>(42);
+    std::ostringstream os;
+    os << m;
+    EXPECT_EQ(os.str(), "42");
+    os.str("");
+    m.reset();
+    os << m;
+    EXPECT_EQ(os.str(), "(-)");
+}
+
+TEST(OstreamHelpers, printer) {
+    std::string hello = "Hello";
+    std::ostringstream os;
+    os << hello;
+    EXPECT_EQ(os.str(), hello);
+
+    std::ostringstream os2;
+    os2 << gem5::stl_helpers::Printer(hello);
+    EXPECT_EQ(os2.str(), "[ H, e, l, l, o, ]");
+}
+
+
+TEST(OstreamHelpers, pointers) {
+    auto helped_representation = [](const auto& val) {
+        std::ostringstream os;
+        os << gem5::stl_helpers::Printer(val);
+        return os.str();
+    };
+    auto expected_representation = [&](const auto& ptr) {
+        using gem5::stl_helpers::operator<<;
+        std::ostringstream os;
+        auto* raw_ptr = &*ptr;
+        os << '(' << raw_ptr << ": " << *ptr << ')';
+        return os.str();
+    };
+
+    int x = 42;
+    auto* ptr = &x;
+    EXPECT_EQ(helped_representation(ptr), expected_representation(ptr));
+
+    auto uptr = std::make_unique<std::string>("Hello, World!");
+    EXPECT_EQ(helped_representation(uptr), expected_representation(uptr));
+
+    auto sptr = std::make_shared<std::optional<bool>>();
+    EXPECT_EQ(helped_representation(sptr), expected_representation(sptr));
+}
diff --git a/src/base/trie.hh b/src/base/trie.hh
index 477bfbde14..47d60b7632 100644
--- a/src/base/trie.hh
+++ b/src/base/trie.hh
@@ -70,7 +70,7 @@ class Trie
         Value *value;
 
         Node *parent;
-        Node *kids[2];
+        std::unique_ptr<Node> kids[2];
 
         Node(Key _key, Key _mask, Value *_val) :
             key(_key & _mask), mask(_mask), value(_val),
@@ -83,16 +83,8 @@ class Trie
         void
         clear()
         {
-            if (kids[1]) {
-                kids[1]->clear();
-                delete kids[1];
-                kids[1] = NULL;
-            }
-            if (kids[0]) {
-                kids[0]->clear();
-                delete kids[0];
-                kids[0] = NULL;
-            }
+            kids[1].reset();
+            kids[0].reset();
         }
 
         void
@@ -188,9 +180,9 @@ class Trie
                 return node;
 
             if (node->kids[0] && node->kids[0]->matches(key))
-                node = node->kids[0];
+                node = node->kids[0].get();
             else if (node->kids[1] && node->kids[1]->matches(key))
-                node = node->kids[1];
+                node = node->kids[1].get();
             else
                 node = NULL;
         }
@@ -225,8 +217,8 @@ class Trie
         // Walk past all the nodes this new node will be inserted after. They
         // can be ignored for the purposes of this function.
         Node *node = &head;
-        while (goesAfter(&node, node->kids[0], key, new_mask) ||
-               goesAfter(&node, node->kids[1], key, new_mask))
+        while (goesAfter(&node, node->kids[0].get(), key, new_mask) ||
+               goesAfter(&node, node->kids[1].get(), key, new_mask))
         {}
         assert(node);
 
@@ -239,14 +231,13 @@ class Trie
         }
 
         for (unsigned int i = 0; i < 2; i++) {
-            Node *&kid = node->kids[i];
-            Node *new_node;
+            auto& kid = node->kids[i];
             if (!kid) {
                 // No kid. Add a new one.
-                new_node = new Node(key, new_mask, val);
+                auto new_node = std::make_unique<Node>(key, new_mask, val);
                 new_node->parent = node;
-                kid = new_node;
-                return new_node;
+                kid = std::move(new_node);
+                return kid.get();
             }
 
             // Walk down the leg until something doesn't match or we run out
@@ -266,23 +257,23 @@ class Trie
                 continue;
 
             // At the point we walked to above, add a new node.
-            new_node = new Node(key, cur_mask, NULL);
+            auto new_node = std::make_unique<Node>(key, cur_mask, nullptr);
             new_node->parent = node;
-            kid->parent = new_node;
-            new_node->kids[0] = kid;
-            kid = new_node;
+            kid->parent = new_node.get();
+            new_node->kids[0] = std::move(kid);
+            kid = std::move(new_node);
 
             // If we ran out of bits, the value goes right here.
             if (cur_mask == new_mask) {
-                new_node->value = val;
-                return new_node;
+                kid->value = val;
+                return kid.get();
             }
 
             // Still more bits to deal with, so add a new node for that path.
-            new_node = new Node(key, new_mask, val);
-            new_node->parent = kid;
-            kid->kids[1] = new_node;
-            return new_node;
+            new_node = std::make_unique<Node>(key, new_mask, val);
+            new_node->parent = kid.get();
+            kid->kids[1] = std::move(new_node);
+            return kid->kids[1].get();
         }
 
         panic("Reached the end of the Trie insert function!\n");
@@ -332,23 +323,22 @@ class Trie
         if (node->kids[0])
             node->kids[0]->parent = parent;
         // Figure out which kid we are, and update our parent's pointers.
-        if (parent->kids[0] == node)
-            parent->kids[0] = node->kids[0];
-        else if (parent->kids[1] == node)
-            parent->kids[1] = node->kids[0];
+        if (parent->kids[0].get() == node)
+            parent->kids[0] = std::move(node->kids[0]);
+        else if (parent->kids[1].get() == node)
+            parent->kids[1] = std::move(node->kids[0]);
         else
             panic("Trie: Inconsistent parent/kid relationship.\n");
         // Make sure if the parent only has one kid, it's kid[0].
         if (parent->kids[1] && !parent->kids[0]) {
-            parent->kids[0] = parent->kids[1];
-            parent->kids[1] = NULL;
+            parent->kids[0] = std::move(parent->kids[1]);
+            parent->kids[1] = nullptr;
         }
 
         // If the parent has less than two kids and no cargo and isn't the
         // root, delete it too.
         if (!parent->kids[1] && !parent->value && parent->parent)
             remove(parent);
-        delete node;
         return val;
     }
 
diff --git a/src/base/type_traits.hh b/src/base/type_traits.hh
index 1fec93d9d1..fd13044765 100644
--- a/src/base/type_traits.hh
+++ b/src/base/type_traits.hh
@@ -27,8 +27,8 @@
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
-#ifndef __BASE_TYPETRAITS_HH__
-#define __BASE_TYPETRAITS_HH__
+#ifndef BASE_TYPETRAITS_HH
+#define BASE_TYPETRAITS_HH
 
 #include <tuple>
 #include <type_traits>
@@ -92,6 +92,30 @@ template<auto F>
 using MemberFunctionArgsTuple_t =
     typename MemberFunctionSignature<decltype(F)>::argsTuple_t;
 
+
+// iterable type trait
+template <typename, typename = void>
+struct is_iterable: std::false_type {};
+
+template <typename T>
+struct is_iterable<T,
+    std::void_t<decltype(begin(std::declval<T>())),
+                decltype(end(std::declval<T>()))>>: std::true_type {};
+
+template <typename T>
+constexpr bool is_iterable_v = is_iterable<T>::value;
+
+// std::hash-enabled type trait
+template <typename, typename = void>
+struct is_std_hash_enabled: std::false_type {};
+
+template <typename T>
+struct is_std_hash_enabled<T,
+    std::void_t<decltype(std::hash<T>())>>: std::true_type {};
+
+template <typename T>
+constexpr bool is_std_hash_enabled_v = is_std_hash_enabled<T>::value;
+
 } // namespace gem5
 
-#endif // __BASE_TYPETRAITS_HH__
+#endif // BASE_TYPETRAITS_HH
diff --git a/src/base/version.cc b/src/base/version.cc
index 38f415590a..bfff67f5b6 100644
--- a/src/base/version.cc
+++ b/src/base/version.cc
@@ -32,6 +32,6 @@ namespace gem5
 /**
  * @ingroup api_base_utils
  */
-const char *gem5Version = "23.0.0.0";
+const char *gem5Version = "23.0.1.0";
 
 } // namespace gem5
diff --git a/src/cpu/BaseCPU.py b/src/cpu/BaseCPU.py
index 556af52612..900c0ae626 100644
--- a/src/cpu/BaseCPU.py
+++ b/src/cpu/BaseCPU.py
@@ -293,8 +293,7 @@ def generateDeviceTree(self, state):
         # Generate nodes from the BaseCPU children (hence under the root node,
         # and don't add them as subnode). Please note: this is mainly needed
         # for the ISA class, to generate the PMU entry in the DTB.
-        for child_node in self.recurseDeviceTree(state):
-            yield child_node
+        yield from self.recurseDeviceTree(state)
 
     def __init__(self, **kwargs):
         super().__init__(**kwargs)
diff --git a/tests/configs/minor-timing.py b/src/cpu/Capstone.py
similarity index 86%
rename from tests/configs/minor-timing.py
rename to src/cpu/Capstone.py
index e6680d7702..4b6b5fd84a 100644
--- a/tests/configs/minor-timing.py
+++ b/src/cpu/Capstone.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2013 ARM Limited
+# Copyright (c) 2023 Arm Limited
 # All rights reserved.
 #
 # The license below extends only to copyright in the software and shall
@@ -10,9 +10,6 @@
 # unmodified and in its entirety in all distributions of the software,
 # modified or unmodified, in source code or in binary form.
 #
-# Copyright (c) 2006-2007 The Regents of The University of Michigan
-# All rights reserved.
-#
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions are
 # met: redistributions of source code must retain the above copyright
@@ -36,9 +33,13 @@
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-from m5.objects import *
-from base_config import *
+from m5.SimObject import SimObject
+from m5.params import *
+from m5.objects.InstTracer import InstDisassembler
+
 
-root = BaseSESystemUniprocessor(
-    mem_mode="timing", mem_class=DDR3_1600_8x8, cpu_class=MinorCPU
-).create_root()
+class CapstoneDisassembler(InstDisassembler):
+    type = "CapstoneDisassembler"
+    cxx_class = "gem5::trace::CapstoneDisassembler"
+    cxx_header = "cpu/capstone.hh"
+    abstract = True
diff --git a/src/cpu/SConscript b/src/cpu/SConscript
index d6dcd2f6ea..03ba7b924d 100644
--- a/src/cpu/SConscript
+++ b/src/cpu/SConscript
@@ -1,6 +1,6 @@
 # -*- mode:python -*-
 
-# Copyright (c) 2020 ARM Limited
+# Copyright (c) 2020, 2023 Arm Limited
 # All rights reserved.
 #
 # The license below extends only to copyright in the software and shall
@@ -115,6 +115,9 @@ Source('simple_thread.cc')
 Source('thread_context.cc')
 Source('thread_state.cc')
 Source('timing_expr.cc')
+SourceLib('capstone', tags='capstone')
+Source('capstone.cc', tags='capstone')
+SimObject('Capstone.py', sim_objects=['CapstoneDisassembler'], tags='capstone')
 
 SimObject('DummyChecker.py', sim_objects=['DummyChecker'])
 Source('checker/cpu.cc')
diff --git a/tests/configs/pc-switcheroo-full.py b/src/cpu/SConsopts
similarity index 80%
rename from tests/configs/pc-switcheroo-full.py
rename to src/cpu/SConsopts
index a69f80c341..94e55ece32 100644
--- a/tests/configs/pc-switcheroo-full.py
+++ b/src/cpu/SConsopts
@@ -1,5 +1,4 @@
-# Copyright (c) 2012 ARM Limited
-# Copyright (c) 2013 Mark D. Hill and David A. Wood
+# Copyright (c) 2023 Arm Limited
 # All rights reserved.
 #
 # The license below extends only to copyright in the software and shall
@@ -34,15 +33,18 @@
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-from m5.objects import *
-from x86_generic import *
-import switcheroo
+Import('*')
 
-root = LinuxX86FSSwitcheroo(
-    mem_class=DDR3_1600_8x8,
-    cpu_classes=(AtomicSimpleCPU, TimingSimpleCPU, DerivO3CPU),
-).create_root()
+from gem5_scons import warning
 
-# Setup a custom test method that uses the switcheroo tester that
-# switches between CPU models.
-run_test = switcheroo.run_test
+import gem5_scons
+
+with gem5_scons.Configure(main) as conf:
+    # Check for <capstone.h>
+    conf.env['CONF']['HAVE_CAPSTONE'] = conf.CheckHeader('capstone/capstone.h', '<>')
+
+    if conf.env['CONF']['HAVE_CAPSTONE']:
+        conf.env.TagImplies('capstone', 'gem5 lib')
+    else:
+        warning("Header file <capstone/capstone.h> not found.\n"
+                "This host has no capstone library installed.")
diff --git a/src/cpu/StaticInstFlags.py b/src/cpu/StaticInstFlags.py
index d562dd5645..4ab6cc499c 100644
--- a/src/cpu/StaticInstFlags.py
+++ b/src/cpu/StaticInstFlags.py
@@ -1,4 +1,14 @@
-# Copyright (c) 2020 ARM Limited
+# Copyright (c) 2020, 2023 Arm Limited
+#
+# The license below extends only to copyright in the software and shall
+# not be construed as granting a license to any other intellectual
+# property including but not limited to intellectual property relating
+# to a hardware implementation of the functionality of the software
+# licensed hereunder.  You may use the software subject to the license
+# terms below provided that you ensure that this notice is replicated
+# unmodified and in its entirety in all distributions of the software,
+# modified or unmodified, in source code or in binary form.
+#
 # Copyright (c) 2003-2005 The Regents of The University of Michigan
 # Copyright (c) 2013 Advanced Micro Devices, Inc.
 # All rights reserved.
@@ -75,6 +85,7 @@ class StaticInstFlags(Enum):
         "IsNonSpeculative",  # Should not be executed speculatively
         "IsQuiesce",  # Is a quiesce instruction
         "IsUnverifiable",  # Can't be verified by a checker
+        "IsPseudo",  # Is a gem5 pseudo-op
         "IsSyscall",  # Causes a system call to be emulated in syscall
         # emulation mode.
         # Flags for microcode
diff --git a/src/cpu/base.cc b/src/cpu/base.cc
index a61c99796c..bea21a1928 100644
--- a/src/cpu/base.cc
+++ b/src/cpu/base.cc
@@ -257,8 +257,8 @@ BaseCPU::mwait(ThreadID tid, PacketPtr pkt)
     AddressMonitor &monitor = addressMonitor[tid];
 
     if (!monitor.gotWakeup) {
-        int block_size = cacheLineSize();
-        uint64_t mask = ~((uint64_t)(block_size - 1));
+        Addr block_size = cacheLineSize();
+        Addr mask = ~(block_size - 1);
 
         assert(pkt->req->hasPaddr());
         monitor.pAddr = pkt->getAddr() & mask;
@@ -282,8 +282,8 @@ BaseCPU::mwaitAtomic(ThreadID tid, ThreadContext *tc, BaseMMU *mmu)
     RequestPtr req = std::make_shared<Request>();
 
     Addr addr = monitor.vAddr;
-    int block_size = cacheLineSize();
-    uint64_t mask = ~((uint64_t)(block_size - 1));
+    Addr block_size = cacheLineSize();
+    Addr mask = ~(block_size - 1);
     int size = block_size;
 
     //The address of the next line if it crosses a cache line boundary.
diff --git a/src/cpu/base.hh b/src/cpu/base.hh
index 3976b66fe4..a6c80dadbe 100644
--- a/src/cpu/base.hh
+++ b/src/cpu/base.hh
@@ -143,7 +143,7 @@ class BaseCPU : public ClockedObject
     bool _switchedOut;
 
     /** Cache the cache line size that we get from the system */
-    const unsigned int _cacheLineSize;
+    const Addr _cacheLineSize;
 
     /** Global CPU statistics that are merged into the Root object. */
     struct GlobalStats : public statistics::Group
@@ -394,7 +394,7 @@ class BaseCPU : public ClockedObject
     /**
      * Get the cache line size of the system.
      */
-    inline unsigned int cacheLineSize() const { return _cacheLineSize; }
+    inline Addr cacheLineSize() const { return _cacheLineSize; }
 
     /**
      * Serialize this object to the given output stream.
diff --git a/src/cpu/capstone.cc b/src/cpu/capstone.cc
new file mode 100644
index 0000000000..4c2896312d
--- /dev/null
+++ b/src/cpu/capstone.cc
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2023 Arm Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "cpu/capstone.hh"
+
+#include "base/output.hh"
+
+namespace gem5
+{
+
+namespace trace
+{
+
+std::string
+CapstoneDisassembler::disassemble(StaticInstPtr inst,
+        const PCStateBase &pc,
+        const loader::SymbolTable *symtab) const
+{
+    std::string inst_dist;
+    if (inst->isPseudo() || inst->isMicroop()) {
+        // Capstone doesn't have any visibility over microops nor over
+        // gem5 pseudo ops. Use native disassembler instead
+        inst_dist = InstDisassembler::disassemble(inst, pc, symtab);
+    } else {
+        // Stripping the extended fields from the ExtMachInst
+        auto mach_inst = inst->getEMI() & mask(inst->size() * 8);
+
+        cs_insn *insn;
+        // capstone disassembler
+        if (const csh *curr_handle = currHandle(pc); curr_handle != nullptr) {
+            size_t count = cs_disasm(*curr_handle, (uint8_t*)&mach_inst,
+                inst->size(), 0, 0, &insn);
+
+            // As we are passing only one instruction, we are expecting one instruction only
+            // being disassembled
+            assert(count <= 1);
+
+            for (int idx = 0; idx < count; idx++) {
+                inst_dist += csprintf("  %s   %s", insn[idx].mnemonic, insn[idx].op_str);
+            }
+        } else {
+            // No valid handle; return an invalid string
+            inst_dist += "  capstone failure";
+        }
+    }
+
+    return inst_dist;
+}
+
+CapstoneDisassembler::CapstoneDisassembler(const Params &p)
+  : InstDisassembler(p)
+{
+}
+
+} // namespace trace
+} // namespace gem5
diff --git a/src/cpu/capstone.hh b/src/cpu/capstone.hh
new file mode 100644
index 0000000000..1a197e5086
--- /dev/null
+++ b/src/cpu/capstone.hh
@@ -0,0 +1,102 @@
+/*
+ * Copyright (c) 2023 Arm Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __CPU_CAPSTONE_HH__
+#define __CPU_CAPSTONE_HH__
+
+#include <capstone/capstone.h>
+
+#include "params/CapstoneDisassembler.hh"
+#include "sim/insttracer.hh"
+
+namespace gem5
+{
+
+class ThreadContext;
+
+namespace trace {
+
+/**
+ * Capstone Disassembler:
+ * The disassembler relies on the capstone library to convert
+ * the StaticInst encoding into the disassembled string.
+ *
+ * One thing to keep in mind is that the disassembled
+ * instruction might not coincide with the instruction being
+ * decoded + executed in gem5. This could be the case if
+ * there was a bug in either gem5 or in capstone itself.
+ * This scenatio is not possible with the native gem5 disassembler
+ * as the instruction mnemonic is tightly coupled with the
+ * decoded(=generated) instruction (you print what you decode)
+ *
+ * The Capstone dispatches to the native disassembler in
+ * two cases:
+ *
+ * a) m5 pseudo ops
+ * b) micro-ops
+ */
+class CapstoneDisassembler : public InstDisassembler
+{
+  public:
+    PARAMS(CapstoneDisassembler);
+    CapstoneDisassembler(const Params &p);
+
+    std::string
+    disassemble(StaticInstPtr inst,
+                const PCStateBase &pc,
+                const loader::SymbolTable *symtab) const override;
+
+  protected:
+
+    /**
+     * Return a pointer to the current capstone handle (csh).
+     *
+     * Any ISA extension of the Capstone disassembler should
+     * initialize (with cs_open) one or more capstone handles
+     * at construcion time.
+     * (You might need more than one handle in case the ISA
+     * has more than one mode of operation, e.g. arm and arm64)
+     * The current handle in use should be returned every time
+     * the currHandle is called.
+     */
+    virtual const csh* currHandle(const PCStateBase &pc) const = 0;
+};
+
+} // namespace trace
+} // namespace gem5
+
+#endif // __CPU_CAPSTONE_HH__
diff --git a/src/cpu/exetrace.cc b/src/cpu/exetrace.cc
index 22d0d4be69..6cd5269fa7 100644
--- a/src/cpu/exetrace.cc
+++ b/src/cpu/exetrace.cc
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017, 2019 ARM Limited
+ * Copyright (c) 2017, 2019, 2023 Arm Limited
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -101,7 +101,7 @@ ExeTracerRecord::traceInst(const StaticInstPtr &inst, bool ran)
     //
 
     outs << std::setw(26) << std::left;
-    outs << inst->disassemble(cur_pc, &loader::debugSymbolTable);
+    outs << tracer.disassemble(inst, *pc, &loader::debugSymbolTable);
 
     if (ran) {
         outs << " : ";
diff --git a/src/cpu/exetrace.hh b/src/cpu/exetrace.hh
index 143cfa0eb3..3fbeb98bc3 100644
--- a/src/cpu/exetrace.hh
+++ b/src/cpu/exetrace.hh
@@ -1,4 +1,16 @@
 /*
+ * Copyright (c) 2023 Arm Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
  * Copyright (c) 2001-2005 The Regents of The University of Michigan
  * All rights reserved.
  *
@@ -49,14 +61,19 @@ class ExeTracerRecord : public InstRecord
   public:
     ExeTracerRecord(Tick _when, ThreadContext *_thread,
                const StaticInstPtr _staticInst, const PCStateBase &_pc,
+               const ExeTracer &_tracer,
                const StaticInstPtr _macroStaticInst = NULL)
-        : InstRecord(_when, _thread, _staticInst, _pc, _macroStaticInst)
+        : InstRecord(_when, _thread, _staticInst, _pc, _macroStaticInst),
+          tracer(_tracer)
     {
     }
 
     void traceInst(const StaticInstPtr &inst, bool ran);
 
     void dump();
+
+  protected:
+    const ExeTracer &tracer;
 };
 
 class ExeTracer : public InstTracer
@@ -75,7 +92,7 @@ class ExeTracer : public InstTracer
             return NULL;
 
         return new ExeTracerRecord(when, tc,
-                staticInst, pc, macroStaticInst);
+                staticInst, pc, *this, macroStaticInst);
     }
 };
 
diff --git a/src/cpu/kvm/BaseKvmCPU.py b/src/cpu/kvm/BaseKvmCPU.py
index f958e8126c..610663fa41 100644
--- a/src/cpu/kvm/BaseKvmCPU.py
+++ b/src/cpu/kvm/BaseKvmCPU.py
@@ -64,6 +64,11 @@ def require_caches(cls):
     def support_take_over(cls):
         return True
 
+    usePerf = Param.Bool(
+        True,
+        "Use perf for gathering statistics from the guest and providing "
+        "statistic-related functionalities",
+    )
     useCoalescedMMIO = Param.Bool(False, "Use coalesced MMIO (EXPERIMENTAL)")
     usePerfOverflow = Param.Bool(
         False, "Use perf event overflow counters (EXPERIMENTAL)"
diff --git a/src/cpu/kvm/base.cc b/src/cpu/kvm/base.cc
index e22e1628d2..eaa771d8cf 100644
--- a/src/cpu/kvm/base.cc
+++ b/src/cpu/kvm/base.cc
@@ -71,12 +71,15 @@ BaseKvmCPU::BaseKvmCPU(const BaseKvmCPUParams &params)
       alwaysSyncTC(params.alwaysSyncTC),
       threadContextDirty(true),
       kvmStateDirty(false),
+      usePerf(params.usePerf),
       vcpuID(-1), vcpuFD(-1), vcpuMMapSize(0),
       _kvmRun(NULL), mmioRing(NULL),
       pageSize(sysconf(_SC_PAGE_SIZE)),
       tickEvent([this]{ tick(); }, "BaseKvmCPU tick",
                 false, Event::CPU_Tick_Pri),
       activeInstPeriod(0),
+      hwCycles(nullptr),
+      hwInstructions(nullptr),
       perfControlledByTimer(params.usePerfOverflow),
       hostFactor(params.hostFactor), stats(this),
       ctrInsts(0)
@@ -96,6 +99,22 @@ BaseKvmCPU::BaseKvmCPU(const BaseKvmCPUParams &params)
     thread->setStatus(ThreadContext::Halted);
     tc = thread->getTC();
     threadContexts.push_back(tc);
+
+    if ((!usePerf) && perfControlledByTimer) {
+        panic("KVM: invalid combination of parameters: cannot use "
+              "perfControlledByTimer without usePerf\n");
+    }
+
+    // If we use perf, we create new PerfKVMCounters
+    if (usePerf) {
+        hwCycles = std::unique_ptr<PerfKvmCounter>(new PerfKvmCounter());
+        hwInstructions = std::unique_ptr<PerfKvmCounter>(new PerfKvmCounter());
+    } else {
+        inform("Using KVM CPU without perf. The stats related to the number "
+               "of cycles and instructions executed by the KVM CPU will not "
+               "be updated. The stats should not be used for performance "
+               "evaluation.");
+    }
 }
 
 BaseKvmCPU::~BaseKvmCPU()
@@ -248,7 +267,7 @@ BaseKvmCPU::restartEqThread()
     setupCounters();
 
     if (p.usePerfOverflow) {
-        runTimer.reset(new PerfKvmTimer(hwCycles,
+        runTimer.reset(new PerfKvmTimer(*hwCycles,
                                         KVM_KICK_SIGNAL,
                                         p.hostFactor,
                                         p.hostFreq));
@@ -424,8 +443,10 @@ BaseKvmCPU::notifyFork()
         vcpuFD = -1;
         _kvmRun = NULL;
 
-        hwInstructions.detach();
-        hwCycles.detach();
+        if (usePerf) {
+            hwInstructions->detach();
+            hwCycles->detach();
+        }
     }
 }
 
@@ -690,7 +711,9 @@ BaseKvmCPU::kvmRunDrain()
 uint64_t
 BaseKvmCPU::getHostCycles() const
 {
-    return hwCycles.read();
+    if (usePerf)
+        return hwCycles->read();
+    return 0;
 }
 
 Tick
@@ -746,21 +769,26 @@ BaseKvmCPU::kvmRun(Tick ticks)
         // Get hardware statistics after synchronizing contexts. The KVM
         // state update might affect guest cycle counters.
         uint64_t baseCycles(getHostCycles());
-        uint64_t baseInstrs(hwInstructions.read());
+        uint64_t baseInstrs = 0;
+        if (usePerf) {
+            baseInstrs = hwInstructions->read();
+        }
 
         // Arm the run timer and start the cycle timer if it isn't
         // controlled by the overflow timer. Starting/stopping the cycle
         // timer automatically starts the other perf timers as they are in
         // the same counter group.
         runTimer->arm(ticks);
-        if (!perfControlledByTimer)
-            hwCycles.start();
+        if (usePerf && (!perfControlledByTimer)) {
+            hwCycles->start();
+        }
 
         ioctlRun();
 
         runTimer->disarm();
-        if (!perfControlledByTimer)
-            hwCycles.stop();
+        if (usePerf && (!perfControlledByTimer)) {
+            hwCycles->stop();
+        }
 
         // The control signal may have been delivered after we exited
         // from KVM. It will be pending in that case since it is
@@ -771,7 +799,10 @@ BaseKvmCPU::kvmRun(Tick ticks)
 
         const uint64_t hostCyclesExecuted(getHostCycles() - baseCycles);
         const uint64_t simCyclesExecuted(hostCyclesExecuted * hostFactor);
-        const uint64_t instsExecuted(hwInstructions.read() - baseInstrs);
+        uint64_t instsExecuted = 0;
+        if (usePerf) {
+            instsExecuted = hwInstructions->read() - baseInstrs;
+        }
         ticksExecuted = runTimer->ticksFromHostCycles(hostCyclesExecuted);
 
         /* Update statistics */
@@ -1288,13 +1319,14 @@ BaseKvmCPU::setupCounters()
 
     // We might be re-attaching counters due threads being
     // re-initialised after fork.
-    if (hwCycles.attached())
-        hwCycles.detach();
-
-    hwCycles.attach(cfgCycles,
-                    0); // TID (0 => currentThread)
+    if (usePerf) {
+        if (hwCycles->attached()) {
+            hwCycles->detach();
+        }
 
-    setupInstCounter();
+        hwCycles->attach(cfgCycles, 0); // TID (0 => currentThread)
+        setupInstCounter();
+    }
 }
 
 bool
@@ -1344,10 +1376,16 @@ BaseKvmCPU::setupInstStop()
 void
 BaseKvmCPU::setupInstCounter(uint64_t period)
 {
+    // This function is for setting up instruction counter using perf
+    if (!usePerf) {
+        return;
+    }
+
     // No need to do anything if we aren't attaching for the first
     // time or the period isn't changing.
-    if (period == activeInstPeriod && hwInstructions.attached())
+    if (period == activeInstPeriod && hwInstructions->attached()) {
         return;
+    }
 
     PerfKvmCounterConfig cfgInstructions(PERF_TYPE_HARDWARE,
                                          PERF_COUNT_HW_INSTRUCTIONS);
@@ -1366,15 +1404,15 @@ BaseKvmCPU::setupInstCounter(uint64_t period)
 
     // We need to detach and re-attach the counter to reliably change
     // sampling settings. See PerfKvmCounter::period() for details.
-    if (hwInstructions.attached())
-        hwInstructions.detach();
-    assert(hwCycles.attached());
-    hwInstructions.attach(cfgInstructions,
+    if (hwInstructions->attached())
+        hwInstructions->detach();
+    assert(hwCycles->attached());
+    hwInstructions->attach(cfgInstructions,
                           0, // TID (0 => currentThread)
-                          hwCycles);
+                          *hwCycles);
 
     if (period)
-        hwInstructions.enableSignals(KVM_KICK_SIGNAL);
+        hwInstructions->enableSignals(KVM_KICK_SIGNAL);
 
     activeInstPeriod = period;
 }
diff --git a/src/cpu/kvm/base.hh b/src/cpu/kvm/base.hh
index 7bbf393f9b..3cf70a0bef 100644
--- a/src/cpu/kvm/base.hh
+++ b/src/cpu/kvm/base.hh
@@ -653,6 +653,9 @@ class BaseKvmCPU : public BaseCPU
      */
     bool kvmStateDirty;
 
+    /** True if using perf; False otherwise*/
+    bool usePerf;
+
     /** KVM internal ID of the vCPU */
     long vcpuID;
 
@@ -763,7 +766,7 @@ class BaseKvmCPU : public BaseCPU
      * PerfKvmTimer (see perfControlledByTimer) to trigger exits from
      * KVM.
      */
-    PerfKvmCounter hwCycles;
+    std::unique_ptr<PerfKvmCounter> hwCycles;
 
     /**
      * Guest instruction counter.
@@ -776,7 +779,7 @@ class BaseKvmCPU : public BaseCPU
      * @see setupInstBreak
      * @see scheduleInstStop
      */
-    PerfKvmCounter hwInstructions;
+    std::unique_ptr<PerfKvmCounter> hwInstructions;
 
     /**
      * Does the runTimer control the performance counters?
diff --git a/src/cpu/kvm/perfevent.cc b/src/cpu/kvm/perfevent.cc
index f9c317da41..c5e33abf82 100644
--- a/src/cpu/kvm/perfevent.cc
+++ b/src/cpu/kvm/perfevent.cc
@@ -173,12 +173,20 @@ PerfKvmCounter::attach(PerfKvmCounterConfig &config,
     {
         if (errno == EACCES)
         {
-            panic("PerfKvmCounter::attach recieved error EACCESS\n"
+            panic("PerfKvmCounter::attach received error EACCESS.\n"
             "  This error may be caused by a too restrictive setting\n"
-            "  in the file '/proc/sys/kernel/perf_event_paranoid'\n"
-            "  The default value was changed to 2 in kernel 4.6\n"
+            "  in the file '/proc/sys/kernel/perf_event_paranoid'.\n"
+            "  The default value was changed to 2 in kernel 4.6.\n"
             "  A value greater than 1 prevents gem5 from making\n"
-            "  the syscall to perf_event_open");
+            "  the syscall to perf_event_open.\n"
+            "    Alternatively, you can set the usePerf flag of the KVM\n"
+            "  CPU to False. Setting this flag to False will limit some\n"
+            "  functionalities of KVM CPU, such as counting the number of\n"
+            "  cycles and the number of instructions, as well as the\n"
+            "  ability of exiting to gem5 after a certain amount of cycles\n"
+            "  or instructions when using KVM CPU. An example can be found\n"
+            "  here, configs/example/gem5_library/"
+            "x86-ubuntu-run-with-kvm-no-perf.py.");
         }
         panic("PerfKvmCounter::attach failed (%i)\n", errno);
     }
diff --git a/src/cpu/minor/dyn_inst.cc b/src/cpu/minor/dyn_inst.cc
index 68415ecd09..6ff5ed6b5e 100644
--- a/src/cpu/minor/dyn_inst.cc
+++ b/src/cpu/minor/dyn_inst.cc
@@ -112,6 +112,11 @@ MinorDynInst::reportData(std::ostream &os) const
 std::ostream &
 operator <<(std::ostream &os, const MinorDynInst &inst)
 {
+    if (!inst.pc) {
+        os << inst.id << " pc: 0x???????? (bubble)";
+        return os;
+    }
+
     os << inst.id << " pc: 0x"
         << std::hex << inst.pc->instAddr() << std::dec << " (";
 
@@ -169,7 +174,7 @@ MinorDynInst::minorTraceInst(const Named &named_object) const
 {
     if (isFault()) {
         minorInst(named_object, "id=F;%s addr=0x%x fault=\"%s\"\n",
-            id, pc->instAddr(), fault->name());
+            id, pc ? pc->instAddr() : 0, fault->name());
     } else {
         unsigned int num_src_regs = staticInst->numSrcRegs();
         unsigned int num_dest_regs = staticInst->numDestRegs();
@@ -209,7 +214,7 @@ MinorDynInst::minorTraceInst(const Named &named_object) const
 
         minorInst(named_object, "id=%s addr=0x%x inst=\"%s\" class=%s"
             " flags=\"%s\"%s%s\n",
-            id, pc->instAddr(),
+            id, pc ? pc->instAddr() : 0,
             (staticInst->opClass() == No_OpClass ?
                 "(invalid)" : staticInst->disassemble(0,NULL)),
             enums::OpClassStrings[staticInst->opClass()],
diff --git a/src/cpu/minor/fetch1.hh b/src/cpu/minor/fetch1.hh
index f6a796ce82..b65bb70d7b 100644
--- a/src/cpu/minor/fetch1.hh
+++ b/src/cpu/minor/fetch1.hh
@@ -213,13 +213,13 @@ class Fetch1 : public Named
     /** Line snap size in bytes.  All fetches clip to make their ends not
      *  extend beyond this limit.  Setting this to the machine L1 cache line
      *  length will result in fetches never crossing line boundaries. */
-    unsigned int lineSnap;
+    Addr lineSnap;
 
     /** Maximum fetch width in bytes.  Setting this (and lineSnap) to the
      *  machine L1 cache line length will result in fetches of whole cache
      *  lines.  Setting this to sizeof(MachInst) will result it fetches of
      *  single instructions (except near the end of lineSnap lines) */
-    unsigned int maxLineWidth;
+    Addr maxLineWidth;
 
     /** Maximum number of fetches allowed in flight (in queues or memory) */
     unsigned int fetchLimit;
diff --git a/src/cpu/minor/lsq.hh b/src/cpu/minor/lsq.hh
index 4d7c351e7a..e30a615803 100644
--- a/src/cpu/minor/lsq.hh
+++ b/src/cpu/minor/lsq.hh
@@ -548,7 +548,7 @@ class LSQ : public Named
     const unsigned int inMemorySystemLimit;
 
     /** Memory system access width (and snap) in bytes */
-    const unsigned int lineWidth;
+    const Addr lineWidth;
 
   public:
     /** The LSQ consists of three queues: requests, transfers and the
diff --git a/src/cpu/minor/scoreboard.cc b/src/cpu/minor/scoreboard.cc
index 475d650d3a..25228c1ef5 100644
--- a/src/cpu/minor/scoreboard.cc
+++ b/src/cpu/minor/scoreboard.cc
@@ -62,10 +62,13 @@ Scoreboard::findIndex(const RegId& reg, Index &scoreboard_index)
         ret = true;
         break;
       case VecRegClass:
-      case VecElemClass:
         scoreboard_index = vecRegOffset + reg.index();
         ret = true;
         break;
+      case VecElemClass:
+        scoreboard_index = vecRegElemOffset + reg.index();
+        ret = true;
+        break;
       case VecPredRegClass:
         scoreboard_index = vecPredRegOffset + reg.index();
         ret = true;
diff --git a/src/cpu/minor/scoreboard.hh b/src/cpu/minor/scoreboard.hh
index d3df324b99..dd42bc6f67 100644
--- a/src/cpu/minor/scoreboard.hh
+++ b/src/cpu/minor/scoreboard.hh
@@ -71,6 +71,7 @@ class Scoreboard : public Named
     const unsigned floatRegOffset;
     const unsigned ccRegOffset;
     const unsigned vecRegOffset;
+    const unsigned vecRegElemOffset;
     const unsigned vecPredRegOffset;
     const unsigned matRegOffset;
 
@@ -115,7 +116,8 @@ class Scoreboard : public Named
         floatRegOffset(intRegOffset + reg_classes.at(IntRegClass)->numRegs()),
         ccRegOffset(floatRegOffset + reg_classes.at(FloatRegClass)->numRegs()),
         vecRegOffset(ccRegOffset + reg_classes.at(CCRegClass)->numRegs()),
-        vecPredRegOffset(vecRegOffset +
+        vecRegElemOffset(vecRegOffset + reg_classes.at(VecRegClass)->numRegs()),
+        vecPredRegOffset(vecRegElemOffset +
                 reg_classes.at(VecElemClass)->numRegs()),
         matRegOffset(vecPredRegOffset +
                 reg_classes.at(VecPredRegClass)->numRegs()),
diff --git a/src/cpu/nativetrace.cc b/src/cpu/nativetrace.cc
index 3070205b9f..60efc791de 100644
--- a/src/cpu/nativetrace.cc
+++ b/src/cpu/nativetrace.cc
@@ -1,4 +1,16 @@
 /*
+ * Copyright (c) 2023 Arm Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
  * Copyright (c) 2006-2009 The Regents of The University of Michigan
  * All rights reserved.
  *
@@ -49,6 +61,17 @@ NativeTrace::NativeTrace(const Params &p)
     fd = native_listener->accept();
 }
 
+NativeTraceRecord::NativeTraceRecord(
+        NativeTrace *_parent,
+        Tick _when, ThreadContext *_thread,
+        const StaticInstPtr _staticInst, const PCStateBase &_pc,
+        const StaticInstPtr _macroStaticInst)
+  : ExeTracerRecord(_when, _thread, _staticInst, _pc,
+                    *_parent, _macroStaticInst),
+    parent(_parent)
+{
+}
+
 void
 NativeTraceRecord::dump()
 {
diff --git a/src/cpu/nativetrace.hh b/src/cpu/nativetrace.hh
index a19acaca3f..a0866e130f 100644
--- a/src/cpu/nativetrace.hh
+++ b/src/cpu/nativetrace.hh
@@ -1,4 +1,16 @@
 /*
+ * Copyright (c) 2023 Arm Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
  * Copyright (c) 2006-2009 The Regents of The University of Michigan
  * All rights reserved.
  *
@@ -50,20 +62,16 @@ class NativeTrace;
 
 class NativeTraceRecord : public ExeTracerRecord
 {
-  protected:
-    NativeTrace * parent;
-
   public:
-    NativeTraceRecord(NativeTrace * _parent,
+    NativeTraceRecord(NativeTrace *_parent,
                Tick _when, ThreadContext *_thread,
                const StaticInstPtr _staticInst, const PCStateBase &_pc,
-               const StaticInstPtr _macroStaticInst=nullptr)
-        : ExeTracerRecord(_when, _thread, _staticInst, _pc, _macroStaticInst),
-        parent(_parent)
-    {
-    }
+               const StaticInstPtr _macroStaticInst=nullptr);
 
     void dump();
+
+  private:
+    NativeTrace *parent;
 };
 
 class NativeTrace : public ExeTracer
diff --git a/src/cpu/o3/commit.cc b/src/cpu/o3/commit.cc
index 538505bcf9..f5e601f679 100644
--- a/src/cpu/o3/commit.cc
+++ b/src/cpu/o3/commit.cc
@@ -254,7 +254,7 @@ Commit::setActiveThreads(std::list<ThreadID> *at_ptr)
 }
 
 void
-Commit::setRenameMap(UnifiedRenameMap rm_ptr[])
+Commit::setRenameMap(UnifiedRenameMap rm_ptr[MaxThreads])
 {
     for (ThreadID tid = 0; tid < numThreads; tid++)
         renameMap[tid] = &rm_ptr[tid];
diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc
index 85cc3dbf71..a93d5bc74d 100644
--- a/src/cpu/o3/cpu.cc
+++ b/src/cpu/o3/cpu.cc
@@ -989,10 +989,10 @@ CPU::getWritableReg(PhysRegIdPtr phys_reg, ThreadID tid)
 {
     switch (phys_reg->classValue()) {
       case VecRegClass:
-        executeStats[tid]->numVecRegReads++;
+        executeStats[tid]->numVecRegWrites++;
         break;
       case VecPredRegClass:
-        executeStats[tid]->numVecPredRegReads++;
+        executeStats[tid]->numVecPredRegWrites++;
         break;
       default:
         break;
diff --git a/src/cpu/o3/fetch.hh b/src/cpu/o3/fetch.hh
index 6add31444d..2c6da6708a 100644
--- a/src/cpu/o3/fetch.hh
+++ b/src/cpu/o3/fetch.hh
@@ -470,7 +470,7 @@ class Fetch
     ThreadID retryTid;
 
     /** Cache block size. */
-    unsigned int cacheBlkSize;
+    Addr cacheBlkSize;
 
     /** The size of the fetch buffer in bytes. The fetch buffer
      *  itself may be smaller than a cache line.
diff --git a/src/cpu/o3/rename.cc b/src/cpu/o3/rename.cc
index f8c305eb1c..c20edc2e46 100644
--- a/src/cpu/o3/rename.cc
+++ b/src/cpu/o3/rename.cc
@@ -286,7 +286,7 @@ Rename::setActiveThreads(std::list<ThreadID> *at_ptr)
 
 
 void
-Rename::setRenameMap(UnifiedRenameMap rm_ptr[])
+Rename::setRenameMap(UnifiedRenameMap rm_ptr[MaxThreads])
 {
     for (ThreadID tid = 0; tid < numThreads; tid++)
         renameMap[tid] = &rm_ptr[tid];
@@ -940,8 +940,11 @@ Rename::doSquash(const InstSeqNum &squashed_seq_num, ThreadID tid)
             // previous physical register that it was renamed to.
             renameMap[tid]->setEntry(hb_it->archReg, hb_it->prevPhysReg);
 
-            // Put the renamed physical register back on the free list.
-            freeList->addReg(hb_it->newPhysReg);
+            // The phys regs can still be owned by squashing but
+            // executing instructions in IEW at this moment. To avoid
+            // ownership hazard in SMT CPU, we delay the freelist update
+            // until they are indeed squashed in the commit stage.
+            freeingInProgress[tid].push_back(hb_it->newPhysReg);
         }
 
         // Notify potential listeners that the register mapping needs to be
@@ -1296,6 +1299,18 @@ Rename::checkSignalsAndUpdate(ThreadID tid)
         squash(fromCommit->commitInfo[tid].doneSeqNum, tid);
 
         return true;
+    } else if (!fromCommit->commitInfo[tid].robSquashing &&
+            !freeingInProgress[tid].empty()) {
+        DPRINTF(Rename, "[tid:%i] Freeing phys regs of misspeculated "
+                "instructions.\n", tid);
+
+        auto reg_it = freeingInProgress[tid].cbegin();
+        while ( reg_it != freeingInProgress[tid].cend()){
+            // Put the renamed physical register back on the free list.
+            freeList->addReg(*reg_it);
+            ++reg_it;
+        }
+        freeingInProgress[tid].clear();
     }
 
     if (checkStall(tid)) {
diff --git a/src/cpu/o3/rename.hh b/src/cpu/o3/rename.hh
index 61ef476501..81e63e5019 100644
--- a/src/cpu/o3/rename.hh
+++ b/src/cpu/o3/rename.hh
@@ -359,6 +359,9 @@ class Rename
     /** Free list interface. */
     UnifiedFreeList *freeList;
 
+    /** Hold phys regs to be released after squash finish */
+    std::vector<PhysRegIdPtr> freeingInProgress[MaxThreads];
+
     /** Pointer to the list of active threads. */
     std::list<ThreadID> *activeThreads;
 
diff --git a/src/cpu/pred/2bit_local.cc b/src/cpu/pred/2bit_local.cc
index c9aa714ed1..00baf92882 100644
--- a/src/cpu/pred/2bit_local.cc
+++ b/src/cpu/pred/2bit_local.cc
@@ -1,4 +1,16 @@
 /*
+ * Copyright (c) 2022-2023 The University of Edinburgh
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
  * Copyright (c) 2004-2006 The Regents of The University of Michigan
  * All rights reserved.
  *
@@ -67,10 +79,10 @@ LocalBP::LocalBP(const LocalBPParams &params)
 }
 
 void
-LocalBP::btbUpdate(ThreadID tid, Addr branch_addr, void * &bp_history)
+LocalBP::updateHistories(ThreadID tid, Addr pc, bool uncond,
+                         bool taken, Addr target, void * &bp_history)
 {
-// Place holder for a function that is called to update predictor history when
-// a BTB entry is invalid or not found.
+// Place holder for a function that is called to update predictor history
 }
 
 
@@ -94,8 +106,8 @@ LocalBP::lookup(ThreadID tid, Addr branch_addr, void * &bp_history)
 }
 
 void
-LocalBP::update(ThreadID tid, Addr branch_addr, bool taken, void *bp_history,
-                bool squashed, const StaticInstPtr & inst, Addr corrTarget)
+LocalBP::update(ThreadID tid, Addr branch_addr, bool taken, void *&bp_history,
+                bool squashed, const StaticInstPtr & inst, Addr target)
 {
     assert(bp_history == NULL);
     unsigned local_predictor_idx;
@@ -135,10 +147,6 @@ LocalBP::getLocalIndex(Addr &branch_addr)
     return (branch_addr >> instShiftAmt) & indexMask;
 }
 
-void
-LocalBP::uncondBranch(ThreadID tid, Addr pc, void *&bp_history)
-{
-}
 
 } // namespace branch_prediction
 } // namespace gem5
diff --git a/src/cpu/pred/2bit_local.hh b/src/cpu/pred/2bit_local.hh
index 55f45ca55c..9bdb1131fd 100644
--- a/src/cpu/pred/2bit_local.hh
+++ b/src/cpu/pred/2bit_local.hh
@@ -1,5 +1,6 @@
 /*
  * Copyright (c) 2011, 2014 ARM Limited
+ * Copyright (c) 2022-2023 The University of Edinburgh
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -69,35 +70,17 @@ class LocalBP : public BPredUnit
      */
     LocalBP(const LocalBPParams &params);
 
-    virtual void uncondBranch(ThreadID tid, Addr pc, void * &bp_history);
+    // Overriding interface functions
+    bool lookup(ThreadID tid, Addr pc, void * &bp_history) override;
 
-    /**
-     * Looks up the given address in the branch predictor and returns
-     * a true/false value as to whether it is taken.
-     * @param branch_addr The address of the branch to look up.
-     * @param bp_history Pointer to any bp history state.
-     * @return Whether or not the branch is taken.
-     */
-    bool lookup(ThreadID tid, Addr branch_addr, void * &bp_history);
+    void updateHistories(ThreadID tid, Addr pc, bool uncond, bool taken,
+                         Addr target,  void * &bp_history) override;
 
-    /**
-     * Updates the branch predictor to Not Taken if a BTB entry is
-     * invalid or not found.
-     * @param branch_addr The address of the branch to look up.
-     * @param bp_history Pointer to any bp history state.
-     * @return Whether or not the branch is taken.
-     */
-    void btbUpdate(ThreadID tid, Addr branch_addr, void * &bp_history);
-
-    /**
-     * Updates the branch predictor with the actual result of a branch.
-     * @param branch_addr The address of the branch to update.
-     * @param taken Whether or not the branch was taken.
-     */
-    void update(ThreadID tid, Addr branch_addr, bool taken, void *bp_history,
-                bool squashed, const StaticInstPtr & inst, Addr corrTarget);
+    void update(ThreadID tid, Addr pc, bool taken,
+                void * &bp_history, bool squashed,
+                const StaticInstPtr & inst, Addr target) override;
 
-    void squash(ThreadID tid, void *bp_history)
+    void squash(ThreadID tid, void * &bp_history) override
     { assert(bp_history == NULL); }
 
   private:
diff --git a/src/cpu/pred/BranchPredictor.py b/src/cpu/pred/BranchPredictor.py
index d18ca3f821..8589fe5d51 100644
--- a/src/cpu/pred/BranchPredictor.py
+++ b/src/cpu/pred/BranchPredictor.py
@@ -1,3 +1,15 @@
+# Copyright (c) 2022-2023 The University of Edinburgh
+# All rights reserved.
+#
+# The license below extends only to copyright in the software and shall
+# not be construed as granting a license to any other intellectual
+# property including but not limited to intellectual property relating
+# to a hardware implementation of the functionality of the software
+# licensed hereunder.  You may use the software subject to the license
+# terms below provided that you ensure that this notice is replicated
+# unmodified and in its entirety in all distributions of the software,
+# modified or unmodified, in source code or in binary form.
+#
 # Copyright (c) 2012 Mark D. Hill and David A. Wood
 # Copyright (c) 2015 The University of Wisconsin
 # All rights reserved.
@@ -25,10 +37,64 @@
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-from m5.SimObject import SimObject
+from m5.SimObject import *
 from m5.params import *
 from m5.proxy import *
 
+from m5.objects.ClockedObject import ClockedObject
+
+
+class BranchType(Enum):
+    vals = [
+        "NoBranch",
+        "Return",
+        "CallDirect",
+        "CallIndirect",  # 'Call',
+        "DirectCond",
+        "DirectUncond",  # 'Direct',
+        "IndirectCond",
+        "IndirectUncond",  #'Indirect',
+    ]
+
+
+class TargetProvider(Enum):
+    vals = [
+        "NoTarget",
+        "BTB",
+        "RAS",
+        "Indirect",
+    ]
+
+
+class ReturnAddrStack(SimObject):
+    type = "ReturnAddrStack"
+    cxx_class = "gem5::branch_prediction::ReturnAddrStack"
+    cxx_header = "cpu/pred/ras.hh"
+
+    numThreads = Param.Unsigned(Parent.numThreads, "Number of threads")
+    numEntries = Param.Unsigned(16, "Number of RAS entries")
+
+
+class BranchTargetBuffer(ClockedObject):
+    type = "BranchTargetBuffer"
+    cxx_class = "gem5::branch_prediction::BranchTargetBuffer"
+    cxx_header = "cpu/pred/btb.hh"
+    abstract = True
+
+    numThreads = Param.Unsigned(Parent.numThreads, "Number of threads")
+
+
+class SimpleBTB(BranchTargetBuffer):
+    type = "SimpleBTB"
+    cxx_class = "gem5::branch_prediction::SimpleBTB"
+    cxx_header = "cpu/pred/simple_btb.hh"
+
+    numEntries = Param.Unsigned(4096, "Number of BTB entries")
+    tagBits = Param.Unsigned(16, "Size of the BTB tags, in bits")
+    instShiftAmt = Param.Unsigned(
+        Parent.instShiftAmt, "Number of bits to shift instructions by"
+    )
+
 
 class IndirectPredictor(SimObject):
     type = "IndirectPredictor"
@@ -52,6 +118,13 @@ class SimpleIndirectPredictor(IndirectPredictor):
     indirectPathLength = Param.Unsigned(
         3, "Previous indirect targets to use for path history"
     )
+    speculativePathLength = Param.Unsigned(
+        256,
+        "Additional buffer space to store speculative path history. "
+        "If there are more speculative branches in flight the history cannot "
+        "be recovered. Set this to an appropriate value respective the CPU"
+        "pipeline depth or a high value e.g. 256 to make it 'unlimited'.",
+    )
     indirectGHRBits = Param.Unsigned(13, "Indirect GHR number of bits")
     instShiftAmt = Param.Unsigned(2, "Number of bits to shift instructions by")
 
@@ -63,14 +136,16 @@ class BranchPredictor(SimObject):
     abstract = True
 
     numThreads = Param.Unsigned(Parent.numThreads, "Number of threads")
-    BTBEntries = Param.Unsigned(4096, "Number of BTB entries")
-    BTBTagSize = Param.Unsigned(16, "Size of the BTB tags, in bits")
-    RASSize = Param.Unsigned(16, "RAS size")
     instShiftAmt = Param.Unsigned(2, "Number of bits to shift instructions by")
 
+    btb = Param.BranchTargetBuffer(SimpleBTB(), "Branch target buffer (BTB)")
+    ras = Param.ReturnAddrStack(
+        ReturnAddrStack(), "Return address stack, set to NULL to disable RAS."
+    )
     indirectBranchPred = Param.IndirectPredictor(
         SimpleIndirectPredictor(),
-        "Indirect branch predictor, set to NULL to disable indirect predictions",
+        "Indirect branch predictor, set to NULL to disable "
+        "indirect predictions",
     )
 
 
diff --git a/src/cpu/pred/SConscript b/src/cpu/pred/SConscript
index f4b6870ec5..ec3102cada 100644
--- a/src/cpu/pred/SConscript
+++ b/src/cpu/pred/SConscript
@@ -1,5 +1,17 @@
 # -*- mode:python -*-
 
+# Copyright (c) 2022-2023 The University of Edinburgh
+# All rights reserved.
+#
+# The license below extends only to copyright in the software and shall
+# not be construed as granting a license to any other intellectual
+# property including but not limited to intellectual property relating
+# to a hardware implementation of the functionality of the software
+# licensed hereunder.  You may use the software subject to the license
+# terms below provided that you ensure that this notice is replicated
+# unmodified and in its entirety in all distributions of the software,
+# modified or unmodified, in source code or in binary form.
+#
 # Copyright (c) 2006 The Regents of The University of Michigan
 # All rights reserved.
 #
@@ -28,8 +40,13 @@
 
 Import('*')
 
-SimObject('BranchPredictor.py', sim_objects=[
-    'IndirectPredictor', 'SimpleIndirectPredictor', 'BranchPredictor',
+
+SimObject('BranchPredictor.py',
+    sim_objects=[
+    'BranchPredictor',
+    'IndirectPredictor', 'SimpleIndirectPredictor',
+    'BranchTargetBuffer', 'SimpleBTB',
+    'ReturnAddrStack',
     'LocalBP', 'TournamentBP', 'BiModeBP', 'TAGEBase', 'TAGE', 'LoopPredictor',
     'TAGE_SC_L_TAGE', 'TAGE_SC_L_TAGE_64KB', 'TAGE_SC_L_TAGE_8KB',
     'LTAGE', 'TAGE_SC_L_LoopPredictor', 'StatisticalCorrector', 'TAGE_SC_L',
@@ -41,17 +58,16 @@ SimObject('BranchPredictor.py', sim_objects=[
     'MultiperspectivePerceptronTAGE', 'MPP_StatisticalCorrector_64KB',
     'MultiperspectivePerceptronTAGE64KB', 'MPP_TAGE_8KB',
     'MPP_LoopPredictor_8KB', 'MPP_StatisticalCorrector_8KB',
-    'MultiperspectivePerceptronTAGE8KB'])
+    'MultiperspectivePerceptronTAGE8KB'],
+    enums=['BranchType', 'TargetProvider'])
 
-DebugFlag('Indirect')
 Source('bpred_unit.cc')
 Source('2bit_local.cc')
-Source('btb.cc')
 Source('simple_indirect.cc')
 Source('indirect.cc')
 Source('ras.cc')
 Source('tournament.cc')
-Source ('bi_mode.cc')
+Source('bi_mode.cc')
 Source('tage_base.cc')
 Source('tage.cc')
 Source('loop_predictor.cc')
@@ -66,6 +82,11 @@ Source('statistical_corrector.cc')
 Source('tage_sc_l.cc')
 Source('tage_sc_l_8KB.cc')
 Source('tage_sc_l_64KB.cc')
+Source('btb.cc')
+Source('simple_btb.cc')
+DebugFlag('Indirect')
+DebugFlag('BTB')
+DebugFlag('RAS')
 DebugFlag('FreeList')
 DebugFlag('Branch')
 DebugFlag('Tage')
diff --git a/src/cpu/pred/bi_mode.cc b/src/cpu/pred/bi_mode.cc
index 40dcbad7db..e55237f6d8 100644
--- a/src/cpu/pred/bi_mode.cc
+++ b/src/cpu/pred/bi_mode.cc
@@ -1,4 +1,16 @@
 /*
+ * Copyright (c) 2022-2023 The University of Edinburgh
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
  * Copyright (c) 2014 The Regents of The University of Michigan
  * All rights reserved.
  *
@@ -73,7 +85,7 @@ BiModeBP::BiModeBP(const BiModeBPParams &params)
  * chooses the taken array and the taken array predicts taken.
  */
 void
-BiModeBP::uncondBranch(ThreadID tid, Addr pc, void * &bpHistory)
+BiModeBP::uncondBranch(ThreadID tid, Addr pc, void * &bp_history)
 {
     BPHistory *history = new BPHistory;
     history->globalHistoryReg = globalHistoryReg[tid];
@@ -81,17 +93,29 @@ BiModeBP::uncondBranch(ThreadID tid, Addr pc, void * &bpHistory)
     history->takenPred = true;
     history->notTakenPred = true;
     history->finalPred = true;
-    bpHistory = static_cast<void*>(history);
-    updateGlobalHistReg(tid, true);
+    bp_history = static_cast<void*>(history);
 }
 
 void
-BiModeBP::squash(ThreadID tid, void *bpHistory)
+BiModeBP::updateHistories(ThreadID tid, Addr pc, bool uncond,
+                         bool taken, Addr target, void * &bp_history)
 {
-    BPHistory *history = static_cast<BPHistory*>(bpHistory);
+    assert(uncond || bp_history);
+    if (uncond) {
+        uncondBranch(tid, pc, bp_history);
+    }
+    updateGlobalHistReg(tid, taken);
+}
+
+
+void
+BiModeBP::squash(ThreadID tid, void * &bp_history)
+{
+    BPHistory *history = static_cast<BPHistory*>(bp_history);
     globalHistoryReg[tid] = history->globalHistoryReg;
 
     delete history;
+    bp_history = nullptr;
 }
 
 /*
@@ -104,7 +128,7 @@ BiModeBP::squash(ThreadID tid, void *bpHistory)
  * direction predictors for the final branch prediction.
  */
 bool
-BiModeBP::lookup(ThreadID tid, Addr branchAddr, void * &bpHistory)
+BiModeBP::lookup(ThreadID tid, Addr branchAddr, void * &bp_history)
 {
     unsigned choiceHistoryIdx = ((branchAddr >> instShiftAmt)
                                 & choiceHistoryMask);
@@ -136,17 +160,11 @@ BiModeBP::lookup(ThreadID tid, Addr branchAddr, void * &bpHistory)
     }
 
     history->finalPred = finalPrediction;
-    bpHistory = static_cast<void*>(history);
-    updateGlobalHistReg(tid, finalPrediction);
+    bp_history = static_cast<void*>(history);
 
     return finalPrediction;
 }
 
-void
-BiModeBP::btbUpdate(ThreadID tid, Addr branchAddr, void * &bpHistory)
-{
-    globalHistoryReg[tid] &= (historyRegisterMask & ~1ULL);
-}
 
 /* Only the selected direction predictor will be updated with the final
  * outcome; the status of the unselected one will not be altered. The choice
@@ -155,12 +173,12 @@ BiModeBP::btbUpdate(ThreadID tid, Addr branchAddr, void * &bpHistory)
  * the direction predictors makes a correct final prediction.
  */
 void
-BiModeBP::update(ThreadID tid, Addr branchAddr, bool taken, void *bpHistory,
-                 bool squashed, const StaticInstPtr & inst, Addr corrTarget)
+BiModeBP::update(ThreadID tid, Addr branchAddr, bool taken,void * &bp_history,
+                 bool squashed, const StaticInstPtr & inst, Addr target)
 {
-    assert(bpHistory);
+    assert(bp_history);
 
-    BPHistory *history = static_cast<BPHistory*>(bpHistory);
+    BPHistory *history = static_cast<BPHistory*>(bp_history);
 
     // We do not update the counters speculatively on a squash.
     // We just restore the global history register.
@@ -222,6 +240,7 @@ BiModeBP::update(ThreadID tid, Addr branchAddr, bool taken, void *bpHistory,
     }
 
     delete history;
+    bp_history = nullptr;
 }
 
 void
diff --git a/src/cpu/pred/bi_mode.hh b/src/cpu/pred/bi_mode.hh
index 721d21b79a..46c3cc2b69 100644
--- a/src/cpu/pred/bi_mode.hh
+++ b/src/cpu/pred/bi_mode.hh
@@ -1,4 +1,16 @@
 /*
+ * Copyright (c) 2022-2023 The University of Edinburgh
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
  * Copyright (c) 2014 The Regents of The University of Michigan
  * All rights reserved.
  *
@@ -61,15 +73,17 @@ class BiModeBP : public BPredUnit
 {
   public:
     BiModeBP(const BiModeBPParams &params);
-    void uncondBranch(ThreadID tid, Addr pc, void * &bp_history);
-    void squash(ThreadID tid, void *bp_history);
-    bool lookup(ThreadID tid, Addr branch_addr, void * &bp_history);
-    void btbUpdate(ThreadID tid, Addr branch_addr, void * &bp_history);
-    void update(ThreadID tid, Addr branch_addr, bool taken, void *bp_history,
-                bool squashed, const StaticInstPtr & inst, Addr corrTarget);
+    bool lookup(ThreadID tid, Addr pc, void * &bp_history) override;
+    void updateHistories(ThreadID tid, Addr pc, bool uncond, bool taken,
+                         Addr target,  void * &bp_history) override;
+    void squash(ThreadID tid, void * &bp_history) override;
+    void update(ThreadID tid, Addr pc, bool taken,
+                void * &bp_history, bool squashed,
+                const StaticInstPtr & inst, Addr target) override;
 
   private:
     void updateGlobalHistReg(ThreadID tid, bool taken);
+    void uncondBranch(ThreadID tid, Addr pc, void * &bp_history);
 
     struct BPHistory
     {
diff --git a/src/cpu/pred/bpred_unit.cc b/src/cpu/pred/bpred_unit.cc
index ec751f7dc6..f705b93b71 100644
--- a/src/cpu/pred/bpred_unit.cc
+++ b/src/cpu/pred/bpred_unit.cc
@@ -1,6 +1,6 @@
 /*
  * Copyright (c) 2011-2012, 2014 ARM Limited
- * Copyright (c) 2010 The University of Edinburgh
+ * Copyright (c) 2010,2022-2023 The University of Edinburgh
  * Copyright (c) 2012 Mark D. Hill and David A. Wood
  * All rights reserved
  *
@@ -58,50 +58,15 @@ namespace branch_prediction
 BPredUnit::BPredUnit(const Params &params)
     : SimObject(params),
       numThreads(params.numThreads),
+      instShiftAmt(params.instShiftAmt),
       predHist(numThreads),
-      BTB(params.BTBEntries,
-          params.BTBTagSize,
-          params.instShiftAmt,
-          params.numThreads),
-      RAS(numThreads),
+      btb(params.btb),
+      ras(params.ras),
       iPred(params.indirectBranchPred),
-      stats(this),
-      instShiftAmt(params.instShiftAmt)
+      stats(this)
 {
-    for (auto& r : RAS)
-        r.init(params.RASSize);
 }
 
-BPredUnit::BPredUnitStats::BPredUnitStats(statistics::Group *parent)
-    : statistics::Group(parent),
-      ADD_STAT(lookups, statistics::units::Count::get(),
-              "Number of BP lookups"),
-      ADD_STAT(condPredicted, statistics::units::Count::get(),
-               "Number of conditional branches predicted"),
-      ADD_STAT(condIncorrect, statistics::units::Count::get(),
-               "Number of conditional branches incorrect"),
-      ADD_STAT(BTBLookups, statistics::units::Count::get(),
-               "Number of BTB lookups"),
-      ADD_STAT(BTBUpdates, statistics::units::Count::get(),
-               "Number of BTB updates"),
-      ADD_STAT(BTBHits, statistics::units::Count::get(), "Number of BTB hits"),
-      ADD_STAT(BTBHitRatio, statistics::units::Ratio::get(), "BTB Hit Ratio",
-               BTBHits / BTBLookups),
-      ADD_STAT(RASUsed, statistics::units::Count::get(),
-               "Number of times the RAS was used to get a target."),
-      ADD_STAT(RASIncorrect, statistics::units::Count::get(),
-               "Number of incorrect RAS predictions."),
-      ADD_STAT(indirectLookups, statistics::units::Count::get(),
-               "Number of indirect predictor lookups."),
-      ADD_STAT(indirectHits, statistics::units::Count::get(),
-               "Number of indirect target hits."),
-      ADD_STAT(indirectMisses, statistics::units::Count::get(),
-               "Number of indirect misses."),
-      ADD_STAT(indirectMispredicted, statistics::units::Count::get(),
-               "Number of mispredicted indirect branches.")
-{
-    BTBHitRatio.precision(6);
-}
 
 probing::PMUUPtr
 BPredUnit::pmuProbePoint(const char *name)
@@ -128,266 +93,350 @@ BPredUnit::drainSanityCheck() const
         assert(ph.empty());
 }
 
+
 bool
 BPredUnit::predict(const StaticInstPtr &inst, const InstSeqNum &seqNum,
                    PCStateBase &pc, ThreadID tid)
 {
+    /** Perform the prediction. */
+    PredictorHistory* bpu_history = nullptr;
+    bool taken  = predict(inst, seqNum, pc, tid, bpu_history);
+
+    assert(bpu_history!=nullptr);
+
+    /** Push the record into the history buffer */
+    predHist[tid].push_front(bpu_history);
+
+    DPRINTF(Branch, "[tid:%i] [sn:%llu] History entry added. "
+            "predHist.size(): %i\n", tid, seqNum, predHist[tid].size());
+
+    return taken;
+}
+
+
+
+
+bool
+BPredUnit::predict(const StaticInstPtr &inst, const InstSeqNum &seqNum,
+                   PCStateBase &pc, ThreadID tid, PredictorHistory* &hist)
+{
+    assert(hist == nullptr);
+
+
     // See if branch predictor predicts taken.
     // If so, get its target addr either from the BTB or the RAS.
-    // Save off record of branch stuff so the RAS can be fixed
-    // up once it's done.
+    // Save off branch stuff into `hist` so we can correct the predictor
+    // if prediction was wrong.
 
-    bool pred_taken = false;
-    std::unique_ptr<PCStateBase> target(pc.clone());
+    BranchType brType = getBranchType(inst);
+    hist = new PredictorHistory(tid, seqNum, pc.instAddr(), inst);
 
-    ++stats.lookups;
+    stats.lookups[tid][brType]++;
     ppBranches->notify(1);
 
-    void *bp_history = NULL;
-    void *indirect_history = NULL;
+
+    /* -----------------------------------------------
+     * Get branch direction
+     * -----------------------------------------------
+     * Lookup the direction predictor for every
+     * conditional branch. For unconditional branches
+     * the direction is always taken
+     */
 
     if (inst->isUncondCtrl()) {
-        DPRINTF(Branch, "[tid:%i] [sn:%llu] Unconditional control\n",
-            tid,seqNum);
-        pred_taken = true;
-        // Tell the BP there was an unconditional branch.
-        uncondBranch(tid, pc.instAddr(), bp_history);
+        // Unconditional branches -----
+        hist->condPred = true;
     } else {
+        // Conditional branches -------
         ++stats.condPredicted;
-        pred_taken = lookup(tid, pc.instAddr(), bp_history);
+        hist->condPred = lookup(tid, pc.instAddr(), hist->bpHistory);
 
-        DPRINTF(Branch, "[tid:%i] [sn:%llu] "
-                "Branch predictor predicted %i for PC %s\n",
-                tid, seqNum,  pred_taken, pc);
-    }
-
-    const bool orig_pred_taken = pred_taken;
-    if (iPred) {
-        iPred->genIndirectInfo(tid, indirect_history);
+        if (hist->condPred) {
+            ++stats.condPredictedTaken;
+        }
     }
+    hist->predTaken = hist->condPred;
 
     DPRINTF(Branch,
-            "[tid:%i] [sn:%llu] Creating prediction history for PC %s\n",
-            tid, seqNum, pc);
-
-    PredictorHistory predict_record(seqNum, pc.instAddr(), pred_taken,
-                                    bp_history, indirect_history, tid, inst);
-
-    // Now lookup in the BTB or RAS.
-    if (pred_taken) {
-        // Note: The RAS may be both popped and pushed to
-        //       support coroutines.
-        if (inst->isReturn()) {
-            ++stats.RASUsed;
-            predict_record.wasReturn = true;
-            // If it's a function return call, then look up the address
-            // in the RAS.
-            const PCStateBase *ras_top = RAS[tid].top();
-            if (ras_top)
-                set(target, inst->buildRetPC(pc, *ras_top));
-
-            // Record the top entry of the RAS, and its index.
-            predict_record.usedRAS = true;
-            predict_record.RASIndex = RAS[tid].topIdx();
-            set(predict_record.RASTarget, ras_top);
-
-            RAS[tid].pop();
-
-            DPRINTF(Branch, "[tid:%i] [sn:%llu] Instruction %s is a return, "
-                    "RAS predicted target: %s, RAS index: %i\n",
-                    tid, seqNum, pc, *target, predict_record.RASIndex);
+            "[tid:%i, sn:%llu] Branch predictor predicted %i for PC:%#x %s\n",
+            tid, seqNum, hist->condPred, hist->pc, toString(brType));
+
+
+    // The direction is done now get the target address
+    // from BTB, RAS or indirect predictor.
+    hist->targetProvider = TargetProvider::NoTarget;
+
+    /* -----------------------------------------------
+     * Branch Target Buffer (BTB)
+     * -----------------------------------------------
+     * The BTB will be checked for all branches.
+     */
+    stats.BTBLookups++;
+    const PCStateBase * btb_target = btb->lookup(tid, pc.instAddr(), brType);
+    if (btb_target) {
+        stats.BTBHits++;
+        hist->btbHit = true;
+
+        if (hist->predTaken) {
+            hist->targetProvider = TargetProvider::BTB;
+            set(hist->target, btb_target);
         }
+    }
+
+    DPRINTF(Branch, "[tid:%i, sn:%llu] PC:%#x BTB:%s\n",
+            tid, seqNum, hist->pc,  (hist->btbHit) ? "hit" : "miss");
 
+
+    /* -----------------------------------------------
+     * Return Address Stack (RAS)
+     * -----------------------------------------------
+     * Perform RAS operations for calls and returns.
+     * Calls: push their RETURN address onto
+     *    the RAS.
+     * Return: pop the the return address from the
+     *    top of the RAS.
+     */
+    if (ras) {
         if (inst->isCall()) {
-            RAS[tid].push(pc);
-            predict_record.pushedRAS = true;
+            // In case of a call build the return address and
+            // push it to the RAS.
+            auto return_addr = inst->buildRetPC(pc, pc);
+            ras->push(tid, *return_addr, hist->rasHistory);
 
-            // Record that it was a call so that the top RAS entry can
-            // be popped off if the speculation is incorrect.
-            predict_record.wasCall = true;
+            DPRINTF(Branch, "[tid:%i] [sn:%llu] Instr. %s was "
+                    "a call, push return address %s onto the RAS\n",
+                    tid, seqNum, pc, *return_addr);
 
-            DPRINTF(Branch,
-                    "[tid:%i] [sn:%llu] Instruction %s was a call, adding "
-                    "%s to the RAS index: %i\n",
-                    tid, seqNum, pc, pc, RAS[tid].topIdx());
         }
+        else if (inst->isReturn()) {
 
-        // The target address is not predicted by RAS.
-        // Thus, BTB/IndirectBranch Predictor is employed.
-        if (!inst->isReturn()) {
-            if (inst->isDirectCtrl() || !iPred) {
-                ++stats.BTBLookups;
-                // Check BTB on direct branches
-                if (BTB.valid(pc.instAddr(), tid)) {
-                    ++stats.BTBHits;
-                    // If it's not a return, use the BTB to get target addr.
-                    set(target, BTB.lookup(pc.instAddr(), tid));
-                    DPRINTF(Branch,
-                            "[tid:%i] [sn:%llu] Instruction %s predicted "
-                            "target is %s\n",
-                            tid, seqNum, pc, *target);
-                } else {
-                    DPRINTF(Branch, "[tid:%i] [sn:%llu] BTB doesn't have a "
-                            "valid entry\n", tid, seqNum);
-                    pred_taken = false;
-                    predict_record.predTaken = pred_taken;
-                    // The Direction of the branch predictor is altered
-                    // because the BTB did not have an entry
-                    // The predictor needs to be updated accordingly
-                    if (!inst->isCall() && !inst->isReturn()) {
-                        btbUpdate(tid, pc.instAddr(), bp_history);
-                        DPRINTF(Branch,
-                                "[tid:%i] [sn:%llu] btbUpdate "
-                                "called for %s\n",
-                                tid, seqNum, pc);
-                    } else if (inst->isCall() && !inst->isUncondCtrl()) {
-                        RAS[tid].pop();
-                        predict_record.pushedRAS = false;
-                    }
-                    inst->advancePC(*target);
-                }
-            } else {
-                predict_record.wasIndirect = true;
-                ++stats.indirectLookups;
-                //Consult indirect predictor on indirect control
-                if (iPred->lookup(pc.instAddr(), *target, tid)) {
-                    // Indirect predictor hit
-                    ++stats.indirectHits;
-                    DPRINTF(Branch,
-                            "[tid:%i] [sn:%llu] Instruction %s predicted "
-                            "indirect target is %s\n",
-                            tid, seqNum, pc, *target);
-                } else {
-                    ++stats.indirectMisses;
-                    pred_taken = false;
-                    predict_record.predTaken = pred_taken;
-                    DPRINTF(Branch,
-                            "[tid:%i] [sn:%llu] Instruction %s no indirect "
-                            "target\n",
-                            tid, seqNum, pc);
-                    if (!inst->isCall() && !inst->isReturn()) {
-
-                    } else if (inst->isCall() && !inst->isUncondCtrl()) {
-                        RAS[tid].pop();
-                        predict_record.pushedRAS = false;
-                    }
-                    inst->advancePC(*target);
-                }
-                iPred->recordIndirect(pc.instAddr(), target->instAddr(),
-                        seqNum, tid);
+            // If it's a return from a function call, then look up the
+            // RETURN address in the RAS.
+            const PCStateBase *return_addr = ras->pop(tid, hist->rasHistory);
+            if (return_addr) {
+
+                // Set the target to the return address
+                set(hist->target, *return_addr);
+                hist->targetProvider = TargetProvider::RAS;
+
+                DPRINTF(Branch, "[tid:%i] [sn:%llu] Instr. %s is a "
+                        "return, RAS poped return addr: %s\n",
+                        tid, seqNum, pc, *hist->target);
             }
         }
-    } else {
-        if (inst->isReturn()) {
-           predict_record.wasReturn = true;
+    }
+
+
+    /* -----------------------------------------------
+     *  Indirect Predictor
+     * -----------------------------------------------
+     * For indirect branches/calls check the indirect
+     * predictor if one is available. Not for returns.
+     * Note that depending on the implementation a
+     * indirect predictor might only return a target
+     * for an indirect branch with a changing target.
+     * As most indirect branches have a static target
+     * using the target from the BTB is the optimal
+     * to save space in the indirect predictor itself.
+     */
+    if (iPred && hist->predTaken &&
+        inst->isIndirectCtrl() && !inst->isReturn()) {
+
+        ++stats.indirectLookups;
+
+        const PCStateBase *itarget = iPred->lookup(tid, seqNum,
+                                            pc.instAddr(),
+                                    hist->indirectHistory);
+
+        if (itarget) {
+            // Indirect predictor hit
+            ++stats.indirectHits;
+            hist->targetProvider = TargetProvider::Indirect;
+            set(hist->target, *itarget);
+
+            DPRINTF(Branch,
+                    "[tid:%i, sn:%llu] Instruction %s predicted "
+                    "indirect target is %s\n",
+                    tid, seqNum, pc, *hist->target);
+        } else {
+            ++stats.indirectMisses;
+            DPRINTF(Branch,
+                    "[tid:%i, sn:%llu] PC:%#x no indirect target\n",
+                    tid, seqNum, pc.instAddr());
         }
-        inst->advancePC(*target);
     }
-    predict_record.target = target->instAddr();
 
-    set(pc, *target);
 
-    if (iPred) {
-        // Update the indirect predictor with the direction prediction
-        // Note that this happens after indirect lookup, so it does not use
-        // the new information
-        // Note also that we use orig_pred_taken instead of pred_taken in
-        // as this is the actual outcome of the direction prediction
-        iPred->updateDirectionInfo(tid, orig_pred_taken);
+    /** ----------------------------------------------
+     * Fallthrough
+     * -----------------------------------------------
+     * All the target predictors did their job.
+     * If there is no target its either not taken or
+     * a BTB miss. In that case we just fallthrough.
+     * */
+    if (hist->targetProvider == TargetProvider::NoTarget) {
+        set(hist->target, pc);
+        inst->advancePC(*hist->target);
+        hist->predTaken = false;
     }
+    stats.targetProvider[tid][hist->targetProvider]++;
+
+    // The actual prediction is done.
+    // For now the BPU assume its correct. The update
+    // functions will correct the branch if needed.
+    // If prediction and actual direction are the same
+    // at commit the prediction was correct.
+    hist->actuallyTaken = hist->predTaken;
+    set(pc, *hist->target);
+
+    DPRINTF(Branch, "%s(tid:%i, sn:%i, PC:%#x, %s) -> taken:%i, target:%s "
+            "provider:%s\n", __func__, tid, seqNum, hist->pc,
+            toString(brType), hist->predTaken, *hist->target,
+            enums::TargetProviderStrings[hist->targetProvider]);
+
+
+    /** ----------------------------------------------
+     * Speculative history update
+     * -----------------------------------------------
+     * Now that the prediction is done the predictor
+     * may update its histories speculative. (local
+     * and global path). A later squash will revert
+     * the history update if needed.
+     * The actual prediction tables will updated once
+     * we know the correct direction.
+     **/
+    updateHistories(tid, hist->pc, hist->uncond, hist->predTaken,
+                    hist->target->instAddr(), hist->bpHistory);
 
-    predHist[tid].push_front(predict_record);
 
-    DPRINTF(Branch,
-            "[tid:%i] [sn:%llu] History entry added. "
-            "predHist.size(): %i\n",
-            tid, seqNum, predHist[tid].size());
+    if (iPred) {
+        // Update the indirect predictor with the direction prediction
+        iPred->update(tid, seqNum, hist->pc, false, hist->predTaken,
+                      *hist->target, brType, hist->indirectHistory);
+    }
 
-    return pred_taken;
+    return hist->predTaken;
 }
 
+
 void
 BPredUnit::update(const InstSeqNum &done_sn, ThreadID tid)
 {
     DPRINTF(Branch, "[tid:%i] Committing branches until "
-            "sn:%llu]\n", tid, done_sn);
+            "[sn:%llu]\n", tid, done_sn);
 
     while (!predHist[tid].empty() &&
-           predHist[tid].back().seqNum <= done_sn) {
-        // Update the branch predictor with the correct results.
-        update(tid, predHist[tid].back().pc,
-                    predHist[tid].back().predTaken,
-                    predHist[tid].back().bpHistory, false,
-                    predHist[tid].back().inst,
-                    predHist[tid].back().target);
+            predHist[tid].back()->seqNum <= done_sn) {
 
-        if (iPred) {
-            iPred->commit(done_sn, tid, predHist[tid].back().indirectHistory);
-        }
+        // Iterate from the back to front. Least recent
+        // sequence number until the most recent done number
+        commitBranch(tid, *predHist[tid].rbegin());
 
+        delete predHist[tid].back();
         predHist[tid].pop_back();
+        DPRINTF(Branch, "[tid:%i] [commit sn:%llu] pred_hist.size(): %i\n",
+                tid, done_sn, predHist[tid].size());
     }
 }
 
 void
-BPredUnit::squash(const InstSeqNum &squashed_sn, ThreadID tid)
+BPredUnit::commitBranch(ThreadID tid, PredictorHistory* &hist)
 {
-    History &pred_hist = predHist[tid];
 
+    stats.committed[tid][hist->type]++;
+    if (hist->mispredict) {
+        stats.mispredicted[tid][hist->type]++;
+    }
+
+
+    DPRINTF(Branch, "Commit branch: sn:%llu, PC:%#x %s, "
+                    "pred:%i, taken:%i, target:%#x\n",
+                hist->seqNum, hist->pc, toString(hist->type),
+                hist->predTaken, hist->actuallyTaken,
+                hist->target->instAddr());
+
+    // Update the branch predictor with the correct results.
+    update(tid, hist->pc,
+                hist->actuallyTaken,
+                hist->bpHistory, false,
+                hist->inst,
+                hist->target->instAddr());
+
+    // Commite also Indirect predictor and RAS
     if (iPred) {
-        iPred->squash(squashed_sn, tid);
+        iPred->commit(tid, hist->seqNum,
+                           hist->indirectHistory);
     }
 
-    while (!pred_hist.empty() &&
-           pred_hist.front().seqNum > squashed_sn) {
-        if (pred_hist.front().wasCall && pred_hist.front().pushedRAS) {
-             // Was a call but predicated false. Pop RAS here
-             DPRINTF(Branch, "[tid:%i] [squash sn:%llu] Squashing"
-                     "  Call [sn:%llu] PC: %s Popping RAS\n", tid, squashed_sn,
-                     pred_hist.front().seqNum, pred_hist.front().pc);
-             RAS[tid].pop();
-        }
-        if (pred_hist.front().usedRAS) {
-            if (pred_hist.front().RASTarget != nullptr) {
-                DPRINTF(Branch, "[tid:%i] [squash sn:%llu]"
-                        " Restoring top of RAS to: %i,"
-                        " target: %s\n", tid, squashed_sn,
-                        pred_hist.front().RASIndex,
-                        *pred_hist.front().RASTarget);
-            }
-            else {
-                DPRINTF(Branch, "[tid:%i] [squash sn:%llu]"
-                        " Restoring top of RAS to: %i,"
-                        " target: INVALID_TARGET\n", tid, squashed_sn,
-                        pred_hist.front().RASIndex);
-            }
+    if (ras) {
+        ras->commit(tid, hist->mispredict,
+                         hist->type,
+                         hist->rasHistory);
+    }
+}
 
-            RAS[tid].restore(pred_hist.front().RASIndex,
-                             pred_hist.front().RASTarget.get());
-        }
 
-        // This call should delete the bpHistory.
-        squash(tid, pred_hist.front().bpHistory);
-        if (iPred) {
-            iPred->deleteIndirectInfo(tid, pred_hist.front().indirectHistory);
-        }
 
-        DPRINTF(Branch, "[tid:%i] [squash sn:%llu] "
-                "Removing history for [sn:%llu] "
-                "PC %#x\n", tid, squashed_sn, pred_hist.front().seqNum,
-                pred_hist.front().pc);
+void
+BPredUnit::squash(const InstSeqNum &squashed_sn, ThreadID tid)
+{
+
+    while (!predHist[tid].empty() &&
+            predHist[tid].front()->seqNum > squashed_sn) {
 
-        pred_hist.pop_front();
+        auto hist = predHist[tid].front();
 
-        DPRINTF(Branch, "[tid:%i] [squash sn:%llu] predHist.size(): %i\n",
+        squashHistory(tid, hist);
+
+        DPRINTF(Branch, "[tid:%i, squash sn:%llu] Removing history for "
+                "sn:%llu, PC:%#x\n", tid, squashed_sn, hist->seqNum,
+                hist->pc);
+
+
+        delete predHist[tid].front();
+        predHist[tid].pop_front();
+
+        DPRINTF(Branch, "[tid:%i] [squash sn:%llu] pred_hist.size(): %i\n",
                 tid, squashed_sn, predHist[tid].size());
     }
 }
 
+
+
+void
+BPredUnit::squashHistory(ThreadID tid, PredictorHistory* &history)
+{
+
+    stats.squashes[tid][history->type]++;
+    DPRINTF(Branch, "[tid:%i] [squash sn:%llu] Incorrect: %s\n",
+                tid, history->seqNum,
+                toString(history->type));
+
+
+    if (history->rasHistory) {
+        assert(ras);
+
+        DPRINTF(Branch, "[tid:%i] [squash sn:%llu] Incorrect call/return "
+                "PC %#x. Fix RAS.\n", tid, history->seqNum,
+                history->pc);
+
+        ras->squash(tid, history->rasHistory);
+    }
+
+    if (iPred) {
+        iPred->squash(tid, history->seqNum,
+                        history->indirectHistory);
+    }
+
+    // This call should delete the bpHistory.
+    squash(tid, history->bpHistory);
+}
+
+
 void
 BPredUnit::squash(const InstSeqNum &squashed_sn,
                   const PCStateBase &corr_target,
-                  bool actually_taken, ThreadID tid)
+                  bool actually_taken, ThreadID tid, bool from_commit)
 {
     // Now that we know that a branch was mispredicted, we need to undo
     // all the branches that have been seen up until this branch and
@@ -405,10 +454,15 @@ BPredUnit::squash(const InstSeqNum &squashed_sn,
     ++stats.condIncorrect;
     ppMisses->notify(1);
 
-    DPRINTF(Branch, "[tid:%i] Squashing from sequence number %i, "
-            "setting target to %s\n", tid, squashed_sn, corr_target);
+
+    DPRINTF(Branch, "[tid:%i] Squash from %s start from sequence number %i, "
+            "setting target to %s\n", tid, from_commit ? "commit" : "decode",
+            squashed_sn, corr_target);
+
+    // dump();
 
     // Squash All Branches AFTER this mispredicted branch
+    // First the Prefetch history then the main history.
     squash(squashed_sn, tid);
 
     // If there's a squash due to a syscall, there may not be an entry
@@ -416,26 +470,32 @@ BPredUnit::squash(const InstSeqNum &squashed_sn,
     // fix up the entry.
     if (!pred_hist.empty()) {
 
-        auto hist_it = pred_hist.begin();
-        //HistoryIt hist_it = find(pred_hist.begin(), pred_hist.end(),
-        //                       squashed_sn);
+        PredictorHistory* const hist = pred_hist.front();
 
-        //assert(hist_it != pred_hist.end());
-        if (pred_hist.front().seqNum != squashed_sn) {
-            DPRINTF(Branch, "Front sn %i != Squash sn %i\n",
-                    pred_hist.front().seqNum, squashed_sn);
+        DPRINTF(Branch, "[tid:%i] [squash sn:%llu] Mispredicted: %s, PC:%#x\n",
+                    tid, squashed_sn, toString(hist->type), hist->pc);
 
-            assert(pred_hist.front().seqNum == squashed_sn);
+        // Update stats
+        stats.corrected[tid][hist->type]++;
+        if (hist->target &&
+            (hist->target->instAddr() != corr_target.instAddr())) {
+                stats.targetWrong[tid][hist->targetProvider]++;
         }
 
+        // If the squash is comming from decode it can be
+        // redirected earlier. Note that this branch might never get
+        // committed as a preceeding branch was mispredicted
+        if (!from_commit) {
+            stats.earlyResteers[tid][hist->type]++;
+        }
 
-        if ((*hist_it).usedRAS) {
-            ++stats.RASIncorrect;
-            DPRINTF(Branch,
-                    "[tid:%i] [squash sn:%llu] Incorrect RAS [sn:%llu]\n",
-                    tid, squashed_sn, hist_it->seqNum);
+        if (actually_taken) {
+            ++stats.NotTakenMispredicted;
+        } else {
+           ++stats.TakenMispredicted;
         }
 
+
         // There are separate functions for in-order and out-of-order
         // branch prediction, but not for update. Therefore, this
         // call should take into account that the mispredicted branch may
@@ -445,95 +505,109 @@ BPredUnit::squash(const InstSeqNum &squashed_sn,
         // local/global histories. The counter tables will be updated when
         // the branch actually commits.
 
-        // Remember the correct direction for the update at commit.
-        pred_hist.front().predTaken = actually_taken;
-        pred_hist.front().target = corr_target.instAddr();
+        // Remember the correct direction and target for the update at commit.
+        hist->mispredict = true;
+        hist->actuallyTaken = actually_taken;
+        set(hist->target,  corr_target);
 
-        update(tid, (*hist_it).pc, actually_taken,
-               pred_hist.front().bpHistory, true, pred_hist.front().inst,
-               corr_target.instAddr());
+        // Correct Direction predictor ------------------
+        update(tid, hist->pc, actually_taken, hist->bpHistory,
+               true, hist->inst, corr_target.instAddr());
 
+
+        // Correct Indirect predictor -------------------
         if (iPred) {
-            iPred->changeDirectionPrediction(tid,
-                pred_hist.front().indirectHistory, actually_taken);
+            iPred->update(tid, squashed_sn, hist->pc,
+                            true, actually_taken, corr_target,
+                            hist->type, hist->indirectHistory);
         }
 
-        if (actually_taken) {
-            if (hist_it->wasReturn && !hist_it->usedRAS) {
-                 DPRINTF(Branch, "[tid:%i] [squash sn:%llu] "
-                        "Incorrectly predicted "
-                        "return [sn:%llu] PC: %#x\n", tid, squashed_sn,
-                        hist_it->seqNum,
-                        hist_it->pc);
-                 RAS[tid].pop();
-                 hist_it->usedRAS = true;
+        // Correct RAS ---------------------------------
+        if (ras) {
+            // The branch was taken and the RAS was not updated.
+            // In case of call or return that needs to be fixed.
+            if (actually_taken && (hist->rasHistory == nullptr)) {
+
+                // A return has not poped the RAS.
+                if (hist->type == BranchType::Return) {
+                    DPRINTF(Branch, "[tid:%i] [squash sn:%llu] "
+                        "Incorrectly predicted return [sn:%llu] PC: %#x\n",
+                        tid, squashed_sn, hist->seqNum, hist->pc);
+
+                    ras->pop(tid, hist->rasHistory);
+                }
+
+                // A call has not pushed a return address to the RAS.
+                if (hist->call) {
+                    // In case of a call build the return address and
+                    // push it to the RAS.
+                    auto return_addr = hist->inst->buildRetPC(
+                                                    corr_target, corr_target);
+
+                    DPRINTF(Branch, "[tid:%i] [squash sn:%llu] "
+                            "Incorrectly predicted call: [sn:%llu,PC:%#x] "
+                            " Push return address %s onto RAS\n", tid,
+                            squashed_sn, hist->seqNum, hist->pc,
+                            *return_addr);
+                    ras->push(tid, *return_addr, hist->rasHistory);
+                }
+
+            // The branch was not taken but the RAS modified.
+            } else if (!actually_taken && (hist->rasHistory != nullptr)) {
+                // The branch was not taken but the RAS was modified.
+                // Needs to be fixed.
+                ras->squash(tid, hist->rasHistory);
             }
-            if (hist_it->wasIndirect) {
+        }
+
+        // Correct BTB ---------------------------------------------------
+        // Check if the misprediction happened was because of a BTB miss
+        // or incorrect indirect predictor
+        if (actually_taken) {
+            if (hist->inst->isIndirectCtrl() && !hist->inst->isReturn()) {
                 ++stats.indirectMispredicted;
-                if (iPred) {
-                    iPred->recordTarget(
-                        hist_it->seqNum, pred_hist.front().indirectHistory,
-                        corr_target, tid);
-                }
             } else {
-                DPRINTF(Branch,"[tid:%i] [squash sn:%llu] "
-                        "BTB Update called for [sn:%llu] "
-                        "PC %#x\n", tid, squashed_sn,
-                        hist_it->seqNum, hist_it->pc);
 
                 ++stats.BTBUpdates;
-                BTB.update(hist_it->pc, corr_target, tid);
+                btb->update(tid, hist->pc, corr_target,
+                            getBranchType(hist->inst));
+
+                ++stats.BTBMispredicted;
+                if (hist->condPred)
+                    ++stats.predTakenBTBMiss;
+
+                btb->incorrectTarget(hist->pc, hist->type);
+
+                DPRINTF(Branch,"[tid:%i] [squash sn:%llu] "
+                    "BTB miss PC %#x %s \n", tid, squashed_sn,
+                    hist->pc, toString(hist->type));
             }
-        } else {
-           //Actually not Taken
-           if (hist_it->wasCall && hist_it->pushedRAS) {
-                 //Was a Call but predicated false. Pop RAS here
-                 DPRINTF(Branch,
-                        "[tid:%i] [squash sn:%llu] "
-                        "Incorrectly predicted "
-                        "Call [sn:%llu] PC: %s Popping RAS\n",
-                        tid, squashed_sn,
-                        hist_it->seqNum, hist_it->pc);
-                 RAS[tid].pop();
-                 hist_it->pushedRAS = false;
-           }
-           if (hist_it->usedRAS) {
-                DPRINTF(Branch,
-                        "[tid:%i] [squash sn:%llu] Incorrectly predicted "
-                        "return [sn:%llu] PC: %#x Restoring RAS\n", tid,
-                        squashed_sn,
-                        hist_it->seqNum, hist_it->pc);
-                DPRINTF(Branch,
-                        "[tid:%i] [squash sn:%llu] Restoring top of RAS "
-                        "to: %i, target: %s\n", tid, squashed_sn,
-                        hist_it->RASIndex, *hist_it->RASTarget);
-                RAS[tid].restore(hist_it->RASIndex, hist_it->RASTarget.get());
-                hist_it->usedRAS = false;
-           }
         }
+
     } else {
         DPRINTF(Branch, "[tid:%i] [sn:%llu] pred_hist empty, can't "
                 "update\n", tid, squashed_sn);
     }
 }
 
+
 void
 BPredUnit::dump()
 {
     int i = 0;
     for (const auto& ph : predHist) {
         if (!ph.empty()) {
-            auto pred_hist_it = ph.begin();
+            auto hist = ph.begin();
 
             cprintf("predHist[%i].size(): %i\n", i++, ph.size());
 
-            while (pred_hist_it != ph.end()) {
+            while (hist != ph.end()) {
                 cprintf("sn:%llu], PC:%#x, tid:%i, predTaken:%i, "
-                        "bpHistory:%#x\n",
-                        pred_hist_it->seqNum, pred_hist_it->pc,
-                        pred_hist_it->tid, pred_hist_it->predTaken,
-                        pred_hist_it->bpHistory);
-                pred_hist_it++;
+                        "bpHistory:%#x, rasHistory:%#x\n",
+                        (*hist)->seqNum, (*hist)->pc,
+                        (*hist)->tid, (*hist)->predTaken,
+                        (*hist)->bpHistory, (*hist)->rasHistory);
+                hist++;
             }
 
             cprintf("\n");
@@ -541,5 +615,108 @@ BPredUnit::dump()
     }
 }
 
+
+BPredUnit::BPredUnitStats::BPredUnitStats(BPredUnit *bp)
+    : statistics::Group(bp),
+      ADD_STAT(lookups, statistics::units::Count::get(),
+              "Number of BP lookups"),
+      ADD_STAT(squashes, statistics::units::Count::get(),
+              "Number of branches that got squashed (completely removed) as "
+              "an earlier branch was mispredicted."),
+      ADD_STAT(corrected, statistics::units::Count::get(),
+              "Number of branches that got corrected but not yet commited. "
+              "Branches get corrected by decode or after execute. Also a "
+              "branch misprediction can be detected out-of-order. Therefore, "
+              "a corrected branch might not end up beeing committed in case "
+              "an even earlier branch was mispredicted"),
+      ADD_STAT(earlyResteers, statistics::units::Count::get(),
+              "Number of branches that got redirected after decode."),
+      ADD_STAT(committed, statistics::units::Count::get(),
+              "Number of branches finally committed "),
+      ADD_STAT(mispredicted, statistics::units::Count::get(),
+              "Number of committed branches that were mispredicted."),
+      ADD_STAT(targetProvider, statistics::units::Count::get(),
+              "The component providing the target for taken branches"),
+      ADD_STAT(targetWrong, statistics::units::Count::get(),
+              "Number of branches where the target was incorrect or not "
+              "available at prediction time."),
+      ADD_STAT(condPredicted, statistics::units::Count::get(),
+               "Number of conditional branches predicted"),
+      ADD_STAT(condPredictedTaken, statistics::units::Count::get(),
+               "Number of conditional branches predicted as taken"),
+      ADD_STAT(condIncorrect, statistics::units::Count::get(),
+               "Number of conditional branches incorrect"),
+      ADD_STAT(predTakenBTBMiss, statistics::units::Count::get(),
+               "Number of branches predicted taken but missed in BTB"),
+      ADD_STAT(NotTakenMispredicted, statistics::units::Count::get(),
+               "Number branches predicted 'not taken' but turned out "
+               "to be taken"),
+      ADD_STAT(TakenMispredicted, statistics::units::Count::get(),
+               "Number branches predicted taken but are actually not taken"),
+      ADD_STAT(BTBLookups, statistics::units::Count::get(),
+               "Number of BTB lookups"),
+      ADD_STAT(BTBUpdates, statistics::units::Count::get(),
+               "Number of BTB updates"),
+      ADD_STAT(BTBHits, statistics::units::Count::get(),
+               "Number of BTB hits"),
+      ADD_STAT(BTBHitRatio, statistics::units::Ratio::get(), "BTB Hit Ratio",
+               BTBHits / BTBLookups),
+      ADD_STAT(BTBMispredicted, statistics::units::Count::get(),
+               "Number BTB mispredictions. No target found or target wrong"),
+      ADD_STAT(indirectLookups, statistics::units::Count::get(),
+               "Number of indirect predictor lookups."),
+      ADD_STAT(indirectHits, statistics::units::Count::get(),
+               "Number of indirect target hits."),
+      ADD_STAT(indirectMisses, statistics::units::Count::get(),
+               "Number of indirect misses."),
+      ADD_STAT(indirectMispredicted, statistics::units::Count::get(),
+               "Number of mispredicted indirect branches.")
+
+{
+    using namespace statistics;
+    BTBHitRatio.precision(6);
+
+    lookups
+        .init(bp->numThreads, enums::Num_BranchType)
+        .flags(total | pdf);
+    lookups.ysubnames(enums::BranchTypeStrings);
+
+    squashes
+        .init(bp->numThreads, enums::Num_BranchType)
+        .flags(total | pdf);
+    squashes.ysubnames(enums::BranchTypeStrings);
+
+    corrected
+        .init(bp->numThreads, enums::Num_BranchType)
+        .flags(total | pdf);
+    corrected.ysubnames(enums::BranchTypeStrings);
+
+    earlyResteers
+        .init(bp->numThreads, enums::Num_BranchType)
+        .flags(total | pdf);
+    earlyResteers.ysubnames(enums::BranchTypeStrings);
+
+    committed
+        .init(bp->numThreads, enums::Num_BranchType)
+        .flags(total | pdf);
+    committed.ysubnames(enums::BranchTypeStrings);
+
+    mispredicted
+        .init(bp->numThreads, enums::Num_BranchType)
+        .flags(total | pdf);
+    mispredicted.ysubnames(enums::BranchTypeStrings);
+
+    targetProvider
+        .init(bp->numThreads, enums::Num_TargetProvider)
+        .flags(total | pdf);
+    targetProvider.ysubnames(enums::TargetProviderStrings);
+
+    targetWrong
+        .init(bp->numThreads, enums::Num_BranchType)
+        .flags(total | pdf);
+    targetWrong.ysubnames(enums::BranchTypeStrings);
+
+}
+
 } // namespace branch_prediction
 } // namespace gem5
diff --git a/src/cpu/pred/bpred_unit.hh b/src/cpu/pred/bpred_unit.hh
index 4af1d876a8..6aaec616c6 100644
--- a/src/cpu/pred/bpred_unit.hh
+++ b/src/cpu/pred/bpred_unit.hh
@@ -1,6 +1,6 @@
 /*
  * Copyright (c) 2011-2012, 2014 ARM Limited
- * Copyright (c) 2010 The University of Edinburgh
+ * Copyright (c) 2010,2022-2023 The University of Edinburgh
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -46,11 +46,13 @@
 
 #include "base/statistics.hh"
 #include "base/types.hh"
+#include "cpu/inst_seq.hh"
+#include "cpu/pred/branch_type.hh"
 #include "cpu/pred/btb.hh"
 #include "cpu/pred/indirect.hh"
 #include "cpu/pred/ras.hh"
-#include "cpu/inst_seq.hh"
 #include "cpu/static_inst.hh"
+#include "enums/TargetProvider.hh"
 #include "params/BranchPredictor.hh"
 #include "sim/probe/pmu.hh"
 #include "sim/sim_object.hh"
@@ -67,8 +69,14 @@ namespace branch_prediction
  */
 class BPredUnit : public SimObject
 {
+    typedef BranchPredictorParams Params;
+    typedef enums::TargetProvider TargetProvider;
+
+    /** Branch Predictor Unit (BPU) interface functions */
   public:
-      typedef BranchPredictorParams Params;
+
+
+
     /**
      * @param params The params object, that has the size of the BP and BTB.
      */
@@ -90,9 +98,6 @@ class BPredUnit : public SimObject
     bool predict(const StaticInstPtr &inst, const InstSeqNum &seqNum,
                  PCStateBase &pc, ThreadID tid);
 
-    // @todo: Rename this function.
-    virtual void uncondBranch(ThreadID tid, Addr pc, void * &bp_history) = 0;
-
     /**
      * Tells the branch predictor to commit any updates until the given
      * sequence number.
@@ -117,82 +122,123 @@ class BPredUnit : public SimObject
      * @param corr_target The correct branch target.
      * @param actually_taken The correct branch direction.
      * @param tid The thread id.
+     * @param from_commit Indicate whether the squash is comming from commit
+     *              or from decode. Its optional and used for statistics.
      */
-    void squash(const InstSeqNum &squashed_sn,
-                const PCStateBase &corr_target,
-                bool actually_taken, ThreadID tid);
+    void squash(const InstSeqNum &squashed_sn, const PCStateBase &corr_target,
+                bool actually_taken, ThreadID tid, bool from_commit=true);
 
-    /**
-     * @param bp_history Pointer to the history object.  The predictor
-     * will need to update any state and delete the object.
-     */
-    virtual void squash(ThreadID tid, void *bp_history) = 0;
+  protected:
+
+    /** *******************************************************
+     * Interface functions to the conditional branch predictor
+     *
+    */
 
     /**
-     * Looks up a given PC in the BP to see if it is taken or not taken.
-     * @param inst_PC The PC to look up.
+     * Looks up a given conditional branch PC of in the BP to see if it
+     * is taken or not taken.
+     * @param pc The PC to look up.
      * @param bp_history Pointer that will be set to an object that
      * has the branch predictor state associated with the lookup.
      * @return Whether the branch is taken or not taken.
      */
-    virtual bool lookup(ThreadID tid, Addr instPC, void * &bp_history) = 0;
+    virtual bool lookup(ThreadID tid, Addr pc, void * &bp_history) = 0;
 
-     /**
-     * If a branch is not taken, because the BTB address is invalid or missing,
-     * this function sets the appropriate counter in the global and local
-     * predictors to not taken.
-     * @param inst_PC The PC to look up the local predictor.
+    /**
+     * Ones done with the prediction this function updates the
+     * path and global history. All branches call this function
+     * including unconditional once.
+     * @param tid The thread id.
+     * @param PC The branch's PC that will be updated.
+     * @param uncond Wheather or not this branch is an unconditional branch.
+     * @param taken Whether or not the branch was taken
+     * @param target The final target of branch. Some modern
+     * predictors use the target in their history.
      * @param bp_history Pointer that will be set to an object that
      * has the branch predictor state associated with the lookup.
      */
-    virtual void btbUpdate(ThreadID tid, Addr instPC, void * &bp_history) = 0;
+    virtual void updateHistories(ThreadID tid, Addr pc, bool uncond,
+                            bool taken, Addr target, void * &bp_history) = 0;
+
+    /**
+     * @param tid The thread id.
+     * @param bp_history Pointer to the history object.  The predictor
+     * will need to update any state and delete the object.
+     */
+    virtual void squash(ThreadID tid, void * &bp_history) = 0;
+
+
+    /**
+     * Updates the BP with taken/not taken information.
+     * @param tid The thread id.
+     * @param PC The branch's PC that will be updated.
+     * @param taken Whether the branch was taken or not taken.
+     * @param bp_history Pointer to the branch predictor state that is
+     * associated with the branch lookup that is being updated.
+     * @param squashed Set to true when this function is called during a
+     * squash operation.
+     * @param inst Static instruction information
+     * @param target The resolved target of the branch (only needed
+     * for squashed branches)
+     * @todo Make this update flexible enough to handle a global predictor.
+     */
+    virtual void update(ThreadID tid, Addr pc, bool taken,
+                   void * &bp_history, bool squashed,
+                   const StaticInstPtr &inst, Addr target) = 0;
+
 
     /**
      * Looks up a given PC in the BTB to see if a matching entry exists.
+     * @param tid The thread id.
      * @param inst_PC The PC to look up.
      * @return Whether the BTB contains the given PC.
      */
-    bool BTBValid(Addr instPC) { return BTB.valid(instPC, 0); }
+    bool BTBValid(ThreadID tid, Addr instPC)
+    {
+        return btb->valid(tid, instPC);
+    }
 
     /**
      * Looks up a given PC in the BTB to get the predicted target. The PC may
      * be changed or deleted in the future, so it needs to be used immediately,
      * and/or copied for use later.
+     * @param tid The thread id.
      * @param inst_PC The PC to look up.
      * @return The address of the target of the branch.
      */
     const PCStateBase *
-    BTBLookup(Addr inst_pc)
+    BTBLookup(ThreadID tid, PCStateBase &instPC)
     {
-        return BTB.lookup(inst_pc, 0);
+        return btb->lookup(tid, instPC.instAddr());
     }
 
     /**
-     * Updates the BP with taken/not taken information.
-     * @param inst_PC The branch's PC that will be updated.
-     * @param taken Whether the branch was taken or not taken.
-     * @param bp_history Pointer to the branch predictor state that is
-     * associated with the branch lookup that is being updated.
-     * @param squashed Set to true when this function is called during a
-     * squash operation.
-     * @param inst Static instruction information
-     * @param corrTarget The resolved target of the branch (only needed
-     * for squashed branches)
-     * @todo Make this update flexible enough to handle a global predictor.
+     * Looks up a given PC in the BTB to get current static instruction
+     * information. This is necessary in a decoupled frontend as
+     * the information does not usually exist at that this point.
+     * Only for instructions (branches) that hit in the BTB this information
+     * is available as the BTB stores them together with the target.
+     *
+     * @param inst_PC The PC to look up.
+     * @return The static instruction info of the given PC if existant.
      */
-    virtual void update(ThreadID tid, Addr instPC, bool taken,
-                   void *bp_history, bool squashed,
-                   const StaticInstPtr &inst, Addr corrTarget) = 0;
+    const StaticInstPtr
+    BTBGetInst(ThreadID tid, Addr instPC)
+    {
+        return btb->getInst(tid, instPC);
+    }
+
     /**
      * Updates the BTB with the target of a branch.
      * @param inst_PC The branch's PC that will be updated.
      * @param target_PC The branch's target that will be added to the BTB.
      */
     void
-    BTBUpdate(Addr instPC, const PCStateBase &target)
+    BTBUpdate(ThreadID tid, Addr instPC, const PCStateBase &target)
     {
         ++stats.BTBUpdates;
-        BTB.update(instPC, target, 0);
+        return btb->update(tid, instPC, target);
     }
 
 
@@ -205,26 +251,28 @@ class BPredUnit : public SimObject
          * Makes a predictor history struct that contains any
          * information needed to update the predictor, BTB, and RAS.
          */
-        PredictorHistory(const InstSeqNum &seq_num, Addr instPC,
-                         bool pred_taken, void *bp_history,
-                         void *indirect_history, ThreadID _tid,
+        PredictorHistory(ThreadID _tid, InstSeqNum sn, Addr _pc,
                          const StaticInstPtr & inst)
-            : seqNum(seq_num), pc(instPC), bpHistory(bp_history),
-              indirectHistory(indirect_history), tid(_tid),
-              predTaken(pred_taken), inst(inst)
-        {}
-
-        PredictorHistory(const PredictorHistory &other) :
-            seqNum(other.seqNum), pc(other.pc), bpHistory(other.bpHistory),
-            indirectHistory(other.indirectHistory), RASIndex(other.RASIndex),
-            tid(other.tid), predTaken(other.predTaken), usedRAS(other.usedRAS),
-            pushedRAS(other.pushedRAS), wasCall(other.wasCall),
-            wasReturn(other.wasReturn), wasIndirect(other.wasIndirect),
-            target(other.target), inst(other.inst)
+            : seqNum(sn), tid(_tid), pc(_pc),
+              inst(inst), type(getBranchType(inst)),
+              call(inst->isCall()), uncond(inst->isUncondCtrl()),
+              predTaken(false), actuallyTaken(false), condPred(false),
+              btbHit(false), targetProvider(TargetProvider::NoTarget),
+              resteered(false), mispredict(false), target(nullptr),
+              bpHistory(nullptr),
+              indirectHistory(nullptr), rasHistory(nullptr)
+        { }
+
+        ~PredictorHistory()
         {
-            set(RASTarget, other.RASTarget);
+            assert(bpHistory == nullptr);
+            assert(indirectHistory == nullptr);
+            assert(rasHistory == nullptr);
         }
 
+        PredictorHistory (const PredictorHistory&) = delete;
+        PredictorHistory& operator= (const PredictorHistory&) = delete;
+
         bool
         operator==(const PredictorHistory &entry) const
         {
@@ -232,60 +280,96 @@ class BPredUnit : public SimObject
         }
 
         /** The sequence number for the predictor history entry. */
-        InstSeqNum seqNum;
+        const InstSeqNum seqNum;
 
-        /** The PC associated with the sequence number. */
-        Addr pc;
+        /** The thread id. */
+        const ThreadID tid;
 
-        /** Pointer to the history object passed back from the branch
-         * predictor.  It is used to update or restore state of the
-         * branch predictor.
-         */
-        void *bpHistory = nullptr;
+        /** The PC associated with the sequence number. */
+        const Addr pc;
 
-        void *indirectHistory = nullptr;
+        /** The branch instrction */
+        const StaticInstPtr inst;
 
-        /** The RAS target (only valid if a return). */
-        std::unique_ptr<PCStateBase> RASTarget;
+        /** The type of the branch */
+        const BranchType type;
 
-        /** The RAS index of the instruction (only valid if a call). */
-        unsigned RASIndex = 0;
+        /** Whether or not the instruction was a call. */
+        const bool call;
 
-        /** The thread id. */
-        ThreadID tid;
+        /** Was unconditional control */
+        const bool uncond;
 
         /** Whether or not it was predicted taken. */
         bool predTaken;
 
-        /** Whether or not the RAS was used. */
-        bool usedRAS = false;
+        /** To record the actual outcome of the branch */
+        bool actuallyTaken;
 
-        /* Whether or not the RAS was pushed */
-        bool pushedRAS = false;
+        /** The prediction of the conditional predictor */
+        bool condPred;
 
-        /** Whether or not the instruction was a call. */
-        bool wasCall = false;
+        /** Was BTB hit at prediction time */
+        bool btbHit;
+
+        /** Which component provided the target */
+        TargetProvider targetProvider;
+
+        /** Resteered */
+        bool resteered;
 
-        /** Whether or not the instruction was a return. */
-        bool wasReturn = false;
+        /** The branch was corrected hence was mispredicted. */
+        bool mispredict;
 
-        /** Wether this instruction was an indirect branch */
-        bool wasIndirect = false;
+        /** The predicted target */
+        std::unique_ptr<PCStateBase> target;
 
-        /** Target of the branch. First it is predicted, and fixed later
-         *  if necessary
+        /**
+         * Pointer to the history objects passed back from the branch
+         * predictor subcomponents.
+         * It is used to update or restore state.
+         * Respectively for conditional, indirect and RAS.
          */
-        Addr target = MaxAddr;
+        void *bpHistory = nullptr;
+
+        void *indirectHistory = nullptr;
+
+        void *rasHistory = nullptr;
 
-        /** The branch instrction */
-        const StaticInstPtr inst;
     };
 
-    typedef std::deque<PredictorHistory> History;
+    typedef std::deque<PredictorHistory*> History;
+
 
+    /**
+     * Internal prediction function.
+    */
+    bool predict(const StaticInstPtr &inst, const InstSeqNum &seqNum,
+               PCStateBase &pc, ThreadID tid, PredictorHistory* &bpu_history);
+
+    /**
+     * Squashes a particular branch instance
+     * @param tid The thread id.
+     * @param bpu_history The history to be squashed.
+     */
+    void squashHistory(ThreadID tid, PredictorHistory* &bpu_history);
+
+
+    /**
+     * Commit a particular branch
+     * @param tid The thread id.
+     * @param bpu_history The history of the branch to be commited.
+     */
+    void commitBranch(ThreadID tid, PredictorHistory* &bpu_history);
+
+
+
+  protected:
     /** Number of the threads for which the branch history is maintained. */
     const unsigned numThreads;
 
+    /** Number of bits to shift instructions by for predictor addresses. */
+    const unsigned instShiftAmt;
 
     /**
      * The per-thread predictor history. This is used to update the predictor
@@ -295,50 +379,55 @@ class BPredUnit : public SimObject
     std::vector<History> predHist;
 
     /** The BTB. */
-    DefaultBTB BTB;
+    BranchTargetBuffer * btb;
 
-    /** The per-thread return address stack. */
-    std::vector<ReturnAddrStack> RAS;
+    /** The return address stack. */
+    ReturnAddrStack * ras;
 
     /** The indirect target predictor. */
     IndirectPredictor * iPred;
 
+    /** Statistics */
     struct BPredUnitStats : public statistics::Group
     {
-        BPredUnitStats(statistics::Group *parent);
+        BPredUnitStats(BPredUnit *bp);
 
-        /** Stat for number of BP lookups. */
-        statistics::Scalar lookups;
-        /** Stat for number of conditional branches predicted. */
+        /** Stats per branch type */
+        statistics::Vector2d lookups;
+        statistics::Vector2d squashes;
+        statistics::Vector2d corrected;
+        statistics::Vector2d earlyResteers;
+        statistics::Vector2d committed;
+        statistics::Vector2d mispredicted;
+
+        /** Target prediction per branch type */
+        statistics::Vector2d targetProvider;
+        statistics::Vector2d targetWrong;
+
+        /** Additional scalar stats for conditional branches */
         statistics::Scalar condPredicted;
-        /** Stat for number of conditional branches predicted incorrectly. */
+        statistics::Scalar condPredictedTaken;
         statistics::Scalar condIncorrect;
-        /** Stat for number of BTB lookups. */
+        statistics::Scalar predTakenBTBMiss;
+        statistics::Scalar NotTakenMispredicted;
+        statistics::Scalar TakenMispredicted;
+
+        /** BTB stats. */
         statistics::Scalar BTBLookups;
-        /** Stat for number of BTB updates. */
         statistics::Scalar BTBUpdates;
-        /** Stat for number of BTB hits. */
         statistics::Scalar BTBHits;
-        /** Stat for the ratio between BTB hits and BTB lookups. */
         statistics::Formula BTBHitRatio;
-        /** Stat for number of times the RAS is used to get a target. */
-        statistics::Scalar RASUsed;
-        /** Stat for number of times the RAS is incorrect. */
-        statistics::Scalar RASIncorrect;
+        statistics::Scalar BTBMispredicted;
 
-        /** Stat for the number of indirect target lookups.*/
+        /** Indirect stats */
         statistics::Scalar indirectLookups;
-        /** Stat for the number of indirect target hits.*/
         statistics::Scalar indirectHits;
-        /** Stat for the number of indirect target misses.*/
         statistics::Scalar indirectMisses;
-        /** Stat for the number of indirect target mispredictions.*/
         statistics::Scalar indirectMispredicted;
+
     } stats;
 
   protected:
-    /** Number of bits to shift instructions by for predictor addresses. */
-    const unsigned instShiftAmt;
 
     /**
      * @{
diff --git a/src/cpu/pred/branch_type.hh b/src/cpu/pred/branch_type.hh
new file mode 100644
index 0000000000..dcc6149a9b
--- /dev/null
+++ b/src/cpu/pred/branch_type.hh
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 2022-2023 The University of Edinburgh
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* @file
+ * A helper for branch type information
+ */
+
+#ifndef __CPU_PRED_BRANCH_TYPE_HH__
+#define __CPU_PRED_BRANCH_TYPE_HH__
+
+#include "cpu/static_inst.hh"
+#include "enums/BranchType.hh"
+
+namespace gem5
+{
+
+namespace branch_prediction
+{
+
+typedef enums::BranchType BranchType;
+
+inline BranchType getBranchType(StaticInstPtr inst)
+{
+    if (inst->isReturn()) {
+        return BranchType::Return;
+    }
+
+    if (inst->isCall()) {
+        return inst->isDirectCtrl()
+                    ? BranchType::CallDirect
+                    : BranchType::CallIndirect;
+    }
+
+    if (inst->isDirectCtrl()) {
+        return inst->isCondCtrl()
+                    ? BranchType::DirectCond
+                    : BranchType::DirectUncond;
+    }
+
+    if (inst->isIndirectCtrl()) {
+        return inst->isCondCtrl()
+                    ? BranchType::IndirectCond
+                    : BranchType::IndirectUncond;
+    }
+    return BranchType::NoBranch;
+}
+
+inline std::string toString(BranchType type)
+{
+    return std::string(enums::BranchTypeStrings[type]);
+}
+
+
+} // namespace branch_prediction
+} // namespace gem5
+
+#endif // __CPU_PRED_BRANCH_TYPE_HH__
diff --git a/src/cpu/pred/btb.cc b/src/cpu/pred/btb.cc
index 71afd45b9f..85d3e2c9bb 100644
--- a/src/cpu/pred/btb.cc
+++ b/src/cpu/pred/btb.cc
@@ -1,4 +1,16 @@
 /*
+ * Copyright (c) 2022-2023 The University of Edinburgh
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
  * Copyright (c) 2004-2005 The Regents of The University of Michigan
  * All rights reserved.
  *
@@ -28,120 +40,60 @@
 
 #include "cpu/pred/btb.hh"
 
-#include "base/intmath.hh"
-#include "base/trace.hh"
-#include "debug/Fetch.hh"
-
 namespace gem5
 {
 
 namespace branch_prediction
 {
 
-DefaultBTB::DefaultBTB(unsigned _numEntries,
-                       unsigned _tagBits,
-                       unsigned _instShiftAmt,
-                       unsigned _num_threads)
-    : numEntries(_numEntries),
-      tagBits(_tagBits),
-      instShiftAmt(_instShiftAmt),
-      log2NumThreads(floorLog2(_num_threads))
-{
-    DPRINTF(Fetch, "BTB: Creating BTB object.\n");
-
-    if (!isPowerOf2(numEntries)) {
-        fatal("BTB entries is not a power of 2!");
-    }
-
-    btb.resize(numEntries);
-
-    for (unsigned i = 0; i < numEntries; ++i) {
-        btb[i].valid = false;
-    }
-
-    idxMask = numEntries - 1;
-
-    tagMask = (1 << tagBits) - 1;
-
-    tagShiftAmt = instShiftAmt + floorLog2(numEntries);
-}
-
-void
-DefaultBTB::reset()
-{
-    for (unsigned i = 0; i < numEntries; ++i) {
-        btb[i].valid = false;
-    }
-}
-
-inline
-unsigned
-DefaultBTB::getIndex(Addr instPC, ThreadID tid)
-{
-    // Need to shift PC over by the word offset.
-    return ((instPC >> instShiftAmt)
-            ^ (tid << (tagShiftAmt - instShiftAmt - log2NumThreads)))
-            & idxMask;
-}
-
-inline
-Addr
-DefaultBTB::getTag(Addr instPC)
+BranchTargetBuffer::BranchTargetBuffer(const Params &params)
+    : ClockedObject(params),
+      numThreads(params.numThreads),
+      stats(this)
 {
-    return (instPC >> tagShiftAmt) & tagMask;
-}
-
-bool
-DefaultBTB::valid(Addr instPC, ThreadID tid)
-{
-    unsigned btb_idx = getIndex(instPC, tid);
-
-    Addr inst_tag = getTag(instPC);
-
-    assert(btb_idx < numEntries);
-
-    if (btb[btb_idx].valid
-        && inst_tag == btb[btb_idx].tag
-        && btb[btb_idx].tid == tid) {
-        return true;
-    } else {
-        return false;
-    }
 }
 
-// @todo Create some sort of return struct that has both whether or not the
-// address is valid, and also the address.  For now will just use addr = 0 to
-// represent invalid entry.
-const PCStateBase *
-DefaultBTB::lookup(Addr inst_pc, ThreadID tid)
+BranchTargetBuffer::BranchTargetBufferStats::BranchTargetBufferStats(
+                                                statistics::Group *parent)
+    : statistics::Group(parent),
+      ADD_STAT(lookups, statistics::units::Count::get(),
+               "Number of BTB lookups"),
+      ADD_STAT(misses, statistics::units::Count::get(),
+               "Number of BTB misses"),
+      ADD_STAT(updates, statistics::units::Count::get(),
+               "Number of BTB updates"),
+      ADD_STAT(mispredict, statistics::units::Count::get(),
+               "Number of BTB mispredictions. "
+               "No target found or target wrong."),
+      ADD_STAT(evictions, statistics::units::Count::get(),
+               "Number of BTB evictions")
 {
-    unsigned btb_idx = getIndex(inst_pc, tid);
-
-    Addr inst_tag = getTag(inst_pc);
-
-    assert(btb_idx < numEntries);
-
-    if (btb[btb_idx].valid
-        && inst_tag == btb[btb_idx].tag
-        && btb[btb_idx].tid == tid) {
-        return btb[btb_idx].target.get();
-    } else {
-        return nullptr;
+    using namespace statistics;
+    lookups
+        .init(enums::Num_BranchType)
+        .flags(total | pdf);
+
+    misses
+        .init(enums::Num_BranchType)
+        .flags(total | pdf);
+
+    updates
+        .init(enums::Num_BranchType)
+        .flags(total | pdf);
+
+    mispredict
+        .init(enums::Num_BranchType)
+        .flags(total | pdf);
+
+    evictions.flags(nozero);
+
+    for (int i = 0; i < enums::Num_BranchType; i++) {
+        lookups.subname(i, enums::BranchTypeStrings[i]);
+        misses.subname(i, enums::BranchTypeStrings[i]);
+        updates.subname(i, enums::BranchTypeStrings[i]);
+        mispredict.subname(i, enums::BranchTypeStrings[i]);
     }
 }
 
-void
-DefaultBTB::update(Addr inst_pc, const PCStateBase &target, ThreadID tid)
-{
-    unsigned btb_idx = getIndex(inst_pc, tid);
-
-    assert(btb_idx < numEntries);
-
-    btb[btb_idx].tid = tid;
-    btb[btb_idx].valid = true;
-    set(btb[btb_idx].target, target);
-    btb[btb_idx].tag = getTag(inst_pc);
-}
-
 } // namespace branch_prediction
 } // namespace gem5
diff --git a/src/cpu/pred/btb.hh b/src/cpu/pred/btb.hh
index 9213053d77..dd3e56a20f 100644
--- a/src/cpu/pred/btb.hh
+++ b/src/cpu/pred/btb.hh
@@ -1,4 +1,16 @@
 /*
+ * Copyright (c) 2022-2023 The University of Edinburgh
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
  * Copyright (c) 2004-2005 The Regents of The University of Michigan
  * All rights reserved.
  *
@@ -29,9 +41,13 @@
 #ifndef __CPU_PRED_BTB_HH__
 #define __CPU_PRED_BTB_HH__
 
+
 #include "arch/generic/pcstate.hh"
-#include "base/logging.hh"
-#include "base/types.hh"
+#include "base/statistics.hh"
+#include "cpu/pred/branch_type.hh"
+#include "cpu/static_inst.hh"
+#include "params/BranchTargetBuffer.hh"
+#include "sim/clocked_object.hh"
 
 namespace gem5
 {
@@ -39,93 +55,73 @@ namespace gem5
 namespace branch_prediction
 {
 
-class DefaultBTB
+class BranchTargetBuffer : public ClockedObject
 {
-  private:
-    struct BTBEntry
-    {
-        /** The entry's tag. */
-        Addr tag = 0;
-
-        /** The entry's target. */
-        std::unique_ptr<PCStateBase> target;
+  public:
+    typedef BranchTargetBufferParams Params;
+    typedef enums::BranchType BranchType;
 
-        /** The entry's thread id. */
-        ThreadID tid;
+    BranchTargetBuffer(const Params &params);
 
-        /** Whether or not the entry is valid. */
-        bool valid = false;
-    };
+    virtual void memInvalidate() override = 0;
 
-  public:
-    /** Creates a BTB with the given number of entries, number of bits per
-     *  tag, and instruction offset amount.
-     *  @param numEntries Number of entries for the BTB.
-     *  @param tagBits Number of bits for each tag in the BTB.
-     *  @param instShiftAmt Offset amount for instructions to ignore alignment.
+    /** Checks if a branch address is in the BTB. Intended as a quick check
+     *  before calling lookup. Does not update statistics.
+     *  @param inst_PC The address of the branch to look up.
+     *  @return Whether or not the branch exists in the BTB.
      */
-    DefaultBTB(unsigned numEntries, unsigned tagBits,
-               unsigned instShiftAmt, unsigned numThreads);
+    virtual bool valid(ThreadID tid, Addr instPC) = 0;
 
-    void reset();
-
-    /** Looks up an address in the BTB. Must call valid() first on the address.
+    /** Looks up an address in the BTB to get the target of the branch.
      *  @param inst_PC The address of the branch to look up.
-     *  @param tid The thread id.
-     *  @return Returns the target of the branch.
+     *  @param type Optional type of the branch to look up.
+     *  @return The target of the branch or nullptr if the branch is not
+     *          in the BTB.
      */
-    const PCStateBase *lookup(Addr instPC, ThreadID tid);
+    virtual const PCStateBase *lookup(ThreadID tid, Addr instPC,
+                            BranchType type = BranchType::NoBranch) = 0;
 
-    /** Checks if a branch is in the BTB.
+    /** Looks up an address in the BTB and return the instruction
+     * information if existant. Does not update statistics.
      *  @param inst_PC The address of the branch to look up.
-     *  @param tid The thread id.
-     *  @return Whether or not the branch exists in the BTB.
+     *  @return Returns the target of the branch.
      */
-    bool valid(Addr instPC, ThreadID tid);
+    virtual const StaticInstPtr getInst(ThreadID tid, Addr instPC) = 0;
+
 
     /** Updates the BTB with the target of a branch.
      *  @param inst_pc The address of the branch being updated.
      *  @param target_pc The target address of the branch.
-     *  @param tid The thread id.
-     */
-    void update(Addr inst_pc, const PCStateBase &target_pc, ThreadID tid);
-
-  private:
-    /** Returns the index into the BTB, based on the branch's PC.
-     *  @param inst_PC The branch to look up.
-     *  @return Returns the index into the BTB.
      */
-    inline unsigned getIndex(Addr instPC, ThreadID tid);
+    virtual void update(ThreadID tid, Addr inst_pc,
+                          const PCStateBase &target_pc,
+                          BranchType type = BranchType::NoBranch,
+                          StaticInstPtr inst = nullptr) = 0;
 
-    /** Returns the tag bits of a given address.
-     *  @param inst_PC The branch's address.
-     *  @return Returns the tag bits.
+    /** Update BTB statistics
      */
-    inline Addr getTag(Addr instPC);
-
-    /** The actual BTB. */
-    std::vector<BTBEntry> btb;
-
-    /** The number of entries in the BTB. */
-    unsigned numEntries;
-
-    /** The index mask. */
-    unsigned idxMask;
+    virtual void incorrectTarget(Addr inst_pc,
+                                  BranchType type = BranchType::NoBranch)
+    {
+      stats.mispredict[type]++;
+    }
 
-    /** The number of tag bits per entry. */
-    unsigned tagBits;
+  protected:
+    /** Number of the threads for which the branch history is maintained. */
+    const unsigned numThreads;
 
-    /** The tag mask. */
-    unsigned tagMask;
+    struct BranchTargetBufferStats : public statistics::Group
+    {
+        BranchTargetBufferStats(statistics::Group *parent);
 
-    /** Number of bits to shift PC when calculating index. */
-    unsigned instShiftAmt;
+        statistics::Vector lookups;
+        statistics::Vector misses;
+        statistics::Vector updates;
+        statistics::Vector mispredict;
+        statistics::Scalar evictions;
 
-    /** Number of bits to shift PC when calculating tag. */
-    unsigned tagShiftAmt;
+    } stats;
 
-    /** Log2 NumThreads used for hashing threadid */
-    unsigned log2NumThreads;
 };
 
 } // namespace branch_prediction
diff --git a/src/cpu/pred/indirect.hh b/src/cpu/pred/indirect.hh
index 5f855b14fd..54c55d0dcb 100644
--- a/src/cpu/pred/indirect.hh
+++ b/src/cpu/pred/indirect.hh
@@ -1,6 +1,16 @@
 /*
  * Copyright (c) 2014 ARM Limited
- * All rights reserved.
+ * Copyright (c) 2023 The University of Edinburgh
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are
@@ -26,11 +36,16 @@
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
+/* @file
+ * Indirect target predictor interface
+ */
+
 #ifndef __CPU_PRED_INDIRECT_BASE_HH__
 #define __CPU_PRED_INDIRECT_BASE_HH__
 
 #include "arch/generic/pcstate.hh"
 #include "cpu/inst_seq.hh"
+#include "cpu/pred/branch_type.hh"
 #include "params/IndirectPredictor.hh"
 #include "sim/sim_object.hh"
 
@@ -51,21 +66,57 @@ class IndirectPredictor : public SimObject
     {
     }
 
-    virtual bool lookup(Addr br_addr, PCStateBase& br_target,
-                        ThreadID tid) = 0;
-    virtual void recordIndirect(Addr br_addr, Addr tgt_addr,
-                                InstSeqNum seq_num, ThreadID tid) = 0;
-    virtual void commit(InstSeqNum seq_num, ThreadID tid,
-                        void * indirect_history) = 0;
-    virtual void squash(InstSeqNum seq_num, ThreadID tid) = 0;
-    virtual void recordTarget(InstSeqNum seq_num, void * indirect_history,
-                              const PCStateBase& target, ThreadID tid) = 0;
-    virtual void genIndirectInfo(ThreadID tid, void* & indirect_history) = 0;
-    virtual void updateDirectionInfo(ThreadID tid, bool actually_taken) = 0;
-    virtual void deleteIndirectInfo(ThreadID tid, void * indirect_history) = 0;
-    virtual void changeDirectionPrediction(ThreadID tid,
-                                           void * indirect_history,
-                                           bool actually_taken) = 0;
+    virtual void reset() {};
+
+    /**
+     * Predicts the indirect target of an indirect branch.
+     * @param tid Thread ID of the branch.
+     * @param sn The sequence number of the branch.
+     * @param pc The branch PC address.
+     * @param i_history The pointer to the history object.
+     * @return For a hit the predictor returns a pointer to the target PCState
+     *         otherwise a nullptr is returned.
+     */
+    virtual const PCStateBase* lookup(ThreadID tid, InstSeqNum sn,
+                                      Addr pc, void * &i_history) = 0;
+
+    /**
+     * Updates the indirect predictor with history information of a branch.
+     * Is called right after the prediction which updates the state
+     * speculatively. In case the branch was mispredicted the function
+     * is called again with the corrected information.
+     * The function is called for ALL branches as some predictors incooperate
+     * all branches in their history.
+     * @param tid Thread ID
+     * @param sn The sequence number of the branch.
+     * @param pc The branch PC address.
+     * @param squash Whether the update is called at a misprediction
+     * @param taken Whether a conditional branch was taken
+     * @param target The target address if this branch.
+     * @param br_type The branch instruction type.
+     * @param i_history The pointer to the history object.
+     */
+    virtual void update(ThreadID tid, InstSeqNum sn, Addr pc, bool squash,
+                        bool taken, const PCStateBase& target,
+                        BranchType br_type, void * &i_history) = 0;
+
+    /**
+     * Squashes a branch. If the branch modified the history
+     * reverts the modification.
+     * @param tid Thread ID
+     * @param sn The sequence number of the branch.
+     * @param i_history The pointer to the history object.
+     */
+    virtual void squash(ThreadID tid, InstSeqNum sn, void * &i_history) = 0;
+
+    /**
+     * A branch gets finally commited. Updates the internal state of
+     * the indirect predictor (counter and target information).
+     * @param tid Thread ID
+     * @param sn The sequence number of the branch.
+     * @param i_history The pointer to the history object.
+     */
+    virtual void commit(ThreadID tid, InstSeqNum sn, void * &i_history) = 0;
 };
 
 } // namespace branch_prediction
diff --git a/src/cpu/pred/loop_predictor.cc b/src/cpu/pred/loop_predictor.cc
index 6574d61bb1..9e34e5141a 100644
--- a/src/cpu/pred/loop_predictor.cc
+++ b/src/cpu/pred/loop_predictor.cc
@@ -1,4 +1,16 @@
 /*
+ * Copyright (c) 2022-2023 The University of Edinburgh
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
  * Copyright (c) 2014 The University of Wisconsin
  *
  * Copyright (c) 2006 INRIA (Institut National de Recherche en
@@ -320,10 +332,13 @@ LoopPredictor::squashLoop(BranchInfo* bi)
 void
 LoopPredictor::updateStats(bool taken, BranchInfo* bi)
 {
-    if (taken == bi->loopPred) {
-        stats.correct++;
-    } else {
-        stats.wrong++;
+    if (bi->loopPredUsed) {
+        stats.used++;
+        if (taken == bi->loopPred) {
+            stats.correct++;
+        } else {
+            stats.wrong++;
+        }
     }
 }
 
@@ -354,6 +369,8 @@ LoopPredictor::condBranchUpdate(ThreadID tid, Addr branch_pc, bool taken,
 LoopPredictor::LoopPredictorStats::LoopPredictorStats(
     statistics::Group *parent)
     : statistics::Group(parent),
+      ADD_STAT(used, statistics::units::Count::get(),
+               "Number of times the loop predictor is the provider."),
       ADD_STAT(correct, statistics::units::Count::get(),
                "Number of times the loop predictor is the provider and the "
                "prediction is correct"),
diff --git a/src/cpu/pred/loop_predictor.hh b/src/cpu/pred/loop_predictor.hh
index 44d75aba35..333cb3b34e 100644
--- a/src/cpu/pred/loop_predictor.hh
+++ b/src/cpu/pred/loop_predictor.hh
@@ -1,5 +1,15 @@
 /*
- * Copyright (c) 2014 The University of Wisconsin
+ * Copyright (c) 2022-2023 The University of Edinburgh
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
  *
  * Copyright (c) 2006 INRIA (Institut National de Recherche en
  * Informatique et en Automatique  / French National Research Institute
@@ -92,6 +102,7 @@ class LoopPredictor : public SimObject
     struct LoopPredictorStats : public statistics::Group
     {
         LoopPredictorStats(statistics::Group *parent);
+        statistics::Scalar used;
         statistics::Scalar correct;
         statistics::Scalar wrong;
     } stats;
diff --git a/src/cpu/pred/ltage.cc b/src/cpu/pred/ltage.cc
index 930d6bf44a..3da443d20d 100644
--- a/src/cpu/pred/ltage.cc
+++ b/src/cpu/pred/ltage.cc
@@ -1,4 +1,16 @@
 /*
+ * Copyright (c) 2022-2023 The University of Edinburgh
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
  * Copyright (c) 2014 The University of Wisconsin
  *
  * Copyright (c) 2006 INRIA (Institut National de Recherche en
@@ -94,8 +106,8 @@ LTAGE::predict(ThreadID tid, Addr branch_pc, bool cond_branch, void* &b)
 
 // PREDICTOR UPDATE
 void
-LTAGE::update(ThreadID tid, Addr branch_pc, bool taken, void* bp_history,
-              bool squashed, const StaticInstPtr & inst, Addr corrTarget)
+LTAGE::update(ThreadID tid, Addr pc, bool taken, void * &bp_history,
+              bool squashed, const StaticInstPtr & inst, Addr target)
 {
     assert(bp_history);
 
@@ -105,7 +117,7 @@ LTAGE::update(ThreadID tid, Addr branch_pc, bool taken, void* bp_history,
         if (tage->isSpeculativeUpdateEnabled()) {
             // This restores the global history, then update it
             // and recomputes the folded histories.
-            tage->squash(tid, taken, bi->tageBranchInfo, corrTarget);
+            tage->squash(tid, taken, bi->tageBranchInfo, target);
 
             if (bi->tageBranchInfo->condBranch) {
                 loopPredictor->squashLoop(bi->lpBranchInfo);
@@ -117,26 +129,27 @@ LTAGE::update(ThreadID tid, Addr branch_pc, bool taken, void* bp_history,
     int nrand = random_mt.random<int>() & 3;
     if (bi->tageBranchInfo->condBranch) {
         DPRINTF(LTage, "Updating tables for branch:%lx; taken?:%d\n",
-                branch_pc, taken);
+                pc, taken);
         tage->updateStats(taken, bi->tageBranchInfo);
 
         loopPredictor->updateStats(taken, bi->lpBranchInfo);
 
-        loopPredictor->condBranchUpdate(tid, branch_pc, taken,
+        loopPredictor->condBranchUpdate(tid, pc, taken,
             bi->tageBranchInfo->tagePred, bi->lpBranchInfo, instShiftAmt);
 
-        tage->condBranchUpdate(tid, branch_pc, taken, bi->tageBranchInfo,
-            nrand, corrTarget, bi->lpBranchInfo->predTaken);
+        tage->condBranchUpdate(tid, pc, taken, bi->tageBranchInfo,
+            nrand, target, bi->lpBranchInfo->predTaken);
     }
 
-    tage->updateHistories(tid, branch_pc, taken, bi->tageBranchInfo, false,
-                          inst, corrTarget);
+    tage->updateHistories(tid, pc, taken, bi->tageBranchInfo, false,
+                          inst, target);
 
     delete bi;
+    bp_history = nullptr;
 }
 
 void
-LTAGE::squash(ThreadID tid, void *bp_history)
+LTAGE::squash(ThreadID tid, void * &bp_history)
 {
     LTageBranchInfo* bi = (LTageBranchInfo*)(bp_history);
 
diff --git a/src/cpu/pred/ltage.hh b/src/cpu/pred/ltage.hh
index 7deaa2bc04..92d1fd25d7 100644
--- a/src/cpu/pred/ltage.hh
+++ b/src/cpu/pred/ltage.hh
@@ -1,4 +1,16 @@
 /*
+ * Copyright (c) 2022-2023 The University of Edinburgh
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
  * Copyright (c) 2014 The University of Wisconsin
  *
  * Copyright (c) 2006 INRIA (Institut National de Recherche en
@@ -69,10 +81,10 @@ class LTAGE : public TAGE
     LTAGE(const LTAGEParams &params);
 
     // Base class methods.
-    void squash(ThreadID tid, void *bp_history) override;
-    void update(ThreadID tid, Addr branch_addr, bool taken, void *bp_history,
-                bool squashed, const StaticInstPtr & inst,
-                Addr corrTarget) override;
+    void squash(ThreadID tid, void * &bp_history) override;
+    void update(ThreadID tid, Addr pc, bool taken,
+                void * &bp_history, bool squashed,
+                const StaticInstPtr & inst, Addr target) override;
 
     void init() override;
 
@@ -98,6 +110,7 @@ class LTAGE : public TAGE
         virtual ~LTageBranchInfo()
         {
             delete lpBranchInfo;
+            lpBranchInfo = nullptr;
         }
     };
 
diff --git a/src/cpu/pred/multiperspective_perceptron.cc b/src/cpu/pred/multiperspective_perceptron.cc
index 25b4d7d39a..fd54ec8163 100644
--- a/src/cpu/pred/multiperspective_perceptron.cc
+++ b/src/cpu/pred/multiperspective_perceptron.cc
@@ -1,4 +1,16 @@
 /*
+ * Copyright (c) 2022-2023 The University of Edinburgh
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
  * Copyright 2019 Texas A&M University
  *
  * Redistribution and use in source and binary forms, with or without
@@ -547,10 +559,19 @@ MultiperspectivePerceptron::train(ThreadID tid, MPPBranchInfo &bi, bool taken)
     }
 }
 
+
 void
-MultiperspectivePerceptron::uncondBranch(ThreadID tid, Addr pc,
-                                         void * &bp_history)
+MultiperspectivePerceptron::updateHistories(ThreadID tid, Addr pc,
+                    bool uncond, bool taken, Addr target, void * &bp_history)
 {
+    assert(uncond || bp_history);
+
+    // For perceptron there is no speculative history correction.
+    // Conditional branches are done.
+    if (!uncond)
+        return;
+
+    // For uncondition branches create branch info.
     MPPBranchInfo *bi = new MPPBranchInfo(pc, pcshift, false);
     std::vector<unsigned int> &ghist_words = threadData[tid]->ghist_words;
 
@@ -613,10 +634,10 @@ MultiperspectivePerceptron::lookup(ThreadID tid, Addr instPC,
 }
 
 void
-MultiperspectivePerceptron::update(ThreadID tid, Addr instPC, bool taken,
-                                   void *bp_history, bool squashed,
+MultiperspectivePerceptron::update(ThreadID tid, Addr pc,  bool taken,
+                                   void * &bp_history, bool squashed,
                                    const StaticInstPtr & inst,
-                                   Addr corrTarget)
+                                   Addr target)
 {
     assert(bp_history);
     MPPBranchInfo *bi = static_cast<MPPBranchInfo*>(bp_history);
@@ -627,6 +648,7 @@ MultiperspectivePerceptron::update(ThreadID tid, Addr instPC, bool taken,
 
     if (bi->isUnconditional()) {
         delete bi;
+        bp_history = nullptr;
         return;
     }
 
@@ -693,7 +715,6 @@ MultiperspectivePerceptron::update(ThreadID tid, Addr instPC, bool taken,
 
     // four different styles of IMLI
     if (!bi->filtered || (record_mask & Imli)) {
-        unsigned int target = corrTarget;
         if (target < bi->getPC()) {
             if (taken) {
                 threadData[tid]->imli_counter[0] += 1;
@@ -813,20 +834,16 @@ MultiperspectivePerceptron::update(ThreadID tid, Addr instPC, bool taken,
     threadData[tid]->last_ghist_bit = taken;
 
     delete bi;
+    bp_history = nullptr;
 }
 
 void
-MultiperspectivePerceptron::btbUpdate(ThreadID tid, Addr branch_pc,
-                                      void* &bp_history)
-{
-}
-
-void
-MultiperspectivePerceptron::squash(ThreadID tid, void *bp_history)
+MultiperspectivePerceptron::squash(ThreadID tid, void * &bp_history)
 {
     assert(bp_history);
     MPPBranchInfo *bi = static_cast<MPPBranchInfo*>(bp_history);
     delete bi;
+    bp_history = nullptr;
 }
 
 } // namespace branch_prediction
diff --git a/src/cpu/pred/multiperspective_perceptron.hh b/src/cpu/pred/multiperspective_perceptron.hh
index 68ab5f1a23..f761607a29 100644
--- a/src/cpu/pred/multiperspective_perceptron.hh
+++ b/src/cpu/pred/multiperspective_perceptron.hh
@@ -1,4 +1,16 @@
 /*
+ * Copyright (c) 2022-2023 The University of Edinburgh
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
  * Copyright 2019 Texas A&M University
  *
  * Redistribution and use in source and binary forms, with or without
@@ -1048,14 +1060,14 @@ class MultiperspectivePerceptron : public BPredUnit
 
     void init() override;
 
-    void uncondBranch(ThreadID tid, Addr pc, void * &bp_history) override;
-    void squash(ThreadID tid, void *bp_history) override;
-    bool lookup(ThreadID tid, Addr instPC, void * &bp_history) override;
-    void update(ThreadID tid, Addr instPC, bool taken,
-            void *bp_history, bool squashed,
-            const StaticInstPtr & inst,
-            Addr corrTarget) override;
-    void btbUpdate(ThreadID tid, Addr branch_addr, void* &bp_history) override;
+    // Base class methods.
+    bool lookup(ThreadID tid, Addr branch_addr, void* &bp_history) override;
+    void updateHistories(ThreadID tid, Addr pc, bool uncond, bool taken,
+                         Addr target,  void * &bp_history) override;
+    void update(ThreadID tid, Addr pc, bool taken,
+                void * &bp_history, bool squashed,
+                const StaticInstPtr & inst, Addr target) override;
+    void squash(ThreadID tid, void * &bp_history) override;
 };
 
 } // namespace branch_prediction
diff --git a/src/cpu/pred/multiperspective_perceptron_tage.cc b/src/cpu/pred/multiperspective_perceptron_tage.cc
index 6176d9ccb2..1075f9d04f 100644
--- a/src/cpu/pred/multiperspective_perceptron_tage.cc
+++ b/src/cpu/pred/multiperspective_perceptron_tage.cc
@@ -1,4 +1,16 @@
 /*
+ * Copyright (c) 2023 The University of Edinburgh
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
  * Copyright 2019 Texas A&M University
  *
  * Redistribution and use in source and binary forms, with or without
@@ -548,7 +560,7 @@ MultiperspectivePerceptronTAGE::lookup(ThreadID tid, Addr instPC,
 
 void
 MPP_StatisticalCorrector::condBranchUpdate(ThreadID tid, Addr branch_pc,
-        bool taken, StatisticalCorrector::BranchInfo *bi, Addr corrTarget,
+        bool taken, StatisticalCorrector::BranchInfo *bi, Addr target,
         bool bias_bit, int hitBank, int altBank, int64_t phist)
 {
     bool scPred = (bi->lsum >= 0);
@@ -588,10 +600,10 @@ MPP_StatisticalCorrector::condBranchUpdate(ThreadID tid, Addr branch_pc,
 }
 
 void
-MultiperspectivePerceptronTAGE::update(ThreadID tid, Addr instPC, bool taken,
-                                   void *bp_history, bool squashed,
+MultiperspectivePerceptronTAGE::update(ThreadID tid, Addr pc, bool taken,
+                                   void * &bp_history, bool squashed,
                                    const StaticInstPtr & inst,
-                                   Addr corrTarget)
+                                   Addr target)
 {
     assert(bp_history);
     MPPTAGEBranchInfo *bi = static_cast<MPPTAGEBranchInfo*>(bp_history);
@@ -600,7 +612,7 @@ MultiperspectivePerceptronTAGE::update(ThreadID tid, Addr instPC, bool taken,
         if (tage->isSpeculativeUpdateEnabled()) {
             // This restores the global history, then update it
             // and recomputes the folded histories.
-            tage->squash(tid, taken, bi->tageBranchInfo, corrTarget);
+            tage->squash(tid, taken, bi->tageBranchInfo, target);
             if (bi->tageBranchInfo->condBranch) {
                 loopPredictor->squashLoop(bi->lpBranchInfo);
             }
@@ -609,16 +621,16 @@ MultiperspectivePerceptronTAGE::update(ThreadID tid, Addr instPC, bool taken,
     }
 
     if (bi->isUnconditional()) {
-        statisticalCorrector->scHistoryUpdate(instPC, inst, taken,
-                bi->scBranchInfo, corrTarget);
-        tage->updateHistories(tid, instPC, taken, bi->tageBranchInfo, false,
-                inst, corrTarget);
+        statisticalCorrector->scHistoryUpdate(pc, inst, taken,
+                bi->scBranchInfo, target);
+        tage->updateHistories(tid, pc, taken, bi->tageBranchInfo, false,
+                inst, target);
     } else {
         tage->updateStats(taken, bi->tageBranchInfo);
         loopPredictor->updateStats(taken, bi->lpBranchInfo);
         statisticalCorrector->updateStats(taken, bi->scBranchInfo);
 
-        loopPredictor->condBranchUpdate(tid, instPC, taken,
+        loopPredictor->condBranchUpdate(tid, pc, taken,
                 bi->tageBranchInfo->tagePred, bi->lpBranchInfo, instShiftAmt);
 
         bool scPred = (bi->scBranchInfo->lsum >= 0);
@@ -626,13 +638,13 @@ MultiperspectivePerceptronTAGE::update(ThreadID tid, Addr instPC, bool taken,
             ((abs(bi->scBranchInfo->lsum) < bi->scBranchInfo->thres))) {
             updatePartial(tid, *bi, taken);
         }
-        statisticalCorrector->condBranchUpdate(tid, instPC, taken,
-                bi->scBranchInfo, corrTarget, false /* bias_bit: unused */,
+        statisticalCorrector->condBranchUpdate(tid, pc, taken,
+                bi->scBranchInfo, target, false /* bias_bit: unused */,
                 0 /* hitBank: unused */, 0 /* altBank: unused*/,
                 tage->getPathHist(tid));
 
-        tage->condBranchUpdate(tid, instPC, taken, bi->tageBranchInfo,
-                               random_mt.random<int>(), corrTarget,
+        tage->condBranchUpdate(tid, pc, taken, bi->tageBranchInfo,
+                               random_mt.random<int>(), target,
                                bi->predictedTaken, true);
 
         updateHistories(tid, *bi, taken);
@@ -640,8 +652,8 @@ MultiperspectivePerceptronTAGE::update(ThreadID tid, Addr instPC, bool taken,
         if (!tage->isSpeculativeUpdateEnabled()) {
             if (inst->isCondCtrl() && inst->isDirectCtrl()
                 && !inst->isCall() && !inst->isReturn()) {
-                uint32_t truncated_target = corrTarget;
-                uint32_t truncated_pc = instPC;
+                uint32_t truncated_target = target;
+                uint32_t truncated_pc = pc;
                 if (truncated_target < truncated_pc) {
                     if (!taken) {
                         threadData[tid]->imli_counter[0] = 0;
@@ -657,20 +669,28 @@ MultiperspectivePerceptronTAGE::update(ThreadID tid, Addr instPC, bool taken,
                 }
             }
 
-            statisticalCorrector->scHistoryUpdate(instPC, inst, taken,
-                    bi->scBranchInfo, corrTarget);
+            statisticalCorrector->scHistoryUpdate(pc, inst, taken,
+                    bi->scBranchInfo, target);
 
-            tage->updateHistories(tid, instPC, taken, bi->tageBranchInfo,
-                                  false, inst, corrTarget);
+            tage->updateHistories(tid, pc, taken, bi->tageBranchInfo,
+                                  false, inst, target);
         }
     }
     delete bi;
+    bp_history = nullptr;
 }
 
 void
-MultiperspectivePerceptronTAGE::uncondBranch(ThreadID tid, Addr pc,
-                                             void * &bp_history)
+MultiperspectivePerceptronTAGE::updateHistories(ThreadID tid, Addr pc,
+                                            bool uncond, bool taken,
+                                            Addr target, void * &bp_history)
 {
+    assert(uncond || bp_history);
+
+    // For perceptron there is no speculative history correction.
+    // Conditional branches are done.
+    if (!uncond) return;
+
     MPPTAGEBranchInfo *bi =
         new MPPTAGEBranchInfo(pc, pcshift, false, *tage, *loopPredictor,
                               *statisticalCorrector);
@@ -678,11 +698,12 @@ MultiperspectivePerceptronTAGE::uncondBranch(ThreadID tid, Addr pc,
 }
 
 void
-MultiperspectivePerceptronTAGE::squash(ThreadID tid, void *bp_history)
+MultiperspectivePerceptronTAGE::squash(ThreadID tid, void * &bp_history)
 {
     assert(bp_history);
     MPPTAGEBranchInfo *bi = static_cast<MPPTAGEBranchInfo*>(bp_history);
     delete bi;
+    bp_history = nullptr;
 }
 
 } // namespace branch_prediction
diff --git a/src/cpu/pred/multiperspective_perceptron_tage.hh b/src/cpu/pred/multiperspective_perceptron_tage.hh
index 3a92e3cf07..9c7ee3556f 100644
--- a/src/cpu/pred/multiperspective_perceptron_tage.hh
+++ b/src/cpu/pred/multiperspective_perceptron_tage.hh
@@ -1,4 +1,16 @@
 /*
+ * Copyright (c) 2022-2023 The University of Edinburgh
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
  * Copyright 2019 Texas A&M University
  *
  * Redistribution and use in source and binary forms, with or without
@@ -178,7 +190,7 @@ class MPP_StatisticalCorrector : public StatisticalCorrector
 
     void condBranchUpdate(ThreadID tid, Addr branch_pc, bool taken,
                           StatisticalCorrector::BranchInfo *bi,
-                          Addr corrTarget, bool b, int hitBank, int altBank,
+                          Addr target, bool b, int hitBank, int altBank,
                           int64_t phist) override;
 
     virtual void getBiasLSUM(Addr branch_pc,
@@ -236,12 +248,12 @@ class MultiperspectivePerceptronTAGE : public MultiperspectivePerceptron
 
     bool lookup(ThreadID tid, Addr instPC, void * &bp_history) override;
 
-    void update(ThreadID tid, Addr instPC, bool taken,
-            void *bp_history, bool squashed,
-            const StaticInstPtr & inst,
-            Addr corrTarget) override;
-    void uncondBranch(ThreadID tid, Addr pc, void * &bp_history) override;
-    void squash(ThreadID tid, void *bp_history) override;
+    void update(ThreadID tid, Addr pc, bool taken,
+                void * &bp_history, bool squashed,
+                const StaticInstPtr & inst, Addr target) override;
+    void updateHistories(ThreadID tid, Addr pc, bool uncond, bool taken,
+                         Addr target,  void * &bp_history) override;
+    void squash(ThreadID tid, void * &bp_history) override;
 
 };
 
diff --git a/src/cpu/pred/ras.cc b/src/cpu/pred/ras.cc
index 8d415b7fbd..f29b265657 100644
--- a/src/cpu/pred/ras.cc
+++ b/src/cpu/pred/ras.cc
@@ -1,4 +1,16 @@
 /*
+ * Copyright (c) 2022-2023 The University of Edinburgh
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
  * Copyright (c) 2004-2005 The Regents of The University of Michigan
  * All rights reserved.
  *
@@ -28,30 +40,46 @@
 
 #include "cpu/pred/ras.hh"
 
+#include <iomanip>
+
+#include "debug/RAS.hh"
+
 namespace gem5
 {
 
 namespace branch_prediction
 {
 
+
 void
-ReturnAddrStack::init(unsigned _numEntries)
+ReturnAddrStack::AddrStack::init(unsigned _numEntries)
 {
     numEntries = _numEntries;
     addrStack.resize(numEntries);
+    for (unsigned i = 0; i < numEntries; ++i) {
+        addrStack[i] = nullptr;
+    }
     reset();
 }
 
 void
-ReturnAddrStack::reset()
+ReturnAddrStack::AddrStack::reset()
 {
     usedEntries = 0;
     tos = 0;
 }
 
+const PCStateBase *
+ReturnAddrStack::AddrStack::top()
+{
+    return addrStack[tos].get();
+}
+
+
 void
-ReturnAddrStack::push(const PCStateBase &return_addr)
+ReturnAddrStack::AddrStack::push(const PCStateBase &return_addr)
 {
+
     incrTos();
 
     set(addrStack[tos], return_addr);
@@ -62,7 +90,7 @@ ReturnAddrStack::push(const PCStateBase &return_addr)
 }
 
 void
-ReturnAddrStack::pop()
+ReturnAddrStack::AddrStack::pop()
 {
     if (usedEntries > 0) {
         --usedEntries;
@@ -72,9 +100,10 @@ ReturnAddrStack::pop()
 }
 
 void
-ReturnAddrStack::restore(unsigned top_entry_idx, const PCStateBase *restored)
+ReturnAddrStack::AddrStack::restore(unsigned _tos,
+                                    const PCStateBase *restored)
 {
-    tos = top_entry_idx;
+    tos = _tos;
 
     set(addrStack[tos], restored);
 
@@ -83,5 +112,218 @@ ReturnAddrStack::restore(unsigned top_entry_idx, const PCStateBase *restored)
     }
 }
 
+std::string
+ReturnAddrStack::AddrStack::toString(int n)
+{
+    std::stringstream ss;
+    for (int i = 0; i < n; i++) {
+        int idx = int(tos)-i;
+        if (idx < 0 || addrStack[idx] == nullptr) {
+            break;
+        }
+        ss << std::dec << idx << ":0x" << std::setfill('0') << std::setw(16)
+           << std::hex << addrStack[idx]->instAddr() << ";";
+    }
+    return ss.str();
+}
+
+
+// Return address stack class.
+//
+
+ReturnAddrStack::ReturnAddrStack(const Params &p)
+    : SimObject(p),
+      numEntries(p.numEntries),
+      numThreads(p.numThreads),
+      stats(this)
+{
+    DPRINTF(RAS, "Create RAS stacks.\n");
+
+    for (unsigned i = 0; i < numThreads; ++i) {
+        addrStacks.emplace_back(*this);
+        addrStacks[i].init(numEntries);
+    }
+}
+
+void
+ReturnAddrStack::reset()
+{
+    DPRINTF(RAS, "RAS Reset.\n");
+    for (auto& r : addrStacks)
+        r.reset();
+}
+
+void
+ReturnAddrStack::makeRASHistory(void* &ras_history)
+{
+    RASHistory* history = new RASHistory;
+    history->pushed = false;
+    history->poped = false;
+    ras_history = static_cast<void*>(history);
+}
+
+void
+ReturnAddrStack::push(ThreadID tid, const PCStateBase &pc,
+                        void * &ras_history)
+{
+    // Note: The RAS may be both popped and pushed to
+    //       support coroutines.
+    if (ras_history == nullptr) {
+        makeRASHistory(ras_history);
+    }
+    RASHistory *history = static_cast<RASHistory*>(ras_history);
+    stats.pushes++;
+    history->pushed = true;
+
+    addrStacks[tid].push(pc);
+
+    DPRINTF(RAS, "%s: RAS[%i] <= %#x. Entries used: %i, tid:%i\n", __func__,
+                    addrStacks[tid].tos, pc.instAddr(),
+                    addrStacks[tid].usedEntries,tid);
+    // DPRINTF(RAS, "[%s]\n", addrStacks[tid].toString(10));
+}
+
+
+const PCStateBase*
+ReturnAddrStack::pop(ThreadID tid, void * &ras_history)
+{
+    // Note: The RAS may be both popped and pushed to
+    //       support coroutines.
+    if (ras_history == nullptr) {
+        makeRASHistory(ras_history);
+    }
+    RASHistory *history = static_cast<RASHistory*>(ras_history);
+    stats.pops++;
+
+    history->poped = true;
+    history->tos = addrStacks[tid].tos;
+
+
+    set(history->ras_entry, addrStacks[tid].top());
+    // Pop the top of stack
+    addrStacks[tid].pop();
+
+    DPRINTF(RAS, "%s: RAS[%i] => %#x. Entries used: %i, tid:%i\n", __func__,
+            addrStacks[tid].tos, (history->ras_entry.get() != nullptr)
+            ? history->ras_entry->instAddr() : 0,
+            addrStacks[tid].usedEntries, tid);
+    // DPRINTF(RAS, "[%s]\n", addrStacks[tid].toString(10));
+
+    return history->ras_entry.get();
+}
+
+void
+ReturnAddrStack::squash(ThreadID tid, void * &ras_history)
+{
+    if (ras_history == nullptr) {
+        // If ras_history is null no stack operation was performed for
+        // this branch. Nothing to be done.
+        return;
+    }
+    stats.squashes++;
+
+    RASHistory *history = static_cast<RASHistory*>(ras_history);
+
+    if (history->pushed) {
+        stats.pops++;
+        addrStacks[tid].pop();
+
+        DPRINTF(RAS, "RAS::%s Incorrect push. Pop RAS[%i]. "
+                "Entries used: %i, tid:%i\n", __func__,
+                addrStacks[tid].tos, addrStacks[tid].usedEntries, tid);
+    }
+
+    if (history->poped) {
+        stats.pushes++;
+        addrStacks[tid].restore(history->tos, history->ras_entry.get());
+        DPRINTF(RAS, "RAS::%s Incorrect pop. Restore to: RAS[%i]:%#x. "
+            "Entries used: %i, tid:%i\n", __func__,
+            history->tos,  (history->ras_entry.get() != nullptr)
+            ? history->ras_entry->instAddr() : 0,
+            addrStacks[tid].usedEntries, tid);
+    }
+    // DPRINTF(RAS, "[%s]\n", addrStacks[tid].toString(10));
+    delete history;
+    ras_history = nullptr;
+}
+
+void
+ReturnAddrStack::commit(ThreadID tid, bool misp,
+                        const BranchType brType, void * &ras_history)
+{
+    // Skip branches that are not call or returns
+    if (!(brType == BranchType::Return ||
+          brType == BranchType::CallDirect ||
+          brType == BranchType::CallIndirect)) {
+        // If its not a call or return there should be no ras history.
+        assert(ras_history == nullptr);
+        return;
+    }
+
+    DPRINTF(RAS, "RAS::%s Commit Branch inst: %s, tid:%i\n",
+                __func__, toString(brType),tid);
+
+
+    if (ras_history == nullptr) {
+        /**
+         * The only case where we could have no history at this point is
+         * for a conditional call that is not taken.
+         *
+         * Conditional calls
+         *
+         * Conditional calls have different scenarios:
+         * 1. the call was predicted as non taken but was actually taken
+         * 2. the call was predicted taken but was actually not taken.
+         * 3. the call was taken but the target was incorrect.
+         * 4. the call was correct.
+         *
+         * In case of mispredictions they will be handled during squashing
+         * of the BPU. It will push and pop the RAS accordingly.
+         **/
+        return;
+    }
+
+    /* Handle all other commited returns and calls */
+    RASHistory *history = static_cast<RASHistory*>(ras_history);
+
+    if (history->poped) {
+        stats.used++;
+        if (misp) {
+            stats.incorrect++;
+        } else {
+            stats.correct++;
+        }
+
+        DPRINTF(RAS, "RAS::%s Commit Return PC %#x, correct:%i, tid:%i\n",
+                __func__, !misp, (history->ras_entry.get() != nullptr)
+                ? history->ras_entry->instAddr() : 0, tid);
+    }
+    delete history;
+    ras_history = nullptr;
+}
+
+
+
+ReturnAddrStack::ReturnAddrStackStats::ReturnAddrStackStats(
+    statistics::Group *parent)
+    : statistics::Group(parent),
+      ADD_STAT(pushes, statistics::units::Count::get(),
+               "Number of times a PC was pushed onto the RAS"),
+      ADD_STAT(pops, statistics::units::Count::get(),
+               "Number of times a PC was poped from the RAS"),
+      ADD_STAT(squashes, statistics::units::Count::get(),
+               "Number of times the stack operation was squashed due to "
+               "wrong speculation."),
+      ADD_STAT(used, statistics::units::Count::get(),
+               "Number of times the RAS is the provider"),
+      ADD_STAT(correct, statistics::units::Count::get(),
+               "Number of times the RAS is the provider and the "
+               "prediction is correct"),
+      ADD_STAT(incorrect, statistics::units::Count::get(),
+               "Number of times the RAS is the provider and the "
+               "prediction is wrong")
+{
+}
+
 } // namespace branch_prediction
 } // namespace gem5
diff --git a/src/cpu/pred/ras.hh b/src/cpu/pred/ras.hh
index 0b4b471c03..294055965e 100644
--- a/src/cpu/pred/ras.hh
+++ b/src/cpu/pred/ras.hh
@@ -1,4 +1,16 @@
 /*
+ * Copyright (c) 2022-2023 The University of Edinburgh
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
  * Copyright (c) 2004-2005 The Regents of The University of Michigan
  * All rights reserved.
  *
@@ -32,7 +44,12 @@
 #include <vector>
 
 #include "arch/generic/pcstate.hh"
+#include "base/statistics.hh"
 #include "base/types.hh"
+#include "cpu/pred/branch_type.hh"
+#include "cpu/static_inst.hh"
+#include "params/ReturnAddrStack.hh"
+#include "sim/sim_object.hh"
 
 namespace gem5
 {
@@ -41,70 +58,164 @@ namespace branch_prediction
 {
 
 /** Return address stack class, implements a simple RAS. */
-class ReturnAddrStack
+class ReturnAddrStack : public SimObject
 {
   public:
-    /** Creates a return address stack, but init() must be called prior to
-     *  use.
-     */
-    ReturnAddrStack() {}
 
-    /** Initializes RAS with a specified number of entries.
-     *  @param numEntries Number of entries in the RAS.
+    /** Subclass that implements the actual address stack. ******
      */
-    void init(unsigned numEntries);
+    class AddrStack
+    {
+      public:
+        AddrStack(ReturnAddrStack &_parent)
+          : parent(_parent)
+        {}
 
-    void reset();
 
-    /** Returns the top address on the RAS. */
-    const PCStateBase *top() { return addrStack[tos].get(); }
+      /** Initializes RAS with a specified number of entries.
+       *  @param numEntries Number of entries in the RAS.
+       */
+      void init(unsigned numEntries);
+
+      void reset();
+
+      /** Returns the top address on the RAS. */
+      const PCStateBase *top();
+
+      /** Returns the index of the top of the RAS. */
+      unsigned topIdx() { return tos; }
+
+      /** Pushes an address onto the RAS. */
+      void push(const PCStateBase &return_addr);
+
+      /** Pops the top address from the RAS. */
+      void pop();
+
+      /** Changes index to the top of the RAS, and replaces the top address
+       *  with a new target.
+       *  @param top_of_stack the index saved at the time of the prediction.
+       *  @param restored The new target address of the new top of the RAS.
+       */
+      void restore(unsigned top_of_stack, const PCStateBase *restored);
+
+      bool empty() { return usedEntries == 0; }
+
+      bool full() { return usedEntries >= numEntries; }
+
+      /** Returns the top n entries of the stack as string. For debugging. */
+      std::string toString(int n);
+
+      /** Increments the top of stack index. */
+      inline void
+      incrTos()
+      {
+          if (++tos == numEntries)
+              tos = 0;
+      }
 
-    /** Returns the index of the top of the RAS. */
-    unsigned topIdx() { return tos; }
+      /** Decrements the top of stack index. */
+      inline void
+      decrTos()
+      {
+          tos = (tos == 0 ? numEntries - 1 : tos - 1);
+      }
 
-    /** Pushes an address onto the RAS. */
-    void push(const PCStateBase &return_addr);
+      /** The Stack itself. */
+      std::vector<std::unique_ptr<PCStateBase>> addrStack;
 
-    /** Pops the top address from the RAS. */
-    void pop();
+      /** The number of entries in the RAS. */
+      unsigned numEntries;
 
-    /** Changes index to the top of the RAS, and replaces the top address with
-     *  a new target.
-     *  @param top_entry_idx The index of the RAS that will now be the top.
-     *  @param restored The new target address of the new top of the RAS.
+      /** The number of used entries in the RAS. */
+      unsigned usedEntries;
+
+      /** The top of stack index. */
+      unsigned tos;
+
+      protected:
+        ReturnAddrStack &parent;
+    };
+
+
+
+  public:
+    // typedef RASParams Params;
+    typedef ReturnAddrStackParams Params;
+
+    // ReturnAddrStack(BPredUnit &_parent, const RASParams);
+    ReturnAddrStack(const Params &p);
+
+    void reset();
+
+    /**
+     * Pushes an address onto the RAS.
+     * @param PC The current PC (should be a call).
+     * @param ras_history Pointer that will be set to an object that
+     * has the return address state associated when the address was pushed.
      */
-    void restore(unsigned top_entry_idx, const PCStateBase *restored);
+    void push(ThreadID tid, const PCStateBase &pc, void * &ras_history);
+
+    /**
+     * Pops the top address from the RAS.
+     * @param ras_history Pointer that will be set to an object that
+     * has the return address state associated when an address was poped.
+     * @return The address that got poped from the stack.
+     *  */
+    const PCStateBase* pop(ThreadID tid, void * &ras_history);
+
+    /**
+     * The branch (call/return) got squashed.
+     * Restores the state of the RAS and delete history
+     *  @param res_history The pointer to the history object.
+     */
+    void squash(ThreadID tid, void * &ras_history);
 
-    bool empty() { return usedEntries == 0; }
+    /**
+     * A branch got finally got finally commited.
+     * @param misp Whether the branch was mispredicted.
+     * @param brType The type of the branch.
+     * @param ras_history The pointer to the history object.
+     */
+    void commit(ThreadID tid, bool misp,
+                const BranchType brType, void * &ras_history);
 
-    bool full() { return usedEntries == numEntries; }
   private:
-    /** Increments the top of stack index. */
-    inline void
-    incrTos()
-    {
-        if (++tos == numEntries)
-            tos = 0;
-    }
 
-    /** Decrements the top of stack index. */
-    inline void
-    decrTos()
+    class RASHistory
     {
-        tos = (tos == 0 ? numEntries - 1 : tos - 1);
-    }
+      public:
+        /* Was the RAS pushed or poped for this branch. */
+        bool pushed = false;
+        bool poped = false;
+        /* Was it a call */
+        bool wasReturn = false;
+        bool wasCall = false;
+        /** The entry that poped from the RAS (only valid if a return). */
+        std::unique_ptr<PCStateBase> ras_entry;
+        /** The RAS index (top of stack pointer) of the instruction */
+        unsigned tos = 0;
+    };
+
+    void makeRASHistory(void* &ras_history);
 
     /** The RAS itself. */
-    std::vector<std::unique_ptr<PCStateBase>> addrStack;
+    std::vector<AddrStack> addrStacks;
 
     /** The number of entries in the RAS. */
     unsigned numEntries;
+    /** The number of threads */
+    unsigned numThreads;
 
-    /** The number of used entries in the RAS. */
-    unsigned usedEntries;
-
-    /** The top of stack index. */
-    unsigned tos;
+    struct ReturnAddrStackStats : public statistics::Group
+    {
+        ReturnAddrStackStats(statistics::Group *parent);
+        statistics::Scalar pushes;
+        statistics::Scalar pops;
+        statistics::Scalar squashes;
+        statistics::Scalar used;
+        statistics::Scalar correct;
+        statistics::Scalar incorrect;
+    } stats;
 };
 
 } // namespace branch_prediction
diff --git a/src/cpu/pred/simple_btb.cc b/src/cpu/pred/simple_btb.cc
new file mode 100644
index 0000000000..c78caac7a8
--- /dev/null
+++ b/src/cpu/pred/simple_btb.cc
@@ -0,0 +1,176 @@
+/*
+ * Copyright (c) 2022-2023 The University of Edinburgh
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
+ * Copyright (c) 2004-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "cpu/pred/simple_btb.hh"
+
+#include "base/intmath.hh"
+#include "base/trace.hh"
+#include "debug/BTB.hh"
+
+namespace gem5
+{
+
+namespace branch_prediction
+{
+
+SimpleBTB::SimpleBTB(const SimpleBTBParams &p)
+    : BranchTargetBuffer(p),
+        numEntries(p.numEntries),
+        tagBits(p.tagBits),
+        instShiftAmt(p.instShiftAmt),
+        log2NumThreads(floorLog2(p.numThreads))
+{
+    DPRINTF(BTB, "BTB: Creating BTB object.\n");
+
+    if (!isPowerOf2(numEntries)) {
+        fatal("BTB entries is not a power of 2!");
+    }
+
+    btb.resize(numEntries);
+
+    for (unsigned i = 0; i < numEntries; ++i) {
+        btb[i].valid = false;
+    }
+
+    idxMask = numEntries - 1;
+
+    tagMask = (1 << tagBits) - 1;
+
+    tagShiftAmt = instShiftAmt + floorLog2(numEntries);
+}
+
+void
+SimpleBTB::memInvalidate()
+{
+    for (unsigned i = 0; i < numEntries; ++i) {
+        btb[i].valid = false;
+    }
+}
+
+inline
+unsigned
+SimpleBTB::getIndex(Addr instPC, ThreadID tid)
+{
+    // Need to shift PC over by the word offset.
+    return ((instPC >> instShiftAmt)
+            ^ (tid << (tagShiftAmt - instShiftAmt - log2NumThreads)))
+            & idxMask;
+}
+
+inline
+Addr
+SimpleBTB::getTag(Addr instPC)
+{
+    return (instPC >> tagShiftAmt) & tagMask;
+}
+
+SimpleBTB::BTBEntry *
+SimpleBTB::findEntry(Addr instPC, ThreadID tid)
+{
+    unsigned btb_idx = getIndex(instPC, tid);
+    Addr inst_tag = getTag(instPC);
+
+    assert(btb_idx < numEntries);
+
+    if (btb[btb_idx].valid
+        && inst_tag == btb[btb_idx].tag
+        && btb[btb_idx].tid == tid) {
+        return &btb[btb_idx];
+    }
+
+    return nullptr;
+}
+
+bool
+SimpleBTB::valid(ThreadID tid, Addr instPC)
+{
+    BTBEntry *entry = findEntry(instPC, tid);
+
+    return entry != nullptr;
+}
+
+// @todo Create some sort of return struct that has both whether or not the
+// address is valid, and also the address.  For now will just use addr = 0 to
+// represent invalid entry.
+const PCStateBase *
+SimpleBTB::lookup(ThreadID tid, Addr instPC, BranchType type)
+{
+    stats.lookups[type]++;
+
+    BTBEntry *entry = findEntry(instPC, tid);
+
+    if (entry) {
+        return entry->target.get();
+    }
+    stats.misses[type]++;
+    return nullptr;
+}
+
+const StaticInstPtr
+SimpleBTB::getInst(ThreadID tid, Addr instPC)
+{
+    BTBEntry *entry = findEntry(instPC, tid);
+
+    if (entry) {
+        return entry->inst;
+    }
+    return nullptr;
+}
+
+void
+SimpleBTB::update(ThreadID tid, Addr instPC,
+                    const PCStateBase &target,
+                    BranchType type, StaticInstPtr inst)
+{
+    unsigned btb_idx = getIndex(instPC, tid);
+
+    assert(btb_idx < numEntries);
+
+    stats.updates[type]++;
+
+    btb[btb_idx].tid = tid;
+    btb[btb_idx].valid = true;
+    set(btb[btb_idx].target, target);
+    btb[btb_idx].tag = getTag(instPC);
+    btb[btb_idx].inst = inst;
+}
+
+} // namespace branch_prediction
+} // namespace gem5
diff --git a/src/cpu/pred/simple_btb.hh b/src/cpu/pred/simple_btb.hh
new file mode 100644
index 0000000000..3c76890348
--- /dev/null
+++ b/src/cpu/pred/simple_btb.hh
@@ -0,0 +1,136 @@
+/*
+ * Copyright (c) 2022-2023 The University of Edinburgh
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
+ * Copyright (c) 2004-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __CPU_PRED_SIMPLE_BTB_HH__
+#define __CPU_PRED_SIMPLE_BTB_HH__
+
+#include "base/logging.hh"
+#include "base/types.hh"
+#include "cpu/pred/btb.hh"
+#include "params/SimpleBTB.hh"
+
+namespace gem5
+{
+
+namespace branch_prediction
+{
+
+class SimpleBTB : public BranchTargetBuffer
+{
+  public:
+    SimpleBTB(const SimpleBTBParams &params);
+
+    void memInvalidate() override;
+    bool valid(ThreadID tid, Addr instPC) override;
+    const PCStateBase *lookup(ThreadID tid, Addr instPC,
+                           BranchType type = BranchType::NoBranch) override;
+    void update(ThreadID tid, Addr instPC, const PCStateBase &target_pc,
+                           BranchType type = BranchType::NoBranch,
+                           StaticInstPtr inst = nullptr) override;
+    const StaticInstPtr getInst(ThreadID tid, Addr instPC) override;
+
+
+  private:
+    struct BTBEntry
+    {
+        /** The entry's tag. */
+        Addr tag = 0;
+
+        /** The entry's target. */
+        std::unique_ptr<PCStateBase> target;
+
+        /** The entry's thread id. */
+        ThreadID tid;
+
+        /** Whether or not the entry is valid. */
+        bool valid = false;
+
+        /** Pointer to the static branch instruction at this address */
+        StaticInstPtr inst = nullptr;
+    };
+
+
+    /** Returns the index into the BTB, based on the branch's PC.
+     *  @param inst_PC The branch to look up.
+     *  @return Returns the index into the BTB.
+     */
+    inline unsigned getIndex(Addr instPC, ThreadID tid);
+
+    /** Returns the tag bits of a given address.
+     *  @param inst_PC The branch's address.
+     *  @return Returns the tag bits.
+     */
+    inline Addr getTag(Addr instPC);
+
+    /** Internal call to find an address in the BTB
+     * @param instPC The branch's address.
+     * @return Returns a pointer to the BTB entry if found, nullptr otherwise.
+    */
+    BTBEntry *findEntry(Addr instPC, ThreadID tid);
+
+    /** The actual BTB. */
+    std::vector<BTBEntry> btb;
+
+    /** The number of entries in the BTB. */
+    unsigned numEntries;
+
+    /** The index mask. */
+    unsigned idxMask;
+
+    /** The number of tag bits per entry. */
+    unsigned tagBits;
+
+    /** The tag mask. */
+    unsigned tagMask;
+
+    /** Number of bits to shift PC when calculating index. */
+    unsigned instShiftAmt;
+
+    /** Number of bits to shift PC when calculating tag. */
+    unsigned tagShiftAmt;
+
+    /** Log2 NumThreads used for hashing threadid */
+    unsigned log2NumThreads;
+};
+
+} // namespace branch_prediction
+} // namespace gem5
+
+#endif // __CPU_PRED_SIMPLE_BTB_HH__
diff --git a/src/cpu/pred/simple_indirect.cc b/src/cpu/pred/simple_indirect.cc
index f09cdeef55..815e8bf31b 100644
--- a/src/cpu/pred/simple_indirect.cc
+++ b/src/cpu/pred/simple_indirect.cc
@@ -1,6 +1,16 @@
 /*
  * Copyright (c) 2014 ARM Limited
- * All rights reserved.
+ * Copyright (c) 2022-2023 The University of Edinburgh
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are
@@ -46,9 +56,11 @@ SimpleIndirectPredictor::SimpleIndirectPredictor(
       numWays(params.indirectWays),
       tagBits(params.indirectTagSize),
       pathLength(params.indirectPathLength),
+      speculativePathLength(params.speculativePathLength),
       instShift(params.instShiftAmt),
       ghrNumBits(params.indirectGHRBits),
-      ghrMask((1 << params.indirectGHRBits)-1)
+      ghrMask((1 << params.indirectGHRBits)-1),
+      stats(this)
 {
     if (!isPowerOf2(numSets)) {
         panic("Indirect predictor requires power of 2 number of sets");
@@ -64,172 +76,292 @@ SimpleIndirectPredictor::SimpleIndirectPredictor(
     fatal_if(ghrNumBits > (sizeof(ThreadInfo::ghr)*8), "ghr_size is too big");
 }
 
+
 void
-SimpleIndirectPredictor::genIndirectInfo(ThreadID tid,
-                                         void* & indirect_history)
+SimpleIndirectPredictor::reset()
 {
-    // record the GHR as it was before this prediction
+    DPRINTF(Indirect, "Reset Indirect predictor\n");
+
+    for (auto& ti : threadInfo) {
+        ti.ghr = 0;
+        ti.pathHist.clear();
+    }
+
+    for (unsigned i = 0; i < numSets; i++) {
+        for (unsigned j = 0; j < numWays; j++) {
+            targetCache[i][j].tag = 0;
+        }
+    }
+}
+
+
+void
+SimpleIndirectPredictor::genIndirectInfo(ThreadID tid, void* &i_history)
+{
+    // Record the GHR as it was before this prediction
     // It will be used to recover the history in case this prediction is
     // wrong or belongs to bad path
-    indirect_history = new unsigned(threadInfo[tid].ghr);
+    IndirectHistory* history = new IndirectHistory;
+    history->ghr = threadInfo[tid].ghr;
+    i_history = static_cast<void*>(history);
 }
 
 void
-SimpleIndirectPredictor::updateDirectionInfo(
-    ThreadID tid, bool actually_taken)
+SimpleIndirectPredictor::updateDirectionInfo(ThreadID tid, bool taken,
+                                             Addr pc, Addr target)
 {
+    // Direction history
     threadInfo[tid].ghr <<= 1;
-    threadInfo[tid].ghr |= actually_taken;
+    threadInfo[tid].ghr |= taken;
     threadInfo[tid].ghr &= ghrMask;
 }
 
-void
-SimpleIndirectPredictor::changeDirectionPrediction(ThreadID tid,
-    void * indirect_history, bool actually_taken)
+
+
+// Interface methods ------------------------------
+const PCStateBase *
+SimpleIndirectPredictor::lookup(ThreadID tid, InstSeqNum sn,
+                                Addr pc, void * &i_history)
 {
-    unsigned * previousGhr = static_cast<unsigned *>(indirect_history);
-    threadInfo[tid].ghr = ((*previousGhr) << 1) + actually_taken;
-    threadInfo[tid].ghr &= ghrMask;
+    assert(i_history==nullptr);
+
+    genIndirectInfo(tid, i_history);
+    IndirectHistory *history = static_cast<IndirectHistory*>(i_history);
+
+    history->pcAddr = pc;
+    history->was_indirect = true;
+
+    /** Do the prediction for indirect branches (no returns) */
+    PCStateBase* target = nullptr;
+    history->hit = lookup(tid, pc, target, history);
+    return target;
 }
 
 bool
-SimpleIndirectPredictor::lookup(Addr br_addr, PCStateBase& target,
-    ThreadID tid)
+SimpleIndirectPredictor::lookup(ThreadID tid, Addr br_addr,
+                                PCStateBase * &target,
+                                IndirectHistory * &history)
 {
-    Addr set_index = getSetIndex(br_addr, threadInfo[tid].ghr, tid);
-    Addr tag = getTag(br_addr);
 
-    assert(set_index < numSets);
+    history->set_index = getSetIndex(br_addr, tid);
+    history->tag = getTag(br_addr);
+    assert(history->set_index < numSets);
+    stats.lookups++;
+
+    DPRINTF(Indirect, "Looking up PC:%#x, (set:%d, tag:%d), "
+                    "ghr:%#x, pathHist sz:%#x\n",
+                    history->pcAddr, history->set_index, history->tag,
+                    history->ghr, threadInfo[tid].pathHist.size());
 
-    DPRINTF(Indirect, "Looking up %x (set:%d)\n", br_addr, set_index);
-    const auto &iset = targetCache[set_index];
+    const auto &iset = targetCache[history->set_index];
     for (auto way = iset.begin(); way != iset.end(); ++way) {
         // tag may be 0 and match the default in way->tag, so we also have to
         // check that way->target has been initialized.
-        if (way->tag == tag && way->target) {
+        if (way->tag == history->tag && way->target) {
             DPRINTF(Indirect, "Hit %x (target:%s)\n", br_addr, *way->target);
             set(target, *way->target);
-            return true;
+            history->hit = true;
+            stats.hits++;
+            return history->hit;
         }
     }
     DPRINTF(Indirect, "Miss %x\n", br_addr);
-    return false;
+    history->hit = false;
+    stats.misses++;
+    return history->hit;
 }
 
-void
-SimpleIndirectPredictor::recordIndirect(Addr br_addr, Addr tgt_addr,
-    InstSeqNum seq_num, ThreadID tid)
-{
-    DPRINTF(Indirect, "Recording %x seq:%d\n", br_addr, seq_num);
-    HistoryEntry entry(br_addr, tgt_addr, seq_num);
-    threadInfo[tid].pathHist.push_back(entry);
-}
 
 void
-SimpleIndirectPredictor::commit(InstSeqNum seq_num, ThreadID tid,
-                          void * indirect_history)
+SimpleIndirectPredictor::commit(ThreadID tid, InstSeqNum sn, void * &i_history)
 {
-    DPRINTF(Indirect, "Committing seq:%d\n", seq_num);
-    ThreadInfo &t_info = threadInfo[tid];
-
+    if (i_history == nullptr) return;
     // we do not need to recover the GHR, so delete the information
-    unsigned * previousGhr = static_cast<unsigned *>(indirect_history);
-    delete previousGhr;
+    IndirectHistory *history = static_cast<IndirectHistory*>(i_history);
 
-    if (t_info.pathHist.empty()) return;
+    DPRINTF(Indirect, "Committing seq:%d, PC:%#x, ghr:%#x, pathHist sz:%lu\n",
+            sn, history->pcAddr, history->ghr,
+            threadInfo[tid].pathHist.size());
 
-    if (t_info.headHistEntry < t_info.pathHist.size() &&
-        t_info.pathHist[t_info.headHistEntry].seqNum <= seq_num) {
-        if (t_info.headHistEntry >= pathLength) {
-            t_info.pathHist.pop_front();
-        } else {
-             ++t_info.headHistEntry;
-        }
+    delete history;
+    i_history = nullptr;
+
+    /** Delete histories if the history grows to much */
+    while (threadInfo[tid].pathHist.size()
+            >= (pathLength + speculativePathLength)) {
+
+        threadInfo[tid].pathHist.pop_front();
     }
 }
 
 void
-SimpleIndirectPredictor::squash(InstSeqNum seq_num, ThreadID tid)
+SimpleIndirectPredictor::update(ThreadID tid, InstSeqNum sn, Addr pc,
+                           bool squash, bool taken, const PCStateBase& target,
+                           BranchType br_type, void * &i_history)
 {
-    DPRINTF(Indirect, "Squashing seq:%d\n", seq_num);
-    ThreadInfo &t_info = threadInfo[tid];
-    auto squash_itr = t_info.pathHist.begin();
-    while (squash_itr != t_info.pathHist.end()) {
-        if (squash_itr->seqNum > seq_num) {
-           break;
+    // If there is no history we did not use the indirect predictor yet.
+    // Create one
+    if (i_history==nullptr) {
+        genIndirectInfo(tid, i_history);
+    }
+    IndirectHistory *history = static_cast<IndirectHistory*>(i_history);
+    assert(history!=nullptr);
+
+    DPRINTF(Indirect, "Update sn:%i PC:%#x, squash:%i, ghr:%#x,path sz:%i\n",
+               sn, pc, squash, history->ghr, threadInfo[tid].pathHist.size());
+
+    /** If update was called during squash we need to fix the indirect
+     * path history and the global path history.
+     * We restore the state before this branch incorrectly updated it
+     * and perform the update afterwards again.
+    */
+    history->was_indirect = isIndirectNoReturn(br_type);
+    if (squash) {
+
+        /** restore global history */
+        threadInfo[tid].ghr = history->ghr;
+
+        /** For indirect branches recalculate index and tag */
+        if (history->was_indirect) {
+            if (!threadInfo[tid].pathHist.empty()) {
+                threadInfo[tid].pathHist.pop_back();
+            }
+
+            history->set_index = getSetIndex(history->pcAddr, tid);
+            history->tag = getTag(history->pcAddr);
+
+            DPRINTF(Indirect, "Record Target seq:%d, PC:%#x, TGT:%#x, "
+                        "ghr:%#x, (set:%x, tag:%x)\n",
+                        sn, history->pcAddr, target, history->ghr,
+                        history->set_index, history->tag);
         }
-        ++squash_itr;
     }
-    if (squash_itr != t_info.pathHist.end()) {
-        DPRINTF(Indirect, "Squashing series starting with sn:%d\n",
-                squash_itr->seqNum);
+
+    // Only indirect branches are recorded in the path history
+    if (history->was_indirect) {
+
+        DPRINTF(Indirect, "Recording %x seq:%d\n", history->pcAddr, sn);
+        threadInfo[tid].pathHist.emplace_back(
+                                    history->pcAddr, target.instAddr(), sn);
+
+        stats.indirectRecords++;
     }
-    t_info.pathHist.erase(squash_itr, t_info.pathHist.end());
+
+    // All branches update the global history
+    updateDirectionInfo(tid,taken, history->pcAddr, target.instAddr());
+
+    // Finally if update is called during at squash we know the target
+    // we predicted was wrong therefore we update the target.
+    // We only record the target if the branch was indirect and taken
+    if (squash && history->was_indirect && taken)
+        recordTarget(tid, sn, target, history);
 }
 
+
+
 void
-SimpleIndirectPredictor::deleteIndirectInfo(ThreadID tid,
-                                            void * indirect_history)
+SimpleIndirectPredictor::squash(ThreadID tid, InstSeqNum sn, void * &i_history)
 {
-    unsigned * previousGhr = static_cast<unsigned *>(indirect_history);
-    threadInfo[tid].ghr = *previousGhr;
+    if (i_history == nullptr) return;
+
+    // we do not need to recover the GHR, so delete the information
+    IndirectHistory *history = static_cast<IndirectHistory*>(i_history);
+
+    DPRINTF(Indirect, "Squashing seq:%d, PC:%#x, indirect:%i, "
+                    "ghr:%#x, pathHist sz:%#x\n",
+                    sn, history->pcAddr, history->was_indirect,
+                    history->ghr,
+                    threadInfo[tid].pathHist.size());
+
+
+    // Revert the global history register.
+    threadInfo[tid].ghr = history->ghr;
 
-    delete previousGhr;
+    // If we record this branch as indirect branch
+    // remove it from the history.
+    // Restore the old head in the history.
+    if (history->was_indirect) {
+
+        // Should not be empty
+        if (threadInfo[tid].pathHist.size() < pathLength) {
+            stats.speculativeOverflows++;
+        }
+
+        if (!threadInfo[tid].pathHist.empty()) {
+            threadInfo[tid].pathHist.pop_back();
+        }
+    }
+
+    delete history;
+    i_history = nullptr;
 }
 
-void
-SimpleIndirectPredictor::recordTarget(
-    InstSeqNum seq_num, void * indirect_history, const PCStateBase& target,
-    ThreadID tid)
-{
-    ThreadInfo &t_info = threadInfo[tid];
 
-    unsigned * ghr = static_cast<unsigned *>(indirect_history);
 
+
+// Internal functions ------------------------------
+
+
+
+void
+SimpleIndirectPredictor::recordTarget(ThreadID tid, InstSeqNum sn,
+                      const PCStateBase& target, IndirectHistory * &history)
+{
     // Should have just squashed so this branch should be the oldest
-    auto hist_entry = *(t_info.pathHist.rbegin());
-    // Temporarily pop it off the history so we can calculate the set
-    t_info.pathHist.pop_back();
-    Addr set_index = getSetIndex(hist_entry.pcAddr, *ghr, tid);
-    Addr tag = getTag(hist_entry.pcAddr);
-    hist_entry.targetAddr = target.instAddr();
-    t_info.pathHist.push_back(hist_entry);
+    // and it should be predicted as indirect.
+    assert(!threadInfo[tid].pathHist.empty());
+    assert(history->was_indirect);
+
+    if (threadInfo[tid].pathHist.rbegin()->pcAddr != history->pcAddr) {
+        DPRINTF(Indirect, "History seems to be corrupted. %#x != %#x\n",
+                    history->pcAddr,
+                    threadInfo[tid].pathHist.rbegin()->pcAddr);
+    }
+
+    DPRINTF(Indirect, "Record Target seq:%d, PC:%#x, TGT:%#x, "
+                      "ghr:%#x, (set:%x, tag:%x)\n",
+                sn, history->pcAddr, target.instAddr(), history->ghr,
+                history->set_index, history->tag);
 
-    assert(set_index < numSets);
+    assert(history->set_index < numSets);
+    stats.targetRecords++;
 
-    auto &iset = targetCache[set_index];
+    // Update the target cache
+    auto &iset = targetCache[history->set_index];
     for (auto way = iset.begin(); way != iset.end(); ++way) {
-        if (way->tag == tag) {
-            DPRINTF(Indirect, "Updating Target (seq: %d br:%x set:%d target:"
-                    "%s)\n", seq_num, hist_entry.pcAddr, set_index, target);
+        if (way->tag == history->tag) {
+            DPRINTF(Indirect,
+                    "Updating Target (seq: %d br:%x set:%d target:%s)\n",
+                    sn, history->pcAddr, history->set_index, target);
             set(way->target, target);
             return;
         }
     }
 
     DPRINTF(Indirect, "Allocating Target (seq: %d br:%x set:%d target:%s)\n",
-            seq_num, hist_entry.pcAddr, set_index, target);
+            sn, history->pcAddr, history->set_index, target);
+
     // Did not find entry, random replacement
     auto &way = iset[rand() % numWays];
-    way.tag = tag;
+    way.tag = history->tag;
     set(way.target, target);
 }
 
 
+
+
 inline Addr
-SimpleIndirectPredictor::getSetIndex(Addr br_addr, unsigned ghr, ThreadID tid)
+SimpleIndirectPredictor::getSetIndex(Addr br_addr, ThreadID tid)
 {
-    ThreadInfo &t_info = threadInfo[tid];
-
     Addr hash = br_addr >> instShift;
     if (hashGHR) {
-        hash ^= ghr;
+        hash ^= threadInfo[tid].ghr;
     }
     if (hashTargets) {
         unsigned hash_shift = floorLog2(numSets) / pathLength;
-        for (int i = t_info.pathHist.size()-1, p = 0;
+        for (int i = threadInfo[tid].pathHist.size()-1, p = 0;
              i >= 0 && p < pathLength; i--, p++) {
-            hash ^= (t_info.pathHist[i].targetAddr >>
+            hash ^= (threadInfo[tid].pathHist[i].targetAddr >>
                      (instShift + p*hash_shift));
         }
     }
@@ -242,5 +374,26 @@ SimpleIndirectPredictor::getTag(Addr br_addr)
     return (br_addr >> instShift) & ((0x1<<tagBits)-1);
 }
 
+
+SimpleIndirectPredictor::IndirectStats::IndirectStats(
+                                        statistics::Group *parent)
+    : statistics::Group(parent),
+    ADD_STAT(lookups, statistics::units::Count::get(),
+             "Number of lookups"),
+    ADD_STAT(hits, statistics::units::Count::get(),
+             "Number of hits of a tag"),
+    ADD_STAT(misses, statistics::units::Count::get(),
+             "Number of misses"),
+    ADD_STAT(targetRecords, statistics::units::Count::get(),
+             "Number of targets that where recorded/installed in the cache"),
+    ADD_STAT(indirectRecords, statistics::units::Count::get(),
+             "Number of indirect branches/calls recorded in the"
+             " indirect hist"),
+    ADD_STAT(speculativeOverflows, statistics::units::Count::get(),
+             "Number of times more than the allowed capacity for speculative "
+             "branches/calls where in flight and destroy the path history")
+{
+}
+
 } // namespace branch_prediction
 } // namespace gem5
diff --git a/src/cpu/pred/simple_indirect.hh b/src/cpu/pred/simple_indirect.hh
index 7f7a73bdeb..efc7c8b0d6 100644
--- a/src/cpu/pred/simple_indirect.hh
+++ b/src/cpu/pred/simple_indirect.hh
@@ -1,6 +1,16 @@
 /*
  * Copyright (c) 2014 ARM Limited
- * All rights reserved.
+ * Copyright (c) 2022-2023 The University of Edinburgh
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are
@@ -31,6 +41,7 @@
 
 #include <deque>
 
+#include "base/statistics.hh"
 #include "cpu/inst_seq.hh"
 #include "cpu/pred/indirect.hh"
 #include "params/SimpleIndirectPredictor.hh"
@@ -46,19 +57,23 @@ class SimpleIndirectPredictor : public IndirectPredictor
   public:
     SimpleIndirectPredictor(const SimpleIndirectPredictorParams &params);
 
-    bool lookup(Addr br_addr, PCStateBase& br_target, ThreadID tid);
-    void recordIndirect(Addr br_addr, Addr tgt_addr, InstSeqNum seq_num,
-                        ThreadID tid);
-    void commit(InstSeqNum seq_num, ThreadID tid, void * indirect_history);
-    void squash(InstSeqNum seq_num, ThreadID tid);
-    void recordTarget(InstSeqNum seq_num, void * indirect_history,
-                      const PCStateBase& target, ThreadID tid);
-    void genIndirectInfo(ThreadID tid, void* & indirect_history);
-    void updateDirectionInfo(ThreadID tid, bool actually_taken);
-    void deleteIndirectInfo(ThreadID tid, void * indirect_history);
-    void changeDirectionPrediction(ThreadID tid, void * indirect_history,
-                                   bool actually_taken);
+    /** Indirect predictor interface */
+    void reset() override;
+
+    const PCStateBase * lookup(ThreadID tid, InstSeqNum sn,
+                                Addr pc, void * &iHistory) override;
+    void update(ThreadID tid, InstSeqNum sn, Addr pc, bool squash,
+                bool taken, const PCStateBase& target,
+                BranchType br_type, void * &iHistory) override;
+    void squash(ThreadID tid, InstSeqNum sn, void * &iHistory) override;
+    void commit(ThreadID tid, InstSeqNum sn, void * &iHistory) override;
+
 
+
+    /** ------------------
+     * The actual predictor
+     * -------------------
+     * */
   private:
     const bool hashGHR;
     const bool hashTargets;
@@ -66,6 +81,7 @@ class SimpleIndirectPredictor : public IndirectPredictor
     const unsigned numWays;
     const unsigned tagBits;
     const unsigned pathLength;
+    const unsigned speculativePathLength;
     const unsigned instShift;
     const unsigned ghrNumBits;
     const unsigned ghrMask;
@@ -78,27 +94,88 @@ class SimpleIndirectPredictor : public IndirectPredictor
 
     std::vector<std::vector<IPredEntry> > targetCache;
 
-    Addr getSetIndex(Addr br_addr, unsigned ghr, ThreadID tid);
-    Addr getTag(Addr br_addr);
+
 
     struct HistoryEntry
     {
         HistoryEntry(Addr br_addr, Addr tgt_addr, InstSeqNum seq_num)
             : pcAddr(br_addr), targetAddr(tgt_addr), seqNum(seq_num) { }
+        HistoryEntry() : pcAddr(0), targetAddr(0), seqNum(0) { }
         Addr pcAddr;
         Addr targetAddr;
         InstSeqNum seqNum;
     };
 
+    /** Indirect branch history information
+     * Used for prediction, update and recovery
+     */
+    struct IndirectHistory
+    {
+        /* data */
+        Addr pcAddr;
+        Addr targetAddr;
+        InstSeqNum seqNum;
+
+        Addr set_index;
+        Addr tag;
+        bool hit;
+        unsigned ghr;
+        uint64_t pathHist;
 
+        bool was_indirect;
+
+        IndirectHistory()
+            : pcAddr(MaxAddr),
+              targetAddr(MaxAddr),
+              was_indirect(false)
+        {}
+    };
+
+    /** Per thread path and global history registers*/
     struct ThreadInfo
     {
+        // Path history register
         std::deque<HistoryEntry> pathHist;
-        unsigned headHistEntry = 0;
+        // Global direction history register
         unsigned ghr = 0;
     };
 
     std::vector<ThreadInfo> threadInfo;
+
+
+    // ---- Internal functions ----- //
+    bool lookup(ThreadID tid, Addr br_addr,
+                PCStateBase * &target, IndirectHistory * &history);
+    void recordTarget(ThreadID tid, InstSeqNum sn,
+                      const PCStateBase& target, IndirectHistory * &history);
+
+    // Helper functions to generate and modify the
+    // direction info
+    void genIndirectInfo(ThreadID tid, void* &iHistory);
+    void updateDirectionInfo(ThreadID tid, bool taken, Addr pc, Addr target);
+
+    // Helper to compute set and tag
+    inline Addr getSetIndex(Addr br_addr, ThreadID tid);
+    inline Addr getTag(Addr br_addr);
+
+    inline bool isIndirectNoReturn(BranchType type) {
+        return (type == BranchType::CallIndirect) ||
+               (type == BranchType::IndirectUncond);
+    }
+
+  protected:
+    struct IndirectStats : public statistics::Group
+    {
+        IndirectStats(statistics::Group *parent);
+        // STATS
+        statistics::Scalar lookups;
+        statistics::Scalar hits;
+        statistics::Scalar misses;
+        statistics::Scalar targetRecords;
+        statistics::Scalar indirectRecords;
+        statistics::Scalar speculativeOverflows;
+
+    } stats;
 };
 
 } // namespace branch_prediction
diff --git a/src/cpu/pred/tage.cc b/src/cpu/pred/tage.cc
index 1ba52e27c7..35c0d75352 100644
--- a/src/cpu/pred/tage.cc
+++ b/src/cpu/pred/tage.cc
@@ -1,4 +1,16 @@
 /*
+ * Copyright (c) 2022-2023 The University of Edinburgh
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
  * Copyright (c) 2014 The University of Wisconsin
  *
  * Copyright (c) 2006 INRIA (Institut National de Recherche en
@@ -56,8 +68,8 @@ TAGE::TAGE(const TAGEParams &params) : BPredUnit(params), tage(params.tage)
 
 // PREDICTOR UPDATE
 void
-TAGE::update(ThreadID tid, Addr branch_pc, bool taken, void* bp_history,
-              bool squashed, const StaticInstPtr & inst, Addr corrTarget)
+TAGE::update(ThreadID tid, Addr pc, bool taken, void * &bp_history,
+              bool squashed, const StaticInstPtr & inst, Addr target)
 {
     assert(bp_history);
 
@@ -67,69 +79,65 @@ TAGE::update(ThreadID tid, Addr branch_pc, bool taken, void* bp_history,
     if (squashed) {
         // This restores the global history, then update it
         // and recomputes the folded histories.
-        tage->squash(tid, taken, tage_bi, corrTarget);
+        tage->squash(tid, taken, tage_bi, target);
         return;
     }
 
     int nrand = random_mt.random<int>() & 3;
     if (bi->tageBranchInfo->condBranch) {
         DPRINTF(Tage, "Updating tables for branch:%lx; taken?:%d\n",
-                branch_pc, taken);
+                pc, taken);
         tage->updateStats(taken, bi->tageBranchInfo);
-        tage->condBranchUpdate(tid, branch_pc, taken, tage_bi, nrand,
-                               corrTarget, bi->tageBranchInfo->tagePred);
+        tage->condBranchUpdate(tid, pc, taken, tage_bi, nrand,
+                               target, bi->tageBranchInfo->tagePred);
     }
 
     // optional non speculative update of the histories
-    tage->updateHistories(tid, branch_pc, taken, tage_bi, false, inst,
-                          corrTarget);
+    tage->updateHistories(tid, pc, taken, tage_bi, false, inst, target);
     delete bi;
+    bp_history = nullptr;
 }
 
 void
-TAGE::squash(ThreadID tid, void *bp_history)
+TAGE::squash(ThreadID tid, void * &bp_history)
 {
     TageBranchInfo *bi = static_cast<TageBranchInfo*>(bp_history);
     DPRINTF(Tage, "Deleting branch info: %lx\n", bi->tageBranchInfo->branchPC);
     delete bi;
+    bp_history = nullptr;
 }
 
 bool
-TAGE::predict(ThreadID tid, Addr branch_pc, bool cond_branch, void* &b)
+TAGE::predict(ThreadID tid, Addr pc, bool cond_branch, void* &b)
 {
     TageBranchInfo *bi = new TageBranchInfo(*tage);//nHistoryTables+1);
     b = (void*)(bi);
-    return tage->tagePredict(tid, branch_pc, cond_branch, bi->tageBranchInfo);
+    return tage->tagePredict(tid, pc, cond_branch, bi->tageBranchInfo);
 }
 
 bool
-TAGE::lookup(ThreadID tid, Addr branch_pc, void* &bp_history)
+TAGE::lookup(ThreadID tid, Addr pc, void* &bp_history)
 {
-    bool retval = predict(tid, branch_pc, true, bp_history);
-
-    TageBranchInfo *bi = static_cast<TageBranchInfo*>(bp_history);
-
-    DPRINTF(Tage, "Lookup branch: %lx; predict:%d\n", branch_pc, retval);
+    bool retval = predict(tid, pc, true, bp_history);
 
-    tage->updateHistories(tid, branch_pc, retval, bi->tageBranchInfo, true);
+    DPRINTF(Tage, "Lookup branch: %lx; predict:%d\n", pc, retval);
 
     return retval;
 }
 
 void
-TAGE::btbUpdate(ThreadID tid, Addr branch_pc, void* &bp_history)
+TAGE::updateHistories(ThreadID tid, Addr pc, bool uncond,
+                         bool taken, Addr target, void * &bp_history)
 {
-    TageBranchInfo *bi = static_cast<TageBranchInfo*>(bp_history);
-    tage->btbUpdate(tid, branch_pc, bi->tageBranchInfo);
-}
+    assert(uncond || bp_history);
+    if (uncond) {
+        DPRINTF(Tage, "UnConditionalBranch: %lx\n", pc);
+        predict(tid, pc, false, bp_history);
+    }
 
-void
-TAGE::uncondBranch(ThreadID tid, Addr br_pc, void* &bp_history)
-{
-    DPRINTF(Tage, "UnConditionalBranch: %lx\n", br_pc);
-    predict(tid, br_pc, false, bp_history);
+    // Update the global history for all branches
     TageBranchInfo *bi = static_cast<TageBranchInfo*>(bp_history);
-    tage->updateHistories(tid, br_pc, true, bi->tageBranchInfo, true);
+    tage->updateHistories(tid, pc, taken, bi->tageBranchInfo, true);
 }
 
 } // namespace branch_prediction
diff --git a/src/cpu/pred/tage.hh b/src/cpu/pred/tage.hh
index 568f07bf9e..6d4151cb11 100644
--- a/src/cpu/pred/tage.hh
+++ b/src/cpu/pred/tage.hh
@@ -1,4 +1,16 @@
 /*
+ * Copyright (c) 2022-2023 The University of Edinburgh
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
  * Copyright (c) 2014 The University of Wisconsin
  *
  * Copyright (c) 2006 INRIA (Institut National de Recherche en
@@ -87,13 +99,13 @@ class TAGE: public BPredUnit
     TAGE(const TAGEParams &params);
 
     // Base class methods.
-    void uncondBranch(ThreadID tid, Addr br_pc, void* &bp_history) override;
-    bool lookup(ThreadID tid, Addr branch_addr, void* &bp_history) override;
-    void btbUpdate(ThreadID tid, Addr branch_addr, void* &bp_history) override;
-    void update(ThreadID tid, Addr branch_addr, bool taken, void *bp_history,
-                bool squashed, const StaticInstPtr & inst,
-                Addr corrTarget) override;
-    virtual void squash(ThreadID tid, void *bp_history) override;
+    bool lookup(ThreadID tid, Addr pc, void* &bp_history) override;
+    void updateHistories(ThreadID tid, Addr pc, bool uncond, bool taken,
+                         Addr target,  void * &bp_history) override;
+    void update(ThreadID tid, Addr pc, bool taken,
+                void * &bp_history, bool squashed,
+                const StaticInstPtr & inst, Addr target) override;
+    virtual void squash(ThreadID tid, void * &bp_history) override;
 };
 
 } // namespace branch_prediction
diff --git a/src/cpu/pred/tage_sc_l.cc b/src/cpu/pred/tage_sc_l.cc
index 615c6230c8..a178ba6fc6 100644
--- a/src/cpu/pred/tage_sc_l.cc
+++ b/src/cpu/pred/tage_sc_l.cc
@@ -1,4 +1,16 @@
 /*
+ * Copyright (c) 2022-2023 The University of Edinburgh
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
  * Copyright (c) 2018 Metempsy Technology Consulting
  * All rights reserved.
  *
@@ -362,16 +374,16 @@ TAGE_SC_L_TAGE::extraAltCalc(TAGEBase::BranchInfo* bi)
 }
 
 bool
-TAGE_SC_L::predict(ThreadID tid, Addr branch_pc, bool cond_branch, void* &b)
+TAGE_SC_L::predict(ThreadID tid, Addr pc, bool cond_branch, void* &b)
 {
     TageSCLBranchInfo *bi = new TageSCLBranchInfo(*tage,
                                                   *statisticalCorrector,
                                                   *loopPredictor);
     b = (void*)(bi);
 
-    bool pred_taken = tage->tagePredict(tid, branch_pc, cond_branch,
+    bool pred_taken = tage->tagePredict(tid, pc, cond_branch,
                                         bi->tageBranchInfo);
-    pred_taken = loopPredictor->loopPredict(tid, branch_pc, cond_branch,
+    pred_taken = loopPredictor->loopPredict(tid, pc, cond_branch,
                                             bi->lpBranchInfo, pred_taken,
                                             instShiftAmt);
 
@@ -394,7 +406,7 @@ TAGE_SC_L::predict(ThreadID tid, Addr branch_pc, bool cond_branch, void* &b)
     bool bias = (bi->tageBranchInfo->longestMatchPred !=
                  bi->tageBranchInfo->altTaken);
 
-    pred_taken = statisticalCorrector->scPredict(tid, branch_pc, cond_branch,
+    pred_taken = statisticalCorrector->scPredict(tid, pc, cond_branch,
             bi->scBranchInfo, pred_taken, bias, use_tage_ctr, tage_ctr,
             tage->getTageCtrBits(), bi->tageBranchInfo->hitBank,
             bi->tageBranchInfo->altBank, tage->getPathHist(tid));
@@ -410,8 +422,8 @@ TAGE_SC_L::predict(ThreadID tid, Addr branch_pc, bool cond_branch, void* &b)
 }
 
 void
-TAGE_SC_L::update(ThreadID tid, Addr branch_pc, bool taken, void *bp_history,
-        bool squashed, const StaticInstPtr & inst, Addr corrTarget)
+TAGE_SC_L::update(ThreadID tid, Addr pc, bool taken, void *&bp_history,
+        bool squashed, const StaticInstPtr & inst, Addr target)
 {
     assert(bp_history);
 
@@ -423,7 +435,7 @@ TAGE_SC_L::update(ThreadID tid, Addr branch_pc, bool taken, void *bp_history,
         if (tage->isSpeculativeUpdateEnabled()) {
             // This restores the global history, then update it
             // and recomputes the folded histories.
-            tage->squash(tid, taken, tage_bi, corrTarget);
+            tage->squash(tid, taken, tage_bi, target);
             if (bi->tageBranchInfo->condBranch) {
                 loopPredictor->squashLoop(bi->lpBranchInfo);
             }
@@ -434,7 +446,7 @@ TAGE_SC_L::update(ThreadID tid, Addr branch_pc, bool taken, void *bp_history,
     int nrand = random_mt.random<int>() & 3;
     if (tage_bi->condBranch) {
         DPRINTF(TageSCL, "Updating tables for branch:%lx; taken?:%d\n",
-                branch_pc, taken);
+                pc, taken);
         tage->updateStats(taken, bi->tageBranchInfo);
 
         loopPredictor->updateStats(taken, bi->lpBranchInfo);
@@ -443,26 +455,27 @@ TAGE_SC_L::update(ThreadID tid, Addr branch_pc, bool taken, void *bp_history,
 
         bool bias = (bi->tageBranchInfo->longestMatchPred !=
                      bi->tageBranchInfo->altTaken);
-        statisticalCorrector->condBranchUpdate(tid, branch_pc, taken,
-            bi->scBranchInfo, corrTarget, bias, bi->tageBranchInfo->hitBank,
+        statisticalCorrector->condBranchUpdate(tid, pc, taken,
+            bi->scBranchInfo, target, bias, bi->tageBranchInfo->hitBank,
             bi->tageBranchInfo->altBank, tage->getPathHist(tid));
 
-        loopPredictor->condBranchUpdate(tid, branch_pc, taken,
+        loopPredictor->condBranchUpdate(tid, pc, taken,
                 bi->tageBranchInfo->tagePred, bi->lpBranchInfo, instShiftAmt);
 
-        tage->condBranchUpdate(tid, branch_pc, taken, bi->tageBranchInfo,
-                               nrand, corrTarget, bi->lpBranchInfo->predTaken);
+        tage->condBranchUpdate(tid, pc, taken, bi->tageBranchInfo,
+                               nrand, target, bi->lpBranchInfo->predTaken);
     }
 
     if (!tage->isSpeculativeUpdateEnabled()) {
-        statisticalCorrector->scHistoryUpdate(branch_pc, inst, taken,
-                                              bi->scBranchInfo, corrTarget);
+        statisticalCorrector->scHistoryUpdate(pc, inst, taken,
+                                              bi->scBranchInfo, target);
 
-        tage->updateHistories(tid, branch_pc, taken, bi->tageBranchInfo, false,
-                              inst, corrTarget);
+        tage->updateHistories(tid, pc, taken, bi->tageBranchInfo, false,
+                              inst, target);
     }
 
     delete bi;
+    bp_history = nullptr;
 }
 
 } // namespace branch_prediction
diff --git a/src/cpu/pred/tage_sc_l.hh b/src/cpu/pred/tage_sc_l.hh
index 7dead58363..6c56e69982 100644
--- a/src/cpu/pred/tage_sc_l.hh
+++ b/src/cpu/pred/tage_sc_l.hh
@@ -1,4 +1,16 @@
 /*
+ * Copyright (c) 2022-2023 The University of Edinburgh
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
  * Copyright (c) 2018 Metempsy Technology Consulting
  * All rights reserved.
  *
@@ -159,12 +171,11 @@ class TAGE_SC_L: public LTAGE
   public:
     TAGE_SC_L(const TAGE_SC_LParams &params);
 
-    bool predict(
-        ThreadID tid, Addr branch_pc, bool cond_branch, void* &b) override;
+    bool predict(ThreadID tid, Addr pc, bool cond_branch, void* &b) override;
 
-    void update(ThreadID tid, Addr branch_addr, bool taken, void *bp_history,
-                bool squashed, const StaticInstPtr & inst,
-                Addr corrTarget) override;
+    void update(ThreadID tid, Addr pc, bool taken,
+                void * &bp_history, bool squashed,
+                const StaticInstPtr & inst, Addr target) override;
 
   protected:
 
diff --git a/src/cpu/pred/tournament.cc b/src/cpu/pred/tournament.cc
index b3a55313b7..a480497c0a 100644
--- a/src/cpu/pred/tournament.cc
+++ b/src/cpu/pred/tournament.cc
@@ -1,5 +1,6 @@
 /*
  * Copyright (c) 2011, 2014 ARM Limited
+ * Copyright (c) 2022-2023 The University of Edinburgh
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -134,50 +135,22 @@ TournamentBP::calcLocHistIdx(Addr &branch_addr)
 
 inline
 void
-TournamentBP::updateGlobalHistTaken(ThreadID tid)
+TournamentBP::updateGlobalHist(ThreadID tid, bool taken)
 {
-    globalHistory[tid] = (globalHistory[tid] << 1) | 1;
+    globalHistory[tid] = (globalHistory[tid] << 1) | taken;
     globalHistory[tid] = globalHistory[tid] & historyRegisterMask;
 }
 
 inline
 void
-TournamentBP::updateGlobalHistNotTaken(ThreadID tid)
+TournamentBP::updateLocalHist(unsigned local_history_idx, bool taken)
 {
-    globalHistory[tid] = (globalHistory[tid] << 1);
-    globalHistory[tid] = globalHistory[tid] & historyRegisterMask;
-}
-
-inline
-void
-TournamentBP::updateLocalHistTaken(unsigned local_history_idx)
-{
-    localHistoryTable[local_history_idx] =
-        (localHistoryTable[local_history_idx] << 1) | 1;
-}
-
-inline
-void
-TournamentBP::updateLocalHistNotTaken(unsigned local_history_idx)
-{
-    localHistoryTable[local_history_idx] =
-        (localHistoryTable[local_history_idx] << 1);
-}
-
-
-void
-TournamentBP::btbUpdate(ThreadID tid, Addr branch_addr, void * &bp_history)
-{
-    unsigned local_history_idx = calcLocHistIdx(branch_addr);
-    //Update Global History to Not Taken (clear LSB)
-    globalHistory[tid] &= (historyRegisterMask & ~1ULL);
-    //Update Local History to Not Taken
     localHistoryTable[local_history_idx] =
-       localHistoryTable[local_history_idx] & (localPredictorMask & ~1ULL);
+        (localHistoryTable[local_history_idx] << 1) | taken;
 }
 
 bool
-TournamentBP::lookup(ThreadID tid, Addr branch_addr, void * &bp_history)
+TournamentBP::lookup(ThreadID tid, Addr pc, void * &bp_history)
 {
     bool local_prediction;
     unsigned local_history_idx;
@@ -187,7 +160,7 @@ TournamentBP::lookup(ThreadID tid, Addr branch_addr, void * &bp_history)
     bool choice_prediction;
 
     //Lookup in the local predictor to get its branch prediction
-    local_history_idx = calcLocHistIdx(branch_addr);
+    local_history_idx = calcLocHistIdx(pc);
     local_predictor_idx = localHistoryTable[local_history_idx]
         & localPredictorMask;
     local_prediction = localCtrs[local_predictor_idx] > localThreshold;
@@ -212,57 +185,53 @@ TournamentBP::lookup(ThreadID tid, Addr branch_addr, void * &bp_history)
 
     assert(local_history_idx < localHistoryTableSize);
 
-    // Speculative update of the global history and the
-    // selected local history.
+    // Select and return the prediction
+    // History update will be happen in the next function
     if (choice_prediction) {
-        if (global_prediction) {
-            updateGlobalHistTaken(tid);
-            updateLocalHistTaken(local_history_idx);
-            return true;
-        } else {
-            updateGlobalHistNotTaken(tid);
-            updateLocalHistNotTaken(local_history_idx);
-            return false;
-        }
+        return global_prediction;
     } else {
-        if (local_prediction) {
-            updateGlobalHistTaken(tid);
-            updateLocalHistTaken(local_history_idx);
-            return true;
-        } else {
-            updateGlobalHistNotTaken(tid);
-            updateLocalHistNotTaken(local_history_idx);
-            return false;
-        }
+        return local_prediction;
     }
 }
 
 void
-TournamentBP::uncondBranch(ThreadID tid, Addr pc, void * &bp_history)
+TournamentBP::updateHistories(ThreadID tid, Addr pc, bool uncond,
+                         bool taken, Addr target, void * &bp_history)
 {
-    // Create BPHistory and pass it back to be recorded.
-    BPHistory *history = new BPHistory;
-    history->globalHistory = globalHistory[tid];
-    history->localPredTaken = true;
-    history->globalPredTaken = true;
-    history->globalUsed = true;
-    history->localHistoryIdx = invalidPredictorIndex;
-    history->localHistory = invalidPredictorIndex;
-    bp_history = static_cast<void *>(history);
-
-    updateGlobalHistTaken(tid);
+    assert(uncond || bp_history);
+    if (uncond) {
+        // Create BPHistory and pass it back to be recorded.
+        BPHistory *history = new BPHistory;
+        history->globalHistory = globalHistory[tid];
+        history->localPredTaken = true;
+        history->globalPredTaken = true;
+        history->globalUsed = true;
+        history->localHistoryIdx = invalidPredictorIndex;
+        history->localHistory = invalidPredictorIndex;
+        bp_history = static_cast<void *>(history);
+    }
+
+    // Update the global history for all branches
+    updateGlobalHist(tid, taken);
+
+    // Update the local history only for conditional branches
+    if (!uncond) {
+        auto history = static_cast<BPHistory *>(bp_history);
+        updateLocalHist(history->localHistoryIdx, taken);
+    }
 }
 
+
 void
-TournamentBP::update(ThreadID tid, Addr branch_addr, bool taken,
-                     void *bp_history, bool squashed,
-                     const StaticInstPtr & inst, Addr corrTarget)
+TournamentBP::update(ThreadID tid, Addr pc, bool taken,
+                     void * &bp_history, bool squashed,
+                     const StaticInstPtr & inst, Addr target)
 {
     assert(bp_history);
 
     BPHistory *history = static_cast<BPHistory *>(bp_history);
 
-    unsigned local_history_idx = calcLocHistIdx(branch_addr);
+    unsigned local_history_idx = calcLocHistIdx(pc);
 
     assert(local_history_idx < localHistoryTableSize);
 
@@ -330,10 +299,11 @@ TournamentBP::update(ThreadID tid, Addr branch_addr, bool taken,
 
     // We're done with this history, now delete it.
     delete history;
+    bp_history = nullptr;
 }
 
 void
-TournamentBP::squash(ThreadID tid, void *bp_history)
+TournamentBP::squash(ThreadID tid, void * &bp_history)
 {
     BPHistory *history = static_cast<BPHistory *>(bp_history);
 
@@ -347,6 +317,7 @@ TournamentBP::squash(ThreadID tid, void *bp_history)
 
     // Delete this BPHistory now that we're done with it.
     delete history;
+    bp_history = nullptr;
 }
 
 #ifdef GEM5_DEBUG
diff --git a/src/cpu/pred/tournament.hh b/src/cpu/pred/tournament.hh
index 018d6756e4..1f404c1cce 100644
--- a/src/cpu/pred/tournament.hh
+++ b/src/cpu/pred/tournament.hh
@@ -1,5 +1,6 @@
 /*
  * Copyright (c) 2011, 2014 ARM Limited
+ * Copyright (c) 2022-2023 The University of Edinburgh
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -70,52 +71,14 @@ class TournamentBP : public BPredUnit
      */
     TournamentBP(const TournamentBPParams &params);
 
-    /**
-     * Looks up the given address in the branch predictor and returns
-     * a true/false value as to whether it is taken.  Also creates a
-     * BPHistory object to store any state it will need on squash/update.
-     * @param branch_addr The address of the branch to look up.
-     * @param bp_history Pointer that will be set to the BPHistory object.
-     * @return Whether or not the branch is taken.
-     */
-    bool lookup(ThreadID tid, Addr branch_addr, void * &bp_history);
-
-    /**
-     * Records that there was an unconditional branch, and modifies
-     * the bp history to point to an object that has the previous
-     * global history stored in it.
-     * @param bp_history Pointer that will be set to the BPHistory object.
-     */
-    void uncondBranch(ThreadID tid, Addr pc, void * &bp_history);
-    /**
-     * Updates the branch predictor to Not Taken if a BTB entry is
-     * invalid or not found.
-     * @param branch_addr The address of the branch to look up.
-     * @param bp_history Pointer to any bp history state.
-     * @return Whether or not the branch is taken.
-     */
-    void btbUpdate(ThreadID tid, Addr branch_addr, void * &bp_history);
-    /**
-     * Updates the branch predictor with the actual result of a branch.
-     * @param branch_addr The address of the branch to update.
-     * @param taken Whether or not the branch was taken.
-     * @param bp_history Pointer to the BPHistory object that was created
-     * when the branch was predicted.
-     * @param squashed is set when this function is called during a squash
-     * operation.
-     * @param inst Static instruction information
-     * @param corrTarget Resolved target of the branch (only needed if
-     * squashed)
-     */
-    void update(ThreadID tid, Addr branch_addr, bool taken, void *bp_history,
-                bool squashed, const StaticInstPtr & inst, Addr corrTarget);
-
-    /**
-     * Restores the global branch history on a squash.
-     * @param bp_history Pointer to the BPHistory object that has the
-     * previous global branch history in it.
-     */
-    void squash(ThreadID tid, void *bp_history);
+    // Base class methods.
+    bool lookup(ThreadID tid, Addr pc, void* &bp_history) override;
+    void updateHistories(ThreadID tid, Addr pc, bool uncond, bool taken,
+                         Addr target,  void * &bp_history) override;
+    void update(ThreadID tid, Addr pc, bool taken,
+                void * &bp_history, bool squashed,
+                const StaticInstPtr & inst, Addr target) override;
+    void squash(ThreadID tid, void * &bp_history) override;
 
   private:
     /**
@@ -131,25 +94,18 @@ class TournamentBP : public BPredUnit
      */
     inline unsigned calcLocHistIdx(Addr &branch_addr);
 
-    /** Updates global history as taken. */
-    inline void updateGlobalHistTaken(ThreadID tid);
-
-    /** Updates global history as not taken. */
-    inline void updateGlobalHistNotTaken(ThreadID tid);
-
-    /**
-     * Updates local histories as taken.
-     * @param local_history_idx The local history table entry that
-     * will be updated.
-     */
-    inline void updateLocalHistTaken(unsigned local_history_idx);
+    /** Updates global history with the given direction
+     * @param taken Whether or not the branch was taken
+    */
+    inline void updateGlobalHist(ThreadID tid, bool taken);
 
     /**
-     * Updates local histories as not taken.
+     * Updates local histories.
      * @param local_history_idx The local history table entry that
      * will be updated.
+     * @param taken Whether or not the branch was taken.
      */
-    inline void updateLocalHistNotTaken(unsigned local_history_idx);
+    inline void updateLocalHist(unsigned local_history_idx, bool taken);
 
     /**
      * The branch history information that is created upon predicting
diff --git a/src/cpu/static_inst.hh b/src/cpu/static_inst.hh
index 7ecc57d2f0..12b05f9b0e 100644
--- a/src/cpu/static_inst.hh
+++ b/src/cpu/static_inst.hh
@@ -1,5 +1,6 @@
 /*
- * Copyright (c) 2017, 2020 ARM Limited
+ * Copyright (c) 2017, 2020, 2023 Arm Limited
+ * Copyright (c) 2022-2023 The University of Edinburgh
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -181,6 +182,7 @@ class StaticInst : public RefCounted, public StaticInstFlags
     bool isNonSpeculative() const { return flags[IsNonSpeculative]; }
     bool isQuiesce() const { return flags[IsQuiesce]; }
     bool isUnverifiable() const { return flags[IsUnverifiable]; }
+    bool isPseudo() const { return flags[IsPseudo]; }
     bool isSyscall() const { return flags[IsSyscall]; }
     bool isMacroop() const { return flags[IsMacroop]; }
     bool isMicroop() const { return flags[IsMicroop]; }
@@ -250,6 +252,11 @@ class StaticInst : public RefCounted, public StaticInstFlags
         _destRegIdxPtr = dest;
     }
 
+    /**
+     * Instruction size in bytes. Necessary for dynamic instruction sizes
+     */
+    size_t _size = 0;
+
     /**
      * Base mnemonic (e.g., "add").  Used by generateDisassembly()
      * methods.  Also useful to readily identify instructions from
@@ -307,6 +314,17 @@ class StaticInst : public RefCounted, public StaticInstFlags
         panic("buildRetPC not defined!");
     }
 
+    size_t size() const
+    {
+        if (_size == 0) fatal(
+            "Instruction size for this instruction not set! It's size is "
+            "required for the decoupled front-end. Either use the standard "
+            "front-end or this ISA needs to be extended with the instruction "
+            "size. Refer to the X86, Arm or RiscV decoders for an example.");
+        return _size;
+    }
+    virtual void size(size_t newSize) { _size = newSize; }
+
     /**
      * Return the microop that goes with a particular micropc. This should
      * only be defined/used in macroops which will contain microops
diff --git a/src/cpu/testers/gpu_ruby_test/ProtocolTester.py b/src/cpu/testers/gpu_ruby_test/ProtocolTester.py
index 178376bee9..c314392520 100644
--- a/src/cpu/testers/gpu_ruby_test/ProtocolTester.py
+++ b/src/cpu/testers/gpu_ruby_test/ProtocolTester.py
@@ -73,7 +73,7 @@ class ProtocolTester(ClockedObject):
     random_seed = Param.Int(
         0,
         "Random seed number. Default value (0) means \
-                                using runtime-specific value.",
+                                using base/random.hh without seed.",
     )
     log_file = Param.String("Log file's name")
     system = Param.System(Parent.any, "System we belong to")
diff --git a/src/cpu/testers/gpu_ruby_test/address_manager.cc b/src/cpu/testers/gpu_ruby_test/address_manager.cc
index 37f74203f7..049ba86e51 100644
--- a/src/cpu/testers/gpu_ruby_test/address_manager.cc
+++ b/src/cpu/testers/gpu_ruby_test/address_manager.cc
@@ -32,6 +32,8 @@
 #include "cpu/testers/gpu_ruby_test/address_manager.hh"
 
 #include <algorithm>
+#include <climits>
+#include <random>
 
 #include "base/intmath.hh"
 #include "base/logging.hh"
@@ -58,8 +60,13 @@ AddressManager::AddressManager(int n_atomic_locs, int n_normal_locs_per_atomic)
         randAddressMap[i] = (Addr)((i + 128) << floorLog2(sizeof(Value)));
     }
 
-    // randomly shuffle randAddressMap
-    std::random_shuffle(randAddressMap.begin(), randAddressMap.end());
+    // randomly shuffle randAddressMap. The seed is determined by the random_mt
+    // gem5 rng. This allows for deterministic randomization.
+    std::shuffle(
+        randAddressMap.begin(),
+        randAddressMap.end(),
+        std::default_random_engine(random_mt.random<unsigned>(0,UINT_MAX))
+    );
 
     // initialize atomic locations
     // first and last normal location per atomic location
diff --git a/src/cpu/testers/gpu_ruby_test/episode.cc b/src/cpu/testers/gpu_ruby_test/episode.cc
index 6822049bbd..7e16b0ef07 100644
--- a/src/cpu/testers/gpu_ruby_test/episode.cc
+++ b/src/cpu/testers/gpu_ruby_test/episode.cc
@@ -34,6 +34,7 @@
 #include <fstream>
 #include <unordered_set>
 
+#include "base/random.hh"
 #include "cpu/testers/gpu_ruby_test/protocol_tester.hh"
 #include "cpu/testers/gpu_ruby_test/tester_thread.hh"
 
@@ -100,7 +101,7 @@ Episode::initActions()
     int num_loads = numLoads;
     int num_stores = numStores;
     while ((num_loads + num_stores) > 0) {
-        switch (random() % 2) {
+        switch (random_mt.random<unsigned int>() % 2) {
             case 0: // Load
                 if (num_loads > 0) {
                     actions.push_back(new Action(Action::Type::LOAD,
diff --git a/src/cpu/testers/gpu_ruby_test/protocol_tester.cc b/src/cpu/testers/gpu_ruby_test/protocol_tester.cc
index f2fd7f9600..6b3f9e19f1 100644
--- a/src/cpu/testers/gpu_ruby_test/protocol_tester.cc
+++ b/src/cpu/testers/gpu_ruby_test/protocol_tester.cc
@@ -34,8 +34,8 @@
 #include <algorithm>
 #include <ctime>
 #include <fstream>
-#include <random>
 
+#include "base/random.hh"
 #include "cpu/testers/gpu_ruby_test/cpu_thread.hh"
 #include "cpu/testers/gpu_ruby_test/dma_thread.hh"
 #include "cpu/testers/gpu_ruby_test/gpu_wavefront.hh"
@@ -141,11 +141,20 @@ ProtocolTester::ProtocolTester(const Params &p)
 
     sentExitSignal = false;
 
-    // set random seed number
+    // set random seed number, if specified.
+    // Note: random_m5 will use a fixed key if random_seed is not set.
+    // This ensures a reproducable.
     if (p.random_seed != 0) {
-        srand(p.random_seed);
+        random_mt.init(p.random_seed);
     } else {
-        srand(time(NULL));
+        warn(
+            "If `random_seed == 0` (or `random_seed` is unset) "
+            "ProtocolTester does not seed the RNG. This will NOT result in "
+            "the RNG generating different results each run. In this case the "
+            "RNG is seeded by a default value. This differs from behavior in "
+            "previous versions of gem5. Setting `random_seed` to a non-zero "
+            "value is strongly recommended."
+        );
     }
 
     actionCount = 0;
diff --git a/src/cpu/testers/gpu_ruby_test/tester_thread.cc b/src/cpu/testers/gpu_ruby_test/tester_thread.cc
index 760f8c2d87..ce3a1bccc6 100644
--- a/src/cpu/testers/gpu_ruby_test/tester_thread.cc
+++ b/src/cpu/testers/gpu_ruby_test/tester_thread.cc
@@ -33,6 +33,7 @@
 
 #include <fstream>
 
+#include "base/random.hh"
 #include "debug/ProtocolTest.hh"
 
 namespace gem5
@@ -144,7 +145,8 @@ TesterThread::attachTesterThreadToPorts(ProtocolTester *_tester,
 void
 TesterThread::issueNewEpisode()
 {
-    int num_reg_loads = random() % tester->getEpisodeLength();
+    int num_reg_loads = \
+        random_mt.random<unsigned int>() % tester->getEpisodeLength();
     int num_reg_stores = tester->getEpisodeLength() - num_reg_loads;
 
     // create a new episode
diff --git a/src/cpu/testers/memtest/MemTest.py b/src/cpu/testers/memtest/MemTest.py
index 24bd974804..2d6a0e33c5 100644
--- a/src/cpu/testers/memtest/MemTest.py
+++ b/src/cpu/testers/memtest/MemTest.py
@@ -63,6 +63,7 @@ class MemTest(ClockedObject):
     percent_reads = Param.Percent(65, "Percentage reads")
     percent_functional = Param.Percent(50, "Percentage functional accesses")
     percent_uncacheable = Param.Percent(10, "Percentage uncacheable")
+    percent_atomic = Param.Percent(0, "Percentage atomics")
 
     # Determine how often to print progress messages and what timeout
     # to use for checking progress of both requests and responses
diff --git a/src/cpu/testers/memtest/memtest.cc b/src/cpu/testers/memtest/memtest.cc
index 7c256d8642..a84bf67cd9 100644
--- a/src/cpu/testers/memtest/memtest.cc
+++ b/src/cpu/testers/memtest/memtest.cc
@@ -94,6 +94,7 @@ MemTest::MemTest(const Params &p)
       percentReads(p.percent_reads),
       percentFunctional(p.percent_functional),
       percentUncacheable(p.percent_uncacheable),
+      percentAtomic(p.percent_atomic),
       requestorId(p.system->getRequestorId(this)),
       blockSize(p.system->cacheLineSize()),
       blockAddrMask(blockSize - 1),
@@ -115,6 +116,7 @@ MemTest::MemTest(const Params &p)
     // set up counters
     numReads = 0;
     numWrites = 0;
+    numAtomics = 0;
 
     // kick things into action
     schedule(tickEvent, curTick());
@@ -142,7 +144,7 @@ MemTest::completeRequest(PacketPtr pkt, bool functional)
     outstandingAddrs.erase(remove_addr);
 
     DPRINTF(MemTest, "Completing %s at address %x (blk %x) %s\n",
-            pkt->isWrite() ? "write" : "read",
+            pkt->isWrite() ? pkt->isAtomicOp() ? "atomic" : "write" : "read",
             req->getPaddr(), blockAlign(req->getPaddr()),
             pkt->isError() ? "error" : "success");
 
@@ -153,7 +155,25 @@ MemTest::completeRequest(PacketPtr pkt, bool functional)
             panic( "%s access failed at %#x\n",
                 pkt->isWrite() ? "Write" : "Read", req->getPaddr());
     } else {
-        if (pkt->isRead()) {
+        if (pkt->isAtomicOp()) {
+            uint8_t ref_data = referenceData[req->getPaddr()];
+            if (pkt_data[0] != ref_data) {
+                panic("%s: read of %x (blk %x) @ cycle %d "
+                      "returns %x, expected %x\n", name(),
+                       req->getPaddr(), blockAlign(req->getPaddr()), curTick(),
+                       pkt_data[0], ref_data);
+            }
+            DPRINTF(MemTest,
+                    "Completing atomic at address %x (blk %x) value %x\n",
+                    req->getPaddr(), blockAlign(req->getPaddr()),
+                    pkt_data[0]);
+
+            referenceData[req->getPaddr()] =
+                   atomicPendingData[req->getPaddr()];
+
+            numAtomics++;
+            stats.numAtomics++;
+        } else if (pkt->isRead()) {
             uint8_t ref_data = referenceData[req->getPaddr()];
             if (pkt_data[0] != ref_data) {
                 panic("%s: read of %x (blk %x) @ cycle %d "
@@ -167,9 +187,10 @@ MemTest::completeRequest(PacketPtr pkt, bool functional)
 
             if (numReads == (uint64_t)nextProgressMessage) {
                 ccprintf(std::cerr,
-                        "%s: completed %d read, %d write accesses @%d\n",
-                        name(), numReads, numWrites, curTick());
-                nextProgressMessage += progressInterval;
+                        "%s: completed %d read, %d write, "
+                        "%d atomic accesses @%d\n",
+                        name(), numReads, numWrites, numAtomics, curTick());
+                        nextProgressMessage += progressInterval;
             }
 
             if (maxLoads != 0 && numReads >= maxLoads)
@@ -205,7 +226,9 @@ MemTest::MemTestStats::MemTestStats(statistics::Group *parent)
       ADD_STAT(numReads, statistics::units::Count::get(),
                "number of read accesses completed"),
       ADD_STAT(numWrites, statistics::units::Count::get(),
-               "number of write accesses completed")
+               "number of write accesses completed"),
+      ADD_STAT(numAtomics, statistics::units::Count::get(),
+               "number of atomic accesses completed")
 {
 
 }
@@ -221,6 +244,8 @@ MemTest::tick()
     unsigned cmd = random_mt.random(0, 100);
     uint8_t data = random_mt.random<uint8_t>();
     bool uncacheable = random_mt.random(0, 100) < percentUncacheable;
+    bool do_atomic = (random_mt.random(0, 100) < percentAtomic) &&
+                     !uncacheable;
     unsigned base = random_mt.random(0, 1);
     Request::Flags flags;
     Addr paddr;
@@ -281,13 +306,36 @@ MemTest::tick()
         pkt = new Packet(req, MemCmd::ReadReq);
         pkt->dataDynamic(pkt_data);
     } else {
-        DPRINTF(MemTest, "Initiating %swrite at addr %x (blk %x) value %x\n",
-                do_functional ? "functional " : "", req->getPaddr(),
-                blockAlign(req->getPaddr()), data);
-
-        pkt = new Packet(req, MemCmd::WriteReq);
-        pkt->dataDynamic(pkt_data);
-        pkt_data[0] = data;
+        if (do_atomic) {
+            DPRINTF(MemTest,
+                    "Initiating atomic at addr %x (blk %x) value %x\n",
+                    req->getPaddr(), blockAlign(req->getPaddr()), data);
+
+            TypedAtomicOpFunctor<uint8_t> *_amo_op =
+                  new AtomicGeneric3Op<uint8_t>(
+                  data, data,
+                  [](uint8_t* b, uint8_t a, uint8_t c){
+                      *b = c;
+                  });
+            assert(_amo_op);
+            AtomicOpFunctorPtr amo_op = AtomicOpFunctorPtr(_amo_op);
+            req->setAtomicOpFunctor(std::move(amo_op));
+            req->setFlags(Request::ATOMIC_RETURN_OP);
+
+            pkt = new Packet(req, MemCmd::WriteReq);
+            pkt->dataDynamic(pkt_data);
+            pkt_data[0] = data;
+            atomicPendingData[req->getPaddr()] = data;
+        } else {
+            DPRINTF(MemTest,
+                    "Initiating %swrite at addr %x (blk %x) value %x\n",
+                    do_functional ? "functional " : "", req->getPaddr(),
+                    blockAlign(req->getPaddr()), data);
+
+            pkt = new Packet(req, MemCmd::WriteReq);
+            pkt->dataDynamic(pkt_data);
+            pkt_data[0] = data;
+        }
     }
 
     // there is no point in ticking if we are waiting for a retry
diff --git a/src/cpu/testers/memtest/memtest.hh b/src/cpu/testers/memtest/memtest.hh
index 3fd1674191..32ffd5cd6e 100644
--- a/src/cpu/testers/memtest/memtest.hh
+++ b/src/cpu/testers/memtest/memtest.hh
@@ -131,6 +131,7 @@ class MemTest : public ClockedObject
     const unsigned percentReads;
     const unsigned percentFunctional;
     const unsigned percentUncacheable;
+    const unsigned percentAtomic;
 
     /** Request id for all generated traffic */
     RequestorID requestorId;
@@ -138,11 +139,12 @@ class MemTest : public ClockedObject
     unsigned int id;
 
     std::unordered_set<Addr> outstandingAddrs;
+    std::unordered_map<Addr, uint8_t> atomicPendingData;
 
     // store the expected value for the addresses we have touched
     std::unordered_map<Addr, uint8_t> referenceData;
 
-    const unsigned blockSize;
+    const Addr blockSize;
 
     const Addr blockAddrMask;
 
@@ -169,6 +171,7 @@ class MemTest : public ClockedObject
 
     uint64_t numReads;
     uint64_t numWrites;
+    uint64_t numAtomics;
     const uint64_t maxLoads;
 
     const bool atomic;
@@ -180,6 +183,7 @@ class MemTest : public ClockedObject
         MemTestStats(statistics::Group *parent);
         statistics::Scalar numReads;
         statistics::Scalar numWrites;
+        statistics::Scalar numAtomics;
     } stats;
 
     /**
diff --git a/src/cpu/testers/traffic_gen/BaseTrafficGen.py b/src/cpu/testers/traffic_gen/BaseTrafficGen.py
index b5df83e779..7fdfda22e5 100644
--- a/src/cpu/testers/traffic_gen/BaseTrafficGen.py
+++ b/src/cpu/testers/traffic_gen/BaseTrafficGen.py
@@ -37,6 +37,7 @@
 from m5.proxy import *
 from m5.objects.ClockedObject import ClockedObject
 
+
 # Types of Stream Generators.
 # Those are orthogonal to the other generators in the TrafficGen
 # and are meant to initialize the stream and substream IDs for
diff --git a/src/cpu/testers/traffic_gen/TrafficGen.py b/src/cpu/testers/traffic_gen/TrafficGen.py
index 6f1aa67bfd..15190120cc 100644
--- a/src/cpu/testers/traffic_gen/TrafficGen.py
+++ b/src/cpu/testers/traffic_gen/TrafficGen.py
@@ -36,6 +36,7 @@
 from m5.params import *
 from m5.objects.BaseTrafficGen import *
 
+
 # The behaviour of this traffic generator is specified in a
 # configuration file, and this file describes a state transition graph
 # where each state is a specific generator behaviour. Examples include
diff --git a/src/cpu/trace/TraceCPU.py b/src/cpu/trace/TraceCPU.py
index 1be16518d7..5e82fd9f9f 100644
--- a/src/cpu/trace/TraceCPU.py
+++ b/src/cpu/trace/TraceCPU.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2013 - 2016 ARM Limited
+# Copyright (c) 2013 - 2016, 2023 Arm Limited
 # All rights reserved.
 #
 # The license below extends only to copyright in the software and shall
@@ -34,10 +34,11 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 from m5.params import *
-from m5.objects.BaseCPU import BaseCPU
+from m5.proxy import *
+from m5.objects.ClockedObject import ClockedObject
 
 
-class TraceCPU(BaseCPU):
+class TraceCPU(ClockedObject):
     """Trace CPU model which replays traces generated in a prior simulation
     using DerivO3CPU or its derived classes. It interfaces with L1 caches.
     """
@@ -54,13 +55,10 @@ def memory_mode(cls):
     def require_caches(cls):
         return True
 
-    def addPMU(self, pmu=None):
-        pass
-
-    @classmethod
-    def support_take_over(cls):
-        return True
+    system = Param.System(Parent.any, "system object")
 
+    icache_port = RequestPort("Instruction Port")
+    dcache_port = RequestPort("Data Port")
     instTraceFile = Param.String("", "Instruction trace file")
     dataTraceFile = Param.String("", "Data dependency trace file")
     sizeStoreBuffer = Param.Unsigned(
diff --git a/src/cpu/trace/trace_cpu.cc b/src/cpu/trace/trace_cpu.cc
index 7399cf6199..336a13beda 100644
--- a/src/cpu/trace/trace_cpu.cc
+++ b/src/cpu/trace/trace_cpu.cc
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013 - 2016 ARM Limited
+ * Copyright (c) 2013 - 2016, 2023 Arm Limited
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -39,6 +39,7 @@
 
 #include "base/compiler.hh"
 #include "sim/sim_exit.hh"
+#include "sim/system.hh"
 
 namespace gem5
 {
@@ -47,7 +48,8 @@ namespace gem5
 int TraceCPU::numTraceCPUs = 0;
 
 TraceCPU::TraceCPU(const TraceCPUParams &params)
-    :   BaseCPU(params),
+    :   ClockedObject(params),
+        cacheLineSize(params.system->cacheLineSize()),
         icachePort(this),
         dcachePort(this),
         instRequestorID(params.system->getRequestorId(this, "inst")),
@@ -93,14 +95,6 @@ TraceCPU::updateNumOps(uint64_t rob_num)
     }
 }
 
-void
-TraceCPU::takeOverFrom(BaseCPU *oldCPU)
-{
-    // Unbind the ports of the old CPU and bind the ports of the TraceCPU.
-    getInstPort().takeOverFrom(&oldCPU->getInstPort());
-    getDataPort().takeOverFrom(&oldCPU->getDataPort());
-}
-
 void
 TraceCPU::init()
 {
@@ -109,7 +103,7 @@ TraceCPU::init()
     DPRINTF(TraceCPUData, "Data memory request trace file is \"%s\".\n",
             dataTraceFile);
 
-    BaseCPU::init();
+    ClockedObject::init();
 
     // Get the send tick of the first instruction read request
     Tick first_icache_tick = icacheGen.init();
@@ -176,7 +170,7 @@ TraceCPU::schedDcacheNext()
     DPRINTF(TraceCPUData, "DcacheGen event.\n");
 
     // Update stat for numCycles
-    baseStats.numCycles = clockEdge() / clockPeriod();
+    traceStats.numCycles = clockEdge() / clockPeriod();
 
     dcacheGen.execute();
     if (dcacheGen.isExecComplete()) {
@@ -216,7 +210,7 @@ TraceCPU::checkAndSchedExitEvent()
     ADD_STAT(cpi, statistics::units::Rate<
                     statistics::units::Cycle, statistics::units::Count>::get(),
              "Cycles per micro-op used as a proxy for CPI",
-             trace->baseStats.numCycles / numOps)
+             trace->traceStats.numCycles / numOps)
 {
     cpi.precision(6);
 }
@@ -591,7 +585,7 @@ TraceCPU::ElasticDataGen::executeMemReq(GraphNode* node_ptr)
     // stat counting this is useful to keep a check on how frequently this
     // happens. If required the code could be revised to mimick splitting such
     // a request into two.
-    unsigned blk_size = owner.cacheLineSize();
+    Addr blk_size = owner.cacheLineSize;
     Addr blk_offset = (node_ptr->physAddr & (Addr)(blk_size - 1));
     if (!(blk_offset + node_ptr->size <= blk_size)) {
         node_ptr->size = blk_size - blk_offset;
@@ -1152,6 +1146,20 @@ TraceCPU::schedDcacheNextEvent(Tick when)
 
 }
 
+Port &
+TraceCPU::getPort(const std::string &if_name, PortID idx)
+{
+    // Get the right port based on name. This applies to all the
+    // subclasses of the base CPU and relies on their implementation
+    // of getDataPort and getInstPort.
+    if (if_name == "dcache_port")
+        return getDataPort();
+    else if (if_name == "icache_port")
+        return getInstPort();
+    else
+        return ClockedObject::getPort(if_name, idx);
+}
+
 bool
 TraceCPU::IcachePort::recvTimingResp(PacketPtr pkt)
 {
diff --git a/src/cpu/trace/trace_cpu.hh b/src/cpu/trace/trace_cpu.hh
index 87f820fe6d..c7fd804351 100644
--- a/src/cpu/trace/trace_cpu.hh
+++ b/src/cpu/trace/trace_cpu.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013 - 2016 ARM Limited
+ * Copyright (c) 2013 - 2016, 2023 Arm Limited
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -45,13 +45,16 @@
 #include <unordered_map>
 
 #include "base/statistics.hh"
-#include "cpu/base.hh"
 #include "debug/TraceCPUData.hh"
 #include "debug/TraceCPUInst.hh"
+#include "mem/packet.hh"
+#include "mem/port.hh"
+#include "mem/request.hh"
 #include "params/TraceCPU.hh"
 #include "proto/inst_dep_record.pb.h"
 #include "proto/packet.pb.h"
 #include "proto/protoio.hh"
+#include "sim/clocked_object.hh"
 #include "sim/sim_events.hh"
 
 namespace gem5
@@ -66,8 +69,7 @@ namespace gem5
  * simulation compared to the detailed cpu model and good correlation when the
  * same trace is used for playback on different memory sub-systems.
  *
- * The TraceCPU inherits from BaseCPU so some virtual methods need to be
- * defined. It has two port subclasses inherited from RequestPort for
+ * The TraceCPU has two port subclasses inherited from RequestPort for
  * instruction and data ports. It issues the memory requests deducing the
  * timing from the trace and without performing real execution of micro-ops. As
  * soon as the last dependency for an instruction is complete, its
@@ -139,22 +141,13 @@ namespace gem5
  * exit.
  */
 
-class TraceCPU : public BaseCPU
+class TraceCPU : public ClockedObject
 {
 
   public:
     TraceCPU(const TraceCPUParams &params);
 
-    void init();
-
-    /**
-     * This is a pure virtual function in BaseCPU. As we don't know how many
-     * insts are in the trace but only know how how many micro-ops are we
-     * cannot count this stat.
-     *
-     * @return 0
-     */
-    Counter totalInsts() const { return 0; }
+    void init() override;
 
     /**
      * Return totalOps as the number of committed micro-ops plus the
@@ -170,17 +163,6 @@ class TraceCPU : public BaseCPU
      */
     void updateNumOps(uint64_t rob_num);
 
-    /* Pure virtual function in BaseCPU. Do nothing. */
-    void wakeup(ThreadID tid=0) { return; }
-
-    /*
-     * When resuming from checkpoint in FS mode, the TraceCPU takes over from
-     * the old cpu. This function overrides the takeOverFrom() function in the
-     * BaseCPU. It unbinds the ports of the old CPU and binds the ports of the
-     * TraceCPU.
-     */
-    void takeOverFrom(BaseCPU *oldCPU);
-
     /**
      * When instruction cache port receives a retry, schedule event
      * icacheNextEvent.
@@ -303,6 +285,9 @@ class TraceCPU : public BaseCPU
         TraceCPU* owner;
     };
 
+    /** Cache the cache line size that we get from the system */
+    const Addr cacheLineSize;
+
     /** Port to connect to L1 instruction cache. */
     IcachePort icachePort;
 
@@ -1112,6 +1097,8 @@ class TraceCPU : public BaseCPU
 
         /** Stat for number of simulated micro-ops. */
         statistics::Scalar numOps;
+        /** Number of CPU cycles simulated */
+        statistics::Scalar numCycles;
         /** Stat for the CPI. This is really cycles per
          *  micro-op and not inst. */
         statistics::Formula cpi;
@@ -1125,6 +1112,18 @@ class TraceCPU : public BaseCPU
     /** Used to get a reference to the dcache port. */
     Port &getDataPort() { return dcachePort; }
 
+    /**
+     * Get a port on this CPU. All CPUs have a data and
+     * instruction port, and this method uses getDataPort and
+     * getInstPort of the subclasses to resolve the two ports.
+     *
+     * @param if_name the port name
+     * @param idx ignored index
+     *
+     * @return a reference to the port with the given name
+     */
+    Port &getPort(const std::string &if_name,
+                  PortID idx=InvalidPortID) override;
 };
 
 } // namespace gem5
diff --git a/src/dev/IntPin.py b/src/dev/IntPin.py
index 9336a89900..61c645af2f 100644
--- a/src/dev/IntPin.py
+++ b/src/dev/IntPin.py
@@ -29,6 +29,7 @@
 INT_SINK_ROLE = "Int Sink Pin"
 Port.compat(INT_SOURCE_ROLE, INT_SINK_ROLE)
 
+
 # A source pin generally represents a single pin which might connect to
 # multiple sinks.
 class IntSourcePin(VectorPort):
diff --git a/src/dev/ResetPort.py b/src/dev/ResetPort.py
index 15caa476ec..467771a258 100644
--- a/src/dev/ResetPort.py
+++ b/src/dev/ResetPort.py
@@ -29,6 +29,7 @@
 RESET_RESPONSE_ROLE = "Reset Response"
 Port.compat(RESET_REQUEST_ROLE, RESET_RESPONSE_ROLE)
 
+
 # ResetRequestPort is an artifact request port for reset purpose.
 class ResetRequestPort(Port):
     def __init__(self, desc):
diff --git a/src/dev/SConscript b/src/dev/SConscript
index a7714a22d7..89e797acc8 100644
--- a/src/dev/SConscript
+++ b/src/dev/SConscript
@@ -54,4 +54,4 @@ Source('pixelpump.cc')
 DebugFlag('Intel8254Timer')
 DebugFlag('MC146818')
 
-GTest('reg_bank.test', 'reg_bank.test.cc')
+GTest('reg_bank.test', 'reg_bank.test.cc', with_tag('gem5 trace'))
diff --git a/src/dev/amdgpu/AMDGPU.py b/src/dev/amdgpu/AMDGPU.py
index 616c501c63..7873794109 100644
--- a/src/dev/amdgpu/AMDGPU.py
+++ b/src/dev/amdgpu/AMDGPU.py
@@ -34,6 +34,7 @@
 from m5.objects.Device import DmaDevice, DmaVirtDevice
 from m5.objects.ClockedObject import ClockedObject
 
+
 # PCI device model for an AMD Vega 10 based GPU. The PCI codes and BARs
 # correspond to a Vega Frontier Edition hardware device. None of the PCI
 # related values in this class should be changed.
diff --git a/src/dev/amdgpu/SConscript b/src/dev/amdgpu/SConscript
index 9f8eeacd00..b8ba454d48 100644
--- a/src/dev/amdgpu/SConscript
+++ b/src/dev/amdgpu/SConscript
@@ -39,6 +39,7 @@ SimObject('AMDGPU.py', sim_objects=['AMDGPUDevice', 'AMDGPUInterruptHandler',
                                     tags='x86 isa')
 
 Source('amdgpu_device.cc', tags='x86 isa')
+Source('amdgpu_gfx.cc', tags='x86 isa')
 Source('amdgpu_nbio.cc', tags='x86 isa')
 Source('amdgpu_vm.cc', tags='x86 isa')
 Source('interrupt_handler.cc', tags='x86 isa')
@@ -50,6 +51,7 @@ Source('system_hub.cc', tags='x86 isa')
 
 DebugFlag('AMDGPUDevice', tags='x86 isa')
 DebugFlag('AMDGPUMem', tags='x86 isa')
+DebugFlag('AMDGPUSystemHub', tags='x86 isa')
 DebugFlag('PM4PacketProcessor', tags='x86 isa')
 DebugFlag('SDMAEngine', tags='x86 isa')
 DebugFlag('SDMAData', tags='x86 isa')
diff --git a/src/dev/amdgpu/amdgpu_device.cc b/src/dev/amdgpu/amdgpu_device.cc
index d1058f1606..b25ffbf79f 100644
--- a/src/dev/amdgpu/amdgpu_device.cc
+++ b/src/dev/amdgpu/amdgpu_device.cc
@@ -216,11 +216,47 @@ AMDGPUDevice::getAddrRanges() const
 Tick
 AMDGPUDevice::readConfig(PacketPtr pkt)
 {
-    [[maybe_unused]] int offset = pkt->getAddr() & PCI_CONFIG_SIZE;
-    DPRINTF(AMDGPUDevice, "Read Config: from offset: %#x size: %#x "
-            "data: %#x\n", offset, pkt->getSize(), config.data[offset]);
+    int offset = pkt->getAddr() & PCI_CONFIG_SIZE;
 
-    Tick delay = PciDevice::readConfig(pkt);
+    if (offset < PCI_DEVICE_SPECIFIC) {
+        PciDevice::readConfig(pkt);
+    } else {
+        if (offset >= PXCAP_BASE && offset < (PXCAP_BASE + sizeof(PXCAP))) {
+            int pxcap_offset = offset - PXCAP_BASE;
+
+            switch (pkt->getSize()) {
+                case sizeof(uint8_t):
+                    pkt->setLE<uint8_t>(pxcap.data[pxcap_offset]);
+                    DPRINTF(AMDGPUDevice,
+                        "Read PXCAP:  dev %#x func %#x reg %#x 1 bytes: data "
+                        "= %#x\n", _busAddr.dev, _busAddr.func, pxcap_offset,
+                        (uint32_t)pkt->getLE<uint8_t>());
+                    break;
+                case sizeof(uint16_t):
+                    pkt->setLE<uint16_t>(
+                        *(uint16_t*)&pxcap.data[pxcap_offset]);
+                    DPRINTF(AMDGPUDevice,
+                        "Read PXCAP:  dev %#x func %#x reg %#x 2 bytes: data "
+                        "= %#x\n", _busAddr.dev, _busAddr.func, pxcap_offset,
+                        (uint32_t)pkt->getLE<uint16_t>());
+                    break;
+                case sizeof(uint32_t):
+                    pkt->setLE<uint32_t>(
+                        *(uint32_t*)&pxcap.data[pxcap_offset]);
+                    DPRINTF(AMDGPUDevice,
+                        "Read PXCAP:  dev %#x func %#x reg %#x 4 bytes: data "
+                        "= %#x\n",_busAddr.dev, _busAddr.func, pxcap_offset,
+                        (uint32_t)pkt->getLE<uint32_t>());
+                    break;
+                default:
+                    panic("Invalid access size (%d) for amdgpu PXCAP %#x\n",
+                          pkt->getSize(), pxcap_offset);
+            }
+            pkt->makeAtomicResponse();
+        } else {
+            warn("Device specific offset %d not implemented!\n", offset);
+        }
+    }
 
     // Before sending MMIOs the driver sends three interrupts in a row.
     // Use this to trigger creating a checkpoint to restore in timing mode.
@@ -231,14 +267,14 @@ AMDGPUDevice::readConfig(PacketPtr pkt)
         if (offset == PCI0_INTERRUPT_PIN) {
             if (++init_interrupt_count == 3) {
                 DPRINTF(AMDGPUDevice, "Checkpointing before first MMIO\n");
-                exitSimLoop("checkpoint", 0, curTick() + delay + 1);
+                exitSimLoop("checkpoint", 0, curTick() + configDelay + 1);
             }
         } else {
             init_interrupt_count = 0;
         }
     }
 
-    return delay;
+    return configDelay;
 }
 
 Tick
@@ -249,7 +285,24 @@ AMDGPUDevice::writeConfig(PacketPtr pkt)
             "data: %#x\n", offset, pkt->getSize(),
             pkt->getUintX(ByteOrder::little));
 
-    return PciDevice::writeConfig(pkt);
+    if (offset < PCI_DEVICE_SPECIFIC)
+        return PciDevice::writeConfig(pkt);
+
+
+    if (offset >= PXCAP_BASE && offset < (PXCAP_BASE + sizeof(PXCAP))) {
+        uint8_t *pxcap_data = &(pxcap.data[0]);
+        int pxcap_offset = offset - PXCAP_BASE;
+
+        DPRINTF(AMDGPUDevice, "Writing PXCAP offset %d size %d\n",
+                pxcap_offset, pkt->getSize());
+
+        memcpy(pxcap_data + pxcap_offset, pkt->getConstPtr<void>(),
+               pkt->getSize());
+    }
+
+    pkt->makeAtomicResponse();
+
+    return configDelay;
 }
 
 void
@@ -291,6 +344,7 @@ AMDGPUDevice::readFrame(PacketPtr pkt, Addr offset)
     system->getDeviceMemory(readPkt)->access(readPkt);
 
     pkt->setUintX(readPkt->getUintX(ByteOrder::little), ByteOrder::little);
+    delete readPkt;
 }
 
 void
@@ -325,6 +379,9 @@ AMDGPUDevice::readMMIO(PacketPtr pkt, Addr offset)
       case GRBM_BASE:
         gpuvm.readMMIO(pkt, aperture_offset >> GRBM_OFFSET_SHIFT);
         break;
+      case GFX_BASE:
+        gfx.readMMIO(pkt, aperture_offset);
+        break;
       case MMHUB_BASE:
         gpuvm.readMMIO(pkt, aperture_offset >> MMHUB_OFFSET_SHIFT);
         break;
@@ -409,7 +466,17 @@ AMDGPUDevice::writeDoorbell(PacketPtr pkt, Addr offset)
             panic("Write to unkown queue type!");
         }
     } else {
-        warn("Unknown doorbell offset: %lx\n", offset);
+        warn("Unknown doorbell offset: %lx. Saving to pending doorbells.\n",
+             offset);
+
+        // We have to ACK the PCI packet immediately, so create a copy of the
+        // packet here to send again.
+        RequestPtr pending_req(pkt->req);
+        PacketPtr pending_pkt = Packet::createWrite(pending_req);
+        uint8_t *pending_data = new uint8_t[pkt->getSize()];
+        pending_pkt->dataDynamic(pending_data);
+
+        pendingDoorbellPkts.emplace(offset, pending_pkt);
     }
 }
 
@@ -453,6 +520,9 @@ AMDGPUDevice::writeMMIO(PacketPtr pkt, Addr offset)
       case NBIO_BASE:
         nbio.writeMMIO(pkt, aperture_offset);
         break;
+      case GFX_BASE:
+        gfx.writeMMIO(pkt, aperture_offset);
+        break;
       default:
         DPRINTF(AMDGPUDevice, "Unknown MMIO aperture for %#x\n", offset);
         break;
@@ -529,6 +599,17 @@ AMDGPUDevice::write(PacketPtr pkt)
     return pioDelay;
 }
 
+void
+AMDGPUDevice::processPendingDoorbells(uint32_t offset)
+{
+    if (pendingDoorbellPkts.count(offset)) {
+        DPRINTF(AMDGPUDevice, "Sending pending doorbell %x\n", offset);
+        writeDoorbell(pendingDoorbellPkts[offset], offset);
+        delete pendingDoorbellPkts[offset];
+        pendingDoorbellPkts.erase(offset);
+    }
+}
+
 bool
 AMDGPUDevice::haveRegVal(uint32_t addr)
 {
@@ -752,6 +833,14 @@ AMDGPUDevice::deallocateAllQueues()
     for (auto& it : sdmaEngs) {
         it.second->deallocateRLCQueues();
     }
+
+    // "All" queues implicitly refers to all user queues. User queues begin at
+    // doorbell address 0x4000, so unmap any queue at or above that address.
+    for (auto [offset, vmid] : doorbellVMIDMap) {
+        if (offset >= 0x4000) {
+            doorbells.erase(offset);
+        }
+    }
 }
 
 void
diff --git a/src/dev/amdgpu/amdgpu_device.hh b/src/dev/amdgpu/amdgpu_device.hh
index 56ed2f4fa8..b6b6e2a81a 100644
--- a/src/dev/amdgpu/amdgpu_device.hh
+++ b/src/dev/amdgpu/amdgpu_device.hh
@@ -36,6 +36,7 @@
 
 #include "base/bitunion.hh"
 #include "dev/amdgpu/amdgpu_defines.hh"
+#include "dev/amdgpu/amdgpu_gfx.hh"
 #include "dev/amdgpu/amdgpu_nbio.hh"
 #include "dev/amdgpu/amdgpu_vm.hh"
 #include "dev/amdgpu/memory_manager.hh"
@@ -89,6 +90,7 @@ class AMDGPUDevice : public PciDevice
     using GPURegMap = std::unordered_map<uint32_t, uint64_t>;
     GPURegMap regs;
     std::unordered_map<uint32_t, QueueType> doorbells;
+    std::unordered_map<uint32_t, PacketPtr> pendingDoorbellPkts;
 
     /**
      * VGA ROM methods
@@ -109,6 +111,7 @@ class AMDGPUDevice : public PciDevice
      * Blocks of the GPU
      */
     AMDGPUNbio nbio;
+    AMDGPUGfx gfx;
     AMDGPUMemoryManager *gpuMemMgr;
     AMDGPUInterruptHandler *deviceIH;
     AMDGPUVM gpuvm;
@@ -185,6 +188,7 @@ class AMDGPUDevice : public PciDevice
      * Set handles to GPU blocks.
      */
     void setDoorbellType(uint32_t offset, QueueType qt);
+    void processPendingDoorbells(uint32_t offset);
     void setSDMAEngine(Addr offset, SDMAEngine *eng);
 
     /**
diff --git a/src/dev/amdgpu/amdgpu_gfx.cc b/src/dev/amdgpu/amdgpu_gfx.cc
new file mode 100644
index 0000000000..3d5b274b86
--- /dev/null
+++ b/src/dev/amdgpu/amdgpu_gfx.cc
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2023 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its
+ * contributors may be used to endorse or promote products derived from this
+ * software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "dev/amdgpu/amdgpu_gfx.hh"
+
+#include "mem/packet_access.hh"
+#include "sim/core.hh"
+
+namespace gem5
+{
+
+void
+AMDGPUGfx::readMMIO(PacketPtr pkt, Addr offset)
+{
+    switch (offset) {
+      case AMDGPU_MM_RLC_GPU_CLOCK_COUNT_LSB:
+        pkt->setLE<uint32_t>(captured_clock_count);
+        break;
+      case AMDGPU_MM_RLC_GPU_CLOCK_COUNT_MSB:
+        pkt->setLE<uint32_t>(captured_clock_count >> 32);
+        break;
+      default:
+        break;
+    }
+}
+
+void
+AMDGPUGfx::writeMMIO(PacketPtr pkt, Addr offset)
+{
+    switch (offset) {
+      case AMDGPU_MM_RLC_CAPTURE_GPU_CLOCK_COUNT:
+        // Use gem5 Ticks in nanoseconds are the counter. The first capture
+        // is expected to return zero.
+        if (captured_clock_count == 1) {
+          captured_clock_count = 0;
+        } else {
+          captured_clock_count = curTick() / sim_clock::as_int::ns;
+        }
+        break;
+      default:
+        break;
+    }
+}
+
+} // namespace gem5
diff --git a/src/dev/amdgpu/amdgpu_gfx.hh b/src/dev/amdgpu/amdgpu_gfx.hh
new file mode 100644
index 0000000000..c32b8624cf
--- /dev/null
+++ b/src/dev/amdgpu/amdgpu_gfx.hh
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2023 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its
+ * contributors may be used to endorse or promote products derived from this
+ * software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __DEV_AMDGPU_AMDGPU_GFX_HH__
+#define __DEV_AMDGPU_AMDGPU_GFX_HH__
+
+#include "base/types.hh"
+#include "mem/packet.hh"
+
+/**
+ * MMIO offsets for GFX. This class handles MMIO reads/writes to the GFX_BASE
+ * aperture which are generally read/written by the gfx driver source here:
+ *
+ *      drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
+ * https://github.com/RadeonOpenCompute/ROCK-Kernel-Driver/blob/master/
+ *      drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+ *
+ * The MMIO addresses in the file are dword addresses. Here they are converted
+ * to byte addresses so gem5 does not need to shift the values.
+ */
+
+// Registers used to read GPU clock count used in profiling
+#define AMDGPU_MM_RLC_GPU_CLOCK_COUNT_LSB                 0x13090
+#define AMDGPU_MM_RLC_GPU_CLOCK_COUNT_MSB                 0x13094
+#define AMDGPU_MM_RLC_CAPTURE_GPU_CLOCK_COUNT             0x13098
+
+namespace gem5
+{
+
+class AMDGPUGfx
+{
+  public:
+    AMDGPUGfx() { }
+
+    void readMMIO(PacketPtr pkt, Addr offset);
+    void writeMMIO(PacketPtr pkt, Addr offset);
+
+  private:
+    /*
+     * GPU clock count at the time capture MMIO is received.
+     */
+    uint64_t captured_clock_count = 1;
+};
+
+} // namespace gem5
+
+#endif // __DEV_AMDGPU_AMDGPU_GFX_HH__
diff --git a/src/dev/amdgpu/pm4_packet_processor.cc b/src/dev/amdgpu/pm4_packet_processor.cc
index e7b846529e..352af400b0 100644
--- a/src/dev/amdgpu/pm4_packet_processor.cc
+++ b/src/dev/amdgpu/pm4_packet_processor.cc
@@ -384,7 +384,10 @@ PM4PacketProcessor::mapQueues(PM4Queue *q, PM4MapQueues *pkt)
                 "Mapping mqd from %p %p (vmid %d - last vmid %d).\n",
                 addr, pkt->mqdAddr, pkt->vmid, gpuDevice->lastVMID());
 
-        gpuDevice->mapDoorbellToVMID(pkt->doorbellOffset,
+        // The doorbellOffset is a dword address. We shift by two / multiply
+        // by four to get the byte address to match doorbell addresses in
+        // the GPU device.
+        gpuDevice->mapDoorbellToVMID(pkt->doorbellOffset << 2,
                                      gpuDevice->lastVMID());
 
         QueueDesc *mqd = new QueueDesc();
@@ -444,6 +447,8 @@ PM4PacketProcessor::processMQD(PM4MapQueues *pkt, PM4Queue *q, Addr addr,
 
     DPRINTF(PM4PacketProcessor, "PM4 mqd read completed, base %p, mqd %p, "
             "hqdAQL %d.\n", mqd->base, mqd->mqdBase, mqd->aql);
+
+    gpuDevice->processPendingDoorbells(offset);
 }
 
 void
@@ -472,6 +477,8 @@ PM4PacketProcessor::processSDMAMQD(PM4MapQueues *pkt, PM4Queue *q, Addr addr,
     // Register doorbell with GPU device
     gpuDevice->setSDMAEngine(pkt->doorbellOffset << 2, sdma_eng);
     gpuDevice->setDoorbellType(pkt->doorbellOffset << 2, RLC);
+
+    gpuDevice->processPendingDoorbells(pkt->doorbellOffset << 2);
 }
 
 void
@@ -576,6 +583,7 @@ PM4PacketProcessor::unmapQueues(PM4Queue *q, PM4UnmapQueues *pkt)
         gpuDevice->deallocatePasid(pkt->pasid);
         break;
       case 2:
+        panic("Unmapping queue selection 2 unimplemented\n");
         break;
       case 3: {
         auto &hsa_pp = gpuDevice->CP()->hsaPacketProc();
@@ -1044,6 +1052,7 @@ PM4PacketProcessor::serialize(CheckpointOut &cp) const
     int num_queues = queues.size();
     Addr id[num_queues];
     Addr mqd_base[num_queues];
+    uint64_t mqd_read_index[num_queues];
     Addr base[num_queues];
     Addr rptr[num_queues];
     Addr wptr[num_queues];
@@ -1060,6 +1069,7 @@ PM4PacketProcessor::serialize(CheckpointOut &cp) const
     uint32_t hqd_active[num_queues];
     uint32_t hqd_vmid[num_queues];
     Addr aql_rptr[num_queues];
+    uint32_t aql[num_queues];
     uint32_t doorbell[num_queues];
     uint32_t hqd_pq_control[num_queues];
 
@@ -1068,9 +1078,10 @@ PM4PacketProcessor::serialize(CheckpointOut &cp) const
         PM4Queue *q = iter.second;
         id[i] = q->id();
         mqd_base[i] = q->mqdBase();
+        mqd_read_index[i] = q->getMQD()->mqdReadIndex;
         bool cur_state = q->ib();
         q->ib(false);
-        base[i] = q->base() >> 8;
+        base[i] = q->base();
         rptr[i] = q->getRptr();
         wptr[i] = q->getWptr();
         q->ib(true);
@@ -1088,6 +1099,7 @@ PM4PacketProcessor::serialize(CheckpointOut &cp) const
         hqd_active[i] = q->getMQD()->hqd_active;
         hqd_vmid[i] = q->getMQD()->hqd_vmid;
         aql_rptr[i] = q->getMQD()->aqlRptr;
+        aql[i] = q->getMQD()->aql;
         doorbell[i] = q->getMQD()->doorbell;
         hqd_pq_control[i] = q->getMQD()->hqd_pq_control;
         i++;
@@ -1096,6 +1108,7 @@ PM4PacketProcessor::serialize(CheckpointOut &cp) const
     SERIALIZE_SCALAR(num_queues);
     SERIALIZE_ARRAY(id, num_queues);
     SERIALIZE_ARRAY(mqd_base, num_queues);
+    SERIALIZE_ARRAY(mqd_read_index, num_queues);
     SERIALIZE_ARRAY(base, num_queues);
     SERIALIZE_ARRAY(rptr, num_queues);
     SERIALIZE_ARRAY(wptr, num_queues);
@@ -1112,6 +1125,7 @@ PM4PacketProcessor::serialize(CheckpointOut &cp) const
     SERIALIZE_ARRAY(hqd_active, num_queues);
     SERIALIZE_ARRAY(hqd_vmid, num_queues);
     SERIALIZE_ARRAY(aql_rptr, num_queues);
+    SERIALIZE_ARRAY(aql, num_queues);
     SERIALIZE_ARRAY(doorbell, num_queues);
     SERIALIZE_ARRAY(hqd_pq_control, num_queues);
 }
@@ -1127,6 +1141,7 @@ PM4PacketProcessor::unserialize(CheckpointIn &cp)
 
     Addr id[num_queues];
     Addr mqd_base[num_queues];
+    uint64_t mqd_read_index[num_queues];
     Addr base[num_queues];
     Addr rptr[num_queues];
     Addr wptr[num_queues];
@@ -1143,11 +1158,13 @@ PM4PacketProcessor::unserialize(CheckpointIn &cp)
     uint32_t hqd_active[num_queues];
     uint32_t hqd_vmid[num_queues];
     Addr aql_rptr[num_queues];
+    uint32_t aql[num_queues];
     uint32_t doorbell[num_queues];
     uint32_t hqd_pq_control[num_queues];
 
     UNSERIALIZE_ARRAY(id, num_queues);
     UNSERIALIZE_ARRAY(mqd_base, num_queues);
+    UNSERIALIZE_ARRAY(mqd_read_index, num_queues);
     UNSERIALIZE_ARRAY(base, num_queues);
     UNSERIALIZE_ARRAY(rptr, num_queues);
     UNSERIALIZE_ARRAY(wptr, num_queues);
@@ -1164,6 +1181,7 @@ PM4PacketProcessor::unserialize(CheckpointIn &cp)
     UNSERIALIZE_ARRAY(hqd_active, num_queues);
     UNSERIALIZE_ARRAY(hqd_vmid, num_queues);
     UNSERIALIZE_ARRAY(aql_rptr, num_queues);
+    UNSERIALIZE_ARRAY(aql, num_queues);
     UNSERIALIZE_ARRAY(doorbell, num_queues);
     UNSERIALIZE_ARRAY(hqd_pq_control, num_queues);
 
@@ -1172,22 +1190,24 @@ PM4PacketProcessor::unserialize(CheckpointIn &cp)
         memset(mqd, 0, sizeof(QueueDesc));
 
         mqd->mqdBase = mqd_base[i] >> 8;
-        mqd->base = base[i];
-        mqd->rptr = rptr[i];
-        mqd->ibBase = ib_base[i];
-        mqd->ibRptr = ib_rptr[i];
+        mqd->mqdReadIndex = mqd_read_index[i];
+        mqd->base = base[i] >> 8;
+        mqd->aql = aql[i];
 
         PM4MapQueues* pkt = new PM4MapQueues;
         memset(pkt, 0, sizeof(PM4MapQueues));
         newQueue(mqd, offset[i], pkt, id[i]);
 
-        queues[id[i]]->ib(false);
-        queues[id[i]]->wptr(wptr[i]);
-        queues[id[i]]->ib(true);
-        queues[id[i]]->wptr(ib_wptr[i]);
+        if (ib[i]) {
+            queues[id[i]]->wptr(ib_wptr[i]);
+            queues[id[i]]->rptr(ib_rptr[i]);
+        } else {
+            queues[id[i]]->rptr(rptr[i]);
+            queues[id[i]]->wptr(wptr[i]);
+        }
+        queues[id[i]]->ib(ib[i]);
         queues[id[i]]->offset(offset[i]);
         queues[id[i]]->processing(processing[i]);
-        queues[id[i]]->ib(ib[i]);
         queues[id[i]]->setPkt(me[i], pipe[i], queue[i], privileged[i]);
         queues[id[i]]->getMQD()->hqd_active = hqd_active[i];
         queues[id[i]]->getMQD()->hqd_vmid = hqd_vmid[i];
@@ -1195,6 +1215,14 @@ PM4PacketProcessor::unserialize(CheckpointIn &cp)
         queues[id[i]]->getMQD()->doorbell = doorbell[i];
         queues[id[i]]->getMQD()->hqd_pq_control = hqd_pq_control[i];
 
+        if (mqd->aql) {
+            int mqd_size = (1 << ((hqd_pq_control[i] & 0x3f) + 1)) * 4;
+            auto &hsa_pp = gpuDevice->CP()->hsaPacketProc();
+            hsa_pp.setDeviceQueueDesc(aql_rptr[i], base[i], id[i],
+                                  mqd_size, 8, GfxVersion::gfx900, offset[i],
+                                  mqd_read_index[i]);
+        }
+
         DPRINTF(PM4PacketProcessor, "PM4 queue %d, rptr: %p wptr: %p\n",
                 queues[id[i]]->id(), queues[id[i]]->rptr(),
                 queues[id[i]]->wptr());
diff --git a/src/dev/amdgpu/sdma_engine.cc b/src/dev/amdgpu/sdma_engine.cc
index e99d694634..0202f583e6 100644
--- a/src/dev/amdgpu/sdma_engine.cc
+++ b/src/dev/amdgpu/sdma_engine.cc
@@ -510,9 +510,12 @@ SDMAEngine::decodeHeader(SDMAQueue *q, uint32_t header)
         dmaReadVirt(q->rptr(), sizeof(sdmaAtomic), cb, dmaBuffer);
         } break;
       case SDMA_OP_CONST_FILL: {
-        q->incRptr(sizeof(sdmaConstFill));
-        warn("SDMA_OP_CONST_FILL not implemented");
-        decodeNext(q);
+        DPRINTF(SDMAEngine, "SDMA Constant fill packet\n");
+        dmaBuffer = new sdmaConstFill();
+        cb = new DmaVirtCallback<uint64_t>(
+            [ = ] (const uint64_t &)
+                { constFill(q, (sdmaConstFill *)dmaBuffer, header); });
+        dmaReadVirt(q->rptr(), sizeof(sdmaConstFill), cb, dmaBuffer);
         } break;
       case SDMA_OP_PTEPDE: {
         DPRINTF(SDMAEngine, "SDMA PTEPDE packet\n");
@@ -1026,6 +1029,68 @@ SDMAEngine::atomicDone(SDMAQueue *q, sdmaAtomicHeader *header, sdmaAtomic *pkt,
     decodeNext(q);
 }
 
+void
+SDMAEngine::constFill(SDMAQueue *q, sdmaConstFill *pkt, uint32_t header)
+{
+    q->incRptr(sizeof(sdmaConstFill));
+
+    sdmaConstFillHeader fill_header;
+    fill_header.ordinal = header;
+
+    DPRINTF(SDMAEngine, "ConstFill %lx srcData %x count %d size %d sw %d\n",
+            pkt->addr, pkt->srcData, pkt->count, fill_header.fillsize,
+            fill_header.sw);
+
+    // Count is number of <size> elements - 1. Size is log2 of byte size.
+    int fill_bytes = (pkt->count + 1) * (1 << fill_header.fillsize);
+    uint8_t *fill_data = new uint8_t[fill_bytes];
+
+    memset(fill_data, pkt->srcData, fill_bytes);
+
+    Addr device_addr = getDeviceAddress(pkt->addr);
+    if (device_addr) {
+        DPRINTF(SDMAEngine, "ConstFill %d bytes of %x to device at %lx\n",
+                fill_bytes, pkt->srcData, pkt->addr);
+
+        auto cb = new EventFunctionWrapper(
+            [ = ]{ constFillDone(q, pkt, fill_data); }, name());
+
+        // Copy the minimum page size at a time in case the physical addresses
+        // are not contiguous.
+        ChunkGenerator gen(pkt->addr, fill_bytes, AMDGPU_MMHUB_PAGE_SIZE);
+        for (; !gen.done(); gen.next()) {
+            Addr chunk_addr = getDeviceAddress(gen.addr());
+            assert(chunk_addr);
+
+            DPRINTF(SDMAEngine, "Copying chunk of %d bytes from %#lx (%#lx)\n",
+                    gen.size(), gen.addr(), chunk_addr);
+
+            gpuDevice->getMemMgr()->writeRequest(chunk_addr, fill_data,
+                                                 gen.size(), 0,
+                                                 gen.last() ? cb : nullptr);
+            fill_data += gen.size();
+        }
+    } else {
+        DPRINTF(SDMAEngine, "ConstFill %d bytes of %x to host at %lx\n",
+                fill_bytes, pkt->srcData, pkt->addr);
+
+        auto cb = new DmaVirtCallback<uint64_t>(
+            [ = ] (const uint64_t &)
+                { constFillDone(q, pkt, fill_data); });
+        dmaWriteVirt(pkt->addr, fill_bytes, cb, (void *)fill_data);
+    }
+}
+
+void
+SDMAEngine::constFillDone(SDMAQueue *q, sdmaConstFill *pkt, uint8_t *fill_data)
+{
+    DPRINTF(SDMAEngine, "ConstFill to %lx done\n", pkt->addr);
+
+    delete fill_data;
+    delete pkt;
+    decodeNext(q);
+}
+
 AddrRangeList
 SDMAEngine::getAddrRanges() const
 {
diff --git a/src/dev/amdgpu/sdma_engine.hh b/src/dev/amdgpu/sdma_engine.hh
index bcbd497e8a..5abe63fcc6 100644
--- a/src/dev/amdgpu/sdma_engine.hh
+++ b/src/dev/amdgpu/sdma_engine.hh
@@ -245,6 +245,8 @@ class SDMAEngine : public DmaVirtDevice
                     uint64_t *dmaBuffer);
     void atomicDone(SDMAQueue *q, sdmaAtomicHeader *header, sdmaAtomic *pkt,
                     uint64_t *dmaBuffer);
+    void constFill(SDMAQueue *q, sdmaConstFill *pkt, uint32_t header);
+    void constFillDone(SDMAQueue *q, sdmaConstFill *pkt, uint8_t *fill_data);
 
     /**
      * Methods for getting SDMA MMIO base address and size. These are set by
diff --git a/src/dev/amdgpu/sdma_packets.hh b/src/dev/amdgpu/sdma_packets.hh
index 52a47d3a2d..07d3f12600 100644
--- a/src/dev/amdgpu/sdma_packets.hh
+++ b/src/dev/amdgpu/sdma_packets.hh
@@ -37,7 +37,7 @@ namespace gem5
 {
 
 /**
- * SDMA packets
+ * SDMA packets - see src/core/inc/sdma_registers.h in ROCR-Runtime
  */
 typedef struct GEM5_PACKED
 {
@@ -80,6 +80,23 @@ typedef struct GEM5_PACKED
 }  sdmaConstFill;
 static_assert(sizeof(sdmaConstFill) == 16);
 
+typedef struct GEM5_PACKED
+{
+    union
+    {
+        struct
+        {
+            uint32_t op : 8;
+            uint32_t sub_op : 8;
+            uint32_t sw : 2;
+            uint32_t res0 : 12;
+            uint32_t fillsize : 2;
+        };
+        uint32_t ordinal;
+    };
+}  sdmaConstFillHeader;
+static_assert(sizeof(sdmaConstFillHeader) == 4);
+
 typedef struct GEM5_PACKED
 {
     uint32_t key0;
diff --git a/src/dev/amdgpu/system_hub.cc b/src/dev/amdgpu/system_hub.cc
index 7a252ea0fa..892a7c4535 100644
--- a/src/dev/amdgpu/system_hub.cc
+++ b/src/dev/amdgpu/system_hub.cc
@@ -31,6 +31,8 @@
 
 #include "dev/amdgpu/system_hub.hh"
 
+#include "debug/AMDGPUSystemHub.hh"
+#include "mem/packet_access.hh"
 #include "mem/port.hh"
 
 namespace gem5
@@ -39,16 +41,92 @@ namespace gem5
 void
 AMDGPUSystemHub::sendRequest(PacketPtr pkt, Event *callback)
 {
-    ResponseEvent *dmaRespEvent = new ResponseEvent(callback);
+    // Some requests, in particular atomics, need to be sent in order
+    // to receive the correct values. If there is an atomic in progress
+    // we must block it until that request is complete. This is overly
+    // conservative and blocks reads/writes but this situation is rare
+    // so it should not impact simulated performance.
+    DeferredReq this_req(pkt, callback);
+    outstandingReqs[pkt->getAddr()].push_back(this_req);
+
+    if (outstandingReqs[pkt->getAddr()].size () > 1) {
+        // There is another request in progress, Delay this one.
+        DPRINTF(AMDGPUSystemHub, "SystemHub deferring request for %#lx\n",
+                pkt->getAddr());
+    } else {
+        // No other requests, we can send immediately.
+        sendDeferredRequest(this_req);
+    }
+}
+
+void
+AMDGPUSystemHub::sendDeferredRequest(DeferredReq& deferredReq)
+{
+    PacketPtr pkt = deferredReq.first;
+    Event *callback = deferredReq.second;
     Tick delay = 0;
+    std::string req_type;
+
+    if (pkt->isAtomicOp()) {
+        AtomicResponseEvent *atomicRespEvent =
+            new AtomicResponseEvent(*this, callback, pkt);
+
+        // First read the value. The response event will do the atomic/write
+        // This places the current value in the packet, which is correct since
+        // atomics return the value prior to performing the atomic.
+        dmaRead(pkt->getAddr(), pkt->getSize(), atomicRespEvent,
+                pkt->getPtr<uint8_t>(), 0, 0, delay);
+
+        req_type = "Atomic";
+    } else if (pkt->isWrite()) {
+        ResponseEvent *dmaRespEvent =
+            new ResponseEvent(*this, callback, pkt);
 
-    if (pkt->isWrite()) {
         dmaWrite(pkt->getAddr(), pkt->getSize(), dmaRespEvent,
                  pkt->getPtr<uint8_t>(), 0, 0, delay);
+
+        req_type = "Write";
     } else {
+        ResponseEvent *dmaRespEvent =
+            new ResponseEvent(*this, callback, pkt);
+
         assert(pkt->isRead());
         dmaRead(pkt->getAddr(), pkt->getSize(), dmaRespEvent,
                 pkt->getPtr<uint8_t>(), 0, 0, delay);
+
+        req_type = "Read";
+    }
+
+    DPRINTF(AMDGPUSystemHub, "SystemHub %s request for %#lx size %d\n",
+            req_type.c_str(), pkt->getAddr(), pkt->getSize());
+}
+
+void
+AMDGPUSystemHub::sendNextRequest(Addr addr, const PacketPtr donePkt)
+{
+    // Remove our request
+    assert(outstandingReqs.count(addr));
+
+    [[maybe_unused]] DeferredReq& frontPkt = outstandingReqs[addr].front();
+    assert(frontPkt.first == donePkt);
+
+    outstandingReqs[addr].pop_front();
+
+    // If there are no more requests this can be removed from the map.
+    // Otherwise issue the next request in the list
+    if (outstandingReqs[addr].empty()) {
+        DPRINTF(AMDGPUSystemHub, "SystemHub done with packets for addr %#lx\n",
+                donePkt->getAddr());
+
+        outstandingReqs.erase(addr);
+    } else {
+        DeferredReq& nextPkt = outstandingReqs[addr].front();
+
+        DPRINTF(AMDGPUSystemHub, "SystemHub sending deferred request for addr"
+                " %#lx size %d\n", nextPkt.first->getAddr(),
+                nextPkt.first->getSize());
+
+        sendDeferredRequest(nextPkt);
     }
 }
 
@@ -57,8 +135,9 @@ AMDGPUSystemHub::dmaResponse(PacketPtr pkt)
 {
 }
 
-AMDGPUSystemHub::ResponseEvent::ResponseEvent(Event *_callback)
-    : callback(_callback)
+AMDGPUSystemHub::ResponseEvent::ResponseEvent(
+        AMDGPUSystemHub& _hub, Event *_callback, PacketPtr _pkt)
+    : systemHub(_hub), callback(_callback), pkt(_pkt)
 {
     // Delete this event after process is called
     setFlags(Event::AutoDelete);
@@ -67,9 +146,62 @@ AMDGPUSystemHub::ResponseEvent::ResponseEvent(Event *_callback)
 void
 AMDGPUSystemHub::ResponseEvent::process()
 {
+    DPRINTF(AMDGPUSystemHub, "SystemHub response for addr %#lx size %d\n",
+            pkt->getAddr(), pkt->getSize());
+
+    systemHub.sendNextRequest(pkt->getAddr(), pkt);
+
     callback->process();
 }
 
+AMDGPUSystemHub::AtomicResponseEvent::AtomicResponseEvent(
+        AMDGPUSystemHub& _hub, Event *_callback, PacketPtr _pkt)
+    : systemHub(_hub), callback(_callback), pkt(_pkt)
+{
+    // Delete this event after process is called
+    setFlags(Event::AutoDelete);
+}
+
+void
+AMDGPUSystemHub::AtomicResponseEvent::process()
+{
+    // Make a second response with the original sender's callback
+    ResponseEvent *dmaRespEvent = new ResponseEvent(systemHub, callback, pkt);
+    Tick delay = 0;
+
+    // Create a new write packet which will be modifed then written
+    RequestPtr write_req =
+        std::make_shared<Request>(pkt->getAddr(), pkt->getSize(), 0,
+                                  pkt->requestorId());
+
+    PacketPtr write_pkt = Packet::createWrite(write_req);
+    uint8_t *write_data = new uint8_t[pkt->getSize()];
+    std::memcpy(write_data, pkt->getPtr<uint8_t>(), pkt->getSize());
+    write_pkt->dataDynamic(write_data);
+
+    // Perform the atomic on the write packet data. The atomic op is not
+    // copied from the original packet, so use the original packet.
+    assert(pkt->isAtomicOp());
+    (*pkt->getAtomicOp())(write_pkt->getPtr<uint8_t>());
+
+    // Write back the new value. The atomic is not considered done until
+    // this packet's response event is triggered.
+    systemHub.dmaWrite(write_pkt->getAddr(), write_pkt->getSize(),
+        dmaRespEvent, write_pkt->getPtr<uint8_t>(), 0, 0, delay);
+
+    // Atomics from the GPU are at most 64-bit and usually 32-bit.
+    // We can take a peek at the data for debugging purposes.
+    [[maybe_unused]] uint64_t req_data = 0x12345678;
+    if (write_pkt->getSize() == 8) {
+        req_data = write_pkt->getLE<uint64_t>();
+    } else if (pkt->getSize() == 4) {
+        req_data = write_pkt->getLE<uint32_t>();
+    }
+
+    DPRINTF(AMDGPUSystemHub, "SystemHub atomic %#lx writing %lx size %d\n",
+            write_pkt->getAddr(), req_data, write_pkt->getSize());
+}
+
 AddrRangeList
 AMDGPUSystemHub::getAddrRanges() const
 {
diff --git a/src/dev/amdgpu/system_hub.hh b/src/dev/amdgpu/system_hub.hh
index 0b48c3bc01..7955f5e694 100644
--- a/src/dev/amdgpu/system_hub.hh
+++ b/src/dev/amdgpu/system_hub.hh
@@ -63,16 +63,37 @@ class AMDGPUSystemHub : public DmaDevice
     AddrRangeList getAddrRanges() const override;
 
   private:
+    typedef std::pair<PacketPtr, Event*> DeferredReq;
+    typedef std::list<DeferredReq> DeferredReqList;
+    std::unordered_map<Addr, DeferredReqList> outstandingReqs;
+
+    void sendNextRequest(Addr addr, const PacketPtr donePkt);
+    void sendDeferredRequest(DeferredReq& deferredReq);
 
     class ResponseEvent : public Event
     {
-       Event *callback;
+        AMDGPUSystemHub &systemHub;
+        Event *callback;
+        PacketPtr pkt;
 
-       public:
-        ResponseEvent(Event *_callback);
+      public:
+        ResponseEvent(AMDGPUSystemHub& _hub,
+                      Event *_callback, PacketPtr _pkt);
 
         void process();
+    };
+
+    class AtomicResponseEvent : public Event
+    {
+        AMDGPUSystemHub &systemHub;
+        Event *callback;
+        PacketPtr pkt;
+
+      public:
+        AtomicResponseEvent(AMDGPUSystemHub& _hub,
+                            Event *_callback, PacketPtr _pkt);
 
+        void process();
     };
 };
 
diff --git a/src/dev/arm/FlashDevice.py b/src/dev/arm/FlashDevice.py
index d5069d94ac..7bd365a2ba 100644
--- a/src/dev/arm/FlashDevice.py
+++ b/src/dev/arm/FlashDevice.py
@@ -38,6 +38,7 @@
 
 from m5.objects.AbstractNVM import *
 
+
 # Distribution of the data.
 # sequential: sequential (address n+1 is likely to be on the same plane as n)
 # Random: @TODO Not yet implemented
diff --git a/src/dev/arm/Gic.py b/src/dev/arm/Gic.py
index 41d602b86a..6fd8eb235f 100644
--- a/src/dev/arm/Gic.py
+++ b/src/dev/arm/Gic.py
@@ -315,6 +315,15 @@ class Gicv3(BaseGic):
 
     gicv4 = Param.Bool(False, "GIC is GICv4 compatible")
 
+    reserved_is_res0 = Param.Bool(
+        True,
+        "According to the GIC specification (IHI0069) "
+        "reserved addresses in the GIC memory map are treated as RES0. "
+        "We allow to disable this behaviour and panic instead "
+        "(reserved_res0 = False) to catch development bugs "
+        "(in gem5 and in the guest SW)",
+    )
+
     def interruptCells(self, int_type, int_num, int_trigger, partition=None):
         """
         Interupt cells generation helper:
diff --git a/src/dev/arm/gic_v3.hh b/src/dev/arm/gic_v3.hh
index 2ea6a98b3b..7adb1d0f3f 100644
--- a/src/dev/arm/gic_v3.hh
+++ b/src/dev/arm/gic_v3.hh
@@ -167,6 +167,17 @@ class Gicv3 : public BaseGic, public Gicv3Registers
     Tick write(PacketPtr pkt) override;
     bool supportsVersion(GicVersion version) override;
 
+    template<typename... Args>
+    void
+    reserved(const char* fmt, Args... args) const
+    {
+        if (params().reserved_is_res0) {
+            warn(fmt, args...);
+        } else {
+            panic(fmt, args...);
+        }
+    }
+
   public:
 
     Gicv3(const Params &p);
diff --git a/src/dev/arm/gic_v3_distributor.cc b/src/dev/arm/gic_v3_distributor.cc
index 1cb485c5f5..af306929ff 100644
--- a/src/dev/arm/gic_v3_distributor.cc
+++ b/src/dev/arm/gic_v3_distributor.cc
@@ -505,8 +505,8 @@ Gicv3Distributor::read(Addr addr, size_t size, bool is_secure_access)
         return 0; // RES0
 
       default:
-        panic("Gicv3Distributor::read(): invalid offset %#x\n", addr);
-        break;
+        gic->reserved("Gicv3Distributor::read(): invalid offset %#x\n", addr);
+        return 0; // RES0
     }
 }
 
@@ -999,7 +999,7 @@ Gicv3Distributor::write(Addr addr, uint64_t data, size_t size,
       }
 
       default:
-        panic("Gicv3Distributor::write(): invalid offset %#x\n", addr);
+        gic->reserved("Gicv3Distributor::write(): invalid offset %#x\n", addr);
         break;
     }
 }
diff --git a/src/dev/arm/gic_v3_redistributor.cc b/src/dev/arm/gic_v3_redistributor.cc
index e4380ef6f0..67d6e42e6b 100644
--- a/src/dev/arm/gic_v3_redistributor.cc
+++ b/src/dev/arm/gic_v3_redistributor.cc
@@ -377,8 +377,8 @@ Gicv3Redistributor::read(Addr addr, size_t size, bool is_secure_access)
         return 0;
 
       default:
-        panic("Gicv3Redistributor::read(): invalid offset %#x\n", addr);
-        break;
+        gic->reserved("Gicv3Redistributor::read(): invalid offset %#x\n", addr);
+        return 0; // RES0
     }
 }
 
@@ -704,7 +704,7 @@ Gicv3Redistributor::write(Addr addr, uint64_t data, size_t size,
       }
 
       default:
-        panic("Gicv3Redistributor::write(): invalid offset %#x\n", addr);
+        gic->reserved("Gicv3Redistributor::write(): invalid offset %#x\n", addr);
         break;
     }
 }
diff --git a/src/dev/dma_device.cc b/src/dev/dma_device.cc
index 24e931ef8d..70a74eeb2f 100644
--- a/src/dev/dma_device.cc
+++ b/src/dev/dma_device.cc
@@ -68,6 +68,7 @@ DmaPort::handleRespPacket(PacketPtr pkt, Tick delay)
 {
     // Should always see a response with a sender state.
     assert(pkt->isResponse());
+    warn_if(pkt->isError(), "Response pkt error.");
 
     // Get the DMA sender state.
     auto *state = dynamic_cast<DmaReqState*>(pkt->senderState);
diff --git a/src/dev/dma_device.hh b/src/dev/dma_device.hh
index 92b44bf5f6..3fd77860f4 100644
--- a/src/dev/dma_device.hh
+++ b/src/dev/dma_device.hh
@@ -187,7 +187,7 @@ class DmaPort : public RequestPort, public Drainable
     /** Default substreamId */
     const uint32_t defaultSSid;
 
-    const int cacheLineSize;
+    const Addr cacheLineSize;
 
   protected:
 
@@ -257,7 +257,7 @@ class DmaDevice : public PioDevice
 
     void init() override;
 
-    unsigned int cacheBlockSize() const { return sys->cacheLineSize(); }
+    Addr cacheBlockSize() const { return sys->cacheLineSize(); }
 
     Port &getPort(const std::string &if_name,
                   PortID idx=InvalidPortID) override;
@@ -526,7 +526,7 @@ class DmaReadFifo : public Drainable, public Serializable
 
     DmaPort &port;
 
-    const int cacheLineSize;
+    const Addr cacheLineSize;
 
   private:
     class DmaDoneEvent : public Event
diff --git a/src/dev/hsa/hsa_packet.hh b/src/dev/hsa/hsa_packet.hh
index 8c7d694431..8eab8385a6 100644
--- a/src/dev/hsa/hsa_packet.hh
+++ b/src/dev/hsa/hsa_packet.hh
@@ -100,6 +100,14 @@ struct _hsa_barrier_or_packet_t
     uint64_t completion_signal;
 };
 
+struct _hsa_generic_vendor_pkt
+{
+    uint32_t padding[14];
+    Addr completion_signal;
+};
+// All HSA AQL packets are 64 bytes. Confirm that here.
+static_assert(sizeof(_hsa_generic_vendor_pkt) == 64);
+
 } // namespace gem5
 
 #endif // __DEV_HSA_HSA_PACKET_HH__
diff --git a/src/dev/hsa/hsa_packet_processor.cc b/src/dev/hsa/hsa_packet_processor.cc
index d0afcf816f..2064de41ce 100644
--- a/src/dev/hsa/hsa_packet_processor.cc
+++ b/src/dev/hsa/hsa_packet_processor.cc
@@ -389,20 +389,16 @@ HSAPacketProcessor::processPkt(void* pkt, uint32_t rl_idx, Addr host_pkt_addr)
             dep_sgnl_rd_st->resetSigVals();
             // The completion signal is connected
             if (bar_and_pkt->completion_signal != 0) {
-                // HACK: The semantics of the HSA signal is to
-                // decrement the current signal value
-                // I'm going to cheat here and read out
-                // the value from main memory using functional
-                // access, and then just DMA the decremented value.
-                uint64_t signal_value = gpu_device->functionalReadHsaSignal(\
-                                            bar_and_pkt->completion_signal);
-
+                // The semantics of the HSA signal is to decrement the current
+                // signal value by one. Do this asynchronously via DMAs and
+                // callbacks as we can safely continue with this function
+                // while waiting for the next packet from the host.
                 DPRINTF(HSAPacketProcessor, "Triggering barrier packet" \
                        " completion signal! Addr: %x\n",
                        bar_and_pkt->completion_signal);
 
-                gpu_device->updateHsaSignal(bar_and_pkt->completion_signal,
-                                            signal_value - 1);
+                gpu_device->sendCompletionSignal(
+                    bar_and_pkt->completion_signal);
             }
         }
         if (dep_sgnl_rd_st->pendingReads > 0) {
diff --git a/src/dev/hsa/hsa_signal.hh b/src/dev/hsa/hsa_signal.hh
index 6acbcb7e1b..7d1f316f04 100644
--- a/src/dev/hsa/hsa_signal.hh
+++ b/src/dev/hsa/hsa_signal.hh
@@ -69,6 +69,12 @@ typedef struct amd_signal_s
   uint32_t reserved3[2];
 } amd_signal_t;
 
+typedef struct
+{
+  uint64_t start_ts;
+  uint64_t end_ts;
+} amd_event_t;
+
 } // namespace gem5
 
 #endif // DEV_HSA_HSA_SIGNAL_H
diff --git a/src/dev/hsa/kfd_ioctl.h b/src/dev/hsa/kfd_ioctl.h
index c953787dc8..b7997c40fc 100644
--- a/src/dev/hsa/kfd_ioctl.h
+++ b/src/dev/hsa/kfd_ioctl.h
@@ -23,9 +23,10 @@
 #ifndef KFD_IOCTL_H_INCLUDED
 #define KFD_IOCTL_H_INCLUDED
 
+#include <sys/ioctl.h>
+#include <sys/types.h>
+
 #include <cstdint>
-#include <linux/ioctl.h>
-#include <linux/types.h>
 
 namespace gem5
 {
diff --git a/src/dev/lupio/LupioBLK.py b/src/dev/lupio/LupioBLK.py
index 786c2ccbc5..e230e23fed 100644
--- a/src/dev/lupio/LupioBLK.py
+++ b/src/dev/lupio/LupioBLK.py
@@ -31,7 +31,6 @@
 
 
 class LupioBLK(DmaDevice):
-
     type = "LupioBLK"
     cxx_class = "gem5::LupioBLK"
     cxx_header = "dev/lupio/lupio_blk.hh"
diff --git a/src/dev/lupio/LupioPIC.py b/src/dev/lupio/LupioPIC.py
index 7afa727e4b..40ea7c7f89 100644
--- a/src/dev/lupio/LupioPIC.py
+++ b/src/dev/lupio/LupioPIC.py
@@ -29,7 +29,6 @@
 
 
 class LupioPIC(BasicPioDevice):
-
     type = "LupioPIC"
     cxx_class = "gem5::LupioPIC"
     cxx_header = "dev/lupio/lupio_pic.hh"
diff --git a/src/dev/lupio/LupioRNG.py b/src/dev/lupio/LupioRNG.py
index d6b7b8a199..a1b93446a0 100644
--- a/src/dev/lupio/LupioRNG.py
+++ b/src/dev/lupio/LupioRNG.py
@@ -30,7 +30,6 @@
 
 
 class LupioRNG(BasicPioDevice):
-
     type = "LupioRNG"
     cxx_class = "gem5::LupioRNG"
     cxx_header = "dev/lupio/lupio_rng.hh"
diff --git a/src/dev/pci/PciHost.py b/src/dev/pci/PciHost.py
index 007b17a30c..58f8eb5a78 100644
--- a/src/dev/pci/PciHost.py
+++ b/src/dev/pci/PciHost.py
@@ -76,7 +76,6 @@ def pciFdtAddr(
         relocatable=0,
         addr=0,
     ):
-
         busf = bus & 0xFF
         devicef = device & 0x1F
         functionf = function & 0x7
diff --git a/src/dev/pci/pcireg.h b/src/dev/pci/pcireg.h
index ab5fea540c..e7794e4dc2 100644
--- a/src/dev/pci/pcireg.h
+++ b/src/dev/pci/pcireg.h
@@ -326,7 +326,7 @@ struct MSIXPbaEntry
  *  Defines the PCI Express capability register and its associated bitfields
  *  for a PCIe device.
  */
-struct PXCAP
+union PXCAP
 {
     uint8_t data[48];
     struct
diff --git a/src/dev/reg_bank.hh b/src/dev/reg_bank.hh
index 3a89a00ab6..9f53c44e38 100644
--- a/src/dev/reg_bank.hh
+++ b/src/dev/reg_bank.hh
@@ -42,7 +42,9 @@
 #include <utility>
 
 #include "base/bitfield.hh"
+#include "base/debug.hh"
 #include "base/logging.hh"
+#include "base/trace.hh"
 #include "base/types.hh"
 #include "sim/byteswap.hh"
 #include "sim/serialize_handlers.hh"
@@ -861,9 +863,27 @@ class RegisterBank : public RegisterBankBase
         void reset() override { _resetter(*this); }
     };
 
+    // Allow gem5 models to set a debug flag to the register bank for logging
+    // all full/partial read/write access to the registers. The register bank
+    // would not log if the flag is not set.
+    //
+    // The debug flag is the one declared in the SConscript
+    //
+    // DebugFlag('HelloExample')
+    //
+    // Then the flag can be set in the register bank with:
+    //
+    // setDebugFlag(::gem5::debug::HelloExample)
+    void
+    setDebugFlag(const ::gem5::debug::SimpleFlag& flag)
+    {
+        _debug_flag = &flag;
+    }
+
   private:
     std::map<Addr, std::reference_wrapper<RegisterBase>> _offsetMap;
 
+    const ::gem5::debug::SimpleFlag* _debug_flag = nullptr;
     Addr _base = 0;
     Addr _size = 0;
     const std::string _name;
@@ -956,45 +976,34 @@ class RegisterBank : public RegisterBankBase
         if (it == _offsetMap.end() || it->first > addr)
             it--;
 
-        if (it->first < addr) {
-            RegisterBase &reg = it->second.get();
-            // Skip at least the beginning of the first register.
-
-            // Figure out what parts of it we're accessing.
-            const off_t reg_off = addr - it->first;
-            const size_t reg_bytes = std::min(reg.size() - reg_off,
-                                              bytes - done);
-
-            // Actually do the access.
-            reg.read(ptr, reg_off, reg_bytes);
-            done += reg_bytes;
-            it++;
-
-            // Was that everything?
-            if (done == bytes)
-                return;
+        std::ostringstream ss;
+        while (done != bytes) {
+          RegisterBase &reg = it->second.get();
+          const Addr reg_off = addr - it->first;
+          const Addr reg_size = reg.size() - reg_off;
+          const Addr reg_bytes = std::min(reg_size, bytes - done);
+
+          if (reg_bytes != reg.size()) {
+              if (_debug_flag) {
+                  ccprintf(ss, "Read register %s, byte offset %d, size %d\n",
+                          reg.name(), reg_off, reg_bytes);
+              }
+              reg.read(ptr + done, reg_off, reg_bytes);
+          } else {
+              if (_debug_flag) {
+                  ccprintf(ss, "Read register %s\n", reg.name());
+              }
+              reg.read(ptr + done);
+          }
+
+          done += reg_bytes;
+          addr += reg_bytes;
+          ++it;
         }
 
-        while (true) {
-            RegisterBase &reg = it->second.get();
-
-            const size_t reg_size = reg.size();
-            const size_t remaining = bytes - done;
-
-            if (remaining == reg_size) {
-                // A complete register read, and then we're done.
-                reg.read(ptr + done);
-                return;
-            } else if (remaining > reg_size) {
-                // A complete register read, with more to go.
-                reg.read(ptr + done);
-                done += reg_size;
-                it++;
-            } else {
-                // Skip the end of the register, and then we're done.
-                reg.read(ptr + done, 0, remaining);
-                return;
-            }
+        if (_debug_flag) {
+            ::gem5::trace::getDebugLogger()->dprintf_flag(
+                curTick(), name(), _debug_flag->name(), "%s", ss.str());
         }
     }
 
@@ -1013,45 +1022,34 @@ class RegisterBank : public RegisterBankBase
         if (it == _offsetMap.end() || it->first > addr)
             it--;
 
-        if (it->first < addr) {
+        std::ostringstream ss;
+        while (done != bytes) {
             RegisterBase &reg = it->second.get();
-            // Skip at least the beginning of the first register.
-
-            // Figure out what parts of it we're accessing.
-            const off_t reg_off = addr - it->first;
-            const size_t reg_bytes = std::min(reg.size() - reg_off,
-                                              bytes - done);
+            const Addr reg_off = addr - it->first;
+            const Addr reg_size = reg.size() - reg_off;
+            const Addr reg_bytes = std::min(reg_size, bytes - done);
+
+            if (reg_bytes != reg.size()) {
+                if (_debug_flag) {
+                    ccprintf(ss, "Write register %s, byte offset %d, size %d\n",
+                              reg.name(), reg_off, reg_size);
+                }
+                reg.write(ptr + done, reg_off, reg_bytes);
+            } else {
+                if (_debug_flag) {
+                  ccprintf(ss, "Write register %s\n", reg.name());
+                }
+                reg.write(ptr + done);
+            }
 
-            // Actually do the access.
-            reg.write(ptr, reg_off, reg_bytes);
             done += reg_bytes;
-            it++;
-
-            // Was that everything?
-            if (done == bytes)
-                return;
+            addr += reg_bytes;
+            ++it;
         }
 
-        while (true) {
-            RegisterBase &reg = it->second.get();
-
-            const size_t reg_size = reg.size();
-            const size_t remaining = bytes - done;
-
-            if (remaining == reg_size) {
-                // A complete register write, and then we're done.
-                reg.write(ptr + done);
-                return;
-            } else if (remaining > reg_size) {
-                // A complete register write, with more to go.
-                reg.write(ptr + done);
-                done += reg_size;
-                it++;
-            } else {
-                // Skip the end of the register, and then we're done.
-                reg.write(ptr + done, 0, remaining);
-                return;
-            }
+        if (_debug_flag) {
+            ::gem5::trace::getDebugLogger()->dprintf_flag(
+                curTick(), name(), _debug_flag->name(), "%s", ss.str());
         }
     }
 
diff --git a/src/dev/riscv/HiFive.py b/src/dev/riscv/HiFive.py
index 5bd6363363..c3d51aa5e7 100755
--- a/src/dev/riscv/HiFive.py
+++ b/src/dev/riscv/HiFive.py
@@ -251,7 +251,7 @@ def generateDeviceTree(self, state):
     def annotateCpuDeviceNode(self, cpu, state):
         cpu.append(FdtPropertyStrings("mmu-type", "riscv,sv48"))
         cpu.append(FdtPropertyStrings("status", "okay"))
-        cpu.append(FdtPropertyStrings("riscv,isa", "rv64imafdcsu"))
+        cpu.append(FdtPropertyStrings("riscv,isa", "rv64imafdc"))
         cpu.appendCompatible(["riscv"])
 
         int_node = FdtNode("interrupt-controller")
diff --git a/src/dev/serial/Uart.py b/src/dev/serial/Uart.py
index 2ca68b8f12..fb0d91efa4 100644
--- a/src/dev/serial/Uart.py
+++ b/src/dev/serial/Uart.py
@@ -82,5 +82,5 @@ def generateDeviceTree(self, state):
         node.append(FdtPropertyWords("interrupts", [platform.uart_int_id]))
         node.append(FdtPropertyWords("clock-frequency", [0x384000]))
         node.append(FdtPropertyWords("interrupt-parent", state.phandle(plic)))
-        node.appendCompatible(["ns8250"])
+        node.appendCompatible(["ns8250", "ns16550a"])
         yield node
diff --git a/src/dev/sparc/T1000.py b/src/dev/sparc/T1000.py
index 9e473a395d..d797c5fc13 100644
--- a/src/dev/sparc/T1000.py
+++ b/src/dev/sparc/T1000.py
@@ -151,6 +151,7 @@ class T1000(Platform):
     puart0 = Uart8250(pio_addr=0x1F10000000)
 
     iob = Iob()
+
     # Attach I/O devices that are on chip
     def attachOnChipIO(self, bus):
         self.iob.pio = bus.mem_side_ports
diff --git a/src/gpu-compute/GPU.py b/src/gpu-compute/GPU.py
index c64a6b791d..1b6c6a7494 100644
--- a/src/gpu-compute/GPU.py
+++ b/src/gpu-compute/GPU.py
@@ -27,6 +27,7 @@
 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 # POSSIBILITY OF SUCH DAMAGE.
 
+from m5.citations import add_citation
 from m5.defines import buildEnv
 from m5.params import *
 from m5.proxy import *
@@ -130,7 +131,7 @@ class ComputeUnit(ClockedObject):
     # Wavefront size is 64. This is configurable, however changing
     # this value to anything other than 64 will likely cause errors.
     wf_size = Param.Int(64, "Wavefront size (in work items)")
-    num_barrier_slots = Param.Int(4, "Number of barrier slots in a CU")
+    num_barrier_slots = Param.Int(16, "Number of barrier slots in a CU")
     num_SIMDs = Param.Int(4, "number of SIMD units per CU")
     num_scalar_cores = Param.Int(1, "number of Scalar cores per CU")
     num_scalar_mem_pipes = Param.Int(
@@ -356,3 +357,35 @@ class StorageClassType(Enum):
         "SC_ARG",
         "SC_NONE",
     ]
+
+
+add_citation(
+    ComputeUnit,
+    """@inproceedings{Gutierrez:2018:amdgpu,
+  author       = {Anthony Gutierrez and
+                  Bradford M. Beckmann and
+                  Alexandru Dutu and
+                  Joseph Gross and
+                  Michael LeBeane and
+                  John Kalamatianos and
+                  Onur Kayiran and
+                  Matthew Poremba and
+                  Brandon Potter and
+                  Sooraj Puthoor and
+                  Matthew D. Sinclair and
+                  Mark Wyse and
+                  Jieming Yin and
+                  Xianwei Zhang and
+                  Akshay Jain and
+                  Timothy G. Rogers},
+  title        = {Lost in Abstraction: Pitfalls of Analyzing GPUs at the Intermediate
+                  Language Level},
+  booktitle    = {{IEEE} International Symposium on High Performance Computer Architecture,
+                  {HPCA} 2018, Vienna, Austria, February 24-28, 2018},
+  pages        = {608--619},
+  publisher    = {{IEEE} Computer Society},
+  year         = {2018},
+  url          = {https://doi.org/10.1109/HPCA.2018.00058},
+  doi          = {10.1109/HPCA.2018.00058}
+}""",
+)
diff --git a/src/gpu-compute/GPUStaticInstFlags.py b/src/gpu-compute/GPUStaticInstFlags.py
index b75e2c6c92..3a44d402be 100644
--- a/src/gpu-compute/GPUStaticInstFlags.py
+++ b/src/gpu-compute/GPUStaticInstFlags.py
@@ -54,6 +54,7 @@ class GPUStaticInstFlags(Enum):
         "MemoryRef",  # References memory (load, store, or atomic)
         "Flat",  # Flat memory op
         "FlatGlobal",  # Global memory op
+        "FlatScratch",  # Scratch memory op
         "Load",  # Reads from memory
         "Store",  # Writes to memory
         # Atomic ops
diff --git a/src/gpu-compute/compute_unit.cc b/src/gpu-compute/compute_unit.cc
index 06fe28f5b8..8d6deeb85a 100644
--- a/src/gpu-compute/compute_unit.cc
+++ b/src/gpu-compute/compute_unit.cc
@@ -383,6 +383,13 @@ ComputeUnit::startWavefront(Wavefront *w, int waveId, LdsChunk *ldsChunk,
 
     stats.waveLevelParallelism.sample(activeWaves);
     activeWaves++;
+
+    panic_if(w->wrGmReqsInPipe, "GM write counter for wavefront non-zero\n");
+    panic_if(w->rdGmReqsInPipe, "GM read counter for wavefront non-zero\n");
+    panic_if(w->wrLmReqsInPipe, "LM write counter for wavefront non-zero\n");
+    panic_if(w->rdLmReqsInPipe, "GM read counter for wavefront non-zero\n");
+    panic_if(w->outstandingReqs,
+             "Outstanding reqs counter for wavefront non-zero\n");
 }
 
 /**
@@ -1910,6 +1917,8 @@ ComputeUnit::updateInstStats(GPUDynInstPtr gpuDynInst)
             }
         } else if (gpuDynInst->isFlatGlobal()) {
             stats.flatVMemInsts++;
+        } else if (gpuDynInst->isFlatScratch()) {
+            stats.flatVMemInsts++;
         } else if (gpuDynInst->isLocalMem()) {
             stats.ldsNoFlatInsts++;
         } else if (gpuDynInst->isLoad()) {
diff --git a/src/gpu-compute/dispatcher.cc b/src/gpu-compute/dispatcher.cc
index d63c875fe5..8a72fd73f4 100644
--- a/src/gpu-compute/dispatcher.cc
+++ b/src/gpu-compute/dispatcher.cc
@@ -40,6 +40,7 @@
 #include "gpu-compute/hsa_queue_entry.hh"
 #include "gpu-compute/shader.hh"
 #include "gpu-compute/wavefront.hh"
+#include "sim/sim_exit.hh"
 #include "sim/syscall_emul_buf.hh"
 #include "sim/system.hh"
 
@@ -309,20 +310,10 @@ GPUDispatcher::notifyWgCompl(Wavefront *wf)
         gpuCmdProc->hsaPacketProc()
             .finishPkt(task->dispPktPtr(), task->queueId());
         if (task->completionSignal()) {
-            /**
-            * HACK: The semantics of the HSA signal is to decrement
-            * the current signal value. We cheat here and read out
-            * he value from main memory using functional access and
-            * then just DMA the decremented value.
-            */
-            uint64_t signal_value =
-                gpuCmdProc->functionalReadHsaSignal(task->completionSignal());
-
             DPRINTF(GPUDisp, "HSA AQL Kernel Complete with completion "
                     "signal! Addr: %d\n", task->completionSignal());
 
-            gpuCmdProc->updateHsaSignal(task->completionSignal(),
-                                        signal_value - 1);
+            gpuCmdProc->sendCompletionSignal(task->completionSignal());
         } else {
             DPRINTF(GPUDisp, "HSA AQL Kernel Complete! No completion "
                 "signal\n");
@@ -333,7 +324,7 @@ GPUDispatcher::notifyWgCompl(Wavefront *wf)
         DPRINTF(GPUKernelInfo, "Completed kernel %d\n", kern_id);
 
         if (kernelExitEvents) {
-            exitSimLoop("GPU Kernel Completed");
+            shader->requestKernelExitEvent();
         }
     }
 
diff --git a/src/gpu-compute/gpu_command_processor.cc b/src/gpu-compute/gpu_command_processor.cc
index 8f748bdc31..05c9a95eed 100644
--- a/src/gpu-compute/gpu_command_processor.cc
+++ b/src/gpu-compute/gpu_command_processor.cc
@@ -116,28 +116,52 @@ void
 GPUCommandProcessor::submitDispatchPkt(void *raw_pkt, uint32_t queue_id,
                                        Addr host_pkt_addr)
 {
-    static int dynamic_task_id = 0;
     _hsa_dispatch_packet_t *disp_pkt = (_hsa_dispatch_packet_t*)raw_pkt;
     assert(!(disp_pkt->kernel_object & (system()->cacheLineSize() - 1)));
 
     /**
-     * we need to read a pointer in the application's address
-     * space to pull out the kernel code descriptor.
+     * Need to use a raw pointer for DmaVirtDevice API. This is deleted
+     * in the dispatchKernelObject method.
      */
-    auto *tc = sys->threads[0];
-
-    TranslatingPortProxy fs_proxy(tc);
-    SETranslatingPortProxy se_proxy(tc);
-    PortProxy &virt_proxy = FullSystem ? fs_proxy : se_proxy;
+    AMDKernelCode *akc = new AMDKernelCode;
 
     /**
-     * In full system mode, the page table entry may point to a system page
-     * or a device page. System pages use the proxy as normal, but a device
-     * page needs to be read from device memory. Check what type it is here.
+     * The kernel_object is a pointer to the machine code, whose entry
+     * point is an 'amd_kernel_code_t' type, which is included in the
+     * kernel binary, and describes various aspects of the kernel. The
+     * desired entry is the 'kernel_code_entry_byte_offset' field,
+     * which provides the byte offset (positive or negative) from the
+     * address of the amd_kernel_code_t to the start of the machine
+     * instructions.
+     *
+     * For SE mode we can read from the port proxy. In FS mode, we may need
+     * to wait for the guest OS to setup translations, especially when using
+     * the KVM CPU, so it is preferred to read the code object using a timing
+     * DMA request.
      */
-    bool is_system_page = true;
-    Addr phys_addr = disp_pkt->kernel_object;
-    if (FullSystem) {
+    if (!FullSystem) {
+        /**
+         * we need to read a pointer in the application's address
+         * space to pull out the kernel code descriptor.
+         */
+        auto *tc = sys->threads[0];
+        SETranslatingPortProxy virt_proxy(tc);
+
+        DPRINTF(GPUCommandProc, "reading kernel_object using proxy\n");
+        virt_proxy.readBlob(disp_pkt->kernel_object, (uint8_t*)akc,
+            sizeof(AMDKernelCode));
+
+        dispatchKernelObject(akc, raw_pkt, queue_id, host_pkt_addr);
+    } else {
+        /**
+         * In full system mode, the page table entry may point to a system
+         * page or a device page. System pages use the proxy as normal, but
+         * a device page needs to be read from device memory. Check what type
+         * it is here.
+         */
+        bool is_system_page = true;
+        Addr phys_addr = disp_pkt->kernel_object;
+
         /**
          * Full system currently only supports running on single VMID (one
          * virtual memory space), i.e., one application running on GPU at a
@@ -149,61 +173,68 @@ GPUCommandProcessor::submitDispatchPkt(void *raw_pkt, uint32_t queue_id,
         walker->startFunctional(gpuDevice->getVM().getPageTableBase(vmid),
                                 phys_addr, tmp_bytes, BaseMMU::Mode::Read,
                                 is_system_page);
-    }
 
-    DPRINTF(GPUCommandProc, "kernobj vaddr %#lx paddr %#lx size %d s:%d\n",
-            disp_pkt->kernel_object, phys_addr, sizeof(AMDKernelCode),
-            is_system_page);
+        DPRINTF(GPUCommandProc, "kernel_object vaddr %#lx paddr %#lx size %d"
+                " s:%d\n", disp_pkt->kernel_object, phys_addr,
+                sizeof(AMDKernelCode), is_system_page);
 
-    /**
-     * The kernel_object is a pointer to the machine code, whose entry
-     * point is an 'amd_kernel_code_t' type, which is included in the
-     * kernel binary, and describes various aspects of the kernel. The
-     * desired entry is the 'kernel_code_entry_byte_offset' field,
-     * which provides the byte offset (positive or negative) from the
-     * address of the amd_kernel_code_t to the start of the machine
-     * instructions.
-     */
-    AMDKernelCode akc;
-    if (is_system_page) {
-        DPRINTF(GPUCommandProc, "kernel_object in system, using proxy\n");
-        virt_proxy.readBlob(disp_pkt->kernel_object, (uint8_t*)&akc,
-            sizeof(AMDKernelCode));
-    } else {
-        assert(FullSystem);
-        DPRINTF(GPUCommandProc, "kernel_object in device, using device mem\n");
-
-        // Read from GPU memory manager one cache line at a time to prevent
-        // rare cases where the AKC spans two memory pages.
-        ChunkGenerator gen(disp_pkt->kernel_object, sizeof(AMDKernelCode),
-                           system()->cacheLineSize());
-        for (; !gen.done(); gen.next()) {
-            Addr chunk_addr = gen.addr();
-            int vmid = 1;
-            unsigned dummy;
-            walker->startFunctional(gpuDevice->getVM().getPageTableBase(vmid),
-                                    chunk_addr, dummy, BaseMMU::Mode::Read,
-                                    is_system_page);
-
-            Request::Flags flags = Request::PHYSICAL;
-            RequestPtr request = std::make_shared<Request>(chunk_addr,
-                system()->cacheLineSize(), flags, walker->getDevRequestor());
-            Packet *readPkt = new Packet(request, MemCmd::ReadReq);
-            readPkt->dataStatic((uint8_t *)&akc + gen.complete());
-            system()->getDeviceMemory(readPkt)->access(readPkt);
-            delete readPkt;
+        /**
+         * System objects use DMA device. Device objects need to use device
+         * memory.
+         */
+        if (is_system_page) {
+            DPRINTF(GPUCommandProc,
+                    "sending system DMA read for kernel_object\n");
+
+            auto dma_callback = new DmaVirtCallback<uint32_t>(
+              [=](const uint32_t&) {
+                dispatchKernelObject(akc, raw_pkt, queue_id, host_pkt_addr);
+              });
+
+            dmaReadVirt(disp_pkt->kernel_object, sizeof(AMDKernelCode),
+                    dma_callback, (void *)akc);
+        } else {
+            DPRINTF(GPUCommandProc,
+                    "kernel_object in device, using device mem\n");
+
+            // Read from GPU memory manager one cache line at a time to prevent
+            // rare cases where the AKC spans two memory pages.
+            ChunkGenerator gen(disp_pkt->kernel_object, sizeof(AMDKernelCode),
+                               system()->cacheLineSize());
+            for (; !gen.done(); gen.next()) {
+                Addr chunk_addr = gen.addr();
+                int vmid = 1;
+                unsigned dummy;
+                walker->startFunctional(
+                    gpuDevice->getVM().getPageTableBase(vmid), chunk_addr,
+                    dummy, BaseMMU::Mode::Read, is_system_page);
+
+                Request::Flags flags = Request::PHYSICAL;
+                RequestPtr request = std::make_shared<Request>(chunk_addr,
+                    system()->cacheLineSize(), flags,
+                    walker->getDevRequestor());
+                Packet *readPkt = new Packet(request, MemCmd::ReadReq);
+                readPkt->dataStatic((uint8_t *)akc + gen.complete());
+                system()->getDeviceMemory(readPkt)->access(readPkt);
+                delete readPkt;
+            }
+
+            dispatchKernelObject(akc, raw_pkt, queue_id, host_pkt_addr);
         }
     }
+}
 
-    DPRINTF(GPUCommandProc, "GPU machine code is %lli bytes from start of the "
-        "kernel object\n", akc.kernel_code_entry_byte_offset);
-
-    DPRINTF(GPUCommandProc,"GPUCommandProc: Sending dispatch pkt to %lu\n",
-        (uint64_t)tc->cpuId());
+void
+GPUCommandProcessor::dispatchKernelObject(AMDKernelCode *akc, void *raw_pkt,
+                                        uint32_t queue_id, Addr host_pkt_addr)
+{
+    _hsa_dispatch_packet_t *disp_pkt = (_hsa_dispatch_packet_t*)raw_pkt;
 
+    DPRINTF(GPUCommandProc, "GPU machine code is %lli bytes from start of the "
+        "kernel object\n", akc->kernel_code_entry_byte_offset);
 
     Addr machine_code_addr = (Addr)disp_pkt->kernel_object
-        + akc.kernel_code_entry_byte_offset;
+        + akc->kernel_code_entry_byte_offset;
 
     DPRINTF(GPUCommandProc, "Machine code starts at addr: %#x\n",
         machine_code_addr);
@@ -219,7 +250,7 @@ GPUCommandProcessor::submitDispatchPkt(void *raw_pkt, uint32_t queue_id,
      * APUs to implement asynchronous memcopy operations from 2 pointers in
      * host memory.  I have no idea what BLIT stands for.
      * */
-    if (akc.runtime_loader_kernel_symbol) {
+    if (akc->runtime_loader_kernel_symbol) {
         kernel_name = "Some kernel";
     } else {
         kernel_name = "Blit kernel";
@@ -230,7 +261,7 @@ GPUCommandProcessor::submitDispatchPkt(void *raw_pkt, uint32_t queue_id,
     GfxVersion gfxVersion = FullSystem ? gpuDevice->getGfxVersion()
                           : driver()->getGfxVersion();
     HSAQueueEntry *task = new HSAQueueEntry(kernel_name, queue_id,
-        dynamic_task_id, raw_pkt, &akc, host_pkt_addr, machine_code_addr,
+        dynamic_task_id, raw_pkt, akc, host_pkt_addr, machine_code_addr,
         gfxVersion);
 
     DPRINTF(GPUCommandProc, "Task ID: %i Got AQL: wg size (%dx%dx%d), "
@@ -248,6 +279,152 @@ GPUCommandProcessor::submitDispatchPkt(void *raw_pkt, uint32_t queue_id,
 
     initABI(task);
     ++dynamic_task_id;
+
+    // The driver expects the start time to be in ns
+    Tick start_ts = curTick() / sim_clock::as_int::ns;
+    dispatchStartTime.insert({disp_pkt->completion_signal, start_ts});
+
+    delete akc;
+}
+
+void
+GPUCommandProcessor::sendCompletionSignal(Addr signal_handle)
+{
+    // Originally the completion signal was read functionally and written
+    // with a timing DMA. This can cause issues in FullSystem mode and
+    // cause translation failures. Therefore, in FullSystem mode everything
+    // is done in timing mode.
+
+    if (!FullSystem) {
+        /**
+        * HACK: The semantics of the HSA signal is to decrement
+        * the current signal value. We cheat here and read out
+        * he value from main memory using functional access and
+        * then just DMA the decremented value.
+        */
+        uint64_t signal_value = functionalReadHsaSignal(signal_handle);
+
+        updateHsaSignal(signal_handle, signal_value - 1);
+    } else {
+        // The semantics of the HSA signal is to decrement the current
+        // signal value by one. Do this asynchronously via DMAs and
+        // callbacks as we can safely continue with this function
+        // while waiting for the next packet from the host.
+        updateHsaSignalAsync(signal_handle, -1);
+    }
+}
+
+void
+GPUCommandProcessor::updateHsaSignalAsync(Addr signal_handle, int64_t diff)
+{
+    Addr mailbox_addr = getHsaSignalMailboxAddr(signal_handle);
+    uint64_t *mailboxValue = new uint64_t;
+    auto cb2 = new DmaVirtCallback<uint64_t>(
+        [ = ] (const uint64_t &)
+            { updateHsaMailboxData(signal_handle, mailboxValue); });
+    dmaReadVirt(mailbox_addr, sizeof(uint64_t), cb2, (void *)mailboxValue);
+    DPRINTF(GPUCommandProc, "updateHsaSignalAsync reading mailbox addr %lx\n",
+            mailbox_addr);
+}
+
+void
+GPUCommandProcessor::updateHsaMailboxData(Addr signal_handle,
+                                          uint64_t *mailbox_value)
+{
+    Addr event_addr = getHsaSignalEventAddr(signal_handle);
+
+    DPRINTF(GPUCommandProc, "updateHsaMailboxData read %ld\n", *mailbox_value);
+    if (*mailbox_value != 0) {
+        // This is an interruptible signal. Now, read the
+        // event ID and directly communicate with the driver
+        // about that event notification.
+        auto cb = new DmaVirtCallback<uint64_t>(
+            [ = ] (const uint64_t &)
+                { updateHsaEventData(signal_handle, mailbox_value); });
+        dmaReadVirt(event_addr, sizeof(uint64_t), cb, (void *)mailbox_value);
+    } else {
+        delete mailbox_value;
+
+        Addr ts_addr = signal_handle + offsetof(amd_signal_t, start_ts);
+
+        amd_event_t *event_ts = new amd_event_t;
+        event_ts->start_ts = dispatchStartTime[signal_handle];
+        event_ts->end_ts = curTick() / sim_clock::as_int::ns;
+        auto cb = new DmaVirtCallback<uint64_t>(
+            [ = ] (const uint64_t &)
+                { updateHsaEventTs(signal_handle, event_ts); });
+        dmaWriteVirt(ts_addr, sizeof(amd_event_t), cb, (void *)event_ts);
+        DPRINTF(GPUCommandProc, "updateHsaMailboxData reading timestamp addr "
+                "%lx\n", ts_addr);
+
+        dispatchStartTime.erase(signal_handle);
+    }
+}
+
+void
+GPUCommandProcessor::updateHsaEventData(Addr signal_handle,
+                                        uint64_t *event_value)
+{
+    Addr mailbox_addr = getHsaSignalMailboxAddr(signal_handle);
+
+    DPRINTF(GPUCommandProc, "updateHsaEventData read %ld\n", *event_value);
+    // Write *event_value to the mailbox to clear the event
+    auto cb = new DmaVirtCallback<uint64_t>(
+        [ = ] (const uint64_t &)
+            { updateHsaSignalDone(event_value); }, *event_value);
+    dmaWriteVirt(mailbox_addr, sizeof(uint64_t), cb, &cb->dmaBuffer, 0);
+
+    Addr ts_addr = signal_handle + offsetof(amd_signal_t, start_ts);
+
+    amd_event_t *event_ts = new amd_event_t;
+    event_ts->start_ts = dispatchStartTime[signal_handle];
+    event_ts->end_ts = curTick() / sim_clock::as_int::ns;
+    auto cb2 = new DmaVirtCallback<uint64_t>(
+        [ = ] (const uint64_t &)
+            { updateHsaEventTs(signal_handle, event_ts); });
+    dmaWriteVirt(ts_addr, sizeof(amd_event_t), cb2, (void *)event_ts);
+    DPRINTF(GPUCommandProc, "updateHsaEventData reading timestamp addr %lx\n",
+            ts_addr);
+
+    dispatchStartTime.erase(signal_handle);
+}
+
+void
+GPUCommandProcessor::updateHsaEventTs(Addr signal_handle,
+                                      amd_event_t *ts)
+{
+    delete ts;
+
+    Addr value_addr = getHsaSignalValueAddr(signal_handle);
+    int64_t diff = -1;
+
+    uint64_t *signalValue = new uint64_t;
+    auto cb = new DmaVirtCallback<uint64_t>(
+        [ = ] (const uint64_t &)
+            { updateHsaSignalData(value_addr, diff, signalValue); });
+    dmaReadVirt(value_addr, sizeof(uint64_t), cb, (void *)signalValue);
+    DPRINTF(GPUCommandProc, "updateHsaSignalAsync reading value addr %lx\n",
+            value_addr);
+}
+
+void
+GPUCommandProcessor::updateHsaSignalData(Addr value_addr, int64_t diff,
+                                         uint64_t *prev_value)
+{
+    // Reuse the value allocated for the read
+    DPRINTF(GPUCommandProc, "updateHsaSignalData read %ld, writing %ld\n",
+            *prev_value, *prev_value + diff);
+    *prev_value += diff;
+    auto cb = new DmaVirtCallback<uint64_t>(
+        [ = ] (const uint64_t &)
+            { updateHsaSignalDone(prev_value); });
+    dmaWriteVirt(value_addr, sizeof(uint64_t), cb, (void *)prev_value);
+}
+
+void
+GPUCommandProcessor::updateHsaSignalDone(uint64_t *signal_value)
+{
+    delete signal_value;
 }
 
 uint64_t
@@ -329,18 +506,27 @@ GPUCommandProcessor::driver()
  */
 
 /**
- * TODO: For now we simply tell the HSAPP to finish the packet,
- *       however a future patch will update this method to provide
- *       the proper handling of any required vendor-specific packets.
- *       In the version of ROCm that is currently supported (1.6)
- *       the runtime will send packets that direct the CP to
- *       invalidate the GPUs caches. We do this automatically on
- *       each kernel launch in the CU, so this is safe for now.
+ * TODO: For now we simply tell the HSAPP to finish the packet and write a
+ * completion signal, if any. However, in the future proper handing may be
+ * required for vendor specific packets.
+ *
+ * In the version of ROCm that is currently supported the runtime will send
+ * packets that direct the CP to invalidate the GPU caches. We do this
+ * automatically on each kernel launch in the CU, so that situation is safe
+ * for now.
  */
 void
 GPUCommandProcessor::submitVendorPkt(void *raw_pkt, uint32_t queue_id,
     Addr host_pkt_addr)
 {
+    auto vendor_pkt = (_hsa_generic_vendor_pkt *)raw_pkt;
+
+    if (vendor_pkt->completion_signal) {
+        sendCompletionSignal(vendor_pkt->completion_signal);
+    }
+
+    warn("Ignoring vendor packet\n");
+
     hsaPP->finishPkt(raw_pkt, queue_id);
 }
 
diff --git a/src/gpu-compute/gpu_command_processor.hh b/src/gpu-compute/gpu_command_processor.hh
index bafe733ee1..85b2a44494 100644
--- a/src/gpu-compute/gpu_command_processor.hh
+++ b/src/gpu-compute/gpu_command_processor.hh
@@ -46,6 +46,7 @@
 #include <cstdint>
 #include <functional>
 
+#include "arch/amdgpu/vega/gpu_registers.hh"
 #include "base/logging.hh"
 #include "base/trace.hh"
 #include "base/types.hh"
@@ -98,6 +99,8 @@ class GPUCommandProcessor : public DmaVirtDevice
                          Addr host_pkt_addr);
     void attachDriver(GPUComputeDriver *driver);
 
+    void dispatchKernelObject(AMDKernelCode *akc, void *raw_pkt,
+                              uint32_t queue_id, Addr host_pkt_addr);
     void dispatchPkt(HSAQueueEntry *task);
     void signalWakeupEvent(uint32_t event_id);
 
@@ -106,9 +109,17 @@ class GPUCommandProcessor : public DmaVirtDevice
     AddrRangeList getAddrRanges() const override;
     System *system();
 
+    void sendCompletionSignal(Addr signal_handle);
     void updateHsaSignal(Addr signal_handle, uint64_t signal_value,
                          HsaSignalCallbackFunction function =
                             [] (const uint64_t &) { });
+    void updateHsaSignalAsync(Addr signal_handle, int64_t diff);
+    void updateHsaSignalData(Addr value_addr, int64_t diff,
+                             uint64_t *prev_value);
+    void updateHsaSignalDone(uint64_t *signal_value);
+    void updateHsaMailboxData(Addr signal_handle, uint64_t *mailbox_value);
+    void updateHsaEventData(Addr signal_handle, uint64_t *event_value);
+    void updateHsaEventTs(Addr signal_handle, amd_event_t *event_value);
 
     uint64_t functionalReadHsaSignal(Addr signal_handle);
 
@@ -140,6 +151,12 @@ class GPUCommandProcessor : public DmaVirtDevice
     HSAPacketProcessor *hsaPP;
     TranslationGenPtr translate(Addr vaddr, Addr size) override;
 
+    // Running counter of dispatched tasks
+    int dynamic_task_id = 0;
+
+    // Keep track of start times for task dispatches.
+    std::unordered_map<Addr, Tick> dispatchStartTime;
+
     /**
      * Perform a DMA read of the read_dispatch_id_field_base_byte_offset
      * field, which follows directly after the read_dispatch_id (the read
@@ -199,7 +216,7 @@ class GPUCommandProcessor : public DmaVirtDevice
          *  the signal is reset we should check that the runtime was
          *  successful and then proceed to launch the kernel.
          */
-        if (task->privMemPerItem() >
+        if ((task->privMemPerItem() * VegaISA::NumVecElemPerVecReg) >
             task->amdQueue.compute_tmpring_size_wavesize * 1024) {
             // TODO: Raising this signal will potentially nuke scratch
             // space for in-flight kernels that were launched from this
diff --git a/src/gpu-compute/gpu_dyn_inst.cc b/src/gpu-compute/gpu_dyn_inst.cc
index 3cbb6f1ff8..c59317d2c4 100644
--- a/src/gpu-compute/gpu_dyn_inst.cc
+++ b/src/gpu-compute/gpu_dyn_inst.cc
@@ -432,6 +432,12 @@ GPUDynInst::isFlatGlobal() const
     return _staticInst->isFlatGlobal();
 }
 
+bool
+GPUDynInst::isFlatScratch() const
+{
+    return _staticInst->isFlatScratch();
+}
+
 bool
 GPUDynInst::isLoad() const
 {
@@ -576,6 +582,12 @@ GPUDynInst::readsFlatScratch() const
     return false;
 }
 
+bool
+GPUDynInst::needsToken() const
+{
+    return isGlobalMem() || isFlat() || isFlatGlobal() || isFlatScratch();
+}
+
 bool
 GPUDynInst::isAtomicAnd() const
 {
@@ -901,12 +913,12 @@ GPUDynInst::resolveFlatSegment(const VectorMask &mask)
         uint32_t numSgprs = wavefront()->maxSgprs;
         uint32_t physSgprIdx =
             wavefront()->computeUnit->registerManager->mapSgpr(wavefront(),
-                                                          numSgprs - 3);
+                                                          numSgprs - 4);
         uint32_t offset =
             wavefront()->computeUnit->srf[simdId]->read(physSgprIdx);
         physSgprIdx =
             wavefront()->computeUnit->registerManager->mapSgpr(wavefront(),
-                                                          numSgprs - 4);
+                                                          numSgprs - 3);
         uint32_t size =
             wavefront()->computeUnit->srf[simdId]->read(physSgprIdx);
         for (int lane = 0; lane < wavefront()->computeUnit->wfSize(); ++lane) {
@@ -919,12 +931,12 @@ GPUDynInst::resolveFlatSegment(const VectorMask &mask)
         wavefront()->execUnitId =  wavefront()->flatLmUnitId;
         wavefront()->decLGKMInstsIssued();
         if (isLoad()) {
-            wavefront()->rdGmReqsInPipe--;
+            wavefront()->rdLmReqsInPipe--;
         } else if (isStore()) {
-            wavefront()->wrGmReqsInPipe--;
+            wavefront()->wrLmReqsInPipe--;
         } else if (isAtomic() || isMemSync()) {
-            wavefront()->rdGmReqsInPipe--;
-            wavefront()->wrGmReqsInPipe--;
+            wavefront()->wrLmReqsInPipe--;
+            wavefront()->rdLmReqsInPipe--;
         } else {
             panic("Invalid memory operation!\n");
         }
diff --git a/src/gpu-compute/gpu_dyn_inst.hh b/src/gpu-compute/gpu_dyn_inst.hh
index e2884a012a..6551fa417a 100644
--- a/src/gpu-compute/gpu_dyn_inst.hh
+++ b/src/gpu-compute/gpu_dyn_inst.hh
@@ -234,6 +234,7 @@ class GPUDynInst : public GPUExecContext
     bool isMemRef() const;
     bool isFlat() const;
     bool isFlatGlobal() const;
+    bool isFlatScratch() const;
     bool isLoad() const;
     bool isStore() const;
 
@@ -256,6 +257,7 @@ class GPUDynInst : public GPUExecContext
     bool writesFlatScratch() const;
     bool readsExecMask() const;
     bool writesExecMask() const;
+    bool needsToken() const;
 
     bool isAtomicAnd() const;
     bool isAtomicOr() const;
diff --git a/src/gpu-compute/gpu_static_inst.hh b/src/gpu-compute/gpu_static_inst.hh
index b86a507dce..156f0e529d 100644
--- a/src/gpu-compute/gpu_static_inst.hh
+++ b/src/gpu-compute/gpu_static_inst.hh
@@ -130,6 +130,7 @@ class GPUStaticInst : public GPUStaticInstFlags
     bool isMemRef() const { return _flags[MemoryRef]; }
     bool isFlat() const { return _flags[Flat]; }
     bool isFlatGlobal() const { return _flags[FlatGlobal]; }
+    bool isFlatScratch() const { return _flags[FlatScratch]; }
     bool isLoad() const { return _flags[Load]; }
     bool isStore() const { return _flags[Store]; }
 
diff --git a/src/gpu-compute/hsa_queue_entry.hh b/src/gpu-compute/hsa_queue_entry.hh
index 4083c1c85a..84ae139127 100644
--- a/src/gpu-compute/hsa_queue_entry.hh
+++ b/src/gpu-compute/hsa_queue_entry.hh
@@ -70,8 +70,6 @@ class HSAQueueEntry
           _gridSize{{(int)((_hsa_dispatch_packet_t*)disp_pkt)->grid_size_x,
                     (int)((_hsa_dispatch_packet_t*)disp_pkt)->grid_size_y,
                     (int)((_hsa_dispatch_packet_t*)disp_pkt)->grid_size_z}},
-          numVgprs(akc->workitem_vgpr_count),
-          numSgprs(akc->wavefront_sgpr_count),
           _queueId(queue_id), _dispatchId(dispatch_id), dispPkt(disp_pkt),
           _hostDispPktAddr(host_pkt_addr),
           _completionSignal(((_hsa_dispatch_packet_t*)disp_pkt)
@@ -88,40 +86,36 @@ class HSAQueueEntry
           _globalWgId(0), dispatchComplete(false)
 
     {
-        // Precompiled BLIT kernels actually violate the spec a bit
-        // and don't set many of the required akc fields.  For these kernels,
-        // we need to rip register usage from the resource registers.
-        //
-        // We can't get an exact number of registers from the resource
-        // registers because they round, but we can get an upper bound on it.
-        // We determine the number of registers by solving for "vgprs_used"
-        // in the LLVM docs: https://www.llvm.org/docs/AMDGPUUsage.html
+        // Use the resource descriptors to determine number of GPRs. This will
+        // round up in some cases, however the exact number field in the AMD
+        // kernel code struct is not backwards compatible and that field is
+        // not populated in newer compiles. The resource descriptor dword must
+        // be backwards compatible, so use that always.
+        // LLVM docs: https://www.llvm.org/docs/AMDGPUUsage.html
         //     #code-object-v3-kernel-descriptor
+        //
         // Currently, the only supported gfx version in gem5 that computes
-        // this differently is gfx90a.
-        if (!numVgprs) {
-            if (gfx_version == GfxVersion::gfx90a) {
-                numVgprs = (akc->granulated_workitem_vgpr_count + 1) * 8;
-            } else {
-                numVgprs = (akc->granulated_workitem_vgpr_count + 1) * 4;
-            }
+        // VGPR count differently is gfx90a.
+        if (gfx_version == GfxVersion::gfx90a) {
+            numVgprs = (akc->granulated_workitem_vgpr_count + 1) * 8;
+        } else {
+            numVgprs = (akc->granulated_workitem_vgpr_count + 1) * 4;
         }
 
-        if (!numSgprs || numSgprs ==
-            std::numeric_limits<decltype(akc->wavefront_sgpr_count)>::max()) {
-            // Supported major generation numbers: 0 (BLIT kernels), 8, and 9
-            uint16_t version = akc->amd_machine_version_major;
-            assert((version == 0) || (version == 8) || (version == 9));
-            // SGPR allocation granularies:
-            // - GFX8: 8
-            // - GFX9: 16
-            // Source: https://llvm.org/docs/AMDGPUUsage.html
-            if ((version == 0) || (version == 8)) {
-                // We assume that BLIT kernels use the same granularity as GFX8
-                numSgprs = (akc->granulated_wavefront_sgpr_count + 1) * 8;
-            } else if (version == 9) {
-                numSgprs = ((akc->granulated_wavefront_sgpr_count + 1) * 16)/2;
-            }
+        // SGPR allocation granularies:
+        // - GFX8: 8
+        // - GFX9: 16
+        // Source: https://llvm.org/docs/.html
+        if (gfx_version == GfxVersion::gfx801 ||
+                gfx_version == GfxVersion::gfx803) {
+            numSgprs = (akc->granulated_wavefront_sgpr_count + 1) * 8;
+        } else if (gfx_version == GfxVersion::gfx900 ||
+                gfx_version == GfxVersion::gfx902 ||
+                gfx_version == GfxVersion::gfx908 ||
+                gfx_version == GfxVersion::gfx90a) {
+            numSgprs = ((akc->granulated_wavefront_sgpr_count + 1) * 16)/2;
+        } else {
+            panic("Saw unknown gfx version setting up GPR counts\n");
         }
 
         initialVgprState.reset();
diff --git a/src/gpu-compute/schedule_stage.cc b/src/gpu-compute/schedule_stage.cc
index 4c4028b152..0d475c577e 100644
--- a/src/gpu-compute/schedule_stage.cc
+++ b/src/gpu-compute/schedule_stage.cc
@@ -579,7 +579,7 @@ ScheduleStage::fillDispatchList()
                     // operation.
                     GPUDynInstPtr mp = schIter->first;
                     if (!mp->isMemSync() && !mp->isScalar() &&
-                        (mp->isGlobalMem() || mp->isFlat())) {
+                        mp->needsToken()) {
                         computeUnit.globalMemoryPipe.acqCoalescerToken(mp);
                     }
 
diff --git a/src/gpu-compute/scoreboard_check_stage.cc b/src/gpu-compute/scoreboard_check_stage.cc
index 3d18260822..b618cab278 100644
--- a/src/gpu-compute/scoreboard_check_stage.cc
+++ b/src/gpu-compute/scoreboard_check_stage.cc
@@ -154,7 +154,8 @@ ScoreboardCheckStage::ready(Wavefront *w, nonrdytype_e *rdyStatus,
     if (!(ii->isBarrier() || ii->isNop() || ii->isReturn() || ii->isBranch() ||
          ii->isALU() || ii->isLoad() || ii->isStore() || ii->isAtomic() ||
          ii->isEndOfKernel() || ii->isMemSync() || ii->isFlat() ||
-         ii->isFlatGlobal() || ii->isSleep() || ii->isLocalMem())) {
+         ii->isFlatGlobal() || ii->isFlatScratch() || ii->isSleep() ||
+         ii->isLocalMem())) {
         panic("next instruction: %s is of unknown type\n", ii->disassemble());
     }
 
diff --git a/src/gpu-compute/shader.cc b/src/gpu-compute/shader.cc
index 73d2366b74..e13e7c9cf4 100644
--- a/src/gpu-compute/shader.cc
+++ b/src/gpu-compute/shader.cc
@@ -41,6 +41,7 @@
 #include "debug/GPUMem.hh"
 #include "debug/GPUShader.hh"
 #include "debug/GPUWgLatency.hh"
+#include "dev/amdgpu/hwreg_defines.hh"
 #include "gpu-compute/dispatcher.hh"
 #include "gpu-compute/gpu_command_processor.hh"
 #include "gpu-compute/gpu_static_inst.hh"
@@ -72,15 +73,25 @@ Shader::Shader(const Params &p) : ClockedObject(p),
     gpuCmdProc.setShader(this);
     _dispatcher.setShader(this);
 
+    // These apertures are set by the driver. In full system mode that is done
+    // using a PM4 packet but the emulated SE mode driver does not set them
+    // explicitly, so we need to define some reasonable defaults here.
     _gpuVmApe.base = ((Addr)1 << 61) + 0x1000000000000L;
     _gpuVmApe.limit = (_gpuVmApe.base & 0xFFFFFF0000000000UL) | 0xFFFFFFFFFFL;
 
-    _ldsApe.base = ((Addr)1 << 61) + 0x0;
+    _ldsApe.base = 0x1000000000000;
     _ldsApe.limit =  (_ldsApe.base & 0xFFFFFFFF00000000UL) | 0xFFFFFFFF;
 
-    _scratchApe.base = ((Addr)1 << 61) + 0x100000000L;
+    _scratchApe.base = 0x2000000000000;
     _scratchApe.limit = (_scratchApe.base & 0xFFFFFFFF00000000UL) | 0xFFFFFFFF;
 
+    // The scratch and LDS address can be queried starting in gfx900. The
+    // base addresses are in the SH_MEM_BASES 32-bit register. The upper 16
+    // bits are for the LDS address and the lower 16 bits are for scratch
+    // address. In both cases the 16 bits represent bits 63:48 of the address.
+    // This means bits 47:0 of the base address is always zero.
+    setHwReg(HW_REG_SH_MEM_BASES, 0x00010002);
+
     shHiddenPrivateBaseVmid = 0;
 
     cuList.resize(n_cu);
@@ -519,8 +530,14 @@ Shader::notifyCuSleep() {
     panic_if(_activeCus <= 0 || _activeCus > cuList.size(),
              "Invalid activeCu size\n");
     _activeCus--;
-    if (!_activeCus)
+    if (!_activeCus) {
         stats.shaderActiveTicks += curTick() - _lastInactiveTick;
+
+        if (kernelExitRequested) {
+            kernelExitRequested = false;
+            exitSimLoop("GPU Kernel Completed");
+        }
+    }
 }
 
 /**
diff --git a/src/gpu-compute/shader.hh b/src/gpu-compute/shader.hh
index 08dfd24b76..32ddf3d15b 100644
--- a/src/gpu-compute/shader.hh
+++ b/src/gpu-compute/shader.hh
@@ -97,6 +97,10 @@ class Shader : public ClockedObject
     // Last tick that all CUs attached to this shader were inactive
     Tick _lastInactiveTick;
 
+    // If a kernel-based exit event was requested, wait for all CUs in the
+    // shader to complete before actually exiting so that stats are updated.
+    bool kernelExitRequested = false;
+
   public:
     typedef ShaderParams Params;
     enum hsail_mode_e {SIMT,VECTOR_SCALAR};
@@ -314,6 +318,12 @@ class Shader : public ClockedObject
         stats.vectorInstDstOperand[num_operands]++;
     }
 
+    void
+    requestKernelExitEvent()
+    {
+        kernelExitRequested = true;
+    }
+
   protected:
     struct ShaderStats : public statistics::Group
     {
diff --git a/src/gpu-compute/wavefront.cc b/src/gpu-compute/wavefront.cc
index 8a1adfe802..0bca152e08 100644
--- a/src/gpu-compute/wavefront.cc
+++ b/src/gpu-compute/wavefront.cc
@@ -1082,7 +1082,7 @@ Wavefront::exec()
      * we return here to avoid spurious errors related to flat insts
      * and their address segment resolution.
      */
-    if (execMask().none() && ii->isFlat()) {
+    if (execMask().none() && ii->needsToken()) {
         computeUnit->getTokenManager()->recvTokens(1);
         return;
     }
diff --git a/src/kern/linux/linux.cc b/src/kern/linux/linux.cc
index 1a54c9c53e..11b9fe0ee4 100644
--- a/src/kern/linux/linux.cc
+++ b/src/kern/linux/linux.cc
@@ -58,19 +58,19 @@ Linux::openSpecialFile(std::string path, Process *process,
     bool matched = false;
     std::string data;
 
-    if (path.compare(0, 13, "/proc/meminfo") == 0) {
+    if (path == "/proc/meminfo") {
         data = Linux::procMeminfo(process, tc);
         matched = true;
-    } else if (path.compare(0, 11, "/etc/passwd") == 0) {
+    } else if (path == "/etc/passwd") {
         data = Linux::etcPasswd(process, tc);
         matched = true;
-    } else if (path.compare(0, 15, "/proc/self/maps") == 0) {
+    } else if (path == "/proc/self/maps") {
         data = Linux::procSelfMaps(process, tc);
         matched = true;
-    } else if (path.compare(0, 30, "/sys/devices/system/cpu/online") == 0) {
+    } else if (path == "/sys/devices/system/cpu/online") {
         data = Linux::cpuOnline(process, tc);
         matched = true;
-    } else if (path.compare(0, 12 ,"/dev/urandom") == 0) {
+    } else if (path == "/dev/urandom") {
         data = Linux::devRandom(process, tc);
         matched = true;
     }
diff --git a/src/learning_gem5/part2/HelloObject.py b/src/learning_gem5/part2/HelloObject.py
index 6b9aa8f811..d2cf73e74f 100644
--- a/src/learning_gem5/part2/HelloObject.py
+++ b/src/learning_gem5/part2/HelloObject.py
@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 # Copyright (c) 2017 Jason Lowe-Power
 # All rights reserved.
 #
diff --git a/src/learning_gem5/part2/SimpleCache.py b/src/learning_gem5/part2/SimpleCache.py
index 1295e543fd..6cdce84580 100644
--- a/src/learning_gem5/part2/SimpleCache.py
+++ b/src/learning_gem5/part2/SimpleCache.py
@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 # Copyright (c) 2017 Jason Lowe-Power
 # All rights reserved.
 #
diff --git a/src/learning_gem5/part2/SimpleMemobj.py b/src/learning_gem5/part2/SimpleMemobj.py
index 2ab95ff76e..19b5ca7c7b 100644
--- a/src/learning_gem5/part2/SimpleMemobj.py
+++ b/src/learning_gem5/part2/SimpleMemobj.py
@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 # Copyright (c) 2017 Jason Lowe-Power
 # All rights reserved.
 #
diff --git a/src/learning_gem5/part2/SimpleObject.py b/src/learning_gem5/part2/SimpleObject.py
index 2acbc77759..82efc2c95a 100644
--- a/src/learning_gem5/part2/SimpleObject.py
+++ b/src/learning_gem5/part2/SimpleObject.py
@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 # Copyright (c) 2017 Jason Lowe-Power
 # All rights reserved.
 #
diff --git a/src/learning_gem5/part2/hello_object.hh b/src/learning_gem5/part2/hello_object.hh
index c34dde304d..05c6dde4d6 100644
--- a/src/learning_gem5/part2/hello_object.hh
+++ b/src/learning_gem5/part2/hello_object.hh
@@ -69,7 +69,7 @@ class HelloObject : public SimObject
      * SimObjects have been constructed. It is called after the user calls
      * simulate() for the first time.
      */
-    void startup();
+    void startup() override;
 };
 
 } // namespace gem5
diff --git a/src/learning_gem5/part2/simple_cache.hh b/src/learning_gem5/part2/simple_cache.hh
index 25d195d4f1..1ca87dd126 100644
--- a/src/learning_gem5/part2/simple_cache.hh
+++ b/src/learning_gem5/part2/simple_cache.hh
@@ -267,7 +267,7 @@ class SimpleCache : public ClockedObject
     const Cycles latency;
 
     /// The block size for the cache
-    const unsigned blockSize;
+    const Addr blockSize;
 
     /// Number of blocks in the cache (size of cache / block size)
     const unsigned capacity;
diff --git a/src/learning_gem5/part3/MSI-dir.sm b/src/learning_gem5/part3/MSI-dir.sm
index ca5ea3e534..70d960114a 100644
--- a/src/learning_gem5/part3/MSI-dir.sm
+++ b/src/learning_gem5/part3/MSI-dir.sm
@@ -448,7 +448,7 @@ machine(MachineType:Directory, "Directory protocol")
     }
 
     action(popMemQueue, "pM", desc="Pop the memory queue") {
-        memQueue_in.dequeue(clockEdge());
+        dequeueMemRespQueue();
     }
 
     // Stalling actions
diff --git a/src/mem/AddrMapper.py b/src/mem/AddrMapper.py
index 932fbf14e1..f1f3fbd6f5 100644
--- a/src/mem/AddrMapper.py
+++ b/src/mem/AddrMapper.py
@@ -36,6 +36,7 @@
 from m5.params import *
 from m5.SimObject import SimObject
 
+
 # An address mapper changes the packet addresses in going from the
 # response port side of the mapper to the request port side. When the
 # response port is queried for the address ranges, it also performs the
diff --git a/src/mem/CommMonitor.py b/src/mem/CommMonitor.py
index ab946f1e91..e0040b74e0 100644
--- a/src/mem/CommMonitor.py
+++ b/src/mem/CommMonitor.py
@@ -38,6 +38,7 @@
 from m5.objects.System import System
 from m5.SimObject import SimObject
 
+
 # The communication monitor will most typically be used in combination
 # with periodic dumping and resetting of stats using schedStatEvent
 class CommMonitor(SimObject):
diff --git a/src/mem/DRAMInterface.py b/src/mem/DRAMInterface.py
index dea62a6be1..a5340e0637 100644
--- a/src/mem/DRAMInterface.py
+++ b/src/mem/DRAMInterface.py
@@ -38,6 +38,7 @@
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+import os
 from m5.objects.MemCtrl import MemCtrl
 from m5.objects.MemInterface import *
 
@@ -53,6 +54,95 @@ class DRAMInterface(MemInterface):
     cxx_header = "mem/dram_interface.hh"
     cxx_class = "gem5::memory::DRAMInterface"
 
+    # All the rowhammer parameters are defined at the start of the
+    # DRAMInterface class. The default device file is used in case it is not
+    # provided by the user.
+    device_file = Param.String(
+        os.path.join(os.getcwd(), "util/hammersim/prob-005.json"),
+        "Absolute path with the device info file."
+        "The default file is included in the repo.",
+    )
+
+    # This number is 50K for DDR4 and around 139K for DDR3
+    rowhammer_threshold = Param.Unsigned(
+        50000, "Number of activates which " "trigger rowhammer"
+    )
+    # Rowhammer specific params.
+    counter_table_length = Param.Unsigned(
+        16,
+        "Number of entries of the TRR "
+        "table for vendor B/counter-based "
+        "maintains.",
+    )
+
+    # TRR variants must be within 0 to 3. No TRR, Vendors A, B and C
+    trr_variant = Param.Unsigned(0, "The different variant of TRR (0 - 3)")
+
+    # TRR threshold is a lower number than rowhammer_threshold. This must be a
+    # preemptive number which prevents filpping bits in the DRAM rows due to a
+    # rowhammer attack.
+    trr_threshold = Param.Unsigned(
+        32768,
+        "The threshold number used to " "refresh rows in the DRAM device.",
+    )
+
+    # I have used a companion table to implement TRR A as there was no source
+    # materials on how a new row is inserted into the TRR table for vendor A.
+    companion_table_length = Param.Unsigned(
+        8, "The number of entres in the " "companion table."
+    )
+
+    # Understandably, the threshold for the companion table is much lower than
+    # the actual TRR table.
+    companion_threshold = Param.Unsigned(
+        1024,
+        "The  threshold number "
+        "used to promote a row from the "
+        "companion table to the trr table",
+    )
+
+    # The dumper variables can be used to dump traces of the TRR and the RH
+    # triggers. This can be used for post-simulation analysis.
+    trr_stat_dump = Param.Bool(
+        False,
+        "Set this to True to dump TRR triggers" "and generate a TRR trace.",
+    )
+
+    # This is similar to trr_stat_dump.
+    rh_stat_dump = Param.Bool(
+        False,
+        "Set this to True to dump RH triggers"
+        "and generate a RH trace. The trace is "
+        "named as `rowhammer.trace`",
+    )
+
+    rh_stat_file = Param.String(
+        "m5out/rowhammer.trace", "output path of the rowhammer trace"
+    )
+
+    # Single-sided rowhammer probability.
+    single_sided_prob = Param.Unsigned(
+        1e7,
+        "Number of double-sided RH "
+        "bitflips required before observing "
+        "at least 1 single-sided bitflip.",
+    )
+
+    # Half-Double rowhammer probability factor.
+    half_double_prob = Param.Unsigned(
+        1e9,
+        "Number of half-double RH "
+        "attacks required before observing "
+        "at least 1 single-sided bitflip.",
+    )
+
+    double_sided_prob = Param.Unsigned(
+        1e5,
+        "Number of double-sided RH "
+        "attacks required to flip at least "
+        "one bit in the sandwiched row.",
+    )
+
     # scheduler page policy
     page_policy = Param.PageManage("open_adaptive", "Page management policy")
 
diff --git a/src/mem/DRAMSim2.py b/src/mem/DRAMSim2.py
index 364a0d794b..037baaa8b5 100644
--- a/src/mem/DRAMSim2.py
+++ b/src/mem/DRAMSim2.py
@@ -33,9 +33,12 @@
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+from citations import add_citation
+
 from m5.params import *
 from m5.objects.AbstractMemory import *
 
+
 # A wrapper for DRAMSim2 multi-channel memory controller
 class DRAMSim2(AbstractMemory):
     type = "DRAMSim2"
@@ -56,3 +59,22 @@ class DRAMSim2(AbstractMemory):
     )
     traceFile = Param.String("", "Output file for trace generation")
     enableDebug = Param.Bool(False, "Enable DRAMSim2 debug output")
+
+
+add_citation(
+    DRAMSim2,
+    """@article{Rosenfeld:2011:dramsim2,
+  author       = {Paul Rosenfeld and
+                  Elliott Cooper{-}Balis and
+                  Bruce L. Jacob},
+  title        = {DRAMSim2: {A} Cycle Accurate Memory System Simulator},
+  journal      = {{IEEE} Compututer Architecture Letters},
+  volume       = {10},
+  number       = {1},
+  pages        = {16--19},
+  year         = {2011},
+  url          = {https://doi.org/10.1109/L-CA.2011.4},
+  doi          = {10.1109/L-CA.2011.4}
+}
+""",
+)
diff --git a/src/mem/DRAMSys.py b/src/mem/DRAMSys.py
index c7d69a0ae4..4b2df67dde 100644
--- a/src/mem/DRAMSys.py
+++ b/src/mem/DRAMSys.py
@@ -24,6 +24,8 @@
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+from m5.citations import add_citation
+
 from m5.SimObject import *
 from m5.params import *
 from m5.proxy import *
@@ -41,3 +43,30 @@ class DRAMSys(AbstractMemory):
     configuration = Param.String("Path to the DRAMSys configuration")
     resource_directory = Param.String("Path to the DRAMSys resource directory")
     recordable = Param.Bool(True, "Whether DRAMSys should record a trace file")
+
+
+add_citation(
+    DRAMSys,
+    r"""@inproceedings{Steiner:2020:dramsys4,
+  author       = {Lukas Steiner and
+                  Matthias Jung and
+                  Felipe S. Prado and
+                  Kirill Bykov and
+                  Norbert Wehn},
+  editor       = {Alex Orailoglu and
+                  Matthias Jung and
+                  Marc Reichenbach},
+  title        = {DRAMSys4.0: {A} Fast and Cycle-Accurate SystemC/TLM-Based {DRAM} Simulator},
+  booktitle    = {Embedded Computer Systems: Architectures, Modeling, and Simulation
+                  - 20th International Conference, {SAMOS} 2020, Samos, Greece, July
+                  5-9, 2020, Proceedings},
+  series       = {Lecture Notes in Computer Science},
+  volume       = {12471},
+  pages        = {110--126},
+  publisher    = {Springer},
+  year         = {2020},
+  url          = {https://doi.org/10.1007/978-3-030-60939-9\_8},
+  doi          = {10.1007/978-3-030-60939-9\_8}
+}
+""",
+)
diff --git a/src/mem/DRAMsim3.py b/src/mem/DRAMsim3.py
index 40f61608d8..de70293258 100644
--- a/src/mem/DRAMsim3.py
+++ b/src/mem/DRAMsim3.py
@@ -33,9 +33,12 @@
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+from m5.citations import add_citation
+
 from m5.params import *
 from m5.objects.AbstractMemory import *
 
+
 # A wrapper for DRAMSim3 multi-channel memory controller
 class DRAMsim3(AbstractMemory):
     type = "DRAMsim3"
@@ -54,3 +57,24 @@ class DRAMsim3(AbstractMemory):
     filePath = Param.String(
         "ext/dramsim3/DRAMsim3/", "Directory to prepend to file names"
     )
+
+
+add_citation(
+    DRAMsim3,
+    """@article{Li:2020:dramsim3,
+  author       = {Shang Li and
+                  Zhiyuan Yang and
+                  Dhiraj Reddy and
+                  Ankur Srivastava and
+                  Bruce L. Jacob},
+  title        = {DRAMsim3: {A} Cycle-Accurate, Thermal-Capable {DRAM} Simulator},
+  journal      = {{IEEE} Compututer Architecture Letters},
+  volume       = {19},
+  number       = {2},
+  pages        = {110--113},
+  year         = {2020},
+  url          = {https://doi.org/10.1109/LCA.2020.2973991},
+  doi          = {10.1109/LCA.2020.2973991}
+}
+""",
+)
diff --git a/src/mem/MemCtrl.py b/src/mem/MemCtrl.py
index 549616ccba..eca15877e5 100644
--- a/src/mem/MemCtrl.py
+++ b/src/mem/MemCtrl.py
@@ -41,6 +41,8 @@
 from m5.params import *
 from m5.proxy import *
 from m5.objects.QoSMemCtrl import *
+from m5.citations import add_citation
+
 
 # Enum for memory scheduling algorithms, currently First-Come
 # First-Served and a First-Row Hit then First-Come First-Served
@@ -99,3 +101,24 @@ class MemCtrl(QoSMemCtrl):
 
     command_window = Param.Latency("10ns", "Static backend latency")
     disable_sanity_check = Param.Bool(False, "Disable port resp Q size check")
+
+
+add_citation(
+    MemCtrl,
+    """@inproceedings{Hansson:2014:dram-controller,
+  author       = {Andreas Hansson and
+                  Neha Agarwal and
+                  Aasheesh Kolli and
+                  Thomas F. Wenisch and
+                  Aniruddha N. Udipi},
+  title        = {Simulating {DRAM} controllers for future system architecture exploration},
+  booktitle    = {2014 {IEEE} International Symposium on Performance Analysis of Systems
+                  and Software, {ISPASS} 2014, Monterey, CA, USA, March 23-25, 2014},
+  pages        = {201--210},
+  publisher    = {{IEEE} Computer Society},
+  year         = {2014},
+  url          = {https://doi.org/10.1109/ISPASS.2014.6844484},
+  doi          = {10.1109/ISPASS.2014.6844484}
+}
+""",
+)
diff --git a/src/mem/MemInterface.py b/src/mem/MemInterface.py
index 60bf99bf47..424e6320ac 100644
--- a/src/mem/MemInterface.py
+++ b/src/mem/MemInterface.py
@@ -43,6 +43,7 @@
 
 from m5.objects.AbstractMemory import AbstractMemory
 
+
 # Enum for the address mapping. With Ch, Ra, Ba, Ro and Co denoting
 # channel, rank, bank, row and column, respectively, and going from
 # MSB to LSB.  Available are RoRaBaChCo and RoRaBaCoCh, that are
diff --git a/src/mem/NVMInterface.py b/src/mem/NVMInterface.py
index 841dc0c047..66b1f9401e 100644
--- a/src/mem/NVMInterface.py
+++ b/src/mem/NVMInterface.py
@@ -39,6 +39,7 @@
 from m5.objects.MemInterface import MemInterface
 from m5.objects.DRAMInterface import AddrMap
 
+
 # The following interface aims to model byte-addressable NVM
 # The most important system-level performance effects of a NVM
 # are modeled without getting into too much detail of the media itself.
diff --git a/src/mem/SConscript b/src/mem/SConscript
index 351f24e907..733fd6a106 100644
--- a/src/mem/SConscript
+++ b/src/mem/SConscript
@@ -71,6 +71,7 @@ SimObject('ThreadBridge.py', sim_objects=['ThreadBridge'])
 
 Source('abstract_mem.cc')
 Source('addr_mapper.cc')
+Source('backdoor_manager.cc')
 Source('bridge.cc')
 Source('coherent_xbar.cc')
 Source('cfi_mem.cc')
@@ -105,6 +106,8 @@ Source('serial_link.cc')
 Source('mem_delay.cc')
 Source('port_terminator.cc')
 
+GTest('backdoor_manager.test', 'backdoor_manager.test.cc',
+      'backdoor_manager.cc', with_tag('gem5_trace'))
 GTest('translation_gen.test', 'translation_gen.test.cc')
 
 Source('translating_port_proxy.cc')
@@ -124,6 +127,7 @@ if env['HAVE_DRAMSIM3']:
 if env['HAVE_DRAMSYS']:
     SimObject('DRAMSys.py', sim_objects=['DRAMSys'])
     Source('dramsys_wrapper.cc')
+    Source('dramsys.cc')
 
 SimObject('MemChecker.py', sim_objects=['MemChecker', 'MemCheckerMonitor'])
 Source('mem_checker.cc')
@@ -151,6 +155,7 @@ DebugFlag('MemCtrl')
 DebugFlag('MMU')
 DebugFlag('MemoryAccess')
 DebugFlag('PacketQueue')
+DebugFlag("PortTrace")
 DebugFlag('ResponsePort')
 DebugFlag('StackDist')
 DebugFlag("DRAMSim2")
@@ -162,3 +167,9 @@ DebugFlag('TokenPort')
 DebugFlag("MemChecker")
 DebugFlag("MemCheckerMonitor")
 DebugFlag("QOS")
+
+DebugFlag("RowHammer")
+DebugFlag("RhInhibitor")
+DebugFlag("RhBitflip")
+DebugFlag("HDBitflip")
+DebugFlag("DRAMAddr")
diff --git a/src/mem/abstract_mem.cc b/src/mem/abstract_mem.cc
index 9340f7e96f..91f0594e90 100644
--- a/src/mem/abstract_mem.cc
+++ b/src/mem/abstract_mem.cc
@@ -377,7 +377,7 @@ tracePacket(System *sys, const char *label, PacketPtr pkt)
 #endif
 
 void
-AbstractMemory::access(PacketPtr pkt)
+AbstractMemory::access(PacketPtr pkt, bool corruptedAccess)
 {
     if (pkt->cacheResponding()) {
         DPRINTF(MemoryAccess, "Cache responding to %#llx: not responding\n",
@@ -444,7 +444,11 @@ AbstractMemory::access(PacketPtr pkt)
             trackLoadLocked(pkt);
         }
         if (pmemAddr) {
+            if (corruptedAccess) {
+                pkt->setCorruptedData(host_addr);
+            } else {
             pkt->setData(host_addr);
+            }
         }
         TRACE_PACKET(pkt->req->isInstFetch() ? "IFetch" : "Read");
         stats.numReads[pkt->req->requestorId()]++;
diff --git a/src/mem/abstract_mem.hh b/src/mem/abstract_mem.hh
index 7f12487421..5c52ae8ea4 100644
--- a/src/mem/abstract_mem.hh
+++ b/src/mem/abstract_mem.hh
@@ -345,7 +345,7 @@ class AbstractMemory : public ClockedObject
      *
      * @param pkt Packet performing the access
      */
-    void access(PacketPtr pkt);
+    void access(PacketPtr pkt, bool corruptedAccess = false);
 
     /**
      * Perform an untimed memory read or write without changing
diff --git a/src/mem/addr_mapper.cc b/src/mem/addr_mapper.cc
index 091b9d56aa..3c4054b6ef 100644
--- a/src/mem/addr_mapper.cc
+++ b/src/mem/addr_mapper.cc
@@ -84,6 +84,19 @@ AddrMapper::recvFunctionalSnoop(PacketPtr pkt)
     pkt->setAddr(orig_addr);
 }
 
+void
+AddrMapper::recvMemBackdoorReq(const MemBackdoorReq &req,
+                               MemBackdoorPtr &backdoor)
+{
+    AddrRange remapped_req_range = AddrRange(remapAddr(req.range().start()),
+                                             remapAddr(req.range().end()));
+    MemBackdoorReq remapped_req(remapped_req_range, req.flags());
+    memSidePort.sendMemBackdoorReq(remapped_req, backdoor);
+    if (backdoor != nullptr) {
+        backdoor = getRevertedBackdoor(backdoor, req.range());
+    }
+}
+
 Tick
 AddrMapper::recvAtomic(PacketPtr pkt)
 {
@@ -104,6 +117,19 @@ AddrMapper::recvAtomicSnoop(PacketPtr pkt)
     return ret_tick;
 }
 
+Tick
+AddrMapper::recvAtomicBackdoor(PacketPtr pkt, MemBackdoorPtr& backdoor)
+{
+    Addr orig_addr = pkt->getAddr();
+    pkt->setAddr(remapAddr(orig_addr));
+    Tick ret_tick = memSidePort.sendAtomicBackdoor(pkt, backdoor);
+    pkt->setAddr(orig_addr);
+    if (backdoor != nullptr) {
+        backdoor = getRevertedBackdoor(backdoor, pkt->getAddrRange());
+    }
+    return ret_tick;
+}
+
 bool
 AddrMapper::recvTimingReq(PacketPtr pkt)
 {
@@ -206,7 +232,8 @@ AddrMapper::recvRangeChange()
 RangeAddrMapper::RangeAddrMapper(const RangeAddrMapperParams &p) :
     AddrMapper(p),
     originalRanges(p.original_ranges),
-    remappedRanges(p.remapped_ranges)
+    remappedRanges(p.remapped_ranges),
+    backdoorManager(originalRanges, remappedRanges)
 {
     if (originalRanges.size() != remappedRanges.size())
         fatal("AddrMapper: original and shadowed range list must "
@@ -232,6 +259,13 @@ RangeAddrMapper::remapAddr(Addr addr) const
     return addr;
 }
 
+MemBackdoorPtr
+RangeAddrMapper::getRevertedBackdoor(MemBackdoorPtr &backdoor,
+                                     const AddrRange &range)
+{
+    return backdoorManager.getRevertedBackdoor(backdoor, range);
+}
+
 AddrRangeList
 RangeAddrMapper::getAddrRanges() const
 {
diff --git a/src/mem/addr_mapper.hh b/src/mem/addr_mapper.hh
index 40a0bb033b..41709f38ab 100644
--- a/src/mem/addr_mapper.hh
+++ b/src/mem/addr_mapper.hh
@@ -38,6 +38,10 @@
 #ifndef __MEM_ADDR_MAPPER_HH__
 #define __MEM_ADDR_MAPPER_HH__
 
+#include <vector>
+
+#include "mem/backdoor_manager.hh"
+#include "mem/packet.hh"
 #include "mem/port.hh"
 #include "params/AddrMapper.hh"
 #include "params/RangeAddrMapper.hh"
@@ -77,6 +81,20 @@ class AddrMapper : public SimObject
      */
     virtual Addr remapAddr(Addr addr) const = 0;
 
+    /**
+     * This function returns a backdoor that fulfills the initiator request,
+     * based on the target backdoor at the first parameter.
+     * Note that this function should return a backdoor in original address
+     * space, while the target backdoor is in remapped address space. Address
+     * reverting logic is probably required in this function.
+     *
+     * @param backdoor the backdoor obtained from target
+     * @param range the initiator request to be fulfilled
+     * @return a backdoor that fulfill the initiator request
+     */
+    virtual MemBackdoorPtr getRevertedBackdoor(MemBackdoorPtr &backdoor,
+                                               const AddrRange &range) = 0;
+
     class AddrMapperSenderState : public Packet::SenderState
     {
 
@@ -168,12 +186,24 @@ class AddrMapper : public SimObject
             mapper.recvFunctional(pkt);
         }
 
+        void recvMemBackdoorReq(const MemBackdoorReq &req,
+                                MemBackdoorPtr &backdoor) override
+        {
+            mapper.recvMemBackdoorReq(req, backdoor);
+        }
+
         Tick
         recvAtomic(PacketPtr pkt) override
         {
             return mapper.recvAtomic(pkt);
         }
 
+        Tick
+        recvAtomicBackdoor(PacketPtr pkt, MemBackdoorPtr& backdoor) override
+        {
+            return mapper.recvAtomicBackdoor(pkt, backdoor);
+        }
+
         bool
         recvTimingReq(PacketPtr pkt) override
         {
@@ -209,10 +239,15 @@ class AddrMapper : public SimObject
 
     void recvFunctionalSnoop(PacketPtr pkt);
 
+    void recvMemBackdoorReq(const MemBackdoorReq &req,
+                            MemBackdoorPtr &backdoor);
+
     Tick recvAtomic(PacketPtr pkt);
 
     Tick recvAtomicSnoop(PacketPtr pkt);
 
+    Tick recvAtomicBackdoor(PacketPtr pkt, MemBackdoorPtr& backdoor);
+
     bool recvTimingReq(PacketPtr pkt);
 
     bool recvTimingResp(PacketPtr pkt);
@@ -269,12 +304,19 @@ class RangeAddrMapper : public AddrMapper
     std::vector<AddrRange> remappedRanges;
 
     Addr remapAddr(Addr addr) const override;
+
+    MemBackdoorPtr getRevertedBackdoor(MemBackdoorPtr &backdoor,
+                                       const AddrRange &range) override;
+
     void
     recvRangeChange() override
     {
         // TODO Check that our peer is actually expecting to receive accesses
         // in our output range(s).
     }
+
+  private:
+    BackdoorManager backdoorManager;
 };
 
 } // namespace gem5
diff --git a/src/mem/backdoor_manager.cc b/src/mem/backdoor_manager.cc
new file mode 100644
index 0000000000..32d267c7a3
--- /dev/null
+++ b/src/mem/backdoor_manager.cc
@@ -0,0 +1,148 @@
+/*
+ * Copyright 2023 Google, Inc
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <utility>
+
+#include "base/logging.hh"
+#include "mem/backdoor_manager.hh"
+
+namespace gem5
+{
+
+BackdoorManager::BackdoorManager(const std::vector<AddrRange> &original_ranges,
+                                 const std::vector<AddrRange> &remapped_ranges)
+    : originalRanges(original_ranges),
+      remappedRanges(remapped_ranges),
+      backdoorLists(original_ranges.size())
+{
+}
+
+MemBackdoorPtr
+BackdoorManager::getRevertedBackdoor(MemBackdoorPtr backdoor,
+                                     const AddrRange &pkt_range)
+{
+    MemBackdoorPtr reverted_backdoor = findBackdoor(pkt_range);
+    if (reverted_backdoor == nullptr) {
+        reverted_backdoor = createRevertedBackdoor(backdoor, pkt_range);
+    }
+    return reverted_backdoor;
+}
+
+MemBackdoorPtr
+BackdoorManager::createRevertedBackdoor(MemBackdoorPtr backdoor,
+                                        const AddrRange &pkt_range)
+{
+    std::unique_ptr<MemBackdoor> reverted_backdoor = std::make_unique<MemBackdoor>();
+    reverted_backdoor->flags(backdoor->flags());
+    reverted_backdoor->ptr(backdoor->ptr());
+
+    Addr addr = pkt_range.start();
+    for (int i = 0; i < originalRanges.size(); ++i) {
+        if (originalRanges[i].contains(addr)) {
+            /** Does not support interleaved range backdoors. */
+            if (originalRanges[i].interleaved() ||
+                remappedRanges[i].interleaved()) {
+                return nullptr;
+            }
+
+            /** Shrink the backdoor to fit inside address range. */
+            AddrRange shrinked_backdoor_range =
+                backdoor->range() & remappedRanges[i];
+
+            Addr backdoor_offset =
+                shrinked_backdoor_range.start() - remappedRanges[i].start();
+            Addr backdoor_size = shrinked_backdoor_range.size();
+
+            /** Create the backdoor in original address view. */
+            reverted_backdoor->range(AddrRange(
+                originalRanges[i].start() + backdoor_offset,
+                originalRanges[i].start() + backdoor_offset + backdoor_size));
+
+            /**
+             * The backdoor pointer also needs to be shrinked to point to the
+             * beginning of the range.
+             */
+            Addr shrinked_offset =
+                shrinked_backdoor_range.start() - backdoor->range().start();
+            reverted_backdoor->ptr(backdoor->ptr() + shrinked_offset);
+
+            /**
+             * Bind the life cycle of the created backdoor with the target
+             * backdoor. Invalid and delete the created backdoor when the
+             * target backdoor is invalidated.
+             */
+            MemBackdoorPtr reverted_backdoor_raw_ptr = reverted_backdoor.get();
+            auto it = backdoorLists[i].insert(backdoorLists[i].end(),
+                                              std::move(reverted_backdoor));
+            backdoor->addInvalidationCallback(
+                [this, i, it](const MemBackdoor &backdoor) {
+                    (*it)->invalidate();  // *it is unique_ptr reverted_backdoor
+                    this->backdoorLists[i].erase(it);
+                });
+            return reverted_backdoor_raw_ptr;
+        }
+    }
+    // Backdoor is not valid. Return an empty one.
+    panic("Target does not provide valid backdoor.");
+}
+
+MemBackdoorPtr
+BackdoorManager::findBackdoor(const AddrRange &pkt_range) const
+{
+    Addr addr = pkt_range.start();
+    Addr size = pkt_range.size();
+    for (int i = 0; i < originalRanges.size(); ++i) {
+        /** The original ranges should be disjoint, so at most one range
+         * contains the begin address.
+         */
+        if (originalRanges[i].contains(addr)) {
+            if (!originalRanges[i].contains(addr + size - 1)) {
+                /** The request range doesn't fit in any address range. */
+                return nullptr;
+            }
+            for (const auto &backdoor : backdoorLists[i]) {
+                if (backdoor->range().contains(addr) &&
+                    backdoor->range().contains(addr + size - 1)) {
+                    return backdoor.get();
+                }
+            }
+        }
+    }
+    return nullptr;
+}
+
+}  // namespace gem5
diff --git a/src/mem/backdoor_manager.hh b/src/mem/backdoor_manager.hh
new file mode 100644
index 0000000000..676987c370
--- /dev/null
+++ b/src/mem/backdoor_manager.hh
@@ -0,0 +1,89 @@
+/*
+ * Copyright 2023 Google, Inc
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __MEM_BACKDOOR_MANAGER_HH__
+#define __MEM_BACKDOOR_MANAGER_HH__
+
+#include <list>
+#include <memory>
+#include <vector>
+
+#include "mem/backdoor.hh"
+#include "mem/packet.hh"
+
+namespace gem5
+{
+
+/**
+ * This class manages the backdoors for RangeAddrMapper. It provides
+ * functionalities such as backdoor remapping, resource managing.
+ */
+class BackdoorManager
+{
+  public:
+    explicit BackdoorManager(const std::vector<AddrRange> &original_ranges,
+                             const std::vector<AddrRange> &remapped_ranges);
+
+    MemBackdoorPtr getRevertedBackdoor(MemBackdoorPtr backdoor,
+                                       const AddrRange &pkt_range);
+
+  protected:
+    /**
+     * This function creates a new backdoor, whose address range contains the
+     * original request address. The address range is in initiator address
+     * view, and shouldn't exceed the original address range.
+     */
+    MemBackdoorPtr createRevertedBackdoor(MemBackdoorPtr backdoor,
+                                          const AddrRange &pkt_range);
+    /**
+     * This function returns a created backdoor that fulfills the request, or
+     * returns nullptr if there's no.
+     */
+    MemBackdoorPtr findBackdoor(const AddrRange &pkt_range) const;
+
+    const std::vector<AddrRange> &originalRanges;
+    const std::vector<AddrRange> &remappedRanges;
+
+    /**
+     * In this vector, each entry contains a list of backdoors that in the
+     * range in original address view.
+     */
+    std::vector<std::list<std::unique_ptr<MemBackdoor>>> backdoorLists;
+};
+}  // namespace gem5
+
+#endif  //__MEM_BACKDOOR_MANAGER_HH__
diff --git a/src/mem/backdoor_manager.test.cc b/src/mem/backdoor_manager.test.cc
new file mode 100644
index 0000000000..05abc50f2f
--- /dev/null
+++ b/src/mem/backdoor_manager.test.cc
@@ -0,0 +1,140 @@
+/*
+ * Copyright 2023 Google, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <gtest/gtest.h>
+#include <vector>
+
+#include "base/addr_range.hh"
+#include "base/gtest/logging.hh"
+#include "mem/backdoor.hh"
+#include "mem/backdoor_manager.hh"
+
+namespace gem5
+{
+namespace backdoor_manager_test
+{
+const std::vector<AddrRange> kOriginalRange({AddrRange(0x0, 0x1000)});
+const std::vector<AddrRange> kRemappedRange({AddrRange(0x1000, 0x2000)});
+
+class BackdoorManagerTest : public BackdoorManager, public ::testing::Test
+{
+  public:
+    BackdoorManagerTest() : BackdoorManager(kOriginalRange, kRemappedRange)
+    {
+    }
+};
+
+TEST_F(BackdoorManagerTest, BasicRemapTest)
+{
+    /**
+     * The backdoor range is remappedRanges[0], and should be reverted into
+     * originalRanges[0].
+     */
+    AddrRange pkt_range = originalRanges[0];
+
+    uint8_t *ptr = nullptr;
+    MemBackdoor remapped_backdoor(remappedRanges[0], ptr,
+                                  MemBackdoor::Flags::Readable);
+    MemBackdoorPtr reverted_backdoor =
+        getRevertedBackdoor(&remapped_backdoor, pkt_range);
+
+    EXPECT_EQ(reverted_backdoor->range(), originalRanges[0]);
+    EXPECT_EQ(reverted_backdoor->ptr(), ptr);
+    ASSERT_EQ(backdoorLists[0].size(), 1);
+    EXPECT_EQ(backdoorLists[0].begin()->get(), reverted_backdoor);
+
+    /**
+     * After the target backdoor is invalidated, the new created backdoor should
+     * be freed and removed from the backdoor list.
+     */
+    remapped_backdoor.invalidate();
+    EXPECT_EQ(backdoorLists[0].size(), 0);
+}
+
+TEST_F(BackdoorManagerTest, ShrinkTest)
+{
+    AddrRange pkt_range = originalRanges[0];
+
+    /**
+     * The backdoor range is larger than the address remapper's address range.
+     * Backdoor is expected to be shrinked.
+     */
+    Addr diff = 0x1000;
+    AddrRange remapped_backdoor_range(
+        remappedRanges[0].start() - diff,  // 0x0
+        remappedRanges[0].end() + diff);   // 0x3000
+
+    uint8_t *ptr = nullptr;
+    MemBackdoor remapped_backdoor(remapped_backdoor_range, ptr,
+                                  MemBackdoor::Flags::Readable);
+    MemBackdoorPtr reverted_backdoor =
+        getRevertedBackdoor(&remapped_backdoor, pkt_range);
+
+    EXPECT_EQ(reverted_backdoor->range(), originalRanges[0]);
+    EXPECT_EQ(reverted_backdoor->ptr(), ptr + diff);
+
+    remapped_backdoor.invalidate();
+}
+
+TEST_F(BackdoorManagerTest, ReuseTest)
+{
+    /**
+     * The two packets have different address range, but both contained in the
+     * original address range.
+     */
+    Addr mid = originalRanges[0].start() + originalRanges[0].size() / 2;
+    AddrRange pkt_range_0 = AddrRange(originalRanges[0].start(), mid);
+    AddrRange pkt_range_1 = AddrRange(mid, originalRanges[0].end());
+
+    /**
+     * The address range of the backdoor covers the whole address range, so
+     * both packets can be fulfilled by this backdoor.
+     */
+    uint8_t *ptr = nullptr;
+    MemBackdoor remapped_backdoor(remappedRanges[0], ptr,
+                                  MemBackdoor::Flags::Readable);
+    /**
+     * For the first packet, a new backdoor should be constructed.
+     */
+    MemBackdoorPtr reverted_backdoor_0 =
+        getRevertedBackdoor(&remapped_backdoor, pkt_range_0);
+    EXPECT_EQ(backdoorLists[0].size(), 1);
+
+    /**
+     * For the second packet, it should return the same backdoor as previous
+     * one, and no new backdoor should be constructed.
+     */
+    MemBackdoorPtr reverted_backdoor_1 =
+        getRevertedBackdoor(&remapped_backdoor, pkt_range_1);
+    EXPECT_EQ(reverted_backdoor_0, reverted_backdoor_1);
+    EXPECT_EQ(backdoorLists[0].size(), 1);
+
+    remapped_backdoor.invalidate();
+}
+
+}  // namespace backdoor_manager_test
+}  // namespace gem5
diff --git a/src/mem/cache/Cache.py b/src/mem/cache/Cache.py
index 49665dde91..501bb92682 100644
--- a/src/mem/cache/Cache.py
+++ b/src/mem/cache/Cache.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2012-2013, 2015, 2018 ARM Limited
+# Copyright (c) 2012-2013, 2015, 2018, 2022 Arm Limited
 # All rights reserved.
 #
 # The license below extends only to copyright in the software and shall
@@ -46,6 +46,7 @@
 from m5.objects.ReplacementPolicies import *
 from m5.objects.Tags import *
 
+
 # Enum for cache clusivity, currently mostly inclusive or mostly
 # exclusive.
 class Clusivity(Enum):
@@ -111,7 +112,7 @@ class BaseCache(ClockedObject):
         "Notify the hardware prefetcher on every access (not just misses)",
     )
     prefetch_on_pf_hit = Param.Bool(
-        False, "Notify the hardware prefetcher on hit on prefetched lines"
+        True, "Notify the hardware prefetcher on hit on prefetched lines"
     )
 
     tags = Param.BaseTags(BaseSetAssoc(), "Tag store")
diff --git a/src/mem/cache/cache.cc b/src/mem/cache/cache.cc
index 24b3fe7219..3357d5e1b2 100644
--- a/src/mem/cache/cache.cc
+++ b/src/mem/cache/cache.cc
@@ -339,13 +339,25 @@ Cache::handleTimingReqMiss(PacketPtr pkt, CacheBlk *blk, Tick forward_time,
         if (pkt->isWrite()) {
             allocateWriteBuffer(pkt, forward_time);
         } else {
-            assert(pkt->isRead());
-
             // uncacheable accesses always allocate a new MSHR
 
             // Here we are using forward_time, modelling the latency of
             // a miss (outbound) just as forwardLatency, neglecting the
             // lookupLatency component.
+
+            // Here we allow allocating miss buffer for read requests
+            // and x86's clflush requests. A clflush request should be
+            // propagate through all levels of the cache system.
+
+            // Doing clflush in uncacheable regions might sound contradictory;
+            // however, it is entirely possible due to how the Linux kernel
+            // handle page property changes. When a linux kernel wants to
+            // change a page property, it flushes the related cache lines. The
+            // kernel might change the page property before flushing the cache
+            // lines. This results in the clflush might occur in an uncacheable
+            // region, where the kernel marks a region uncacheable before
+            // flushing. clflush results in a CleanInvalidReq.
+            assert(pkt->isRead() || pkt->isCleanInvalidateRequest());
             allocateMissBuffer(pkt, forward_time);
         }
 
diff --git a/src/mem/cache/cache_blk.hh b/src/mem/cache/cache_blk.hh
index 775efbe673..e476ab639d 100644
--- a/src/mem/cache/cache_blk.hh
+++ b/src/mem/cache/cache_blk.hh
@@ -515,6 +515,7 @@ class TempCacheBlk final : public CacheBlk
         data = new uint8_t[size];
     }
     TempCacheBlk(const TempCacheBlk&) = delete;
+    using CacheBlk::operator=;
     TempCacheBlk& operator=(const TempCacheBlk&) = delete;
     ~TempCacheBlk() { delete [] data; };
 
diff --git a/src/mem/cache/compressors/base.cc b/src/mem/cache/compressors/base.cc
index df3020dbf8..6ab2831fc9 100644
--- a/src/mem/cache/compressors/base.cc
+++ b/src/mem/cache/compressors/base.cc
@@ -78,7 +78,7 @@ Base::CompressionData::getSizeBits() const
 std::size_t
 Base::CompressionData::getSize() const
 {
-    return std::ceil(_size/8);
+    return std::ceil(_size/(float)CHAR_BIT);
 }
 
 Base::Base(const Params &p)
diff --git a/src/mem/cache/prefetch/Prefetcher.py b/src/mem/cache/prefetch/Prefetcher.py
index a350319258..c15ef6539c 100644
--- a/src/mem/cache/prefetch/Prefetcher.py
+++ b/src/mem/cache/prefetch/Prefetcher.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2012, 2014, 2019 ARM Limited
+# Copyright (c) 2012, 2014, 2019, 2022 Arm Limited
 # All rights reserved.
 #
 # The license below extends only to copyright in the software and shall
@@ -45,7 +45,7 @@
 from m5.objects.ReplacementPolicies import *
 
 
-class HWPProbeEvent(object):
+class HWPProbeEvent:
     def __init__(self, prefetcher, obj, *listOfNames):
         self.obj = obj
         self.prefetcher = prefetcher
@@ -192,6 +192,13 @@ class StridePrefetcher(QueuedPrefetcher):
     use_requestor_id = Param.Bool(True, "Use requestor id based history")
 
     degree = Param.Int(4, "Number of prefetches to generate")
+    distance = Param.Unsigned(
+        0,
+        "How far ahead of the demand stream to start prefetching. "
+        "Skip this number of strides ahead of the first identified prefetch, "
+        "then generate `degree` prefetches at `stride` intervals. "
+        "A value of zero indicates no skip.",
+    )
 
     table_assoc = Param.Int(4, "Associativity of the PC table")
     table_entries = Param.MemorySize("64", "Number of entries of the PC table")
diff --git a/src/mem/cache/prefetch/base.cc b/src/mem/cache/prefetch/base.cc
index e3e4b24cf2..25c37df323 100644
--- a/src/mem/cache/prefetch/base.cc
+++ b/src/mem/cache/prefetch/base.cc
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013-2014 ARM Limited
+ * Copyright (c) 2013-2014, 2022 Arm Limited
  * All rights reserved.
  *
  * The license below extends only to copyright in the software and shall
@@ -245,6 +245,7 @@ Base::probeNotify(const PacketPtr &pkt, bool miss)
     // operations or for writes that we are coaslescing.
     if (pkt->cmd.isSWPrefetch()) return;
     if (pkt->req->isCacheMaintenance()) return;
+    if (pkt->isCleanEviction()) return;
     if (pkt->isWrite() && cache != nullptr && cache->coalesce()) return;
     if (!pkt->req->hasPaddr()) {
         panic("Request must have a physical address");
diff --git a/src/mem/cache/prefetch/queued.cc b/src/mem/cache/prefetch/queued.cc
index 1ab34d2e9b..c67c315dad 100644
--- a/src/mem/cache/prefetch/queued.cc
+++ b/src/mem/cache/prefetch/queued.cc
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014-2015 ARM Limited
+ * Copyright (c) 2014-2015, 2022 Arm Limited
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -178,7 +178,7 @@ Queued::notify(const PacketPtr &pkt, const PrefetchInfo &pfi)
     if (queueSquash) {
         auto itr = pfq.begin();
         while (itr != pfq.end()) {
-            if (itr->pfInfo.getAddr() == blk_addr &&
+            if (blockAddress(itr->pfInfo.getAddr()) == blk_addr &&
                 itr->pfInfo.isSecure() == is_secure) {
                 DPRINTF(HWPrefetch, "Removing pf candidate addr: %#x "
                         "(cl: %#x), demand request going to the same addr\n",
diff --git a/src/mem/cache/prefetch/stride.cc b/src/mem/cache/prefetch/stride.cc
index 0a77b28a1c..4b709400c5 100644
--- a/src/mem/cache/prefetch/stride.cc
+++ b/src/mem/cache/prefetch/stride.cc
@@ -1,6 +1,6 @@
 /*
  * Copyright (c) 2018 Inria
- * Copyright (c) 2012-2013, 2015 ARM Limited
+ * Copyright (c) 2012-2013, 2015, 2022-2023 Arm Limited
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -84,6 +84,7 @@ Stride::Stride(const StridePrefetcherParams &p)
     threshConf(p.confidence_threshold/100.0),
     useRequestorId(p.use_requestor_id),
     degree(p.degree),
+    distance(p.distance),
     pcTableInfo(p.table_assoc, p.table_entries, p.table_indexing_policy,
         p.table_replacement_policy)
 {
@@ -167,16 +168,16 @@ Stride::calculatePrefetch(const PrefetchInfo &pfi,
             return;
         }
 
+        // Round strides up to atleast 1 cacheline
+        int prefetch_stride = new_stride;
+        if (abs(new_stride) < blkSize) {
+            prefetch_stride = (new_stride < 0) ? -blkSize : blkSize;
+        }
+
+        Addr new_addr = pf_addr + distance * prefetch_stride;
         // Generate up to degree prefetches
         for (int d = 1; d <= degree; d++) {
-            // Round strides up to atleast 1 cacheline
-            int prefetch_stride = new_stride;
-            if (abs(new_stride) < blkSize) {
-                prefetch_stride = (new_stride < 0) ? -blkSize : blkSize;
-            }
-
-            Addr new_addr = pf_addr + d * prefetch_stride;
-            addresses.push_back(AddrPriority(new_addr, 0));
+            addresses.push_back(AddrPriority(new_addr += prefetch_stride, 0));
         }
     } else {
         // Miss in table
diff --git a/src/mem/cache/prefetch/stride.hh b/src/mem/cache/prefetch/stride.hh
index 7e55abea21..35ba4eed4e 100644
--- a/src/mem/cache/prefetch/stride.hh
+++ b/src/mem/cache/prefetch/stride.hh
@@ -1,6 +1,6 @@
 /*
  * Copyright (c) 2018 Inria
- * Copyright (c) 2012-2013, 2015 ARM Limited
+ * Copyright (c) 2012-2013, 2015, 2022 Arm Limited
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -105,6 +105,8 @@ class Stride : public Queued
 
     const int degree;
 
+    const int distance;
+
     /**
      * Information used to create a new PC table. All of them behave equally.
      */
diff --git a/src/mem/cache/tags/base.cc b/src/mem/cache/tags/base.cc
index 560b041e45..8216f3dfe8 100644
--- a/src/mem/cache/tags/base.cc
+++ b/src/mem/cache/tags/base.cc
@@ -215,6 +215,15 @@ BaseTags::print()
     return str;
 }
 
+void
+BaseTags::forEachBlk(std::function<void(CacheBlk &)> visitor)
+{
+    anyBlk([visitor](CacheBlk &blk) {
+        visitor(blk);
+        return false;
+    });
+}
+
 BaseTags::BaseTagStats::BaseTagStats(BaseTags &_tags)
     : statistics::Group(&_tags),
     tags(_tags),
diff --git a/src/mem/cache/tags/base.hh b/src/mem/cache/tags/base.hh
index e2702778b8..c49188151c 100644
--- a/src/mem/cache/tags/base.hh
+++ b/src/mem/cache/tags/base.hh
@@ -336,7 +336,7 @@ class BaseTags : public ClockedObject
      *
      * @param visitor Visitor to call on each block.
      */
-    virtual void forEachBlk(std::function<void(CacheBlk &)> visitor) = 0;
+    void forEachBlk(std::function<void(CacheBlk &)> visitor);
 
     /**
      * Find if any of the blocks satisfies a condition
diff --git a/src/mem/cache/tags/base_set_assoc.hh b/src/mem/cache/tags/base_set_assoc.hh
index 22695d2010..8ffb7189b7 100644
--- a/src/mem/cache/tags/base_set_assoc.hh
+++ b/src/mem/cache/tags/base_set_assoc.hh
@@ -233,12 +233,6 @@ class BaseSetAssoc : public BaseTags
         return indexingPolicy->regenerateAddr(blk->getTag(), blk);
     }
 
-    void forEachBlk(std::function<void(CacheBlk &)> visitor) override {
-        for (CacheBlk& blk : blks) {
-            visitor(blk);
-        }
-    }
-
     bool anyBlk(std::function<bool(CacheBlk &)> visitor) override {
         for (CacheBlk& blk : blks) {
             if (visitor(blk)) {
diff --git a/src/mem/cache/tags/compressed_tags.cc b/src/mem/cache/tags/compressed_tags.cc
index 32d7401550..c84718f5f6 100644
--- a/src/mem/cache/tags/compressed_tags.cc
+++ b/src/mem/cache/tags/compressed_tags.cc
@@ -163,14 +163,6 @@ CompressedTags::findVictim(Addr addr, const bool is_secure,
     return victim;
 }
 
-void
-CompressedTags::forEachBlk(std::function<void(CacheBlk &)> visitor)
-{
-    for (CompressionBlk& blk : blks) {
-        visitor(blk);
-    }
-}
-
 bool
 CompressedTags::anyBlk(std::function<bool(CacheBlk &)> visitor)
 {
diff --git a/src/mem/cache/tags/compressed_tags.hh b/src/mem/cache/tags/compressed_tags.hh
index b54efb05d4..6e5b62d3e8 100644
--- a/src/mem/cache/tags/compressed_tags.hh
+++ b/src/mem/cache/tags/compressed_tags.hh
@@ -108,16 +108,6 @@ class CompressedTags : public SectorTags
                          const std::size_t compressed_size,
                          std::vector<CacheBlk*>& evict_blks) override;
 
-    /**
-     * Visit each sub-block in the tags and apply a visitor.
-     *
-     * The visitor should be a std::function that takes a cache block.
-     * reference as its parameter.
-     *
-     * @param visitor Visitor to call on each block.
-     */
-    void forEachBlk(std::function<void(CacheBlk &)> visitor) override;
-
     /**
      * Find if any of the sub-blocks satisfies a condition.
      *
diff --git a/src/mem/cache/tags/fa_lru.hh b/src/mem/cache/tags/fa_lru.hh
index deffd72015..dba89f809d 100644
--- a/src/mem/cache/tags/fa_lru.hh
+++ b/src/mem/cache/tags/fa_lru.hh
@@ -85,6 +85,7 @@ class FALRUBlk : public CacheBlk
 {
   public:
     FALRUBlk() : CacheBlk(), prev(nullptr), next(nullptr), inCachesMask(0) {}
+    using CacheBlk::operator=;
 
     /** The previous block in LRU order. */
     FALRUBlk *prev;
@@ -253,12 +254,6 @@ class FALRU : public BaseTags
         return blk->getTag();
     }
 
-    void forEachBlk(std::function<void(CacheBlk &)> visitor) override {
-        for (int i = 0; i < numBlocks; i++) {
-            visitor(blks[i]);
-        }
-    }
-
     bool anyBlk(std::function<bool(CacheBlk &)> visitor) override {
         for (int i = 0; i < numBlocks; i++) {
             if (visitor(blks[i])) {
diff --git a/src/mem/cache/tags/sector_blk.hh b/src/mem/cache/tags/sector_blk.hh
index dae8741e39..fbfea64d93 100644
--- a/src/mem/cache/tags/sector_blk.hh
+++ b/src/mem/cache/tags/sector_blk.hh
@@ -64,6 +64,7 @@ class SectorSubBlk : public CacheBlk
   public:
     SectorSubBlk() : CacheBlk(), _sectorBlk(nullptr), _sectorOffset(0) {}
     SectorSubBlk(const SectorSubBlk&) = delete;
+    using CacheBlk::operator=;
     SectorSubBlk& operator=(const SectorSubBlk&) = delete;
     SectorSubBlk(SectorSubBlk&&) = delete;
     /**
diff --git a/src/mem/cache/tags/sector_tags.cc b/src/mem/cache/tags/sector_tags.cc
index cb121ebd9a..6a9ffd02ed 100644
--- a/src/mem/cache/tags/sector_tags.cc
+++ b/src/mem/cache/tags/sector_tags.cc
@@ -359,14 +359,6 @@ SectorTags::SectorTagsStats::regStats()
     }
 }
 
-void
-SectorTags::forEachBlk(std::function<void(CacheBlk &)> visitor)
-{
-    for (SectorSubBlk& blk : blks) {
-        visitor(blk);
-    }
-}
-
 bool
 SectorTags::anyBlk(std::function<bool(CacheBlk &)> visitor)
 {
diff --git a/src/mem/cache/tags/sector_tags.hh b/src/mem/cache/tags/sector_tags.hh
index bad132158c..035b085962 100644
--- a/src/mem/cache/tags/sector_tags.hh
+++ b/src/mem/cache/tags/sector_tags.hh
@@ -193,16 +193,6 @@ class SectorTags : public BaseTags
      */
     Addr regenerateBlkAddr(const CacheBlk* blk) const override;
 
-    /**
-     * Visit each sub-block in the tags and apply a visitor.
-     *
-     * The visitor should be a std::function that takes a cache block.
-     * reference as its parameter.
-     *
-     * @param visitor Visitor to call on each block.
-     */
-    void forEachBlk(std::function<void(CacheBlk &)> visitor) override;
-
     /**
      * Find if any of the sub-blocks satisfies a condition.
      *
diff --git a/src/mem/coherent_xbar.cc b/src/mem/coherent_xbar.cc
index 8163299a09..74ef1ead36 100644
--- a/src/mem/coherent_xbar.cc
+++ b/src/mem/coherent_xbar.cc
@@ -159,7 +159,7 @@ CoherentXBar::recvTimingReq(PacketPtr pkt, PortID cpu_side_port_id)
     assert(is_express_snoop == cache_responding);
 
     // determine the destination based on the destination address range
-    PortID mem_side_port_id = findPort(pkt->getAddrRange());
+    PortID mem_side_port_id = findPort(pkt);
 
     // test if the crossbar should be considered occupied for the current
     // port, and exclude express snoops from the check
@@ -563,7 +563,7 @@ CoherentXBar::recvTimingSnoopReq(PacketPtr pkt, PortID mem_side_port_id)
     // device responsible for the address range something is
     // wrong, hence there is nothing further to do as the packet
     // would be going back to where it came from
-    assert(findPort(pkt->getAddrRange()) == mem_side_port_id);
+    assert(findPort(pkt) == mem_side_port_id);
 }
 
 bool
@@ -799,7 +799,7 @@ CoherentXBar::recvAtomicBackdoor(PacketPtr pkt, PortID cpu_side_port_id,
 
     // even if we had a snoop response, we must continue and also
     // perform the actual request at the destination
-    PortID mem_side_port_id = findPort(pkt->getAddrRange());
+    PortID mem_side_port_id = findPort(pkt);
 
     if (sink_packet) {
         DPRINTF(CoherentXBar, "%s: Not forwarding %s\n", __func__,
@@ -1035,7 +1035,7 @@ CoherentXBar::recvFunctional(PacketPtr pkt, PortID cpu_side_port_id)
             }
         }
 
-        PortID dest_id = findPort(pkt->getAddrRange());
+        PortID dest_id = findPort(pkt);
 
         memSidePorts[dest_id]->sendFunctional(pkt);
     }
diff --git a/src/mem/dram_interface.cc b/src/mem/dram_interface.cc
index 65e06db4d3..99efceb444 100644
--- a/src/mem/dram_interface.cc
+++ b/src/mem/dram_interface.cc
@@ -37,15 +37,19 @@
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
-
 #include "mem/dram_interface.hh"
 
 #include "base/bitfield.hh"
 #include "base/cprintf.hh"
 #include "base/trace.hh"
 #include "debug/DRAM.hh"
+#include "debug/DRAMAddr.hh"
 #include "debug/DRAMPower.hh"
 #include "debug/DRAMState.hh"
+#include "debug/HDBitflip.hh"
+#include "debug/RhBitflip.hh"
+#include "debug/RhInhibitor.hh"
+#include "debug/RowHammer.hh"
 #include "sim/system.hh"
 
 namespace gem5
@@ -171,6 +175,461 @@ DRAMInterface::chooseNextFRFCFS(MemPacketQueue& queue, Tick min_col_at) const
     return std::make_pair(selected_pkt_it, selected_col_at);
 }
 
+
+void
+DRAMInterface::checkRowHammer(Bank& bank_ref, MemPacket* mem_pkt)
+{
+    // hammersim: next stop: half-double. visualizing half-double
+    // | row - 4 |
+    // | row - 3 |
+    // | row - 2 |  <-- bitflips here
+    // | row - 1 |
+    // | row     |
+    // | row + 1 |
+    // | row + 2 |  <-- birflips here
+    // | row + 3 |
+    // | row + 4 |
+
+    // check for half double only if the current row is 2 or higher as there
+    // cannot be a half-double if the row is 1.
+    if (mem_pkt->row >= 2 ) {
+        if (bank_ref.rhTriggers[mem_pkt->row - 1][1] >= 1 &&
+                bank_ref.rhTriggers[mem_pkt->row][1] >= 1000) {
+            // half-double is rare. so we have to adjust the probability by a
+            // very large factor.
+
+            bool bitflip = false;
+            // I cannot flip this bit with a probability of 1. therefore, we
+            // need the second probability factor to cause bitflips
+            // the rng of c uses time. so for all simulated mem addresses for 1
+            // sec will have the same probability
+            struct timeval time;
+            gettimeofday(&time,NULL);
+
+            srand((time.tv_sec * 1000) + (time.tv_usec / 1000));
+            // srand(time(nullptr));
+            uint64_t prob = rand() % halfDoubleProb + 1;
+            if (prob <= 1)
+                bitflip = true;
+
+            // now search for the device_map whether this row is weak or not
+            uint16_t col;
+            if (device_map["0"][std::to_string(bank_ref.bank)]
+                    [std::to_string(mem_pkt->row - 2)] != nullptr) {
+                srand((time.tv_sec * 1000) + (time.tv_usec / 1000));
+
+                uint16_t col_idx = rand() % (uint16_t)device_map["0"]
+                        [std::to_string(bank_ref.bank)]
+                        [std::to_string(mem_pkt->row - 2)].size();
+                col = (uint16_t)device_map["0"][std::to_string(bank_ref.bank)]
+                    [std::to_string(mem_pkt->row - 2)][col_idx];
+
+                // TODO:
+                // Now delete this entry from the device map as the same bit
+                // (column in this case) cannot flip twice unless somehting new
+                // is written in the same column.
+
+                // XXX:
+                // I am using a simple method by keeping track of this column
+                // and not allowing this column to flip until a write happens
+                // on this column.
+
+                if (bank_ref.flagged_entries[mem_pkt->row - 2][col] == 1) {
+                    bitflip = false;
+                }
+                bank_ref.flagged_entries[mem_pkt->row - 2][col] = 1;
+
+            }
+            else {
+                bitflip = false;
+            }
+
+            if (bitflip) {
+                // This is a half-double bitflip. This will only appear if
+                // HDBitflip is enabled.
+                DPRINTF(HDBitflip,
+                        "HD Bitflip at %#x, bank %d, row %d, col %d\n",
+                        mem_pkt->addr + col, bank_ref.bank, mem_pkt->row - 2,
+                        col);
+            }
+        }
+    }
+
+    if (mem_pkt->row <= rowsPerBank - 3) {
+        if (bank_ref.rhTriggers[mem_pkt->row + 1][2] >= 1 &&
+                bank_ref.rhTriggers[mem_pkt->row][2] >= 1000) {
+
+            // half-double is rare. so we have to adjust the probability by a
+            // very large factor.
+            // flip bit here
+            bool bitflip = false;
+
+            // We cannot flip this bit with a probability of 1. therefore, we
+            // need the second probability factor to cause bitflips
+
+            // the rng of c uses time. so for all simulated mem addresses for 1
+            // sec will have the same probability
+
+            struct timeval time;
+            gettimeofday(&time,NULL);
+
+            srand((time.tv_sec * 1000) + (time.tv_usec / 1000));
+            uint64_t prob = rand() % halfDoubleProb + 1;
+            if (prob <= 1)
+                bitflip = true;
+
+            // TODO: We need to flip a bit in the MemPacket for row +- 2
+
+            // if (bank_ref.weakColumns[mem_pkt->row + 2].test(0)) {
+            //     // this condition needs to be fixed/verified.
+            //     mem_pkt->corruptedAccess = true;
+            //     bank_ref.weakColumns[mem_pkt->row + 2].reset(0);
+            //     if (bitflip) {
+            //         mem_pkt->corruptedAccess = true;
+            //         bank_ref.weakColumns[mem_pkt->row - 2].reset(0);
+            //     }
+            // }
+
+            uint16_t col;
+            if (device_map["0"][std::to_string(bank_ref.bank)]
+                    [std::to_string(mem_pkt->row + 2)] != nullptr) {
+
+                srand((time.tv_sec * 1000) + (time.tv_usec / 1000));
+                uint16_t col_idx = rand() % (uint16_t)device_map["0"]
+                        [std::to_string(bank_ref.bank)]
+                        [std::to_string(mem_pkt->row + 2)].size();
+                col = (uint16_t)device_map["0"][std::to_string(bank_ref.bank)]
+                    [std::to_string(mem_pkt->row + 2)][col_idx];
+                // mem_pkt->corruptedAccess = true;
+
+                if (bank_ref.flagged_entries[mem_pkt->row + 2][col] == 1)
+                    bitflip = false;
+
+                bank_ref.flagged_entries[mem_pkt->row + 2][col] = 1;
+            }
+            else
+                bitflip = false;
+
+            if (bitflip)
+                DPRINTF(HDBitflip,
+                        "HD Bitflip at %#x, bank %d, row %d, col %d\n",
+                        mem_pkt->addr + col, bank_ref.bank, mem_pkt->row + 2,
+                        col);
+
+        }
+    }
+
+    // row `mem_pkt->row` was ACTIVATED. we need to check its neighborhood for
+    // bitflips.
+
+    bool single_sided = true, bitflip_status = false;
+
+    if (bank_ref.rhTriggers[mem_pkt->row][1]  >= rowhammerThreshold) {
+        // this is a compound probability factor with a tunable parameter
+        // for double rowhammer attacks
+
+        // check the ndb of the this row:
+        // we dont know that the value of N is in an N-sided attack. so we
+        // only have to see whether (a) this row is a part of an N sided
+        // attack.
+        // we expect that the number of activates of the edge rows is similar.
+        // in order to not let this slip, we keep a difference variable called
+        // delta. the user can set this value.
+        // check this->row is an aggressor row and then check for its neighbors
+        // this row can only be sandwiched if its > 1.
+        if (mem_pkt->row >= 1) {
+            if (bank_ref.aggressor_rows[mem_pkt->row]>=rowhammerThreshold/2 &&
+                bank_ref.aggressor_rows[mem_pkt->row-2]>=rowhammerThreshold/2){
+                    single_sided = false;
+                    bitflip_status = true;
+
+            }
+        }
+
+        struct timeval time;
+        gettimeofday(&time,NULL);
+
+        if (single_sided) {
+            // tunable probability
+            srand((time.tv_sec * 1000) + (time.tv_usec / 1000));
+            uint64_t prob = rand() % singleSidedProb + 1;
+            if (prob <= 1)
+                // flip a bit!
+                bitflip_status = true;
+            // single sided bitflip should cause bitflips on both sides of the
+            // aggressor row.
+        }
+
+        if (!single_sided) {
+            // columns[mem_pkt->row + 1].test(0)) {
+            //     // this condition needs to be fixed/verified.
+            //     mem_pkt->corruptedAccess = true;
+            //     bank_ref.weakColumns[mem_pkt->row + 1].reset(0);
+            // }{
+
+            // we need to flip a bit depending upon some probability
+            // struct timeval time;
+            gettimeofday(&time,NULL);
+
+            srand((time.tv_sec * 1000) + (time.tv_usec / 1000));
+
+            uint64_t prob = rand() % doubleSidedProb + 1;
+            if (prob > 1)
+                bitflip_status = false;
+        }
+
+        uint16_t col;
+        if (mem_pkt->row > 0) {
+            if (device_map["0"][std::to_string(bank_ref.bank)]
+                    [std::to_string(mem_pkt->row - 1)] != nullptr) {
+
+                srand((time.tv_sec * 1000) + (time.tv_usec / 1000));
+                uint16_t col_idx = rand() % (uint16_t)device_map["0"]
+                    [std::to_string(bank_ref.bank)]
+                    [std::to_string(mem_pkt->row - 1)].size();
+                col = (uint16_t)device_map["0"][std::to_string(bank_ref.bank)]
+                    [std::to_string(mem_pkt->row - 1)][col_idx];
+                // mem_pkt->corruptedAccess = true;
+                if (bank_ref.flagged_entries[mem_pkt->row - 1][col] == 1)
+                    bitflip_status = false;
+
+                bank_ref.flagged_entries[mem_pkt->row - 1][col] = 1;
+            }
+            else
+                // it does not really matter what the bitflip status is. it has
+                // to be set to false at this point.
+                bitflip_status = false;
+
+
+            if (bitflip_status) {
+                if (rhStatDump) {
+                    std::ofstream outfile;
+                    outfile.open(rhStatFile,
+                            std::ios::out | std::ios::app);
+
+                    outfile << "Bitflip at 0x" << std::hex <<
+                            mem_pkt->addr + col << std::dec << " bank " <<
+                            (int)bank_ref.bank << " row " << mem_pkt->row - 1
+                            << " col " << col << " single-sided " <<
+                            single_sided << std::endl;
+
+                    outfile.close();
+                }
+                DPRINTF(RhBitflip,
+                  "Bitflip at %#x, bank %d, row %d, col %d, single-sided %d\n",
+                  mem_pkt->addr + col, bank_ref.bank, mem_pkt->row - 1,
+                  col, single_sided);
+
+                // Also, need to figure out if the accessed
+                // column is flippable or not, and if it has
+                // previously been flipped
+                // also reset the trigger counter (by looking at weakColumns)
+
+                // If this access is turned out to be corrupted, we will
+                // reset that bit in the weakColumns, so that the future
+                // accesses of the column will not induce a bit flip
+
+                // kg -> ayaz: we need to talk on how to parse the device map
+                // we need exact columns/capacitors to model this part.
+
+                // if (bank_ref.weakColumns[mem_pkt->row - 1].test(0)) {
+                //     mem_pkt->corruptedAccess = true;
+                //     bank_ref.weakColumns[mem_pkt->row - 1].reset(0);
+                // }
+            }
+        }
+        // regardless of this row being a single or a double sided attack, its
+        // rowhammer counter will be set to zero.
+
+        // since now that rhtriggers is a vector, we need to take care of all
+        // the entries.
+
+        // we cannot flip the same bit, but we can flip the same row.
+        // TODO: uncomment these lines if you want to
+
+        // bank_ref.rhTriggers[mem_pkt->row][1] = 0;
+        // bank_ref.rhTriggers[mem_pkt->row - 2][2] = 0;
+        // bank_ref.rhTriggers[mem_pkt->row - 3][3] = 0;
+        // bank_ref.rhTriggers[mem_pkt->row + 1][0] = 0;
+
+    }
+
+    single_sided = true, bitflip_status = false;
+    if (bank_ref.rhTriggers[mem_pkt->row][2]  >= rowhammerThreshold) {
+
+        // this is a compound probability factor with a tunable parameter
+        // for double rowhammer attacks
+
+        // check the ndb of the this row:
+        // we dont know that the value of N is in an N-sided attack. so we
+        // only have to see whether (a) this row is a part of an N sided
+        // attack.
+        // we expect that the number of activates of the edge rows is similar.
+        // in order to not let this slip, we keep a difference variable called
+        // delta. the user can set this value.
+
+        // check this->row is an aggressor row and then check for its neighbors
+        if (mem_pkt->row < rowsPerBank - 3) {
+            if (bank_ref.aggressor_rows[mem_pkt->row] >=
+                    rowhammerThreshold/2 &&
+                    bank_ref.aggressor_rows[mem_pkt->row + 2] >=
+                    rowhammerThreshold/2) {
+                single_sided = false;
+                bitflip_status = true;
+            }
+        }
+
+        struct timeval time;
+        gettimeofday(&time,NULL);
+        if (single_sided) {
+            // tunable probability
+            srand((time.tv_sec * 1000) + (time.tv_usec / 1000));
+            uint64_t prob = rand() % singleSidedProb + 1;
+            if (prob <= 10)
+                // flip a bit!
+                bitflip_status = true;
+        }
+
+
+        if (!single_sided) {
+            // we need to flip a bit depending upon some probability
+            // struct timeval time;
+            gettimeofday(&time,NULL);
+
+            srand((time.tv_sec * 1000) + (time.tv_usec / 1000));
+            // srand(time(nullptr));
+            uint64_t prob = rand() % doubleSidedProb + 1;
+            if (prob > 1)
+                bitflip_status = false;
+        }
+
+        uint16_t col;
+        if (mem_pkt->row < rowsPerBank - 2) {
+            if (device_map["0"][std::to_string(bank_ref.bank)]
+                [std::to_string(mem_pkt->row + 1)] != nullptr) {
+
+                srand((time.tv_sec * 1000) + (time.tv_usec / 1000));
+                uint16_t col_idx = rand() % (uint16_t)device_map["0"]
+                    [std::to_string(bank_ref.bank)]
+                    [std::to_string(mem_pkt->row + 1)].size();
+                col = (uint16_t)device_map["0"][std::to_string(bank_ref.bank)]
+                    [std::to_string(mem_pkt->row + 1)][col_idx];
+                // mem_pkt->corruptedAccess = true;
+                if (bank_ref.flagged_entries[mem_pkt->row + 1][col] == 1)
+                    bitflip_status = false;
+
+                bank_ref.flagged_entries[mem_pkt->row + 1][col] = 1;
+            }
+            else
+                bitflip_status = false;
+
+            if (bitflip_status) {
+                if (rhStatDump) {
+                    std::ofstream outfile;
+                    outfile.open(rhStatFile,
+                            std::ios::out | std::ios::app);
+
+                    outfile << "Bitflip at 0x" << std::hex <<
+                            mem_pkt->addr + col <<
+                            std::dec << " bank " << (int)bank_ref.bank
+                            << " row "
+                            << mem_pkt->row + 1 << " col " << col
+                            << " single-sided " << single_sided << std::endl;
+
+                    outfile.close();
+                }
+                DPRINTF(RhBitflip,
+                    "Bitflip at %#x, bank %d, row %d, col %d, single-sided \
+                    %d\n",
+                    mem_pkt->addr + col, bank_ref.bank, mem_pkt->row + 1, col,
+                    single_sided);
+
+                // Also, need to figure out if the accessed
+                // column is flippable or not, and if it has
+                // previously been flipped
+                // also reset the trigger counter (by looking at weakColumns)
+                // If this access is turned out to be corrupted, we will
+                // reset that bit in the weakColumns, so that the future
+                // accesses of the column will not induce a bit flip
+
+                // if (bank_ref.weakColumns[mem_pkt->row + 1].test(0)) {
+                //     // this condition needs to be fixed/verified.
+                //     mem_pkt->corruptedAccess = true;
+                //     bank_ref.weakColumns[mem_pkt->row + 1].reset(0);
+                // }
+
+                // similar to the statement above, we do the same here.
+                // we cannot reset the counters to zero.
+                // the TRR mechanism has to do this. or a refresh event.
+
+                // bank_ref.rhTriggers[mem_pkt->row + 3][0] = 0;
+                // bank_ref.rhTriggers[mem_pkt->row + 2][1] = 0;
+                // bank_ref.rhTriggers[mem_pkt->row][2] = 0;
+                // bank_ref.rhTriggers[mem_pkt->row - 1][3] = 0;
+            }
+        }
+    }
+}
+
+void
+DRAMInterface::updateVictims(Bank& bank_ref, uint32_t row)
+{
+    // AYAZ:
+    // std::cout << "UV : " << bank_ref.bank << "rhTriggers size " <<
+        // bank_ref.rhTriggers.size() << std::endl;
+
+    // both sides of the aggressor row has to be incremented
+
+    assert(row != rowsPerBank);
+
+    // the difference between this version and rh-analysis is that instead of
+    // measuing blast radius = 2
+    // we need to increment +2 counters if +1 counters reach 1000.
+    // slow
+
+    if ((row <= 1) || (row >= rowsPerBank-2)) {
+        if (row == 0) {
+            if (bank_ref.rhTriggers[row][1]++ % 1024 == 0)
+                bank_ref.rhTriggers[row][0]++;
+        } else if (row == 1) {
+            bank_ref.rhTriggers[row][2]++;
+            bank_ref.rhTriggers[row][1]++;
+            bank_ref.rhTriggers[row][0]++;
+        } else if (row == rowsPerBank - 1) {
+            bank_ref.rhTriggers[row][3]++;
+            bank_ref.rhTriggers[row][2]++;
+        } else if (row == rowsPerBank - 2) {
+            bank_ref.rhTriggers[row][3]++;
+            bank_ref.rhTriggers[row][2]++;
+            bank_ref.rhTriggers[row][1]++;
+        }
+    }
+    else {
+        // modifying this logic. nbd first.
+        bank_ref.rhTriggers[row][1]++;
+        bank_ref.rhTriggers[row][2]++;
+
+
+        bank_ref.rhTriggers[row][0]++;
+        bank_ref.rhTriggers[row][3]++;
+    }
+
+    // making sure that the activated row has its counter
+    // set to 0, only in case if it has not already been corrupted
+    // once we return flipped data, we can reset the rhTriggers for that
+    // row to restart the flipping cycle
+
+    // if (bank_ref.rhTriggers[row] < rowhammerThreshold) {
+    //     bank_ref.rhTriggers[row] = 0;
+    // }
+
+    // kg: the same needs to be done to the trr tables as well
+    //     the trr tables are reset (if necessary) in the refresh section,
+    //     where these are triggered.
+}
+
+
+
 void
 DRAMInterface::activateBank(Rank& rank_ref, Bank& bank_ref,
                        Tick act_tick, uint32_t row)
@@ -185,12 +644,636 @@ DRAMInterface::activateBank(Rank& rank_ref, Bank& bank_ref,
     else
         act_at = ctrl->verifySingleCmd(act_tick, maxCommandsPerWindow, true);
 
+    if (!first_act) {
+        // first access to memory.
+        first_act = true;
+        DPRINTF(DRAM, "Memory was first ACTed at tick %d\n", act_at);
+
+        if (rhStatDump) {
+            // need to start the stat dumper here
+            DPRINTF(RowHammer, "Dumping RowHammer stats at %s\n", rhStatFile);
+            std::ofstream outfile;
+            outfile.open(
+                rhStatFile, std::ios::out | std::ios::trunc);
+            outfile << "# starting to capture row access for RH analysis" <<
+            std::endl;
+            outfile.close();
+        }
+
+        for (auto &b: rank_ref.banks) {
+            b.trr_table.resize(counterTableLength, std::vector<uint64_t>(4));
+            b.companion_table.resize(
+                companionTableLength, std::vector<uint64_t>(4));
+
+            // initializing flag_map
+            b.flagged_entries.resize(8192, std::vector<bool>(1024));
+        }
+        para_refreshes = 0;
+
+    }
     DPRINTF(DRAM, "Activate at tick %d\n", act_at);
 
+    // we have to keep a track of all the activates in the aggressor_table
+    bank_ref.aggressor_rows[row]++;
+    bool act_flag = false;
+    for (auto&  it: bank_ref.activated_row_list)
+        if (it == row) {
+            act_flag = true;
+            break;
+        }
+    if (!act_flag)
+        bank_ref.activated_row_list.push_back(row);
+
+    // we only model TRR for the three major DRAM vendors only.
+
+    switch (trrVariant) {
+        case 0: {
+            // this is basically no trr. it does absolutely nothing.
+            break;
+        }
+        case 1: {
+            // This corresponds to the table-based TRR from Vendor A.
+            // Vendor A is Samsung.
+            // There are two different TRR-triggered refreshes in this case.
+            // TRR induced refreshes are handles in the refresh section.
+
+            // kg: We use the trr_table here for this bank.
+            // 0 -> rank
+            // 1 -> bank
+            // 2 -> row
+            // 3 -> counter
+
+            bool found_flag = false;
+
+            for (int i = 0; i < std::max(
+                counterTableLength, bank_ref.entries); i++) {
+                // found this addr in the trr table.
+                if (bank_ref.trr_table[i][0] == rank_ref.rank &&
+                        bank_ref.trr_table[i][1] == bank_ref.bank &&
+                        bank_ref.trr_table[i][2] == row) {
+
+                    // TODO: Need to check whether this row is open.
+                    // I guess activateBank does not require this.
+                    found_flag = true;
+
+                    // since this row is accessed, we increment its counter by
+                    // 1. this information is used in the refresh section.
+                    bank_ref.trr_table[i][3]++;
+                    break;
+                }
+            }
+
+            // If the row is not found in the trr table.
+            if (!found_flag) {
+                // We have a row which is not in the TRR table. But we don't
+                // know if we want to put this row in the table or not.
+                // UTRR does not discuss this.
+
+                // We use a small companion counter table, which acts like a
+                // buffer to insert new rows. Rows gets replaced here. This
+                // approach to track rows is similar to the technique proposed
+                // by Prohit (Son et. al., DAC 2017).
+
+                // We use two variables to find and track this row in the
+                // companion table.
+
+                int companion_idx = 0;
+                bool companion_found_flag = false;
+
+                for (int i = 0 ; i < std :: max(companionTableLength,
+                        bank_ref.companion_entries); i++) {
+                    // found this address in the companion table.
+                    if (bank_ref.companion_table[i][0] == rank_ref.rank &&
+                            bank_ref.companion_table[i][1] == bank_ref.bank &&
+                            bank_ref.companion_table[i][2] == row) {
+
+                        companion_found_flag = true;
+
+                        // increment this counter by 1. This value is used to
+                        // promote riws from the comapnion table to the trr
+                        // table.
+                        bank_ref.companion_table[i][3]++;
+
+                        // companion index is set to i.
+                        companion_idx = i;
+                        break;
+                    }
+                }
+
+                if (!companion_found_flag) {
+                    // If we did not find this row in the companion table, then
+                    // we make a new entry for this row in the companion table.
+
+                    // `idx` is used to find the index in the companion table
+                    // to insert this row.
+                    int idx = 0;
+
+                    // Find if there is space in the companion table for a new
+                    // row.
+
+                    if (bank_ref.companion_entries < companionTableLength) {
+
+                        // This is left in the companion table.
+
+                        idx = (int)bank_ref.companion_entries;
+
+                        // TODO: This part of the code is not required. Verify
+                        // this claim.
+                        if (bank_ref.companion_entries <
+                                companionTableLength - 1)
+                            bank_ref.companion_entries += 1;
+                    }
+                    else {
+                        // there is no space left in the companion table.
+                        // TODO: Do we insert this row at the end, replacing
+                        // anything there? OR, Do we find the lowest counter
+                        // count for the row to replace?
+
+                        assert(idx == 0);
+
+                        // the number of entries in the companion table cannot
+                        // be more than the total length of the table.
+
+                        assert(bank_ref.companion_entries
+                                == companionTableLength);
+
+                        // using the second approach here, i.e., entry with the
+                        // lowest count will be replaced.
+                        for (int i = 0; i < companionTableLength ; i++) {
+                            if (bank_ref.companion_table[idx][3] >
+                                    bank_ref.companion_table[i][3])
+                                idx = i;
+                        }
+                    }
+
+                    // assert idx is within the counterTableLength range.
+                    assert(bank_ref.companion_entries < companionTableLength);
+
+                    // creating this entry in the companion table.
+
+                    bank_ref.companion_table[idx][0] = rank_ref.rank;
+                    bank_ref.companion_table[idx][1] = bank_ref.bank;
+                    bank_ref.companion_table[idx][2] = row;
+                    bank_ref.companion_table[idx][3] = 1;
+                }
+                else {
+                    // found this row in the companion table. We now have to
+                    // decide whether we promote this row to the trr_table or
+                    // we just continue with our experiments.
+
+                    // This row has more acts than the companion threshold,
+                    // then we promote this row to the trr_table.
+
+                    if (bank_ref.companion_table[companion_idx][3]
+                            > companionThreshold) {
+                        // We insert this row in the trr_table. Is there space?
+                        // kg: Find out if there is space in the TRR table for
+                        // a new row insertion.
+                        int trr_idx = 0;
+
+                        // Check if there is space in the trr table for a new
+                        // row.
+
+                        if (bank_ref.entries < counterTableLength) {
+                            // There is space in the trr table.
+
+                            trr_idx = (int)bank_ref.entries;
+                            // std :: cout << "_x " << trr_idx << " " <<
+                            // bank_ref.entries << std :: endl;
+
+                            // TODO: This part of the code might not be
+                            // required. Double check this.
+
+                            if (bank_ref.entries < counterTableLength - 1)
+                                bank_ref.entries++;
+                        }
+                        else {
+                            // there is no space for a new row.
+                            // TODO: We replace the trr entry with the least
+                            // act count. Verify this with the UTRR paper.
+
+                            // sanity checks.
+                            assert(trr_idx == 0);
+                            assert(bank_ref.entries == counterTableLength);
+
+                            for (int i = 0; i < counterTableLength ; i++) {
+                                if (bank_ref.trr_table[trr_idx][3] >
+                                        bank_ref.trr_table[i][3])
+                                    trr_idx = i;
+                            }
+                        }
+
+                        // sanity checks
+                        assert(trr_idx >= 0 && trr_idx < counterTableLength);
+
+                        bank_ref.trr_table[trr_idx][0] =
+                                bank_ref.companion_table[companion_idx][0];
+                        bank_ref.trr_table[trr_idx][1] =
+                                bank_ref.companion_table[companion_idx][1];
+                        bank_ref.trr_table[trr_idx][2] =
+                                bank_ref.companion_table[companion_idx][2];
+                        bank_ref.trr_table[trr_idx][3] =
+                                bank_ref.companion_table[companion_idx][3];
+
+                        // An entry has been cleared in the companion table. we
+                        // need to adjust that in the companion table. Replace
+                        // the current idx with the last index.
+
+                        // RE: redoing this part in a simpler way.
+                        // sanity check: the companion_entries and the
+                        // companionTableLength has to be the same since i just
+                        // moved a row.
+
+                        // companion_index is empty. the end row will be moved
+                        // to the companion_index
+
+                        if (companion_idx != std::min(companionTableLength,
+                                bank_ref.companion_entries) - 1)
+                            for (int i = 0 ; i < 4 ; i++)
+                                bank_ref.companion_table[companion_idx][i] =
+                                        bank_ref.companion_table[std::min(
+                                        companionTableLength,
+                                        bank_ref.companion_entries) - 1][i];
+
+                        bank_ref.companion_entries--;
+                        assert(
+                            bank_ref.companion_entries < companionTableLength);
+                    }
+
+                }
+            }
+
+            DPRINTF(RowHammer, "Rank %d, Bank %d, Row %d, Entries %d, "
+                    "Companion Entries %d\n", rank_ref.rank, bank_ref.bank,
+                    row, bank_ref.entries, bank_ref.companion_entries);
+
+            break;
+        }
+        case 2: {
+            // This is the one with the random sampler.
+            // We will use a table. Otherwise, we don't know how to track all
+            // the different rows activated.
+
+            // the catch is that it is a single entry table.
+            // this is SK Hynix from U-TRR paper.
+
+            // we also need to decide whether we need to sample this row or not
+            // we use a probability function based on the address' bank, rank
+            // and row bits. This should work as this is consistently observed
+            // on real dimms.
+
+            // We reuse the variable trr_table length. The sampler will
+            // randomly enter these rows into the table. the sampler acts at
+            // ACT time.
+
+            // picking the first 10 bits. xoring them to see if that row needs
+            // to be entered in the table or not.
+
+            // TODO: XXX: Missing feature.
+            // There is no way to know if a particular row's ACT is closing in
+            // on a tREFI request. This TRR activates its sampler close to the
+            // tREFI instruction.
+
+            int select_count = 0;
+            int recreated_address = bank_ref.bank + rank_ref.rank + row;
+            bool selected = false;
+
+            // this rng is really difficult to implement and match it with an
+            // actual SK Hynix DIMM.
+
+            while (recreated_address != 0) {
+                selected = selected ^ (recreated_address % 2);
+                recreated_address /=2;
+                if (++select_count == 10)
+                    break;
+            }
+
+            DPRINTF(RhInhibitor, "Looking into the rng function "
+                " row %d, selected %d, recreated_address %d\n",
+                row, selected, recreated_address);
+
+            if (selected) {
+                // This row is selected to be sampled. Therefore we proceed to
+                // add this row in the counter table.
+
+                // find space in the trr_table. companion_table is not needed
+                // in this case.
+                // There is space in the companion table for a new row.
+                uint8_t trr_idx = 0;
+
+                // before doing this, we need to check whether we have an entry
+                // for this row or not.
+
+                // forcing entry to the companion table when it is full.
+
+                bool found_flag = false;
+                for (int i = 0; i < std::max(
+                        counterTableLength, bank_ref.entries); i++) {
+                    // found this addr
+                    if (bank_ref.trr_table[i][0] == rank_ref.rank &&
+                        bank_ref.trr_table[i][1] == bank_ref.bank &&
+                        bank_ref.trr_table[i][2] == row) {
+                            // TODO: Need to check whether this row is open.
+                            // I guess activateBank does not require this.
+                            found_flag = true;
+                            bank_ref.trr_table[i][3]++;
+                            break;
+                        }
+                }
+
+                if (!found_flag) {
+                    // only if the table entry for that particular row is
+                    // missing we create a new entry in this table.
+
+                    // otherwise, we are done in this step. We don't need to
+                    // cover this part of the program.
+
+                    if (bank_ref.entries < counterTableLength) {
+                        trr_idx = bank_ref.entries;
+                        if (bank_ref.entries < counterTableLength - 1)
+                            bank_ref.entries += 1;
+                    }
+                    else {
+                        for (int i = 0; i < counterTableLength ; i++) {
+                            if (bank_ref.trr_table[trr_idx][3] >
+                                    bank_ref.trr_table[i][3])
+                                trr_idx = i;
+                        }
+                    }
+                    bank_ref.trr_table[trr_idx][0] = rank_ref.rank;
+                    bank_ref.trr_table[trr_idx][1] = bank_ref.bank;
+                    bank_ref.trr_table[trr_idx][2] = row;
+                    bank_ref.trr_table[trr_idx][3] = 1;
+                }
+            }
+            // we are done in the sampler phase of the program. We just need to
+            // take care of the inhibitor phase of the program.
+            DPRINTF(RowHammer, "Rank %d, Bank %d, Row %d, Entries %d\n",
+                    rank_ref.rank, bank_ref.bank, row, bank_ref.entries);
+            break;
+        }
+        case 3: {
+
+            // This case corresponds Vendor C from the U-TRR paper. The major
+            // points in this TRR implementation is the 2k activate count. It
+            // also has a probabilistic sampler, which samples rows. For
+            // simplicity, we will keep a track of the first 2k accesses
+            // deterministically.
+            // XXX: How?
+
+            // The table to store this information is fixed. So, we are limited
+            // by space of the trr table.
+
+            // This TRR is also triggered in a per-bank basis.
+
+            // act_count is reset when it reaches 2k in the inhibitor phase.
+
+            bank_ref.act_count++;
+
+            // We use the same random function to keep a track of these
+            // aggressor rows in the table.
+
+            int select_count = 0;
+            int recreated_address = bank_ref.bank + rank_ref.rank + row;
+            bool selected = false;
+
+            while (recreated_address != 0) {
+                selected = selected ^ (recreated_address % 2);
+                recreated_address /=2;
+                if (++select_count == 10)
+                    break;
+            }
+
+            if (selected) {
+                // similar procedure as Vendor B. We traverse the table to find
+                // this entry in the table. This counter is necessary to issue
+                // refreshes in the inhibitor phase.
+
+                // This row is selected to be sampled. Therefore we proceed to
+                // add this row in the counter table.
+
+                // before doing this, we need to check whether we have an entry
+                // for this row or not.
+
+                bool found_flag = false;
+                for (int i = 0; i < std::max(
+                        counterTableLength, bank_ref.entries); i++) {
+                    // found this addr
+                    if (bank_ref.trr_table[i][0] == rank_ref.rank &&
+                        bank_ref.trr_table[i][1] == bank_ref.bank &&
+                        bank_ref.trr_table[i][2] == row) {
+                            // TODO: Need to check whether this row is open.
+                            // I guess activateBank does not require this.
+                            found_flag = true;
+                            bank_ref.trr_table[i][3]++;
+                            break;
+                        }
+                }
+
+                if (!found_flag) {
+                    // find space in the trr_table. companion_table is not
+                    // needed in this case. There is space in the companion
+                    // table for a new row.
+                    uint8_t trr_idx = 0;
+                    // only if the table entry for that particular row is
+                    // missing we create a new entry in this table. otherwise,
+                    // we are done in this step. We don't need to cover this
+                    // part of the program.
+                    if (bank_ref.entries < counterTableLength) {
+                        trr_idx = bank_ref.entries;
+                        if (bank_ref.entries < counterTableLength - 1)
+                            bank_ref.entries += 1;
+                    }
+                    else {
+                        for (int i = 0; i < counterTableLength ; i++) {
+                            if (bank_ref.trr_table[trr_idx][3] >
+                                    bank_ref.trr_table[i][3])
+                                trr_idx = i;
+                        }
+                    }
+                    bank_ref.trr_table[trr_idx][0] = rank_ref.rank;
+                    bank_ref.trr_table[trr_idx][1] = bank_ref.bank;
+                    bank_ref.trr_table[trr_idx][2] = row;
+                    bank_ref.trr_table[trr_idx][3] = 1;
+                }
+            }
+
+            // we just need to program the inhibitor phase of the program now.
+
+            break;
+        }
+        case 4: {
+            // TRR Vendor A
+
+            // Experimental version without companion table.
+            // Companion table parameters are ignored.
+
+            // try searching in the trr_table first.
+            bool found_flag = false;
+            for (int i = 0; i < std::max(counterTableLength,
+                    bank_ref.entries); i++) {
+                // found this addr in the trr table.
+                if (bank_ref.trr_table[i][0] == rank_ref.rank &&
+                        bank_ref.trr_table[i][1] == bank_ref.bank &&
+                        bank_ref.trr_table[i][2] == row) {
+
+                    // TODO: Need to check whether this row is open.
+                    // I guess activateBank does not require this.
+                    found_flag = true;
+
+                    // since this row is accessed, we increment its counter by
+                    // 1. this information is used in the refresh section.
+                    bank_ref.trr_table[i][3]++;
+                    break;
+                }
+            }
+
+            // this row is not present in the TRR table. Therefore, we create a
+            // new entry for this row in the trr table.
+
+            if (!found_flag) {
+                // check if there is space in the trr table
+                if (bank_ref.entries < counterTableLength) {
+                    // there is space in the table. we just create a new entry
+                    // at the end of this table.
+                    assert(bank_ref.entries < counterTableLength);
+                    bank_ref.trr_table[bank_ref.entries][0] = rank_ref.rank;
+                    bank_ref.trr_table[bank_ref.entries][1] = bank_ref.bank;
+                    bank_ref.trr_table[bank_ref.entries][2] = row;
+                    bank_ref.trr_table[bank_ref.entries][3] = 1;
+                    bank_ref.entries++;
+                }
+                else {
+                    // there is no space in the trr table. replace the row with
+                    // the lowest act count.
+                    int min_idx = 0;
+                    assert(bank_ref.entries == counterTableLength);
+                    for (int i = 0 ; i < counterTableLength; i++)
+                        if (bank_ref.trr_table[min_idx][3] <
+                                bank_ref.trr_table[i][3])
+                            min_idx = i;
+
+                    // sanity check
+                    assert(min_idx >= 0 && min_idx < counterTableLength);
+
+                    bank_ref.trr_table[min_idx][0] = rank_ref.rank;
+                    bank_ref.trr_table[min_idx][1] = bank_ref.bank;
+                    bank_ref.trr_table[min_idx][2] = row;
+                    bank_ref.trr_table[min_idx][3] = 1;
+                }
+            }
+            // we are done in the sampler phase of the program. We just need to
+            // take care of the inhibitor phase of the program.
+            DPRINTF(RowHammer, "Rank %d, Bank %d, Row %d, Entries %d\n",
+                    rank_ref.rank, bank_ref.bank, row, bank_ref.entries);
+            break;
+        }
+
+        case 5: {
+            // this corresponds to PARA
+            // PARA does not have a sampler/counting mechanism. it just issues
+            // rowhammer refreshes with a probability of P.
+
+            struct timeval time;
+            gettimeofday(&time,NULL);
+
+            srand((time.tv_sec * 1000) + (time.tv_usec / 1000));
+
+            uint64_t prob = rand() % 10000 + 1;
+
+            // the inhibitor cannot be installed here. however, explicit
+            // refreshing can only be done here.
+
+            // violates timing parameters.
+
+            bool inhibitor_status = false;
+            if (prob <= 100)
+                inhibitor_status = true;
+
+            int num_neighbor_rows = 1;
+
+            // if inhibitor is true, then we just issue refreshes to the
+            // neighboring rows of the currently activated row.
+
+            if (inhibitor_status) {
+
+                for (int i = 0 ; i < num_neighbor_rows; i++) {
+                            DPRINTF(RhInhibitor, "Inhibitor triggered "
+                            "refresh in rank %d, bank %d, row %d, "
+                            "counter value %d, %d, %d, %d, \t"
+                            "Issued PARA refreshes %lld\n",
+                            rank_ref.rank,
+                            bank_ref.bank,
+                            row,
+                            bank_ref.rhTriggers[row - 1][2],
+                            bank_ref.rhTriggers[row - 2][3],
+                            bank_ref.rhTriggers[row + 1][1],
+                            bank_ref.rhTriggers[row + 2][0],
+                            para_refreshes + 2
+                    );
+                    para_refreshes += 2;
+                    int local_count = 2;
+                    if (row > 1 && row < (rowsPerBank - 2)) {
+                        bank_ref.rhTriggers[row - i - 1][2] = 0;
+                        bank_ref.rhTriggers[row - i - 2][3] = 0;
+                        bank_ref.rhTriggers[row - i + 1][1] = 0;
+                        bank_ref.rhTriggers[row - i + 2][0] = 0;
+                    }
+                    else if (row == 1) {
+                        bank_ref.rhTriggers[row - i - 1][2] = 0;
+                        bank_ref.rhTriggers[row - i + 1][1] = 0;
+                        bank_ref.rhTriggers[row - i + 2][0] = 0;
+                    }
+                    else if (row == 0) {
+                        bank_ref.rhTriggers[row - i + 1][1] = 0;
+                        bank_ref.rhTriggers[row - i + 2][0] = 0;
+                        local_count = 1;
+                    }
+                    else if (row == rowsPerBank - 2) {
+                        bank_ref.rhTriggers[row - i - 1][2] = 0;
+                        bank_ref.rhTriggers[row - i - 2][3] = 0;
+                        bank_ref.rhTriggers[row - i + 1][1] = 0;
+                    }
+                    else if (row == rowsPerBank - 1) {
+                        bank_ref.rhTriggers[row - i - 1][2] = 0;
+                        bank_ref.rhTriggers[row - i - 2][3] = 0;
+                        local_count = 1;
+                    }
+                    else {
+                        fatal("Unexpected row condition encountered!");
+                    }
+
+                    para_refreshes += local_count;
+                    DPRINTF(RhInhibitor, "Inhibitor triggered "
+                            "refresh in rank %d, bank %d, row %d, "
+                            "counter value %d, %d, %d, %d, \t"
+                            "Issued PARA refreshes %lld\n",
+                            rank_ref.rank,
+                            bank_ref.bank,
+                            row,
+                            bank_ref.rhTriggers[row - 1][2],
+                            bank_ref.rhTriggers[row - 2][3],
+                            bank_ref.rhTriggers[row + 1][1],
+                            bank_ref.rhTriggers[row + 2][0],
+                            para_refreshes
+                    );
+                }
+            }
+            break;
+        }
+
+        default:
+            fatal("Unknown trr_variant detected!");
+            break;
+    }
+
+    // No TRR code beyound this point.
     // update the open row
     assert(bank_ref.openRow == Bank::NO_ROW);
     bank_ref.openRow = row;
 
+    updateVictims(bank_ref, row);
+
     // start counting anew, this covers both the case when we
     // auto-precharged, and when this access is forced to
     // precharge
@@ -366,6 +1449,25 @@ DRAMInterface::doBurstAccess(MemPacket* mem_pkt, Tick next_burst_at,
     // get the bank
     Bank& bank_ref = rank_ref.banks[mem_pkt->bank];
 
+    // hammersim
+    if (!mem_pkt->isRead()) {
+        // this is a write operation.
+        for (int i = 0 ; i < 1024; i++) {
+            bank_ref.flagged_entries[mem_pkt->row][i] = false;
+        }
+    }
+
+    if (mem_pkt->row != 0) {
+        // now that rhtirggers is a vector, there is no self rh triggers
+        DPRINTF(DRAM, "thTrigger [row] %ld [row - 1] %ld  [row - 2]\n",
+                bank_ref.rhTriggers[mem_pkt->row - 1][2],
+                bank_ref.rhTriggers[mem_pkt->row][1],
+                bank_ref.rhTriggers[mem_pkt->row][0]);
+    }
+    else {
+        DPRINTF(DRAM, "Rhammer triggers  %ld \n",
+                bank_ref.rhTriggers[mem_pkt->row + 1][0]);
+    }
     // for the state we need to track if it is a row hit or not
     bool row_hit = true;
 
@@ -612,6 +1714,43 @@ DRAMInterface::doBurstAccess(MemPacket* mem_pkt, Tick next_burst_at,
         stats.perBankWrBursts[mem_pkt->bankId]++;
 
     }
+    // hammersim
+    // kg: now, if we access a row, its rhtrigger counter has to be set to 0.
+    // this is because we accessed the row. this can potentially become the
+    // starting point for context sensitive rowhammer analysis. if this row's
+    // act count in > 1000, this might be a half double attack
+
+    // AYAZ: Before returning, make sure that we update the pkt to indicate
+    // that the row is corrupted or not
+    checkRowHammer(bank_ref, mem_pkt);
+
+    // accessing a row resets its own rowhammer disturbance.
+    // keep a bound check to not have any runtime crashes
+    if (mem_pkt->row == 0) {
+        bank_ref.rhTriggers[mem_pkt->row + 1][1] = 0;
+        bank_ref.rhTriggers[mem_pkt->row + 2][0] = 0;
+    }
+    else if (mem_pkt->row == 1) {
+        bank_ref.rhTriggers[mem_pkt->row - 1][2] = 0;
+        bank_ref.rhTriggers[mem_pkt->row + 1][1] = 0;
+        bank_ref.rhTriggers[mem_pkt->row + 2][0] = 0;
+    }
+    else if (mem_pkt->row == rowsPerBank - 2) {
+        bank_ref.rhTriggers[mem_pkt->row - 2][3] = 0;
+        bank_ref.rhTriggers[mem_pkt->row - 1][2] = 0;
+        bank_ref.rhTriggers[mem_pkt->row + 1][1] = 0;
+    }
+    else if (mem_pkt->row == rowsPerBank - 1) {
+        bank_ref.rhTriggers[mem_pkt->row - 2][3] = 0;
+        bank_ref.rhTriggers[mem_pkt->row - 1][2] = 0;
+    }
+    else {
+        bank_ref.rhTriggers[mem_pkt->row - 1][2] = 0;
+        bank_ref.rhTriggers[mem_pkt->row - 2][3] = 0;
+        bank_ref.rhTriggers[mem_pkt->row + 1][1] = 0;
+        bank_ref.rhTriggers[mem_pkt->row + 2][0] = 0;
+    }
+
     // Update bus state to reflect when previous command was issued
     return std::make_pair(cmd_at, cmd_at + burst_gap);
 }
@@ -653,6 +1792,18 @@ DRAMInterface::DRAMInterface(const DRAMInterfaceParams &_p)
       activationLimit(_p.activation_limit),
       wrToRdDlySameBG(tWL + _p.tBURST_MAX + _p.tWTR_L),
       rdToWrDlySameBG(_p.tRTW + _p.tBURST_MAX),
+      rowhammerThreshold(_p.rowhammer_threshold),
+      deviceFile(_p.device_file),
+      counterTableLength(_p.counter_table_length),
+      trrVariant(_p.trr_variant),
+      trrThreshold(_p.trr_threshold),
+      companionTableLength(_p.companion_table_length),
+      companionThreshold(_p.companion_threshold),
+      singleSidedProb(_p.single_sided_prob),
+      halfDoubleProb(_p.half_double_prob),
+      doubleSidedProb(_p.double_sided_prob),
+      rhStatDump(_p.rh_stat_dump),
+      rhStatFile(_p.rh_stat_file),
       pageMgmt(_p.page_policy),
       maxAccessesPerRow(_p.max_accesses_per_row),
       timeStampOffset(0), activeRank(0),
@@ -696,6 +1847,38 @@ DRAMInterface::DRAMInterface(const DRAMInterfaceParams &_p)
 
     rowsPerBank = capacity / (rowBufferSize * banksPerRank * ranksPerChannel);
 
+    // hammersim
+    for (int r = 0; r < ranksPerChannel; r++) {
+        for (int b = 0; b < ranks[r]->banks.size(); b++)
+            {
+                // AYAZ: Also initialize the rowhammer activates vector
+                // updating resizing to account for 4 elelemts per rhtrigger.
+                ranks[r]->banks[b].rhTriggers.resize(rowsPerBank);
+                for (int rt = 0; rt < rowsPerBank; rt++) {
+                    // around a victim row.
+                    ranks[r]->banks[b].rhTriggers[rt].resize(4, 0);
+                }
+                ranks[r]->banks[b].aggressor_rows.resize(rowsPerBank, 0);
+                // AYAZ: initializing every column with flip bit set
+                // Need to consult the device map here and set the weak
+                // columns accordingly
+                ranks[r]->banks[b].weakColumns.resize(rowsPerBank, 0x0);
+            }
+    }
+
+
+    // AYAZ: At this point we can get the data from the file and update
+    // the weakColumns structure.
+
+    // kg: reimplementing this part using json files as device maps.
+    DPRINTF(RowHammer, "Initializing device map.\n");
+
+    std::ifstream f(deviceFile);
+    device_map = nlohmann::json::parse(f);
+
+    DPRINTF(RowHammer, "Initialized device map successfully!\n");
+
+
     // some basic sanity checks
     if (tREFI <= tRP || tREFI <= tRFC) {
         fatal("tREFI (%d) must be larger than tRP (%d) and tRFC (%d)\n",
@@ -1442,6 +2625,417 @@ DRAMInterface::Rank::processRefreshEvent()
 
         assert(!powerEvent.scheduled());
 
+
+        // AYAZ: this is the point where the current
+        // refresh is done, so we should be able to
+        // check how many refreshes are done so far
+        // and if the total refreshes has has gone
+        // through an entire cycle (8192 for DDR4),
+        // I think at that point all the trigger
+        // counters can be reset to 0?
+        // we can also implement a simple distributed
+        // refresh scheme as well. But, I think it is ok
+        // to reset things after 8192 refreshes as well.
+
+        // increment the refresh counter
+        dram.refreshCounter++;
+
+        // cannot have a bitflip until this point
+        int num_neighbor_rows = 0;
+
+        // the trr implementation is different than the og version implemented
+        // here in this code.
+
+        // There are only three cases. subversions are interleaved/switched
+        // based on the refreshCounter count.
+
+        switch(dram.trrVariant) {
+            case 0:
+                // This is no TRR Variant. It does absolutely nothing.
+                break;
+            case 1:
+            case 4:
+                // TRR variant A always picks exactly 2 rows with the
+                // highest activation count.
+                num_neighbor_rows = 2;
+                // ensure that the number of rows to be refreshed is not 0
+
+                if (dram.refreshCounter % 9 == 0) {
+                    // We need to traverse all the TRR tables per bank to find
+                    // out which row to refresh.
+                    std::cout << "refresh_counter " << dram.refreshCounter
+                        << std::endl;
+                    // We iterate over all the tables of each bank
+                    for (auto &b: banks) {
+
+                        bool inhibitor_flag = false;
+                        // TODO:
+                        // TRR can refresh all rows which has > th hammer count
+                        int max_idx = 0;
+                        for (int i = 0 ; i < std::min(b.entries,
+                                dram.counterTableLength) ; i++) {
+                            // all refresh
+                            // i's hammer count should be more than the set
+                            // threshold.
+                            // max_idx should have the highest hammers
+                            // if i's hammer count is < max_idx, then we
+                            // swap these two.
+                            std::cout << b.trr_table[i][0] <<
+                                " " << b.trr_table[i][1] <<
+                                " " << i << b.trr_table[i][2] <<
+                                " " << b.trr_table[i][3] << std::endl;
+                            if (b.trr_table[i][3] > dram.trrThreshold) {
+                                if (b.trr_table[max_idx][3] < b.trr_table[i][3]
+                                    ) {
+                                    inhibitor_flag = true;
+                                    max_idx = i;
+                                    }
+                                else {
+                                    // max_idx still has more activates than i
+                                    // we just need to verify whether it has
+                                    // more hammers than the threshold.
+                                    if (b.trr_table[max_idx][3] >
+                                            dram.trrThreshold)
+                                        inhibitor_flag = true;
+                                    // else max_idx still hasn't reached th.
+                                    // do nothing basically
+                                }
+                            }
+                        }
+
+                        if (inhibitor_flag) {
+                            // this is where the refresh is happening.
+                            // currently there is no way of counting the
+                            // extra latency (none) or the power this step
+                            // consumes.
+                            DPRINTF(RhInhibitor, "Inhibitor triggered refresh "
+                                            "in rank %d, bank %d, row %d, "
+                                            "count %d, idx %d Count %d \t "
+                                            "Total TRR refreshes %lld\n",
+                                            b.trr_table[max_idx][0],
+                                            b.trr_table[max_idx][1],
+                                            b.trr_table[max_idx][2],
+                                            b.trr_table[max_idx][3],
+                                            max_idx, dram.trrThreshold,
+                                            dram.num_trr_refreshes + (
+                                                2 * num_neighbor_rows
+                                            )
+                            );
+                            // found an entry with more than threshold number
+                            // of activates. it is important to note that
+                            // entries in the trr table isn't cleared.
+
+                            b.trr_table[max_idx][3] = 0;
+                            dram.num_trr_refreshes += 2 * num_neighbor_rows;
+
+                            // need to reset the rhTriggers too for the victim
+                            // rows.
+                            // this logic should be bypassed when the number of
+                            // aggressor rows will be more than the trr_table's
+                            // size.
+
+                                b.rhTriggers[b.trr_table[max_idx][2] + 1][0] =
+                                    0;
+                                b.rhTriggers[b.trr_table[max_idx][2]][1] = 0;
+                                b.rhTriggers[b.trr_table[max_idx][2] - 2][2] =
+                                    0;
+                                b.rhTriggers[b.trr_table[max_idx][2] - 3][3] =
+                                    0;
+
+                                b.rhTriggers[b.trr_table[max_idx][2] - 1][3] =
+                                    0;
+                                b.rhTriggers[b.trr_table[max_idx][2]][2] = 0;
+                                b.rhTriggers[b.trr_table[max_idx][2] + 2][1] =
+                                    0;
+                                b.rhTriggers[b.trr_table[max_idx][2] + 3][0] =
+                                    0;
+                            // }
+                        }
+                    }
+                }
+                break;
+
+                // Number of neighboring rows is the a little confusing for
+                // this version of the code.
+                // break;
+
+            case 2:
+                // This is Vendor B from the U-TRR paper.
+                num_neighbor_rows = 2;
+
+                if (dram.refreshCounter % 2 == 0 ||
+                        dram.refreshCounter % 4 == 0 ||
+                        dram.refreshCounter % 9 == 0) {
+                    // We need to refresh the row with the maximum number of
+                    // activates across all the tables. Although this row is
+                    // maintained per bank, but I think refreshing the max
+                    // among the max per bank will do the trick.
+
+                    // we need traffic generators for rh > 1 bank to validate
+                    // the above statement.
+
+                    // TODO: use a definite variable for this
+                    // int bank_count = banks.size();
+                    // for (auto &b: banks)
+                    //     bank_count++;
+                    // vector<uint64_t>potential_refresh_table(bank_count);
+
+                    bool inhibitor_flag = false;
+                    int bank_count = 0;
+                    // TODO:
+                    // TRR can refresh all rows which has > th hammer count
+                    int max_bank_idx = 0, max_idx = 0, max_val;
+                    // We iterate over all the tables of each bank
+                    for (auto &b: banks) {
+                        if (bank_count == 0)
+                            max_val = b.trr_table[max_idx][3];
+
+                        // this index is the highest
+                        if (max_val > dram.trrThreshold)
+                            inhibitor_flag = true;
+
+                        for (int i = 0 ; i < std::min(b.entries,
+                                dram.counterTableLength) ; i++) {
+                            // all refresh
+                            // i's hammer count should be more than the set
+                            // threshold.
+                            // max_idx should have the highest hammers
+                            // if i's hammer count is < max_idx, then we
+                            // swap these two.
+                            if (b.trr_table[i][3] > dram.trrThreshold) {
+                                if (max_val < b.trr_table[i][3]
+                                    ) {
+                                    max_idx = i;
+                                    max_bank_idx = bank_count;
+                                    // there is some row to refresh
+                                    inhibitor_flag = true;
+                                }
+                                // else {
+                                //     // max_idx still has more activates than
+                                //     // i. we just need to verify whether it
+                                //     // has more hammers than the threshold.
+                                //     if (b.trr_table[max_idx][3] >
+                                //             dram.trrThreshold)
+                                //         inhibitor_flag = true;
+                                //     // else max_idx still hasn't reached th.
+                                //     // do nothing basically
+                                // }
+                            }
+                        }
+                        bank_count++;
+                        // std :: cout << b.trr_table[max_idx][0] << " " <<
+                        //         b.trr_table[max_idx][1] << " " <<
+                        //         b.trr_table[max_idx][2] << " " <<
+                        //         b.trr_table[max_idx][3] << " " <<
+                        //         max_bank_idx << " " << max_idx << " " <<
+                        //         inhibitor_flag << std :: endl;
+                    }
+
+                    // it can refresh atmost one row among all banks.
+
+                    if (inhibitor_flag) {
+                        // this is where the refresh is happening.
+                        // currently there is no way of counting the
+                        // extra latency (none) or the power this step
+                        // consumes.
+                        bank_count = 0;
+                        for (auto &b: banks) {
+                            if (bank_count == max_bank_idx) {
+                                DPRINTF(RhInhibitor, "Inhibitor triggered "
+                                        "refresh in rank %d, bank %d, row %d, "
+                                        "count %d, idx %d Count %d \t "
+                                        "Total TRR refreshes %lld\n",
+                                        b.trr_table[max_idx][0],
+                                        b.trr_table[max_idx][1],
+                                        b.trr_table[max_idx][2],
+                                        b.trr_table[max_idx][3],
+                                        max_idx, dram.trrThreshold,
+                                        dram.num_trr_refreshes + (
+                                            2 * num_neighbor_rows
+                                        )
+                                );
+                                // found an entry with more than threshold
+                                // number of activates. it is important to note
+                                // that entries in the trr table isn't cleared.
+
+                                b.trr_table[max_idx][3] = 0;
+                                dram.num_trr_refreshes +=
+                                    2 * num_neighbor_rows;
+
+                                // need to reset the rhTriggers too for the
+                                // victim rows.
+                                b.rhTriggers[b.trr_table[max_idx][2] + 1][0] =
+                                    0;
+                                b.rhTriggers[b.trr_table[max_idx][2]][1] = 0;
+                                b.rhTriggers[b.trr_table[max_idx][2] - 2][2] =
+                                    0;
+                                b.rhTriggers[b.trr_table[max_idx][2] - 3][3] =
+                                    0;
+
+                                b.rhTriggers[b.trr_table[max_idx][2] - 1][3] =
+                                    0;
+                                b.rhTriggers[b.trr_table[max_idx][2]][2] = 0;
+                                b.rhTriggers[b.trr_table[max_idx][2] + 2][1] =
+                                    0;
+                                b.rhTriggers[b.trr_table[max_idx][2] + 3][0] =
+                                    0;
+
+                                // so. sk hynix dimms cannot have half-doubles
+                                // impressive
+                                b.rhTriggers[b.trr_table[max_idx][2] - 4][3] =
+                                    0;
+                                b.rhTriggers[b.trr_table[max_idx][2] - 3][2] =
+                                    0;
+                                b.rhTriggers[b.trr_table[max_idx][2] - 1][1] =
+                                    0;
+                                b.rhTriggers[b.trr_table[max_idx][2]][0] = 0;
+
+                                b.rhTriggers[b.trr_table[max_idx][2] + 4][0] =
+                                    0;
+                                b.rhTriggers[b.trr_table[max_idx][2] + 3][1] =
+                                    0;
+                                b.rhTriggers[b.trr_table[max_idx][2] + 1][2] =
+                                    0;
+                                b.rhTriggers[b.trr_table[max_idx][2]][3] = 0;
+
+                                // for (int j = 0 ; j < num_neighbor_rows; j++)
+                                // {
+                                //     b.rhTriggers[b.trr_table[
+                                //          max_idx][2] - j - 1] = 0;
+                                //     b.rhTriggers[b.trr_table[
+                                //          max_idx][2] + j + 1] = 0;
+                                // this logic should be bypassed when the
+                                // number of aggressor rows will be more than
+                                // the trr_table's size.
+                                // }
+                                // cannot refresh > 1 row.
+                                // break;
+                            }
+                            bank_count++;
+                        }
+                    }
+
+
+
+
+                    // for (auto)
+                }
+                break;
+            case 3:
+                // micron
+                break;
+            case 5: {
+                // this corresponds to PARA.
+
+                // we use a rng to issue inhibitor refreshes.
+                // since this mitigation mechanism issues refreshes on the fly,
+                // its inhibitor is within the act part of the code.
+                break;
+            }
+            default:
+                fatal("Unknown trr variant!");
+        }
+
+        // No TRR code beyond this point.
+
+        // TODO
+        // kg: This part has to fixed. We need to implement a RH table as
+        // opposed to a TRR table which keeps a track of all the RH attacks and
+        // is also responsible for flipping bits.
+
+        if (dram.refreshCounter % 4096 == 0
+                || dram.refreshCounter % 8192 == 0) {
+
+            // reset the threshold counters. this depends on the trr variant
+            // that we use.
+
+            switch(dram.trrVariant) {
+                case 0:
+                    if (dram.rhStatDump) {
+                        if (dram.refreshCounter % 8192 == 0) {
+                            std::ofstream outfile;
+                            outfile.open(dram.rhStatFile,
+                                    std::ios::out | std::ios::app );
+                            outfile << "# dumping counters before refresh!" <<
+                                    std::endl;
+                            int bank_count = 0;
+                            for (auto &b: banks) {
+                                outfile << "bank: " << bank_count << std::endl;
+                                for (auto& it: b.activated_row_list) {
+                                    outfile << "\t" << it << "\t";
+                                    for (int i = 0; i < 4; i++)
+                                        outfile << b.rhTriggers[it][i] << " ";
+                                    outfile << std::endl;
+                                }
+                                bank_count++;
+                            }
+
+                            outfile.close();
+                        }
+                    }
+                    // now reset the counters
+                    if (dram.refreshCounter % 8192 == 0) {
+                        for (auto &b: banks) {
+                            for (int row_index = 0;
+                                row_index < dram.rowsPerBank; row_index++) {
+                                for (int j = 0 ; j < 4; j++) {
+                                    b.rhTriggers[row_index][j] = 0;
+                                }
+                            }
+                        }
+                    }
+                    break;
+                case 1:
+                case 4:
+                    // there must be no cross variable initialziations.
+                    if (dram.refreshCounter % 4096 == 0) {
+                        std :: cout << "Refershed" << std :: endl;
+
+                        for (auto &b : banks) {
+                            for (int i = 0 ; i < dram.counterTableLength;
+                                    i++) {
+                                b.trr_table[i][3] = 0;
+                            }
+                            for (int i = 0 ; i < dram.companionTableLength;
+                                    i++) {
+                                b.companion_table[i][3] = 0;
+                            }
+                            for (int row_index = 0;
+                                    row_index < dram.rowsPerBank;row_index++) {
+                                for (int j = 0 ; j < 4; j++) {
+                                    b.rhTriggers[row_index][j] = 0;
+                                }
+                                b.aggressor_rows[row_index] = 0;
+                            }
+                        }
+                    }
+                    break;
+                case 2:
+                case 5:
+                    // there must be no cross variable initialziations.
+                    if (dram.refreshCounter % 8192 == 0) {
+                        std :: cout << "Refershed" << std :: endl;
+
+                        for (auto &b : banks) {
+                            for (int i = 0 ; i < dram.counterTableLength; i++)
+                                b.trr_table[i][3] = 0;
+                            for (int row_index = 0;
+                                    row_index < dram.rowsPerBank;row_index++) {
+                                for (int j = 0 ; j < 4; j++) {
+                                    b.rhTriggers[row_index][j] = 0;
+                                }
+                                b.aggressor_rows[row_index] = 0;
+                            }
+                        }
+                    }
+                    break;
+                case 3:
+                    break;
+                default:
+                    fatal("Unknown TRR Variant detected!");
+            }
+        }
+
         if ((dram.ctrl->drainState() == DrainState::Draining) ||
             (dram.ctrl->drainState() == DrainState::Drained)) {
             // if draining, do not re-enter low-power mode.
diff --git a/src/mem/dram_interface.hh b/src/mem/dram_interface.hh
index e20e33faf9..91b9bb2137 100644
--- a/src/mem/dram_interface.hh
+++ b/src/mem/dram_interface.hh
@@ -527,6 +527,35 @@ class DRAMInterface : public MemInterface
     const Tick wrToRdDlySameBG;
     const Tick rdToWrDlySameBG;
 
+    // hammersim
+
+    //AYAZ: Rowhammer activation threshold
+    const uint32_t rowhammerThreshold;
+
+    //AYAZ: the path to the device file with
+    // the information on weak columns
+    std::string deviceFile;
+    nlohmann::json device_map;
+
+    //AYAZ: Rowhammer refresh counter
+    int refreshCounter = 0;
+
+    // kg: changes here
+    const uint32_t counterTableLength;
+    const uint32_t trrVariant;
+    const uint32_t trrThreshold;
+    const uint32_t companionTableLength;
+    const uint32_t companionThreshold;
+
+    const uint64_t singleSidedProb;
+    const uint64_t halfDoubleProb;
+    const uint64_t doubleSidedProb;
+
+    uint64_t num_trr_refreshes = 0;
+    bool first_act = false;
+    uint64_t para_refreshes;
+    const bool rhStatDump;
+    std::string rhStatFile;
 
     enums::PageManage pageMgmt;
     /**
@@ -561,6 +590,24 @@ class DRAMInterface : public MemInterface
     void activateBank(Rank& rank_ref, Bank& bank_ref, Tick act_tick,
                       uint32_t row);
 
+    /**
+     * Keep track of possible corruption in neighbouring rows
+     *
+     * @param bank_ref Reference to the bank
+     * @param row Index of the row
+     */
+    void updateVictims(Bank& bank_ref, uint32_t row);
+
+    /**
+     * Check if the current access is
+     * going to lead to corrupted data
+     * @param bank_ref Reference to the bank
+     * @param row Index of the row
+     * @param col index of the column in the row
+     */
+    void checkRowHammer(Bank& bank_ref, MemPacket* mem_pkt);
+
+
     /**
      * Precharge a given bank and also update when the precharge is
      * done. This will also deal with any stats related to the
diff --git a/src/mem/dramsys.cc b/src/mem/dramsys.cc
new file mode 100644
index 0000000000..68fe983d30
--- /dev/null
+++ b/src/mem/dramsys.cc
@@ -0,0 +1,138 @@
+/*
+ * Copyright (c) 2023 Fraunhofer IESE
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "dramsys.hh"
+
+namespace gem5
+{
+
+namespace memory
+{
+
+DRAMSys::DRAMSys(Params const& params) :
+    AbstractMemory(params),
+    tlmWrapper(dramSysWrapper.tSocket, params.name + ".tlm", InvalidPortID),
+    config(::DRAMSys::Config::from_path(params.configuration,
+                                        params.resource_directory)),
+    dramSysWrapper(
+        params.name.c_str(), config, params.recordable, params.range)
+{
+    dramSysWrapper.dramsys->registerIdleCallback(
+        [this]
+        {
+            if (dramSysWrapper.dramsys->idle())
+            {
+                signalDrainDone();
+            }
+        });
+}
+
+gem5::Port& DRAMSys::getPort(const std::string& if_name, PortID idx)
+{
+    if (if_name != "tlm")
+    {
+        return AbstractMemory::getPort(if_name, idx);
+    }
+
+    return tlmWrapper;
+}
+
+DrainState DRAMSys::drain()
+{
+    return dramSysWrapper.dramsys->idle() ? DrainState::Drained
+                                          : DrainState::Draining;
+}
+
+void DRAMSys::serialize(CheckpointOut& cp) const
+{
+    std::filesystem::path checkpointPath = CheckpointIn::dir();
+
+    auto topLevelObjects = sc_core::sc_get_top_level_objects();
+    for (auto const* object : topLevelObjects)
+    {
+        std::function<void(sc_core::sc_object const*)> serialize;
+        serialize =
+            [&serialize, &checkpointPath](sc_core::sc_object const* object)
+        {
+            auto const* serializableObject =
+                dynamic_cast<::DRAMSys::Serialize const*>(object);
+
+            if (serializableObject != nullptr)
+            {
+                std::string dumpFileName(object->name());
+                dumpFileName += ".pmem";
+                std::ofstream stream(checkpointPath / dumpFileName,
+                                     std::ios::binary);
+                serializableObject->serialize(stream);
+            }
+
+            for (auto const* childObject : object->get_child_objects())
+            {
+                serialize(childObject);
+            }
+        };
+
+        serialize(object);
+    }
+}
+
+void DRAMSys::unserialize(CheckpointIn& cp)
+{
+    std::filesystem::path checkpointPath = CheckpointIn::dir();
+
+    auto topLevelObjects = sc_core::sc_get_top_level_objects();
+    for (auto* object : topLevelObjects)
+    {
+        std::function<void(sc_core::sc_object*)> deserialize;
+        deserialize =
+            [&deserialize, &checkpointPath](sc_core::sc_object* object)
+        {
+            auto* deserializableObject =
+                dynamic_cast<::DRAMSys::Deserialize*>(object);
+
+            if (deserializableObject != nullptr)
+            {
+                std::string dumpFileName(object->name());
+                dumpFileName += ".pmem";
+                std::ifstream stream(checkpointPath / dumpFileName,
+                                     std::ios::binary);
+                deserializableObject->deserialize(stream);
+            }
+
+            for (auto* childObject : object->get_child_objects())
+            {
+                deserialize(childObject);
+            }
+        };
+
+        deserialize(object);
+    }
+}
+
+} // namespace memory
+} // namespace gem5
diff --git a/src/mem/dramsys.hh b/src/mem/dramsys.hh
index d4d9ab8859..8530f2c563 100644
--- a/src/mem/dramsys.hh
+++ b/src/mem/dramsys.hh
@@ -29,7 +29,7 @@
 #ifndef __MEM_DRAMSYS_H__
 #define __MEM_DRAMSYS_H__
 
-#include "DRAMSysConfiguration.h"
+#include "DRAMSys/config/DRAMSysConfiguration.h"
 #include "mem/abstract_mem.hh"
 #include "mem/dramsys_wrapper.hh"
 #include "params/DRAMSys.hh"
@@ -43,36 +43,20 @@ namespace memory
 class DRAMSys : public AbstractMemory
 {
     PARAMS(DRAMSys);
-    sc_gem5::TlmTargetWrapper<32> tlmWrapper;
+    sc_gem5::TlmTargetWrapper<> tlmWrapper;
 
   public:
-    DRAMSys(Params const &params)
-        : AbstractMemory(params),
-          tlmWrapper(dramSysWrapper.tSocket,
-              params.name + ".tlm",
-              InvalidPortID),
-          config(DRAMSysConfiguration::from_path(
-              params.configuration,
-              params.resource_directory)),
-          dramSysWrapper(params.name.c_str(),
-            config,
-            params.recordable,
-            params.range)
-    {
-    }
+    DRAMSys(Params const& params);
 
-    gem5::Port &getPort(const std::string &if_name, PortID idx) override
-    {
-        if (if_name != "tlm")
-        {
-            return AbstractMemory::getPort(if_name, idx);
-        }
+    gem5::Port& getPort(const std::string& if_name, PortID idx) override;
 
-        return tlmWrapper;
-    }
+    DrainState drain() override;
+
+    void serialize(CheckpointOut& cp) const override;
+    void unserialize(CheckpointIn& cp) override;
 
   private:
-    DRAMSysConfiguration::Configuration config;
+    ::DRAMSys::Config::Configuration config;
     DRAMSysWrapper dramSysWrapper;
 };
 
diff --git a/src/mem/dramsys_wrapper.cc b/src/mem/dramsys_wrapper.cc
index afa67f3bf2..2decec42a2 100644
--- a/src/mem/dramsys_wrapper.cc
+++ b/src/mem/dramsys_wrapper.cc
@@ -36,7 +36,7 @@ namespace memory
 
 DRAMSysWrapper::DRAMSysWrapper(
     sc_core::sc_module_name name,
-    DRAMSysConfiguration::Configuration const &config,
+    ::DRAMSys::Config::Configuration const &config,
     bool recordable,
     AddrRange range) :
     sc_core::sc_module(name),
@@ -44,28 +44,41 @@ DRAMSysWrapper::DRAMSysWrapper(
     range(range)
 {
     tSocket.register_nb_transport_fw(this, &DRAMSysWrapper::nb_transport_fw);
-    tSocket.register_transport_dbg(this, &DRAMSysWrapper::transport_dbg);
     iSocket.register_nb_transport_bw(this, &DRAMSysWrapper::nb_transport_bw);
+
+    tSocket.register_b_transport(this, &DRAMSysWrapper::b_transport);
+
+    tSocket.register_transport_dbg(this, &DRAMSysWrapper::transport_dbg);
     iSocket.bind(dramsys->tSocket);
 
     // Register a callback to compensate for the destructor not
     // being called.
     registerExitCallback(
-        [this]()
+        []()
         {
             // Workaround for BUG GEM5-1233
             sc_gem5::Kernel::stop();
         });
 }
 
-std::shared_ptr<::DRAMSys>
+std::shared_ptr<::DRAMSys::DRAMSys>
 DRAMSysWrapper::instantiateDRAMSys(
     bool recordable,
-    DRAMSysConfiguration::Configuration const &config)
+    ::DRAMSys::Config::Configuration const &config)
 {
     return recordable
-        ? std::make_shared<::DRAMSysRecordable>("DRAMSys", config)
-        : std::make_shared<::DRAMSys>("DRAMSys", config);
+        ? std::make_shared<::DRAMSys::DRAMSysRecordable>("DRAMSys", config)
+        : std::make_shared<::DRAMSys::DRAMSys>("DRAMSys", config);
+}
+
+void DRAMSysWrapper::b_transport(
+    tlm::tlm_generic_payload &payload,
+    sc_core::sc_time &delay)
+{
+    // Subtract base address offset
+    payload.set_address(payload.get_address() - range.start());
+
+    iSocket->b_transport(payload, delay);
 }
 
 tlm::tlm_sync_enum DRAMSysWrapper::nb_transport_fw(
diff --git a/src/mem/dramsys_wrapper.hh b/src/mem/dramsys_wrapper.hh
index f1437cb761..26d552fd2f 100644
--- a/src/mem/dramsys_wrapper.hh
+++ b/src/mem/dramsys_wrapper.hh
@@ -32,13 +32,14 @@
 #include <iostream>
 #include <memory>
 
-#include "DRAMSysConfiguration.h"
+#include "DRAMSys/config/DRAMSysConfiguration.h"
+#include "DRAMSys/simulation/DRAMSysRecordable.h"
 #include "mem/abstract_mem.hh"
 #include "params/DRAMSys.hh"
 #include "sim/core.hh"
-#include "simulation/DRAMSysRecordable.h"
 #include "systemc/core/kernel.hh"
 #include "systemc/ext/core/sc_module_name.hh"
+
 #include "systemc/ext/systemc"
 #include "systemc/ext/tlm"
 #include "systemc/ext/tlm_utils/simple_target_socket.h"
@@ -57,14 +58,14 @@ class DRAMSysWrapper : public sc_core::sc_module
   public:
     SC_HAS_PROCESS(DRAMSysWrapper);
     DRAMSysWrapper(sc_core::sc_module_name name,
-                   DRAMSysConfiguration::Configuration const &config,
+                   ::DRAMSys::Config::Configuration const &config,
                    bool recordable,
                    AddrRange range);
 
   private:
-    static std::shared_ptr<::DRAMSys>
+    static std::shared_ptr<::DRAMSys::DRAMSys>
     instantiateDRAMSys(bool recordable,
-        DRAMSysConfiguration::Configuration const &config);
+        ::DRAMSys::Config::Configuration const &config);
 
     tlm::tlm_sync_enum nb_transport_fw(tlm::tlm_generic_payload &payload,
                                        tlm::tlm_phase &phase,
@@ -74,12 +75,15 @@ class DRAMSysWrapper : public sc_core::sc_module
                                        tlm::tlm_phase &phase,
                                        sc_core::sc_time &bwDelay);
 
+    void b_transport(tlm::tlm_generic_payload &payload,
+                     sc_core::sc_time &delay);
+
     unsigned int transport_dbg(tlm::tlm_generic_payload &trans);
 
     tlm_utils::simple_initiator_socket<DRAMSysWrapper> iSocket;
     tlm_utils::simple_target_socket<DRAMSysWrapper> tSocket;
 
-    std::shared_ptr<::DRAMSys> dramsys;
+    std::shared_ptr<::DRAMSys::DRAMSys> dramsys;
 
     AddrRange range;
 };
diff --git a/src/mem/mem_ctrl.cc b/src/mem/mem_ctrl.cc
index 9a3600f331..da39c816db 100644
--- a/src/mem/mem_ctrl.cc
+++ b/src/mem/mem_ctrl.cc
@@ -39,7 +39,6 @@
  */
 
 #include "mem/mem_ctrl.hh"
-
 #include "base/trace.hh"
 #include "debug/DRAM.hh"
 #include "debug/Drain.hh"
@@ -288,7 +287,7 @@ MemCtrl::addToReadQueue(PacketPtr pkt,
 
     // If all packets are serviced by write queue, we send the repsonse back
     if (pktsServicedByWrQ == pkt_count) {
-        accessAndRespond(pkt, frontendLatency, mem_intr);
+        accessAndRespond(pkt, frontendLatency, mem_intr, false);
         return true;
     }
 
@@ -375,7 +374,7 @@ MemCtrl::addToWriteQueue(PacketPtr pkt, unsigned int pkt_count,
     // snoop the write queue for any upcoming reads
     // @todo, if a pkt size is larger than burst size, we might need a
     // different front end latency
-    accessAndRespond(pkt, frontendLatency, mem_intr);
+    accessAndRespond(pkt, frontendLatency, mem_intr, false);
 }
 
 void
@@ -510,14 +509,14 @@ MemCtrl::processRespondEvent(MemInterface* mem_intr,
             // @todo we probably want to have a different front end and back
             // end latency for split packets
             accessAndRespond(mem_pkt->pkt, frontendLatency + backendLatency,
-                             mem_intr);
+                             mem_intr, mem_pkt->corruptedAccess);
             delete mem_pkt->burstHelper;
             mem_pkt->burstHelper = NULL;
         }
     } else {
         // it is not a split packet
         accessAndRespond(mem_pkt->pkt, frontendLatency + backendLatency,
-                         mem_intr);
+                         mem_intr, mem_pkt->corruptedAccess);
     }
 
     queue.pop_front();
@@ -620,7 +619,7 @@ MemCtrl::chooseNextFRFCFS(MemPacketQueue& queue, Tick extra_col_delay,
 
 void
 MemCtrl::accessAndRespond(PacketPtr pkt, Tick static_latency,
-                                                MemInterface* mem_intr)
+                                MemInterface* mem_intr, bool corruptedAccess)
 {
     DPRINTF(MemCtrl, "Responding to Address %#x.. \n", pkt->getAddr());
 
@@ -629,6 +628,18 @@ MemCtrl::accessAndRespond(PacketPtr pkt, Tick static_latency,
     // response
     panic_if(!mem_intr->getAddrRange().contains(pkt->getAddr()),
              "Can't handle address range for packet %s\n", pkt->print());
+
+    // hammersim: making changes here to corrupt the data present inside the
+    // memory packet. ensure that the interface is DRAM.
+    // TODO: assert mem_intr -> DRAM
+    if (corruptedAccess) {
+        mem_intr->access(pkt, true);
+        assert(pkt->hasData());
+        corruptedAccess = false;
+    }
+    else {
+        mem_intr->access(pkt, false);
+    }
     mem_intr->access(pkt);
 
     // turn packet around to go back to requestor if response expected
@@ -1496,6 +1507,11 @@ MemCtrl::MemoryPort::recvFunctional(PacketPtr pkt)
         // calls recvAtomic() and throws away the latency; we can save a
         // little here by just not calculating the latency.
         ctrl.recvFunctional(pkt);
+    } else {
+        // The packet's request is satisfied by the queue, but queue
+        // does not call makeResponse.
+        // Here, change the packet to the corresponding response
+        pkt->makeResponse();
     }
 
     pkt->popLabel();
diff --git a/src/mem/mem_ctrl.hh b/src/mem/mem_ctrl.hh
index 917798ffa7..06fb074c23 100644
--- a/src/mem/mem_ctrl.hh
+++ b/src/mem/mem_ctrl.hh
@@ -98,6 +98,11 @@ class BurstHelper
 class MemPacket
 {
   public:
+    // hammersim: AYAZ: For rowhammer stuff
+    // this is to indicate that the mem_pkt is accessing a column in a row
+    // which has flip bits the actual column in that row which should be
+    // flipped? we can randomly pick that column!
+    bool corruptedAccess = false;
 
     /** When did request enter the controller */
     const Tick entryTime;
@@ -382,7 +387,7 @@ class MemCtrl : public qos::MemCtrl
      * @param mem_intr the memory interface to access
      */
     virtual void accessAndRespond(PacketPtr pkt, Tick static_latency,
-                                                MemInterface* mem_intr);
+                                MemInterface* mem_intr, bool corruptedAccess);
 
     /**
      * Determine if there is a packet that can issue.
diff --git a/src/mem/mem_interface.hh b/src/mem/mem_interface.hh
index b0f762fc80..3dbef08fe2 100644
--- a/src/mem/mem_interface.hh
+++ b/src/mem/mem_interface.hh
@@ -46,6 +46,8 @@
 #ifndef __MEM_INTERFACE_HH__
 #define __MEM_INTERFACE_HH__
 
+#include <sys/time.h>
+
 #include <deque>
 #include <string>
 #include <unordered_set>
@@ -61,6 +63,8 @@
 #include "params/MemInterface.hh"
 #include "sim/eventq.hh"
 
+#include "../../ext/json/json/include/nlohmann/json.hpp"
+
 namespace gem5
 {
 
@@ -100,12 +104,62 @@ class MemInterface : public AbstractMemory
 
         uint32_t rowAccesses;
         uint32_t bytesAccessed;
+        // hammersim: these variables are added to the mem_interface to
+        // implement a tracking mechanism. more variables are added to
+        // implement TRR-based mitigations.
+
+        // std::vector< std::vector<uint64_t> > agg_table;
+
+        // trr_table maintains a list of all tracked rows by a mitigation
+        // mechanism. in the ideal case, a deterministic rowhammer mitigation
+        std::vector< std::vector<uint64_t> > trr_table;
+
+        // a companion table is an implementation artifact which determines the
+        // policy of adding a new row to the trr_table. this is only useful
+        // when the DRAM vendor uses completely table-based mitigation.
+        std::vector< std::vector<uint64_t> > companion_table;
+
+        uint32_t act_count = 0;
+
+        uint32_t entries;
+        uint32_t companion_entries;
+
+        // we need aggressor rows to determine whether a given attack is a
+        // single sided or a double sided rowhammer
+        std::vector<long int> aggressor_rows;
+
+        // this only changes when the row numbers will be more than 2^16
+        // maybe ddr5
+        std::vector<uint16_t> activated_row_list;
+
+        // this branch now has updated rhTrigggers
+        std::vector<std::vector<long int>> rhTriggers;
+
+        // i am reimplementing this part.
+        // these branches cannot be merged any longer
+        // std::vector<std::vector<uint16_t> > weakColumns;
+        std::vector<std::bitset<1024>> weakColumns;
+
+        nlohmann::json bank_device_map;
+
+        // TODO: This needs to be changed in the future.
+        // currently it only supports one bank.
 
+        std::vector<std::vector<bool>> flagged_entries;
         Bank() :
             openRow(NO_ROW), bank(0), bankgr(0),
             rdAllowedAt(0), wrAllowedAt(0), preAllowedAt(0), actAllowedAt(0),
-            rowAccesses(0), bytesAccessed(0)
-        { }
+            rowAccesses(0), bytesAccessed(0), entries(0), companion_entries(0),
+            aggressor_rows(0), rhTriggers(0), weakColumns(0)
+        {
+            // hammersim: moving companion and trr table stuff
+            // weakColumns.resize(32768, std::vector<uint16_t>(1024));
+            trr_table.resize(0, std::vector<uint64_t>(4));
+            companion_table.resize(0, std::vector<uint64_t>(4));
+
+            // initializing flag_map
+            flagged_entries.resize(8192, std::vector<bool>(1024));
+        }
     };
 
     /**
diff --git a/src/mem/noncoherent_xbar.cc b/src/mem/noncoherent_xbar.cc
index 0a378e2c63..12b62974b7 100644
--- a/src/mem/noncoherent_xbar.cc
+++ b/src/mem/noncoherent_xbar.cc
@@ -107,8 +107,8 @@ NoncoherentXBar::recvTimingReq(PacketPtr pkt, PortID cpu_side_port_id)
     // we should never see express snoops on a non-coherent crossbar
     assert(!pkt->isExpressSnoop());
 
-    // determine the destination based on the address
-    PortID mem_side_port_id = findPort(pkt->getAddrRange());
+    // determine the destination port
+    PortID mem_side_port_id = findPort(pkt);
 
     // test if the layer should be considered occupied for the current
     // port
@@ -255,7 +255,7 @@ NoncoherentXBar::recvAtomicBackdoor(PacketPtr pkt, PortID cpu_side_port_id,
     unsigned int pkt_cmd = pkt->cmdToIndex();
 
     // determine the destination port
-    PortID mem_side_port_id = findPort(pkt->getAddrRange());
+    PortID mem_side_port_id = findPort(pkt);
 
     // stats updates for the request
     pktCount[cpu_side_port_id][mem_side_port_id]++;
@@ -316,7 +316,7 @@ NoncoherentXBar::recvFunctional(PacketPtr pkt, PortID cpu_side_port_id)
     }
 
     // determine the destination port
-    PortID dest_id = findPort(pkt->getAddrRange());
+    PortID dest_id = findPort(pkt);
 
     // forward the request to the appropriate destination
     memSidePorts[dest_id]->sendFunctional(pkt);
diff --git a/src/mem/packet.hh b/src/mem/packet.hh
index ed7a94f4fb..d08504a7a9 100644
--- a/src/mem/packet.hh
+++ b/src/mem/packet.hh
@@ -1156,7 +1156,7 @@ class Packet : public Printable, public Extensible<Packet>
   public:
     /**
      * @{
-     * @name Data accessor mehtods
+     * @name Data accessor methods
      */
 
     /**
@@ -1303,7 +1303,34 @@ class Packet : public Printable, public Extensible<Packet>
             std::memcpy(getPtr<uint8_t>(), p, getSize());
         }
     }
+    /**
+     * Copy corrupted data into the packet from the provided pointer.
+     */
+    void
+    setCorruptedData(uint8_t *p)
+    {
+        // we should never be copying data onto itself, which means we
+        // must idenfity packets with static data, as they carry the
+        // same pointer from source to destination and back
+        assert(p != getPtr<uint8_t>() || flags.isSet(STATIC_DATA));
+
+        if (p != getPtr<uint8_t>()) {
+            // for packet with allocated dynamic data, we copy data from
+            // one to the other, e.g. a forwarded response to a response
+
+            //long long *buffer = new long long;
 
+            //std::memcpy(buffer, p, getSize());
+
+            std::memset(p, 0x00F0, getSize());
+
+            //*buffer = *buffer + 1;
+
+            //std::memcpy(p, buffer, getSize());
+
+            std::memcpy(getPtr<uint8_t>(), p, getSize());
+        }
+    }
     /**
      * Copy data into the packet from the provided block pointer,
      * which is aligned to the given block size.
@@ -1437,6 +1464,15 @@ class Packet : public Printable, public Extensible<Packet>
         return cmd == MemCmd::CleanEvict || cmd == MemCmd::WritebackClean;
     }
 
+    /**
+     * Is this packet a clean invalidate request, e.g., clflush/clflushopt?
+     */
+    bool
+    isCleanInvalidateRequest() const
+    {
+        return cmd == MemCmd::CleanInvalidReq;
+    }
+
     bool
     isMaskedWrite() const
     {
diff --git a/src/mem/port.cc b/src/mem/port.cc
index 2a253b91a3..e597255dfb 100644
--- a/src/mem/port.cc
+++ b/src/mem/port.cc
@@ -45,6 +45,7 @@
 #include "mem/port.hh"
 
 #include "base/trace.hh"
+#include "debug/PortTrace.hh"
 #include "debug/ResponsePort.hh"
 #include "sim/sim_object.hh"
 
@@ -186,6 +187,29 @@ RequestPort::printAddr(Addr a)
     sendFunctional(&pkt);
 }
 
+void
+RequestPort::addTrace(PacketPtr pkt) const
+{
+    if (!gem5::debug::PortTrace || !pkt)
+        return;
+    auto ext = pkt->getExtension<TracingExtension>();
+    if (!ext) {
+        ext = std::make_shared<TracingExtension>();
+        pkt->setExtension(ext);
+    }
+    ext->add(name(), _responsePort->name(), pkt->getAddr());
+}
+
+void
+RequestPort::removeTrace(PacketPtr pkt) const
+{
+    if (!gem5::debug::PortTrace || !pkt)
+        return;
+    auto ext = pkt->getExtension<TracingExtension>();
+    panic_if(!ext, "There is no TracingExtension in the packet.");
+    ext->remove();
+}
+
 /**
  * Response port
  */
diff --git a/src/mem/port.hh b/src/mem/port.hh
index a3acffc427..5f977aaab8 100644
--- a/src/mem/port.hh
+++ b/src/mem/port.hh
@@ -46,6 +46,11 @@
 #ifndef __MEM_PORT_HH__
 #define __MEM_PORT_HH__
 
+#include <memory>
+#include <sstream>
+#include <stack>
+#include <string>
+
 #include "base/addr_range.hh"
 #include "mem/packet.hh"
 #include "mem/protocol/atomic.hh"
@@ -64,6 +69,58 @@ class SlavePort;
 
 class ResponsePort;
 
+/**
+ * TracingExtension is an Extension of the Packet for recording the trace
+ * of the Packet. The stack in the TracingExtension holds the name of the
+ * ports that the Packet has passed through.
+ */
+class TracingExtension : public gem5::Extension<Packet, TracingExtension>
+{
+ public:
+   TracingExtension() = default;
+   TracingExtension(const std::stack<std::string>& q) { trace_ = q; }
+
+   std::unique_ptr<ExtensionBase> clone() const override
+   {
+       return std::make_unique<TracingExtension>(trace_);
+   }
+
+   void
+   add(std::string request_port, std::string response_port, gem5::Addr addr)
+   {
+       trace_.push(request_port + csprintf(" addr=%#llx", addr));
+       trace_.push(response_port);
+   }
+
+   void
+   remove()
+   {
+       trace_.pop();  // Remove the response port name.
+       trace_.pop();  // Remove the request port name.
+   }
+
+   bool empty() { return trace_.empty(); }
+   std::stack<std::string>& getTrace() { return trace_; }
+   std::string getTraceInString()
+   {
+       std::stringstream port_trace;
+       std::stack<std::string> copy_stack = trace_;
+       port_trace << "Port trace of the Packet (" << std::endl
+                  << "[Destination] ";
+       while (!copy_stack.empty()) {
+           if (copy_stack.size() == 1)
+               port_trace << "[Source] ";
+           port_trace << copy_stack.top() << std::endl;
+           copy_stack.pop();
+       }
+       port_trace << ")";
+       return port_trace.str();
+   }
+
+  private:
+   std::stack<std::string> trace_;
+};
+
 /**
  * A RequestPort is a specialisation of a Port, which
  * implements the default protocol for the three different level of
@@ -266,6 +323,10 @@ class RequestPort: public Port, public AtomicRequestProtocol,
     {
         panic("%s was not expecting a snoop retry.\n", name());
     }
+
+  private:
+    void addTrace(PacketPtr pkt) const;
+    void removeTrace(PacketPtr pkt) const;
 };
 
 class [[deprecated]] MasterPort : public RequestPort
@@ -393,7 +454,11 @@ class ResponsePort : public Port, public AtomicResponseProtocol,
     sendTimingResp(PacketPtr pkt)
     {
         try {
-            return TimingResponseProtocol::sendResp(_requestPort, pkt);
+            _requestPort->removeTrace(pkt);
+            bool succ = TimingResponseProtocol::sendResp(_requestPort, pkt);
+            if (!succ)
+                _requestPort->addTrace(pkt);
+            return succ;
         } catch (UnboundPortException) {
             reportUnbound();
         }
@@ -487,7 +552,10 @@ inline Tick
 RequestPort::sendAtomic(PacketPtr pkt)
 {
     try {
-        return AtomicRequestProtocol::send(_responsePort, pkt);
+        addTrace(pkt);
+        Tick tick = AtomicRequestProtocol::send(_responsePort, pkt);
+        removeTrace(pkt);
+        return tick;
     } catch (UnboundPortException) {
         reportUnbound();
     }
@@ -497,8 +565,11 @@ inline Tick
 RequestPort::sendAtomicBackdoor(PacketPtr pkt, MemBackdoorPtr &backdoor)
 {
     try {
-        return AtomicRequestProtocol::sendBackdoor(_responsePort,
-                                                    pkt, backdoor);
+        addTrace(pkt);
+        Tick tick = AtomicRequestProtocol::sendBackdoor(_responsePort,
+                                                        pkt, backdoor);
+        removeTrace(pkt);
+        return tick;
     } catch (UnboundPortException) {
         reportUnbound();
     }
@@ -508,7 +579,9 @@ inline void
 RequestPort::sendFunctional(PacketPtr pkt) const
 {
     try {
-        return FunctionalRequestProtocol::send(_responsePort, pkt);
+        addTrace(pkt);
+        FunctionalRequestProtocol::send(_responsePort, pkt);
+        removeTrace(pkt);
     } catch (UnboundPortException) {
         reportUnbound();
     }
@@ -530,7 +603,11 @@ inline bool
 RequestPort::sendTimingReq(PacketPtr pkt)
 {
     try {
-        return TimingRequestProtocol::sendReq(_responsePort, pkt);
+        addTrace(pkt);
+        bool succ = TimingRequestProtocol::sendReq(_responsePort, pkt);
+        if (!succ)
+            removeTrace(pkt);
+        return succ;
     } catch (UnboundPortException) {
         reportUnbound();
     }
diff --git a/src/mem/port_proxy.cc b/src/mem/port_proxy.cc
index 19e1a53e84..a3c82452e2 100644
--- a/src/mem/port_proxy.cc
+++ b/src/mem/port_proxy.cc
@@ -44,19 +44,19 @@
 namespace gem5
 {
 
-PortProxy::PortProxy(ThreadContext *tc, unsigned int cache_line_size) :
+PortProxy::PortProxy(ThreadContext *tc, Addr cache_line_size) :
     PortProxy([tc](PacketPtr pkt)->void { tc->sendFunctional(pkt); },
         cache_line_size)
 {}
 
-PortProxy::PortProxy(const RequestPort &port, unsigned int cache_line_size) :
+PortProxy::PortProxy(const RequestPort &port, Addr cache_line_size) :
     PortProxy([&port](PacketPtr pkt)->void { port.sendFunctional(pkt); },
         cache_line_size)
 {}
 
 void
 PortProxy::readBlobPhys(Addr addr, Request::Flags flags,
-                        void *p, int size) const
+                        void *p, uint64_t size) const
 {
     for (ChunkGenerator gen(addr, size, _cacheLineSize); !gen.done();
          gen.next()) {
@@ -73,7 +73,7 @@ PortProxy::readBlobPhys(Addr addr, Request::Flags flags,
 
 void
 PortProxy::writeBlobPhys(Addr addr, Request::Flags flags,
-                         const void *p, int size) const
+                         const void *p, uint64_t size) const
 {
     for (ChunkGenerator gen(addr, size, _cacheLineSize); !gen.done();
          gen.next()) {
@@ -90,7 +90,7 @@ PortProxy::writeBlobPhys(Addr addr, Request::Flags flags,
 
 void
 PortProxy::memsetBlobPhys(Addr addr, Request::Flags flags,
-                          uint8_t v, int size) const
+                          uint8_t v, uint64_t size) const
 {
     // quick and dirty...
     uint8_t *buf = new uint8_t[size];
diff --git a/src/mem/port_proxy.hh b/src/mem/port_proxy.hh
index 29f6ba60a4..49c6d6f811 100644
--- a/src/mem/port_proxy.hh
+++ b/src/mem/port_proxy.hh
@@ -92,7 +92,7 @@ class PortProxy : FunctionalRequestProtocol
     SendFunctionalFunc sendFunctional;
 
     /** Granularity of any transactions issued through this proxy. */
-    const unsigned int _cacheLineSize;
+    const Addr _cacheLineSize;
 
     void
     recvFunctionalSnoop(PacketPtr pkt) override
@@ -103,13 +103,13 @@ class PortProxy : FunctionalRequestProtocol
     }
 
   public:
-    PortProxy(SendFunctionalFunc func, unsigned int cache_line_size) :
+    PortProxy(SendFunctionalFunc func, Addr cache_line_size) :
         sendFunctional(func), _cacheLineSize(cache_line_size)
     {}
 
     // Helpers which create typical SendFunctionalFunc-s from other objects.
-    PortProxy(ThreadContext *tc, unsigned int cache_line_size);
-    PortProxy(const RequestPort &port, unsigned int cache_line_size);
+    PortProxy(ThreadContext *tc, Addr cache_line_size);
+    PortProxy(const RequestPort &port, Addr cache_line_size);
 
     virtual ~PortProxy() {}
 
@@ -120,19 +120,19 @@ class PortProxy : FunctionalRequestProtocol
      * Read size bytes memory at physical address and store in p.
      */
     void readBlobPhys(Addr addr, Request::Flags flags,
-                      void *p, int size) const;
+                      void *p, uint64_t size) const;
 
     /**
      * Write size bytes from p to physical address.
      */
     void writeBlobPhys(Addr addr, Request::Flags flags,
-                       const void *p, int size) const;
+                       const void *p, uint64_t size) const;
 
     /**
      * Fill size bytes starting at physical addr with byte value val.
      */
     void memsetBlobPhys(Addr addr, Request::Flags flags,
-                        uint8_t v, int size) const;
+                        uint8_t v, uint64_t size) const;
 
 
 
@@ -143,7 +143,7 @@ class PortProxy : FunctionalRequestProtocol
      * Returns true on success and false on failure.
      */
     virtual bool
-    tryReadBlob(Addr addr, void *p, int size) const
+    tryReadBlob(Addr addr, void *p, uint64_t size) const
     {
         readBlobPhys(addr, 0, p, size);
         return true;
@@ -154,7 +154,7 @@ class PortProxy : FunctionalRequestProtocol
      * Returns true on success and false on failure.
      */
     virtual bool
-    tryWriteBlob(Addr addr, const void *p, int size) const
+    tryWriteBlob(Addr addr, const void *p, uint64_t size) const
     {
         writeBlobPhys(addr, 0, p, size);
         return true;
@@ -165,7 +165,7 @@ class PortProxy : FunctionalRequestProtocol
      * Returns true on success and false on failure.
      */
     virtual bool
-    tryMemsetBlob(Addr addr, uint8_t val, int size) const
+    tryMemsetBlob(Addr addr, uint8_t val, uint64_t size) const
     {
         memsetBlobPhys(addr, 0, val, size);
         return true;
@@ -179,7 +179,7 @@ class PortProxy : FunctionalRequestProtocol
      * Same as tryReadBlob, but insists on success.
      */
     void
-    readBlob(Addr addr, void *p, int size) const
+    readBlob(Addr addr, void *p, uint64_t size) const
     {
         if (!tryReadBlob(addr, p, size))
             fatal("readBlob(%#x, ...) failed", addr);
@@ -189,7 +189,7 @@ class PortProxy : FunctionalRequestProtocol
      * Same as tryWriteBlob, but insists on success.
      */
     void
-    writeBlob(Addr addr, const void *p, int size) const
+    writeBlob(Addr addr, const void *p, uint64_t size) const
     {
         if (!tryWriteBlob(addr, p, size))
             fatal("writeBlob(%#x, ...) failed", addr);
@@ -199,7 +199,7 @@ class PortProxy : FunctionalRequestProtocol
      * Same as tryMemsetBlob, but insists on success.
      */
     void
-    memsetBlob(Addr addr, uint8_t v, int size) const
+    memsetBlob(Addr addr, uint8_t v, uint64_t size) const
     {
         if (!tryMemsetBlob(addr, v, size))
             fatal("memsetBlob(%#x, ...) failed", addr);
diff --git a/src/mem/qos/QoSMemCtrl.py b/src/mem/qos/QoSMemCtrl.py
index 3028b439d5..86ed03196f 100644
--- a/src/mem/qos/QoSMemCtrl.py
+++ b/src/mem/qos/QoSMemCtrl.py
@@ -38,6 +38,7 @@
 from m5.objects.ClockedObject import ClockedObject
 from m5.objects.QoSTurnaround import *
 
+
 # QoS Queue Selection policy used to select packets among same-QoS queues
 class QoSQPolicy(Enum):
     vals = ["fifo", "lifo", "lrg"]
diff --git a/src/mem/qos/QoSPolicy.py b/src/mem/qos/QoSPolicy.py
index 2dfc974f43..ef44121567 100644
--- a/src/mem/qos/QoSPolicy.py
+++ b/src/mem/qos/QoSPolicy.py
@@ -36,6 +36,7 @@
 from m5.SimObject import *
 from m5.params import *
 
+
 # QoS scheduler policy used to serve incoming transaction
 class QoSPolicy(SimObject):
     type = "QoSPolicy"
diff --git a/src/mem/qos/QoSTurnaround.py b/src/mem/qos/QoSTurnaround.py
index f356635a57..26794c3f49 100644
--- a/src/mem/qos/QoSTurnaround.py
+++ b/src/mem/qos/QoSTurnaround.py
@@ -35,6 +35,7 @@
 
 from m5.SimObject import SimObject
 
+
 # QoS Turnaround policy used to select bus state - READ or WRITE
 class QoSTurnaroundPolicy(SimObject):
     type = "QoSTurnaroundPolicy"
diff --git a/src/mem/request.hh b/src/mem/request.hh
index 491aad0241..df249ac249 100644
--- a/src/mem/request.hh
+++ b/src/mem/request.hh
@@ -757,6 +757,13 @@ class Request : public Extensible<Request>
         return atomicOpFunctor.get();
     }
 
+    void
+    setAtomicOpFunctor(AtomicOpFunctorPtr amo_op)
+    {
+        atomicOpFunctor = std::move(amo_op);
+    }
+
+
     /**
      * Accessor for hardware transactional memory abort cause.
      */
diff --git a/src/mem/ruby/common/DataBlock.cc b/src/mem/ruby/common/DataBlock.cc
index f70aa79fd4..8f47d0026b 100644
--- a/src/mem/ruby/common/DataBlock.cc
+++ b/src/mem/ruby/common/DataBlock.cc
@@ -51,9 +51,19 @@ namespace ruby
 
 DataBlock::DataBlock(const DataBlock &cp)
 {
-    m_data = new uint8_t[RubySystem::getBlockSizeBytes()];
-    memcpy(m_data, cp.m_data, RubySystem::getBlockSizeBytes());
+    uint8_t *block_update;
+    size_t block_bytes = RubySystem::getBlockSizeBytes();
+    m_data = new uint8_t[block_bytes];
+    memcpy(m_data, cp.m_data, block_bytes);
     m_alloc = true;
+    // If this data block is involved in an atomic operation, the effect
+    // of applying the atomic operations on the data block are recorded in
+    // m_atomicLog. If so, we must copy over every entry in the change log
+    for (size_t i = 0; i < cp.m_atomicLog.size(); i++) {
+        block_update = new uint8_t[block_bytes];
+        memcpy(block_update, cp.m_atomicLog[i], block_bytes);
+        m_atomicLog.push_back(block_update);
+    }
 }
 
 void
@@ -73,7 +83,20 @@ DataBlock::clear()
 bool
 DataBlock::equal(const DataBlock& obj) const
 {
-    return !memcmp(m_data, obj.m_data, RubySystem::getBlockSizeBytes());
+    size_t block_bytes = RubySystem::getBlockSizeBytes();
+    // Check that the block contents match
+    if (memcmp(m_data, obj.m_data, block_bytes)) {
+        return false;
+    }
+    if (m_atomicLog.size() != obj.m_atomicLog.size()) {
+        return false;
+    }
+    for (size_t i = 0; i < m_atomicLog.size(); i++) {
+        if (memcmp(m_atomicLog[i], obj.m_atomicLog[i], block_bytes)) {
+            return false;
+        }
+    }
+    return true;
 }
 
 void
@@ -87,12 +110,13 @@ DataBlock::copyPartial(const DataBlock &dblk, const WriteMask &mask)
 }
 
 void
-DataBlock::atomicPartial(const DataBlock &dblk, const WriteMask &mask)
+DataBlock::atomicPartial(const DataBlock &dblk, const WriteMask &mask,
+        bool isAtomicNoReturn)
 {
     for (int i = 0; i < RubySystem::getBlockSizeBytes(); i++) {
         m_data[i] = dblk.m_data[i];
     }
-    mask.performAtomic(m_data);
+    mask.performAtomic(m_data, m_atomicLog, isAtomicNoReturn);
 }
 
 void
@@ -107,6 +131,28 @@ DataBlock::print(std::ostream& out) const
     out << std::dec << "]" << std::flush;
 }
 
+int
+DataBlock::numAtomicLogEntries() const
+{
+    return m_atomicLog.size();
+}
+uint8_t*
+DataBlock::popAtomicLogEntryFront()
+{
+    assert(m_atomicLog.size() > 0);
+    auto ret = m_atomicLog.front();
+    m_atomicLog.pop_front();
+    return ret;
+}
+void
+DataBlock::clearAtomicLogEntries()
+{
+    for (auto log : m_atomicLog) {
+        delete [] log;
+    }
+    m_atomicLog.clear();
+}
+
 const uint8_t*
 DataBlock::getData(int offset, int len) const
 {
@@ -137,7 +183,18 @@ DataBlock::setData(PacketPtr pkt)
 DataBlock &
 DataBlock::operator=(const DataBlock & obj)
 {
-    memcpy(m_data, obj.m_data, RubySystem::getBlockSizeBytes());
+    uint8_t *block_update;
+    size_t block_bytes = RubySystem::getBlockSizeBytes();
+    // Copy entire block contents from obj to current block
+    memcpy(m_data, obj.m_data, block_bytes);
+    // If this data block is involved in an atomic operation, the effect
+    // of applying the atomic operations on the data block are recorded in
+    // m_atomicLog. If so, we must copy over every entry in the change log
+    for (size_t i = 0; i < obj.m_atomicLog.size(); i++) {
+        block_update = new uint8_t[block_bytes];
+        memcpy(block_update, obj.m_atomicLog[i], block_bytes);
+        m_atomicLog.push_back(block_update);
+    }
     return *this;
 }
 
diff --git a/src/mem/ruby/common/DataBlock.hh b/src/mem/ruby/common/DataBlock.hh
index e147d701c5..7456a25f3f 100644
--- a/src/mem/ruby/common/DataBlock.hh
+++ b/src/mem/ruby/common/DataBlock.hh
@@ -44,6 +44,7 @@
 #include <inttypes.h>
 
 #include <cassert>
+#include <deque>
 #include <iomanip>
 #include <iostream>
 
@@ -71,6 +72,12 @@ class DataBlock
     {
         if (m_alloc)
             delete [] m_data;
+
+        // If data block involved in atomic
+        // operations, free all meta data
+        for (auto log : m_atomicLog) {
+            delete [] log;
+        }
     }
 
     DataBlock& operator=(const DataBlock& obj);
@@ -80,13 +87,17 @@ class DataBlock
     void clear();
     uint8_t getByte(int whichByte) const;
     const uint8_t *getData(int offset, int len) const;
+    uint8_t* popAtomicLogEntryFront();
+    int numAtomicLogEntries() const;
+    void clearAtomicLogEntries();
     uint8_t *getDataMod(int offset);
     void setByte(int whichByte, uint8_t data);
     void setData(const uint8_t *data, int offset, int len);
     void setData(PacketPtr pkt);
     void copyPartial(const DataBlock &dblk, int offset, int len);
     void copyPartial(const DataBlock &dblk, const WriteMask &mask);
-    void atomicPartial(const DataBlock & dblk, const WriteMask & mask);
+    void atomicPartial(const DataBlock & dblk, const WriteMask & mask,
+            bool isAtomicNoReturn=true);
     bool equal(const DataBlock& obj) const;
     void print(std::ostream& out) const;
 
@@ -94,6 +105,9 @@ class DataBlock
     void alloc();
     uint8_t *m_data;
     bool m_alloc;
+
+    // Tracks block changes when atomic ops are applied
+    std::deque<uint8_t*> m_atomicLog;
 };
 
 inline void
diff --git a/src/mem/ruby/common/WriteMask.cc b/src/mem/ruby/common/WriteMask.cc
index 4c24a64706..1fa03c951e 100644
--- a/src/mem/ruby/common/WriteMask.cc
+++ b/src/mem/ruby/common/WriteMask.cc
@@ -55,5 +55,29 @@ WriteMask::print(std::ostream& out) const
         << std::flush;
 }
 
+void
+WriteMask::performAtomic(uint8_t * p,
+        std::deque<uint8_t*>& log, bool isAtomicNoReturn) const
+{
+    int offset;
+    uint8_t *block_update;
+    // Here, operations occur in FIFO order from the mAtomicOp
+    // vector. This is done to match the ordering of packets
+    // that was seen when the initial coalesced request was created.
+    for (int i = 0; i < mAtomicOp.size(); i++) {
+        if (!isAtomicNoReturn) {
+            // Save the old value of the data block in case a
+            // return value is needed
+            block_update = new uint8_t[mSize];
+            std::memcpy(block_update, p, mSize);
+            log.push_back(block_update);
+        }
+        // Perform the atomic operation
+        offset = mAtomicOp[i].first;
+        AtomicOpFunctor *fnctr = mAtomicOp[i].second;
+        (*fnctr)(&p[offset]);
+    }
+}
+
 } // namespace ruby
 } // namespace gem5
diff --git a/src/mem/ruby/common/WriteMask.hh b/src/mem/ruby/common/WriteMask.hh
index 2de21da79b..8c6b8ce976 100644
--- a/src/mem/ruby/common/WriteMask.hh
+++ b/src/mem/ruby/common/WriteMask.hh
@@ -222,26 +222,16 @@ class WriteMask
 
     void print(std::ostream& out) const;
 
-    void
-    performAtomic(uint8_t * p) const
-    {
-        for (int i = 0; i < mAtomicOp.size(); i++) {
-            int offset = mAtomicOp[i].first;
-            AtomicOpFunctor *fnctr = mAtomicOp[i].second;
-            (*fnctr)(&p[offset]);
-        }
-    }
-
-    void
-    performAtomic(DataBlock & blk) const
-    {
-        for (int i = 0; i < mAtomicOp.size(); i++) {
-            int offset = mAtomicOp[i].first;
-            uint8_t *p = blk.getDataMod(offset);
-            AtomicOpFunctor *fnctr = mAtomicOp[i].second;
-            (*fnctr)(p);
-        }
-    }
+    /*
+     * Performs atomic operations on the data block pointed to by p. The
+     * atomic operations to perform are in the vector mAtomicOp. The
+     * effect of each atomic operation is pushed to the atomicChangeLog
+     * so that each individual atomic requestor may see the results of their
+     * specific atomic operation.
+     */
+    void performAtomic(uint8_t * p,
+            std::deque<uint8_t*>& atomicChangeLog,
+            bool isAtomicNoReturn=true) const;
 
     const AtomicOpVector&
     getAtomicOps() const
diff --git a/src/mem/ruby/network/MessageBuffer.cc b/src/mem/ruby/network/MessageBuffer.cc
index 9a6500978e..9a4439a538 100644
--- a/src/mem/ruby/network/MessageBuffer.cc
+++ b/src/mem/ruby/network/MessageBuffer.cc
@@ -62,7 +62,8 @@ MessageBuffer::MessageBuffer(const Params &p)
     m_max_dequeue_rate(p.max_dequeue_rate), m_dequeues_this_cy(0),
     m_time_last_time_size_checked(0),
     m_time_last_time_enqueue(0), m_time_last_time_pop(0),
-    m_last_arrival_time(0), m_strict_fifo(p.ordered),
+    m_last_arrival_time(0), m_last_message_strict_fifo_bypassed(false),
+    m_strict_fifo(p.ordered),
     m_randomization(p.randomization),
     m_allow_zero_latency(p.allow_zero_latency),
     m_routing_priority(p.routing_priority),
@@ -214,7 +215,8 @@ random_time()
 }
 
 void
-MessageBuffer::enqueue(MsgPtr message, Tick current_time, Tick delta)
+MessageBuffer::enqueue(MsgPtr message, Tick current_time, Tick delta,
+                       bool bypassStrictFIFO)
 {
     // record current time incase we have a pop that also adjusts my size
     if (m_time_last_time_enqueue < current_time) {
@@ -252,7 +254,8 @@ MessageBuffer::enqueue(MsgPtr message, Tick current_time, Tick delta)
 
     // Check the arrival time
     assert(arrival_time >= current_time);
-    if (m_strict_fifo) {
+    if (m_strict_fifo &&
+        !(bypassStrictFIFO || m_last_message_strict_fifo_bypassed)) {
         if (arrival_time < m_last_arrival_time) {
             panic("FIFO ordering violated: %s name: %s current time: %d "
                   "delta: %d arrival_time: %d last arrival_time: %d\n",
@@ -266,6 +269,8 @@ MessageBuffer::enqueue(MsgPtr message, Tick current_time, Tick delta)
         m_last_arrival_time = arrival_time;
     }
 
+    m_last_message_strict_fifo_bypassed = bypassStrictFIFO;
+
     // compute the delay cycles and set enqueue time
     Message* msg_ptr = message.get();
     assert(msg_ptr != NULL);
diff --git a/src/mem/ruby/network/MessageBuffer.hh b/src/mem/ruby/network/MessageBuffer.hh
index 279599340a..03a0454433 100644
--- a/src/mem/ruby/network/MessageBuffer.hh
+++ b/src/mem/ruby/network/MessageBuffer.hh
@@ -123,7 +123,8 @@ class MessageBuffer : public SimObject
 
     const MsgPtr &peekMsgPtr() const { return m_prio_heap.front(); }
 
-    void enqueue(MsgPtr message, Tick curTime, Tick delta);
+    void enqueue(MsgPtr message, Tick curTime, Tick delta,
+                bool bypassStrictFIFO = false);
 
     // Defer enqueueing a message to a later cycle by putting it aside and not
     // enqueueing it in this cycle
@@ -271,6 +272,9 @@ class MessageBuffer : public SimObject
 
     uint64_t m_msg_counter;
     int m_priority_rank;
+
+    bool m_last_message_strict_fifo_bypassed;
+
     const bool m_strict_fifo;
     const MessageRandomization m_randomization;
     const bool m_allow_zero_latency;
diff --git a/src/mem/ruby/network/MessageBuffer.py b/src/mem/ruby/network/MessageBuffer.py
index bd20239cea..270dbb1ba1 100644
--- a/src/mem/ruby/network/MessageBuffer.py
+++ b/src/mem/ruby/network/MessageBuffer.py
@@ -40,6 +40,7 @@
 from m5.proxy import *
 from m5.SimObject import SimObject
 
+
 # A MessageBuffer inserts random delays to enqueued messages when the
 # randomization param is set to 'enabled' or when globally enabled for the
 # RubySystem and the param is set to 'ruby_system' (default). 'disabled'
diff --git a/src/mem/ruby/network/garnet/GarnetNetwork.py b/src/mem/ruby/network/garnet/GarnetNetwork.py
index 128118309c..bf5a0bad33 100644
--- a/src/mem/ruby/network/garnet/GarnetNetwork.py
+++ b/src/mem/ruby/network/garnet/GarnetNetwork.py
@@ -28,6 +28,7 @@
 # Author: Tushar Krishna
 #
 
+from m5.citations import add_citation
 from m5.params import *
 from m5.proxy import *
 from m5.objects.Network import RubyNetwork
@@ -83,3 +84,40 @@ class GarnetRouter(BasicRouter):
     width = Param.UInt32(
         Parent.ni_flit_size, "bit width supported by the router"
     )
+
+
+add_citation(
+    GarnetNetwork,
+    """@inproceedings{Bharadwaj:2020:kite,
+  author       = {Srikant Bharadwaj and
+                  Jieming Yin and
+                  Bradford M. Beckmann and
+                  Tushar Krishna},
+  title        = {Kite: {A} Family of Heterogeneous Interposer Topologies Enabled via
+                  Accurate Interconnect Modeling},
+  booktitle    = {57th {ACM/IEEE} Design Automation Conference, {DAC} 2020, San Francisco,
+                  CA, USA, July 20-24, 2020},
+  pages        = {1--6},
+  publisher    = {{IEEE}},
+  year         = {2020},
+  url          = {https://doi.org/10.1109/DAC18072.2020.9218539},
+  doi          = {10.1109/DAC18072.2020.9218539}
+}
+@inproceedings{Agarwal:2009:garnet,
+  author       = {Niket Agarwal and
+                  Tushar Krishna and
+                  Li{-}Shiuan Peh and
+                  Niraj K. Jha},
+  title        = {{GARNET:} {A} detailed on-chip network model inside a full-system
+                  simulator},
+  booktitle    = {{IEEE} International Symposium on Performance Analysis of Systems
+                  and Software, {ISPASS} 2009, April 26-28, 2009, Boston, Massachusetts,
+                  USA, Proceedings},
+  pages        = {33--42},
+  publisher    = {{IEEE} Computer Society},
+  year         = {2009},
+  url          = {https://doi.org/10.1109/ISPASS.2009.4919636},
+  doi          = {10.1109/ISPASS.2009.4919636}
+}
+""",
+)
diff --git a/src/mem/ruby/network/garnet/flit.cc b/src/mem/ruby/network/garnet/flit.cc
index d31d826c93..21e6dcdc33 100644
--- a/src/mem/ruby/network/garnet/flit.cc
+++ b/src/mem/ruby/network/garnet/flit.cc
@@ -51,7 +51,7 @@ flit::flit(int packet_id, int id, int  vc, int vnet, RouteInfo route, int size,
     m_enqueue_time = curTime;
     m_dequeue_time = curTime;
     m_time = curTime;
-    m_packet_id = id;
+    m_packet_id = packet_id;
     m_id = id;
     m_vnet = vnet;
     m_vc = vc;
diff --git a/src/mem/ruby/protocol/GPU_VIPER-SQC.sm b/src/mem/ruby/protocol/GPU_VIPER-SQC.sm
index 28bddf5ba4..bdc5d73f20 100644
--- a/src/mem/ruby/protocol/GPU_VIPER-SQC.sm
+++ b/src/mem/ruby/protocol/GPU_VIPER-SQC.sm
@@ -1,5 +1,6 @@
 /*
  * Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
+ * Copyright (c) 2023 Matthew D. Sinclair
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -47,6 +48,9 @@ machine(MachineType:SQC, "GPU SQC (L1 I Cache)")
 {
   state_declaration(State, desc="SQC Cache States", default="SQC_State_I") {
     I, AccessPermission:Invalid, desc="Invalid";
+    // Note: currently IV in the TCP is only for pending loads to a given cache
+    // line.  Since the SQC is read only, there are no stores.
+    IV, AccessPermission:Invalid, desc="Going from I to V, waiting on TCC data";
     V, AccessPermission:Read_Only, desc="Valid";
   }
 
@@ -74,7 +78,7 @@ machine(MachineType:SQC, "GPU SQC (L1 I Cache)")
   }
 
   structure(TBE, desc="...") {
-    State TBEState,             desc="Transient state";
+    State TBEState,          desc="Transient state";
     DataBlock DataBlk,       desc="data for the block, required for concurrent writebacks";
     bool Dirty,              desc="Is the data dirty (different than memory)?";
     int NumPendingMsgs,      desc="Number of acks/data messages that this processor is waiting for";
@@ -97,6 +101,7 @@ machine(MachineType:SQC, "GPU SQC (L1 I Cache)")
   void unset_tbe();
   void wakeUpAllBuffers();
   void wakeUpBuffers(Addr a);
+  void wakeUpAllBuffers(Addr a);
   Cycles curCycle();
 
   // Internal functions
@@ -269,6 +274,21 @@ machine(MachineType:SQC, "GPU SQC (L1 I Cache)")
     }
   }
 
+  action(t_allocateTBE, "t", desc="allocate TBE Entry") {
+    check_allocate(TBEs);
+    TBEs.allocate(address);
+    set_tbe(TBEs.lookup(address));
+  }
+
+  action(d_deallocateTBE, "d", desc="Deallocate TBE") {
+    TBEs.deallocate(address);
+    unset_tbe();
+  }
+
+  action(st_stallAndWaitRequest, "st", desc="Stall and wait on the address") {
+    stall_and_wait(mandatoryQueue_in, address);
+  }
+
   action(p_popMandatoryQueue, "pm", desc="Pop Mandatory Queue") {
     mandatoryQueue_in.dequeue(clockEdge());
   }
@@ -277,6 +297,10 @@ machine(MachineType:SQC, "GPU SQC (L1 I Cache)")
     responseToSQC_in.dequeue(clockEdge());
   }
 
+  action(wada_wakeUpAllDependentsAddr, "wada", desc="Wake up any requests waiting for this address") {
+    wakeUpAllBuffers(address);
+  }
+
   action(l_loadDoneHit, "ldh", desc="local load done (hits in SQC)") {
     assert(is_valid(cache_entry));
     sequencer.readCallback(address, cache_entry.DataBlk, true, MachineType:L1Cache);
@@ -297,6 +321,10 @@ machine(MachineType:SQC, "GPU SQC (L1 I Cache)")
     }
   }
 
+  action(mru_updateMRU, "mru", desc="Touch block for replacement policy") {
+    L1cache.setMRU(address);
+  }
+
   // added for profiling
   action(uu_profileDataMiss, "\udm", desc="Profile SQC demand miss"){
     L1cache.profileDemandMiss();
@@ -308,22 +336,52 @@ machine(MachineType:SQC, "GPU SQC (L1 I Cache)")
 
   // Transitions
 
+  // if another request arrives for the same cache line that has a pending
+  // load, put it on the wakeup buffer.  This reduced resource contention since
+  // they won't try again every cycle and will instead only try again once woken
+  // up
+  transition(IV, {Fetch}) {
+      st_stallAndWaitRequest;
+  }
+
   // transitions from base
-  transition({I, V}, Repl, I) {TagArrayRead, TagArrayWrite} {
+  transition({I, IV, V}, Repl, I) {TagArrayRead, TagArrayWrite} {
     // since we're evicting something, don't bother classifying as hit/miss
     ic_invCache;
   }
 
-  transition(I, Data, V) {TagArrayRead, TagArrayWrite, DataArrayRead} {
+  // if we got a response for a load where the line is in I, then
+  // another request must have come in that replaced the line in question in
+  // the cache.  Thus, complete this request without allocating the line, but
+  // still deallocate TBE and wakeup any dependent addresses.
+  transition(I, Data) {TagArrayRead, TagArrayWrite, DataArrayRead} {
+    // don't profile this as a hit/miss since it's a reponse from L2,
+    // so we already counted it
+    l_loadDoneMiss;
+    wada_wakeUpAllDependentsAddr;
+    d_deallocateTBE;
+    pr_popResponseQueue;
+  }
+
+  // if line is currently in IV, then Data is returning the data for a
+  // pending load, so transition to V, deallocate TBE, and wakeup any dependent
+  // requests so they will be replayed now that this request has returned.
+  transition(IV, Data, V) {TagArrayRead, TagArrayWrite, DataArrayRead} {
     a_allocate;
     // don't profile this as a hit/miss since it's a reponse from L2,
     // so we already counted it
     w_writeCache;
     l_loadDoneMiss;
+    wada_wakeUpAllDependentsAddr;
+    d_deallocateTBE;
     pr_popResponseQueue;
   }
 
-  transition(I, Fetch) {TagArrayRead, TagArrayWrite} {
+  // if we have a load that misses, allocate TBE entry and transition to IV
+  // to prevent subsequent requests to same cache line from also going to TCC
+  // while this request is pending
+  transition(I, Fetch, IV) {TagArrayRead, TagArrayWrite} {
+    t_allocateTBE;
     nS_issueRdBlkS;
     uu_profileDataMiss; // since line wasn't in SQC, we missed
     p_popMandatoryQueue;
@@ -332,6 +390,7 @@ machine(MachineType:SQC, "GPU SQC (L1 I Cache)")
   // simple hit transitions
   transition(V, Fetch) {TagArrayRead, DataArrayRead} {
     l_loadDoneHit;
+    mru_updateMRU;
     uu_profileDataHit; // line was in SQC, so we hit
     p_popMandatoryQueue;
   }
diff --git a/src/mem/ruby/protocol/GPU_VIPER-TCC.sm b/src/mem/ruby/protocol/GPU_VIPER-TCC.sm
index a59589870d..14c9c8c1cc 100644
--- a/src/mem/ruby/protocol/GPU_VIPER-TCC.sm
+++ b/src/mem/ruby/protocol/GPU_VIPER-TCC.sm
@@ -36,6 +36,7 @@ machine(MachineType:TCC, "TCC Cache")
    bool WB; /*is this cache Writeback?*/
    Cycles l2_request_latency := 50;
    Cycles l2_response_latency := 20;
+   Cycles glc_atomic_latency := 0;
 
   // From the TCPs or SQCs
   MessageBuffer * requestFromTCP, network="From", virtual_network="1", vnet_type="request";
@@ -60,10 +61,13 @@ machine(MachineType:TCC, "TCC Cache")
     WrVicBlk,               desc="L1 Write Through";
     WrVicBlkBack,           desc="L1 Write Through(dirty cache)";
     WrVicBlkEvict,          desc="L1 Write Through(dirty cache) and evict";
+    AtomicWait,             desc="Atomic Op that must wait for pending loads";
     Atomic,                 desc="Atomic Op";
+    AtomicPassOn,           desc="Atomic Op Passed on to Directory";
     AtomicDone,             desc="AtomicOps Complete";
     AtomicNotDone,          desc="AtomicOps not Complete";
-    Data,                   desc="data messgae";
+    Data,                   desc="Data message";
+    Flush,                  desc="Flush cache entry";
     // Coming from this TCC
     L2_Repl,                desc="L2 Replacement";
     // Probes
@@ -81,6 +85,7 @@ machine(MachineType:TCC, "TCC Cache")
     I, AccessPermission:Invalid,    desc="Invalid";
     IV, AccessPermission:Busy,      desc="Waiting for Data";
     WI, AccessPermission:Busy,      desc="Waiting on Writethrough Ack";
+    WIB, AccessPermission:Busy,     desc="Waiting on Writethrough Ack; Will be Bypassed";
     A, AccessPermission:Busy,       desc="Invalid waiting on atomici Data";
   }
 
@@ -89,6 +94,7 @@ machine(MachineType:TCC, "TCC Cache")
     DataArrayWrite,   desc="Write the data array";
     TagArrayRead,     desc="Read the data array";
     TagArrayWrite,    desc="Write the data array";
+    AtomicALUOperation,  desc="Atomic ALU operation";
   }
 
 
@@ -102,16 +108,18 @@ machine(MachineType:TCC, "TCC Cache")
   }
 
   structure(TBE, desc="...") {
-    State TBEState,     desc="Transient state";
-    DataBlock DataBlk,  desc="data for the block";
-    bool Dirty,         desc="Is the data dirty?";
-    bool Shared,        desc="Victim hit by shared probe";
-    MachineID From,     desc="Waiting for writeback from...";
-    NetDest Destination, desc="Data destination";
-    int numAtomics,     desc="number remaining atomics";
-    int atomicDoneCnt,  desc="number AtomicDones triggered";
-    bool isGLCSet,      desc="Bypass L1 Cache";
-    bool isSLCSet,      desc="Bypass L1 and L2 Cache";
+    State TBEState,                  desc="Transient state";
+    DataBlock DataBlk,               desc="data for the block";
+    bool Dirty,                      desc="Is the data dirty?";
+    bool Shared,                     desc="Victim hit by shared probe";
+    MachineID From,                  desc="Waiting for writeback from...";
+    NetDest Destination,             desc="Data destination";
+    int numPending,                  desc="num pending requests";
+    int numPendingDirectoryAtomics,  desc="number of pending atomics to be performed in directory";
+    int atomicDoneCnt,               desc="number AtomicDones triggered";
+    bool isGLCSet,                   desc="Bypass L1 Cache";
+    bool isSLCSet,                   desc="Bypass L1 and L2 Cache";
+    WriteMask atomicWriteMask,       desc="Atomic write mask";
   }
 
   structure(TBETable, external="yes") {
@@ -218,6 +226,8 @@ machine(MachineType:TCC, "TCC Cache")
         L2cache.recordRequestType(CacheRequestType:TagArrayRead, addr);
     } else if (request_type == RequestType:TagArrayWrite) {
         L2cache.recordRequestType(CacheRequestType:TagArrayWrite, addr);
+    } else if (request_type == RequestType:AtomicALUOperation) {
+        L2cache.recordRequestType(CacheRequestType:AtomicALUOperation, addr);
     }
   }
 
@@ -230,6 +240,8 @@ machine(MachineType:TCC, "TCC Cache")
       return L2cache.checkResourceAvailable(CacheResourceType:TagArray, addr);
     } else if (request_type == RequestType:TagArrayWrite) {
       return L2cache.checkResourceAvailable(CacheResourceType:TagArray, addr);
+    } else if (request_type == RequestType:AtomicALUOperation) {
+      return L2cache.checkResourceAvailable(CacheResourceType:AtomicALUArray, addr);
     } else {
       error("Invalid RequestType type in checkResourceAvailable");
       return true;
@@ -253,21 +265,22 @@ machine(MachineType:TCC, "TCC Cache")
   // request queue going to NB
   //
 
-
-// ** IN_PORTS **
+  // ** IN_PORTS **
   in_port(triggerQueue_in, TriggerMsg, triggerQueue) {
     if (triggerQueue_in.isReady(clockEdge())) {
       peek(triggerQueue_in, TriggerMsg) {
         TBE tbe := TBEs.lookup(in_msg.addr);
         Entry cache_entry := getCacheEntry(in_msg.addr);
 
+        // The trigger queue applies only to atomics performed in the directory.
+
         // There is a possible race where multiple AtomicDone triggers can be
         // sent if another Atomic to the same address is issued after the
         // AtomicDone is triggered but before the message arrives here. For
         // that case we count the number of AtomicDones in flight for this
         // address and only call AtomicDone to deallocate the TBE when it is
         // the last in flight message.
-        if (tbe.numAtomics == 0 && tbe.atomicDoneCnt == 1) {
+        if (tbe.numPendingDirectoryAtomics == 0 && tbe.atomicDoneCnt == 1) {
             trigger(Event:AtomicDone, in_msg.addr, cache_entry, tbe);
         } else {
             trigger(Event:AtomicNotDone, in_msg.addr, cache_entry, tbe);
@@ -276,32 +289,45 @@ machine(MachineType:TCC, "TCC Cache")
     }
   }
 
-
-
+  // handle responses from directory here
   in_port(responseFromNB_in, ResponseMsg, responseFromNB) {
     if (responseFromNB_in.isReady(clockEdge())) {
       peek(responseFromNB_in, ResponseMsg, block_on="addr") {
         TBE tbe := TBEs.lookup(in_msg.addr);
         Entry cache_entry := getCacheEntry(in_msg.addr);
-        bool is_slc_set := false;
-
-        if (!is_invalid(tbe)) {
-            is_slc_set := tbe.isSLCSet;
-        }
-
-        if (is_slc_set) {
-            // If the SLC bit is set, the response needs to bypass the cache
-            // and should not be allocated an entry.
+        /*
+          MOESI_AMD_Base-dir acts as the directory, and it always passes
+          SLC information back to L2 because of races at L2 with requests
+          from different CUs sending requests to same cache line in parallel.
+          If these requests have different GLC/SLC settings, the L2 TBE may
+          not have the correct GLC/SLC information for a given request.
+         */
+        bool is_slc_set := in_msg.isSLCSet;
+
+        // Whether the SLC bit is set or not, WB acks should invoke the
+        // WBAck event. For cases where a read response will follow a
+        // WBAck (A read bypass evict on a dirty line), the line's TLB
+        // will not be deallocated on WBAck, and the SLC bit will be
+        // checked when the read response is received.
+        if (in_msg.Type == CoherenceResponseType:NBSysWBAck) {
+          trigger(Event:WBAck, in_msg.addr, cache_entry, tbe);
+        } else if(in_msg.Type == CoherenceResponseType:NBSysResp) {
+          // If the SLC bit is set or the cache is write-through and
+          // we're receiving modified data (such as from an atomic),
+          // the response needs to bypass the cache and should not be
+          // allocated an entry.
+          if(is_slc_set || (!WB && in_msg.State == CoherenceState:Modified)) {
             trigger(Event:Bypass, in_msg.addr, cache_entry, tbe);
-        } else if (in_msg.Type == CoherenceResponseType:NBSysResp) {
-          if(presentOrAvail(in_msg.addr)) {
-            trigger(Event:Data, in_msg.addr, cache_entry, tbe);
           } else {
-            Addr victim :=  L2cache.cacheProbe(in_msg.addr);
-            trigger(Event:L2_Repl, victim, getCacheEntry(victim), TBEs.lookup(victim));
+            if(presentOrAvail(in_msg.addr)) {
+              // Responses with atomic data will only reach here if the
+              // SLC bit isn't set and the cache is WB
+              trigger(Event:Data, in_msg.addr, cache_entry, tbe);
+            } else {
+              Addr victim :=  L2cache.cacheProbe(in_msg.addr);
+              trigger(Event:L2_Repl, victim, getCacheEntry(victim), TBEs.lookup(victim));
+            }
           }
-        } else if (in_msg.Type == CoherenceResponseType:NBSysWBAck) {
-          trigger(Event:WBAck, in_msg.addr, cache_entry, tbe);
         } else {
           error("Unexpected Response Message to Core");
         }
@@ -348,12 +374,33 @@ machine(MachineType:TCC, "TCC Cache")
             } else {
                 trigger(Event:WrVicBlk, in_msg.addr, cache_entry, tbe);
             }
-        } else if (in_msg.Type == CoherenceRequestType:Atomic) {
-          // Currently the Atomic requests do not have GLC/SLC bit handing
-          // support. The assert ensures that the requests do not have
-          // these set, and therefore do not expect to bypass the cache
-          assert(!in_msg.isSLCSet);
-          trigger(Event:Atomic, in_msg.addr, cache_entry, tbe);
+        } else if (in_msg.Type == CoherenceRequestType:Atomic ||
+                   in_msg.Type == CoherenceRequestType:AtomicReturn ||
+                   in_msg.Type == CoherenceRequestType:AtomicNoReturn) {
+	  /*
+	    If there are pending requests for this line already and those
+	    requests are not atomics, because we can't easily differentiate
+	    between different request types on return and because decrementing
+	    the atomic count assumes all returned requests in the A state are
+	    atomics, we will need to put this atomic to sleep and wake it up
+	    when the loads return.
+	   */
+	  if (is_valid(tbe) && (tbe.numPending > 0) &&
+	        (tbe.numPendingDirectoryAtomics == 0)) {
+            trigger(Event:AtomicWait, in_msg.addr, cache_entry, tbe);
+          } else {
+            // If the request is system-level, if the address isn't in the cache,
+            // or if this cache is write-through, then send the request to the
+            // directory. Since non-SLC atomics won't be performed by the directory,
+            // TCC will perform the atomic on the return path on Event:Data.
+            // The action will invalidate the cache line if SLC is set and the address is
+            // in the cache.
+            if(in_msg.isSLCSet || !WB) {
+              trigger(Event:AtomicPassOn, in_msg.addr, cache_entry, tbe);
+            } else {
+              trigger(Event:Atomic, in_msg.addr, cache_entry, tbe);
+            }
+          }
         } else if (in_msg.Type == CoherenceRequestType:RdBlk) {
           if (in_msg.isSLCSet) {
             // If SLC bit is set, the request needs to go directly to memory.
@@ -362,6 +409,8 @@ machine(MachineType:TCC, "TCC Cache")
           } else {
             trigger(Event:RdBlk, in_msg.addr, cache_entry, tbe);
           }
+        } else if (in_msg.Type == CoherenceRequestType:WriteFlush) {
+            trigger(Event:Flush, in_msg.addr, cache_entry, tbe);
         } else {
           DPRINTF(RubySlicc, "%s\n", in_msg);
           error("Unexpected Response Message to Core");
@@ -402,24 +451,35 @@ machine(MachineType:TCC, "TCC Cache")
       out_msg.addr := address;
       out_msg.Type := CoherenceResponseType:TDSysResp;
       out_msg.Sender := machineID;
-      out_msg.Destination := tbe.Destination;
-      out_msg.DataBlk := cache_entry.DataBlk;
       out_msg.MessageSize := MessageSizeType:Response_Data;
       out_msg.Dirty := false;
       out_msg.State := CoherenceState:Shared;
-      DPRINTF(RubySlicc, "%s\n", out_msg);
       peek(responseFromNB_in, ResponseMsg) {
-        out_msg.isGLCSet := tbe.isGLCSet;
-        out_msg.isSLCSet := tbe.isSLCSet;
+        // if line state is Invalid, then we must be doing the transition(I, Data)
+        // so use the DataBlk from the incoming message
+        if ((getAccessPermission(address) == AccessPermission:NotPresent) ||
+	      (getAccessPermission(address) == AccessPermission:Invalid)) {
+          out_msg.DataBlk := in_msg.DataBlk;
+        } else {
+          out_msg.DataBlk := cache_entry.DataBlk;
+        }
+        out_msg.isGLCSet := in_msg.isGLCSet;
+        out_msg.isSLCSet := in_msg.isSLCSet;
+        // reuse CURequestor field to allow multiple concurrent loads and
+        // track where they should go back to (since TBE can't distinguish
+        // destinations)
+        out_msg.Destination.clear();
+        out_msg.Destination.add(in_msg.CURequestor);
       }
+      DPRINTF(RubySlicc, "%s\n", out_msg);
     }
     enqueue(unblockToNB_out, UnblockMsg, 1) {
       out_msg.addr := address;
       out_msg.Destination.add(mapAddressToMachine(address, MachineType:Directory));
       out_msg.MessageSize := MessageSizeType:Unblock_Control;
       peek(responseFromNB_in, ResponseMsg) {
-        out_msg.isGLCSet := tbe.isGLCSet;
-        out_msg.isSLCSet := tbe.isSLCSet;
+        out_msg.isGLCSet := in_msg.isGLCSet;
+        out_msg.isSLCSet := in_msg.isSLCSet;
       }
       DPRINTF(RubySlicc, "%s\n", out_msg);
     }
@@ -431,13 +491,17 @@ machine(MachineType:TCC, "TCC Cache")
           out_msg.addr := address;
           out_msg.Type := CoherenceResponseType:TDSysResp;
           out_msg.Sender := machineID;
-          out_msg.Destination := tbe.Destination;
+          // reuse CURequestor field to allow multiple concurrent loads and
+          // track where they should go back to (since TBE can't distinguish
+          // destinations)
+          out_msg.Destination.clear();
+          out_msg.Destination.add(in_msg.CURequestor);
           out_msg.DataBlk := in_msg.DataBlk;
           out_msg.MessageSize := MessageSizeType:Response_Data;
           out_msg.Dirty := false;
           out_msg.State := CoherenceState:Shared;
-          out_msg.isGLCSet := tbe.isGLCSet;
-          out_msg.isSLCSet := tbe.isSLCSet;
+          out_msg.isGLCSet := in_msg.isGLCSet;
+          out_msg.isSLCSet := in_msg.isSLCSet;
           DPRINTF(RubySlicc, "%s\n", out_msg);
         }
         enqueue(unblockToNB_out, UnblockMsg, 1) {
@@ -450,19 +514,25 @@ machine(MachineType:TCC, "TCC Cache")
   }
 
   action(rd_requestData, "r", desc="Miss in L2, pass on") {
-    if(tbe.Destination.count()==1){
-      peek(coreRequestNetwork_in, CPURequestMsg) {
-        enqueue(requestToNB_out, CPURequestMsg, l2_request_latency) {
-          out_msg.addr := address;
-          out_msg.Type := in_msg.Type;
-          out_msg.Requestor := machineID;
-          out_msg.Destination.add(mapAddressToMachine(address, MachineType:Directory));
-          out_msg.Shared := false; // unneeded for this request
-          out_msg.MessageSize := in_msg.MessageSize;
-          out_msg.isGLCSet := tbe.isGLCSet;
-          out_msg.isSLCSet := tbe.isSLCSet;
-          DPRINTF(RubySlicc, "%s\n", out_msg);
-        }
+    peek(coreRequestNetwork_in, CPURequestMsg) {
+      DPRINTF(RubySlicc, "in_msg: %s\n", in_msg);
+      enqueue(requestToNB_out, CPURequestMsg, l2_request_latency) {
+        out_msg.addr := address;
+        out_msg.Type := in_msg.Type;
+        out_msg.Requestor := machineID;
+        /*
+          To allow multiple concurrent requests from different CUs, we pass
+          the orgin information along to the directory, which stores it in its
+          TBE as appropriate before passing it back to the TCC on the return
+          path.
+         */
+        out_msg.CURequestor := in_msg.Requestor;
+        out_msg.Destination.add(mapAddressToMachine(address, MachineType:Directory));
+        out_msg.Shared := false; // unneeded for this request
+        out_msg.MessageSize := in_msg.MessageSize;
+        out_msg.isGLCSet := in_msg.isGLCSet;
+        out_msg.isSLCSet := in_msg.isSLCSet;
+        DPRINTF(RubySlicc, "out_msg: %s\n", out_msg);
       }
     }
   }
@@ -473,7 +543,7 @@ machine(MachineType:TCC, "TCC Cache")
         out_msg.addr := address;
         out_msg.Type := CoherenceResponseType:TDSysWBAck;
         out_msg.Destination.clear();
-        out_msg.Destination.add(in_msg.WTRequestor);
+        out_msg.Destination.add(in_msg.CURequestor);
         out_msg.Sender := machineID;
         out_msg.MessageSize := MessageSizeType:Writeback_Control;
         out_msg.instSeqNum := in_msg.instSeqNum;
@@ -495,19 +565,66 @@ machine(MachineType:TCC, "TCC Cache")
     }
   }
 
+  action(fw_sendFlushResponse, "fw", desc="send Flush Response") {
+    peek(coreRequestNetwork_in, CPURequestMsg) {
+      enqueue(responseToCore_out, ResponseMsg, l2_response_latency) {
+        out_msg.addr := address;
+        out_msg.Type := CoherenceResponseType:TDSysWBAck;
+        out_msg.Destination.clear();
+        out_msg.Destination.add(in_msg.Requestor);
+        out_msg.Sender := machineID;
+        out_msg.MessageSize := MessageSizeType:Writeback_Control;
+        out_msg.instSeqNum := in_msg.instSeqNum;
+      }
+    }
+  }
+
   action(ar_sendAtomicResponse, "ar", desc="send Atomic Ack") {
+    peek(coreRequestNetwork_in, CPURequestMsg) {
+        enqueue(responseToCore_out, ResponseMsg, l2_response_latency + glc_atomic_latency, true) {
+          out_msg.addr := address;
+          out_msg.Type := CoherenceResponseType:TDSysResp;
+          out_msg.Destination.clear();
+          out_msg.Destination.add(in_msg.Requestor);
+          out_msg.Sender := machineID;
+          out_msg.MessageSize := MessageSizeType:Response_Data;
+          out_msg.DataBlk := cache_entry.DataBlk;
+          out_msg.isGLCSet := in_msg.isGLCSet;
+          out_msg.isSLCSet := in_msg.isSLCSet;
+        }
+    }
+    cache_entry.DataBlk.clearAtomicLogEntries();
+  }
+
+  action(baplr_sendBypassedAtomicPerformedLocallyResponse, "barplr", desc="send locally-performed bypassed Atomic Ack") {
     peek(responseFromNB_in, ResponseMsg) {
         enqueue(responseToCore_out, ResponseMsg, l2_response_latency) {
           out_msg.addr := address;
           out_msg.Type := CoherenceResponseType:TDSysResp;
-          out_msg.Destination.add(in_msg.WTRequestor);
+          out_msg.Destination.add(in_msg.CURequestor);
           out_msg.Sender := machineID;
           out_msg.MessageSize := in_msg.MessageSize;
-          out_msg.DataBlk := in_msg.DataBlk;
+          out_msg.DataBlk := cache_entry.DataBlk;
           out_msg.isGLCSet := tbe.isGLCSet;
           out_msg.isSLCSet := tbe.isSLCSet;
         }
     }
+    cache_entry.DataBlk.clearAtomicLogEntries();
+  }
+
+  action(bapdr_sendBypassedAtomicPerformedInDirectoryResponse, "bapdr", desc="send bypassed Atomic Ack") {
+    peek(responseFromNB_in, ResponseMsg) {
+        enqueue(responseToCore_out, ResponseMsg, l2_response_latency) {
+          out_msg.addr := address;
+          out_msg.Type := CoherenceResponseType:TDSysResp;
+          out_msg.Destination.add(in_msg.CURequestor);
+          out_msg.Sender := machineID;
+          out_msg.MessageSize := in_msg.MessageSize;
+          out_msg.DataBlk := in_msg.DataBlk;
+          out_msg.isGLCSet := in_msg.isGLCSet;
+          out_msg.isSLCSet := in_msg.isSLCSet;
+        }
+    }
   }
 
   action(a_allocateBlock, "a", desc="allocate TCC block") {
@@ -531,24 +648,51 @@ machine(MachineType:TCC, "TCC Cache")
       TBEs.allocate(address);
       set_tbe(TBEs.lookup(address));
       tbe.Destination.clear();
-      tbe.numAtomics := 0;
+      tbe.numPendingDirectoryAtomics := 0;
       tbe.atomicDoneCnt := 0;
+      tbe.numPending := 0;
     }
+    // each pending requests increments this count by 1
+    tbe.numPending := tbe.numPending + 1;
     if (coreRequestNetwork_in.isReady(clockEdge())) {
       peek(coreRequestNetwork_in, CPURequestMsg) {
-        if(in_msg.Type == CoherenceRequestType:RdBlk || in_msg.Type == CoherenceRequestType:Atomic){
+        if(in_msg.Type == CoherenceRequestType:RdBlk ||
+           in_msg.Type == CoherenceRequestType:Atomic ||
+           in_msg.Type == CoherenceRequestType:AtomicReturn ||
+           in_msg.Type == CoherenceRequestType:AtomicNoReturn){
           tbe.Destination.add(in_msg.Requestor);
         }
+        /*
+          If there are multiple concurrent requests to the same cache line, each
+          one will overwrite the previous ones GLC/SLC information here.
+          If these requests have different GLC/SLC information, this causes
+          a segfault.  Hence, currently the support relies on the directory to
+          pass back the GLC/SLC information instead of relying on L2 TBE to be
+          correct.
+
+          This message is left here as an FYI for future developers.
+         */
         tbe.isGLCSet := in_msg.isGLCSet;
         tbe.isSLCSet := in_msg.isSLCSet;
+        if(in_msg.Type == CoherenceRequestType:Atomic ||
+           in_msg.Type == CoherenceRequestType:AtomicReturn ||
+           in_msg.Type == CoherenceRequestType:AtomicNoReturn){
+          tbe.atomicWriteMask.clear();
+          tbe.atomicWriteMask.orMask(in_msg.writeMask);
+        }
       }
     }
   }
 
   action(dt_deallocateTBE, "dt", desc="Deallocate TBE entry") {
-    tbe.Destination.clear();
-    TBEs.deallocate(address);
-    unset_tbe();
+    // since we may have multiple destinations, can't deallocate if we aren't
+    // last one
+    tbe.numPending := tbe.numPending - 1;
+    if (tbe.numPending == 0) {
+      tbe.Destination.clear();
+      TBEs.deallocate(address);
+      unset_tbe();
+    }
   }
 
   action(wcb_writeCacheBlock, "wcb", desc="write data to TCC") {
@@ -566,12 +710,26 @@ machine(MachineType:TCC, "TCC Cache")
     }
   }
 
+  action(wardb_writeAtomicResponseDirtyBytes, "wardb", desc="write data to TCC") {
+    peek(responseFromNB_in, ResponseMsg) {
+      cache_entry.DataBlk := in_msg.DataBlk;
+      cache_entry.writeMask.orMask(tbe.atomicWriteMask);
+      DPRINTF(RubySlicc, "Writing to TCC: %s\n", in_msg);
+    }
+  }
+
+  action(owm_orWriteMask, "owm", desc="or TCCs write mask") {
+    peek(coreRequestNetwork_in, CPURequestMsg) {
+      cache_entry.writeMask.orMask(in_msg.writeMask);
+    }
+  }
+
   action(wt_writeThrough, "wt", desc="write back data") {
     peek(coreRequestNetwork_in, CPURequestMsg) {
       enqueue(requestToNB_out, CPURequestMsg, l2_request_latency) {
         out_msg.addr := address;
         out_msg.Requestor := machineID;
-        out_msg.WTRequestor := in_msg.Requestor;
+        out_msg.CURequestor := in_msg.Requestor;
         out_msg.Destination.add(mapAddressToMachine(address, MachineType:Directory));
         out_msg.MessageSize := MessageSizeType:Data;
         out_msg.Type := CoherenceRequestType:WriteThrough;
@@ -579,6 +737,8 @@ machine(MachineType:TCC, "TCC Cache")
         out_msg.DataBlk := in_msg.DataBlk;
         out_msg.writeMask.orMask(in_msg.writeMask);
         out_msg.instSeqNum := in_msg.instSeqNum;
+        out_msg.isGLCSet := in_msg.isGLCSet;
+        out_msg.isSLCSet := in_msg.isSLCSet;
       }
     }
   }
@@ -587,7 +747,7 @@ machine(MachineType:TCC, "TCC Cache")
     enqueue(requestToNB_out, CPURequestMsg, l2_request_latency) {
       out_msg.addr := address;
       out_msg.Requestor := machineID;
-      out_msg.WTRequestor := machineID;
+      out_msg.CURequestor := machineID;
       out_msg.Destination.add(mapAddressToMachine(address, MachineType:Directory));
       out_msg.MessageSize := MessageSizeType:Data;
       out_msg.Type := CoherenceRequestType:WriteThrough;
@@ -597,17 +757,37 @@ machine(MachineType:TCC, "TCC Cache")
     }
   }
 
+  action(f_flush, "f", desc="write back data") {
+    peek(coreRequestNetwork_in, CPURequestMsg) {
+      enqueue(requestToNB_out, CPURequestMsg, l2_request_latency) {
+        out_msg.addr := address;
+        out_msg.Requestor := machineID;
+        out_msg.CURequestor := in_msg.Requestor;
+        out_msg.Destination.add(mapAddressToMachine(address, MachineType:Directory));
+        out_msg.MessageSize := MessageSizeType:Data;
+        out_msg.Type := CoherenceRequestType:WriteFlush;
+        out_msg.Dirty := true;
+        out_msg.DataBlk := cache_entry.DataBlk;
+        out_msg.writeMask.orMask(cache_entry.writeMask);
+        out_msg.isGLCSet := in_msg.isGLCSet;
+        out_msg.isSLCSet := in_msg.isSLCSet;
+      }
+    }
+  }
+
   action(at_atomicThrough, "at", desc="write back data") {
     peek(coreRequestNetwork_in, CPURequestMsg) {
       enqueue(requestToNB_out, CPURequestMsg, l2_request_latency) {
         out_msg.addr := address;
         out_msg.Requestor := machineID;
-        out_msg.WTRequestor := in_msg.Requestor;
+        out_msg.CURequestor := in_msg.Requestor;
         out_msg.Destination.add(mapAddressToMachine(address, MachineType:Directory));
         out_msg.MessageSize := MessageSizeType:Data;
-        out_msg.Type := CoherenceRequestType:Atomic;
+        out_msg.Type := in_msg.Type;
         out_msg.Dirty := true;
         out_msg.writeMask.orMask(in_msg.writeMask);
+        out_msg.isGLCSet := in_msg.isGLCSet;
+        out_msg.isSLCSet := in_msg.isSLCSet;
       }
     }
   }
@@ -649,26 +829,40 @@ machine(MachineType:TCC, "TCC Cache")
     wakeUpAllBuffers(address);
   }
 
+  /*
+    Currently z_stall is unused because it can lead to Protocol Stalls that
+    eventually lead to deadlock.  Instead, it is recommended to use
+    st_stallAndWaitRequest in combination with a wakeupBuffer call (e.g.,
+    wada_wakeUpAllDependentsAddr) to put the pending requests to sleep instead of
+    them causing head of line blocking -- wada_wakeUpAllDependentsAddr should wake
+    the request up once the request preventing it from completing is done.
   action(z_stall, "z", desc="stall") {
       // built-in
   }
+  */
 
 
-  action(ina_incrementNumAtomics, "ina", desc="inc num atomics") {
-    tbe.numAtomics := tbe.numAtomics + 1;
+  action(inpa_incrementNumPendingDirectoryAtomics, "inpa", desc="inc num atomics") {
+    // Only increment number of atomics if they will actually be performed in directory
+    // That is, if the SLC bit is set or if the cache is write through
+    peek(coreRequestNetwork_in, CPURequestMsg) {
+      if (in_msg.isSLCSet || !WB) {
+        tbe.numPendingDirectoryAtomics := tbe.numPendingDirectoryAtomics + 1;
+      }
+    }
   }
 
 
-  action(dna_decrementNumAtomics, "dna", desc="inc num atomics") {
-    tbe.numAtomics := tbe.numAtomics - 1;
-    if (tbe.numAtomics==0) {
+  action(dnpa_decrementNumPendingDirectoryAtomics, "dnpa", desc="dec num atomics") {
+    tbe.numPendingDirectoryAtomics := tbe.numPendingDirectoryAtomics - 1;
+    if (tbe.numPendingDirectoryAtomics==0) {
       enqueue(triggerQueue_out, TriggerMsg, 1) {
         tbe.atomicDoneCnt := tbe.atomicDoneCnt + 1;
         out_msg.addr := address;
         out_msg.Type := TriggerType:AtomicDone;
         peek(responseFromNB_in, ResponseMsg) {
-          out_msg.isGLCSet := tbe.isGLCSet;
-          out_msg.isSLCSet := tbe.isSLCSet;
+          out_msg.isGLCSet := in_msg.isGLCSet;
+          out_msg.isSLCSet := in_msg.isSLCSet;
         }
       }
     }
@@ -682,6 +876,19 @@ machine(MachineType:TCC, "TCC Cache")
     triggerQueue_in.dequeue(clockEdge());
   }
 
+  action(pa_performAtomic, "pa", desc="Perform atomic") {
+    peek(coreRequestNetwork_in, CPURequestMsg) {
+      if (in_msg.Type == CoherenceRequestType:AtomicReturn) {
+        cache_entry.DataBlk.atomicPartial(cache_entry.DataBlk, cache_entry.writeMask, false);
+      } else {
+        // Set the isAtomicNoReturn flag to ensure that logs are not
+        // generated erroneously
+        assert(in_msg.Type == CoherenceRequestType:AtomicNoReturn);
+        cache_entry.DataBlk.atomicPartial(cache_entry.DataBlk, cache_entry.writeMask, true);
+      }
+    }
+  }
+
   // END ACTIONS
 
   // BEGIN TRANSITIONS
@@ -693,37 +900,71 @@ machine(MachineType:TCC, "TCC Cache")
   // Stalling transitions do NOT check the tag array...and if they do,
   // they can cause a resource stall deadlock!
 
-  transition(WI, {RdBlk, WrVicBlk, Atomic, WrVicBlkBack}) { //TagArrayRead} {
-      // by putting the stalled requests in a buffer, we reduce resource contention
-      // since they won't try again every cycle and will instead only try again once
-      // woken up
+  transition(WI, {RdBlk, WrVicBlk, Atomic, AtomicPassOn, WrVicBlkBack}) { //TagArrayRead} {
+      // don't profile as hit or miss since it will be tried again
+      /*
+        By putting the stalled requests in a buffer, we reduce resource contention
+        since they won't try again every cycle and will instead only try again once
+        woken up.
+       */
+      st_stallAndWaitRequest;
+  }
+  transition(WIB, {RdBlk, WrVicBlk, Atomic, WrVicBlkBack}) { //TagArrayRead} {
+      // don't profile as hit or miss since it will be tried again
+      /*
+        By putting the stalled requests in a buffer, we reduce resource contention
+        since they won't try again every cycle and will instead only try again once
+        woken up.
+       */
       st_stallAndWaitRequest;
   }
   transition(A, {RdBlk, WrVicBlk, WrVicBlkBack}) { //TagArrayRead} {
-      // by putting the stalled requests in a buffer, we reduce resource contention
-      // since they won't try again every cycle and will instead only try again once
-      // woken up
+      // don't profile as hit or miss since it will be tried again
+      /*
+        By putting the stalled requests in a buffer, we reduce resource contention
+        since they won't try again every cycle and will instead only try again once
+        woken up.
+       */
       st_stallAndWaitRequest;
   }
-  transition(IV, {WrVicBlk, Atomic, WrVicBlkBack}) { //TagArrayRead} {
-      // by putting the stalled requests in a buffer, we reduce resource contention
-      // since they won't try again every cycle and will instead only try again once
-      // woken up
+
+  transition(IV, {WrVicBlk, Atomic, AtomicPassOn, WrVicBlkBack}) { //TagArrayRead} {
+      // don't profile as hit or miss since it will be tried again
+      /*
+        By putting the stalled requests in a buffer, we reduce resource contention
+        since they won't try again every cycle and will instead only try again once
+        woken up.
+       */
       st_stallAndWaitRequest;
   }
+
+  transition({I, IV, V}, AtomicWait) {
+    // don't profile as hit or miss since it will be tried again
+    /*
+      By putting the stalled requests in a buffer, we reduce resource contention
+      since they won't try again every cycle and will instead only try again once
+      woken up.
+     */
+    st_stallAndWaitRequest;
+  }
+
   transition({M, V}, RdBlk) {TagArrayRead, DataArrayRead} {
     p_profileHit;
     sd_sendData;
     ut_updateTag;
     p_popRequestQueue;
   }
+
   transition(W, RdBlk, WI) {TagArrayRead, DataArrayRead} {
+    // don't profile as hit or miss since it will be tried again
     t_allocateTBE;
     wb_writeBack;
-    // need to try this request again after writing back the current entry -- to
-    // do so, put it with other stalled requests in a buffer to reduce resource
-    // contention since they won't try again every cycle and will instead only
-    // try again once woken up
+    /*
+      Need to try this request again after writing back the current entry -- to
+      do so, put it with other stalled requests in a buffer to reduce resource
+      contention since they won't try again every cycle and will instead only
+      try again once woken up.
+     */
     st_stallAndWaitRequest;
   }
 
@@ -748,10 +989,10 @@ machine(MachineType:TCC, "TCC Cache")
     p_popRequestQueue;
   }
 
-// Transition to be called when a read request with SLC flag set arrives at
-// entry in state W. It evicts and invalidates the cache entry before
-// forwarding the request to global memory
-  transition(W, RdBypassEvict, I) {TagArrayRead} {
+  // Transition to be called when a read request with SLC flag set arrives at
+  // entry in state W. It evicts and invalidates the cache entry before
+  // forwarding the request to global memory
+  transition(W, RdBypassEvict, WIB) {TagArrayRead} {
     p_profileMiss;
     t_allocateTBE;
     wb_writeBack;
@@ -760,10 +1001,10 @@ machine(MachineType:TCC, "TCC Cache")
     p_popRequestQueue;
   }
 
-// Transition to be called when a read request with SLC flag set arrives at
-// entry in state M. It evicts and invalidates the cache entry before
-// forwarding the request to global memory to main memory
-  transition(M, RdBypassEvict, I) {TagArrayRead} {
+  // Transition to be called when a read request with SLC flag set arrives at
+  // entry in state M. It evicts and invalidates the cache entry before
+  // forwarding the request to global memory to main memory
+  transition(M, RdBypassEvict, WIB) {TagArrayRead} {
     p_profileMiss;
     t_allocateTBE;
     wb_writeBack;
@@ -772,9 +1013,9 @@ machine(MachineType:TCC, "TCC Cache")
     p_popRequestQueue;
   }
 
-// Transition to be called when a read request with SLC flag set arrives at
-// entry in state V. It invalidates the cache entry before forwarding the
-// request to global memory.
+  // Transition to be called when a read request with SLC flag set arrives at
+  // entry in state V. It invalidates the cache entry before forwarding the
+  // request to global memory.
   transition(V, RdBypassEvict, I) {TagArrayRead} {
     p_profileMiss;
     t_allocateTBE;
@@ -783,39 +1024,78 @@ machine(MachineType:TCC, "TCC Cache")
     p_popRequestQueue;
   }
 
-// Transition to be called when a read request with SLC flag arrives at entry
-// in transient state. The request stalls until the pending transition is complete.
-  transition({WI, IV}, RdBypassEvict)  {
+  // Transition to be called when a read request with SLC flag arrives at entry
+  // in transient state. The request stalls until the pending transition is complete.
+  transition({WI, WIB, IV}, RdBypassEvict)  {
+    // don't profile as hit or miss since it will be tried again
     st_stallAndWaitRequest;
   }
 
-  transition(V, Atomic, A) {TagArrayRead} {
+  transition(V, Atomic, M) {TagArrayRead, TagArrayWrite, DataArrayWrite, AtomicALUOperation} {
+    p_profileHit;
+    ut_updateTag;
+    owm_orWriteMask;
+    pa_performAtomic;
+    ar_sendAtomicResponse;
+    p_popRequestQueue;
+  }
+
+  transition(A, {Atomic, AtomicWait}) {
+    // don't profile as hit or miss since it will be tried again
+    // by putting the stalled requests in a buffer, we reduce resource contention
+    // since they won't try again every cycle and will instead only try again once
+    // woken up
+    st_stallAndWaitRequest;
+  }
+
+  transition(W, Atomic, WI) {
+    t_allocateTBE;
+    wb_writeBack;
+    // need to try this request again after writing back the current entry -- to
+    // do so, put it with other stalled requests in a buffer to reduce resource
+    // contention since they won't try again every cycle and will instead only
+    // try again once woken up
+    st_stallAndWaitRequest;
+  }
+
+  transition(M, Atomic) {TagArrayRead, DataArrayWrite, AtomicALUOperation} {
+    p_profileHit;
+    owm_orWriteMask;
+    pa_performAtomic;
+    ar_sendAtomicResponse;
+    p_popRequestQueue;
+  }
+
+  // The following atomic pass on actions will send the request to the directory,
+  // and are triggered when an atomic request is received that is not in TCC,
+  // and/or if SLC is set.
+  transition(V, AtomicPassOn, A) {TagArrayRead} {
     p_profileHit;
     i_invL2;
     t_allocateTBE;
     at_atomicThrough;
-    ina_incrementNumAtomics;
+    inpa_incrementNumPendingDirectoryAtomics;
     p_popRequestQueue;
   }
 
-transition(I, Atomic, A) {TagArrayRead} {
+  transition(I, {Atomic, AtomicPassOn}, A) {TagArrayRead} {
     p_profileMiss;
     i_invL2;
     t_allocateTBE;
     at_atomicThrough;
-    ina_incrementNumAtomics;
+    inpa_incrementNumPendingDirectoryAtomics;
     p_popRequestQueue;
   }
 
-  transition(A, Atomic) {
-    p_profileMiss;
+  transition(A, AtomicPassOn) {
+    // don't profile as hit or miss since it will be tried again
     // by putting the stalled requests in a buffer, we reduce resource contention
     // since they won't try again every cycle and will instead only try again once
     // woken up
     st_stallAndWaitRequest;
   }
 
-  transition({M, W}, Atomic, WI) {TagArrayRead} {
+  transition({M, W}, AtomicPassOn, WI) {TagArrayRead, DataArrayRead} {
     t_allocateTBE;
     wb_writeBack;
     // after writing back the current line, we need to wait for it to be done
@@ -865,9 +1145,9 @@ transition(I, Atomic, A) {TagArrayRead} {
     p_popRequestQueue;
   }
 
-// Transition to be called when a write request with SLC bit set arrives at an
-// entry with state V. The entry has to be evicted and invalidated before the
-// request is forwarded to global memory
+  // Transition to be called when a write request with SLC bit set arrives at an
+  // entry with state V. The entry has to be evicted and invalidated before the
+  // request is forwarded to global memory
   transition(V, WrVicBlkEvict, I) {TagArrayRead, TagArrayWrite, DataArrayWrite} {
     p_profileMiss;
     ut_updateTag;
@@ -877,9 +1157,9 @@ transition(I, Atomic, A) {TagArrayRead} {
     p_popRequestQueue;
   }
 
-// Transition to be called when a write request with SLC bit set arrives at an
-// entry with state W. The entry has to be evicted and invalidated before the
-// request is forwarded to global memory.
+  // Transition to be called when a write request with SLC bit set arrives at an
+  // entry with state W. The entry has to be evicted and invalidated before the
+  // request is forwarded to global memory.
   transition(W, WrVicBlkEvict, I) {TagArrayRead, TagArrayWrite, DataArrayWrite} {
     p_profileMiss;
     ut_updateTag;
@@ -900,7 +1180,7 @@ transition(I, Atomic, A) {TagArrayRead} {
     i_invL2;
   }
 
-  transition({A, IV, WI}, L2_Repl) {
+  transition({A, IV, WI, WIB}, L2_Repl) {
     i_invL2;
   }
 
@@ -919,14 +1199,14 @@ transition(I, Atomic, A) {TagArrayRead} {
     pp_popProbeQueue;
   }
 
-  transition({A, IV, WI}, PrbInv) {
+  transition({A, IV, WI, WIB}, PrbInv) {
     pi_sendProbeResponseInv;
     pp_popProbeQueue;
   }
 
-// Transition to be called when the response for a request with SLC bit set
-// arrives. The request has to be forwarded to the core that needs it while
-// making sure no entry is allocated.
+  // Transition to be called when the response for a request with SLC bit set
+  // arrives. The request has to be forwarded to the core that needs it while
+  // making sure no entry is allocated.
   transition(I, Bypass, I) {
     rb_bypassDone;
     pr_popResponseQueue;
@@ -934,20 +1214,65 @@ transition(I, Atomic, A) {TagArrayRead} {
     dt_deallocateTBE;
   }
 
+  transition(A, Bypass) {TagArrayRead, TagArrayWrite} {
+    bapdr_sendBypassedAtomicPerformedInDirectoryResponse;
+    dnpa_decrementNumPendingDirectoryAtomics;
+    pr_popResponseQueue;
+  }
+
+  transition(WI, Bypass, I) {
+    pr_popResponseQueue;
+    wada_wakeUpAllDependentsAddr;
+    dt_deallocateTBE;
+  }
+
   transition(IV, Data, V) {TagArrayRead, TagArrayWrite, DataArrayWrite} {
     a_allocateBlock;
     ut_updateTag;
     wcb_writeCacheBlock;
     sdr_sendDataResponse;
+    wada_wakeUpAllDependentsAddr;
+    dt_deallocateTBE;
+    pr_popResponseQueue;
+  }
+
+  /*
+    Since the L2 now allows multiple loads from different CUs to proceed in
+    parallel to the directory, we may get Event:Data back when the line is
+    already in V.  In this case, send the response to the appropriate TCP
+    and update MRU/data in TCC, but don't need to allocate line.
+   */
+  transition(V, Data) {TagArrayRead, TagArrayWrite, DataArrayWrite} {
+    ut_updateTag;
+    wcb_writeCacheBlock;
+    sdr_sendDataResponse;
+    wada_wakeUpAllDependentsAddr;
+    // tracks # pending requests, so need to decrement here too
+    dt_deallocateTBE;
     pr_popResponseQueue;
+  }
+
+  /*
+    Since the L2 now allows multiple loads from different CUs to proceed in
+    parallel to the directory, we may get Event:Data back when the line is
+    now in I because it has been evicted by an intervening request to the same
+    set index.  In this case, send the response to the appropriate TCP without
+    affecting the TCC (essentially, treat it similar to a bypass request except
+    we also send the unblock back to the directory).
+   */
+  transition(I, Data) {
+    sdr_sendDataResponse;
     wada_wakeUpAllDependentsAddr;
+    // tracks # pending requests, so need to decrement here too
     dt_deallocateTBE;
+    pr_popResponseQueue;
   }
 
-  transition(A, Data) {TagArrayRead, TagArrayWrite, DataArrayWrite} {
+  transition(A, Data, M) {TagArrayRead, TagArrayWrite, DataArrayWrite, AtomicALUOperation} {
     a_allocateBlock;
-    ar_sendAtomicResponse;
-    dna_decrementNumAtomics;
+    wardb_writeAtomicResponseDirtyBytes;
+    pa_performAtomic;
+    baplr_sendBypassedAtomicPerformedLocallyResponse;
     pr_popResponseQueue;
   }
 
@@ -974,4 +1299,25 @@ transition(I, Atomic, A) {TagArrayRead} {
     wada_wakeUpAllDependentsAddr;
     pr_popResponseQueue;
   }
+
+  transition(WIB, WBAck,I) {
+    pr_popResponseQueue;
+  }
+
+  transition({A, IV, WI, WIB}, Flush) {
+    st_stallAndWaitRequest;
+  }
+
+  transition(I, Flush) {
+    fw_sendFlushResponse;
+    p_popRequestQueue;
+  }
+
+  transition({V, W}, Flush, I) {TagArrayRead, TagArrayWrite} {
+    t_allocateTBE;
+    ut_updateTag;
+    f_flush;
+    i_invL2;
+    p_popRequestQueue;
+   }
 }
diff --git a/src/mem/ruby/protocol/GPU_VIPER-TCP.sm b/src/mem/ruby/protocol/GPU_VIPER-TCP.sm
index 7e0ad4ed96..97997a12b5 100644
--- a/src/mem/ruby/protocol/GPU_VIPER-TCP.sm
+++ b/src/mem/ruby/protocol/GPU_VIPER-TCP.sm
@@ -1,5 +1,6 @@
 /*
  * Copyright (c) 2011-2015 Advanced Micro Devices, Inc.
+ * Copyright (c) 2023 Matthew D. Sinclair
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -52,28 +53,34 @@ machine(MachineType:TCP, "GPU TCP (L1 Data Cache)")
 
 {
   state_declaration(State, desc="TCP Cache States", default="TCP_State_I") {
-    I, AccessPermission:Invalid, desc="Invalid";
+    I, AccessPermission:Invalid,   desc="Invalid";
+    // Note: currently IV in the TCP is only for pending loads to a given cache
+    // line. Since the TCP is write through, stores should be allowed to pass
+    // through without requiring them to wait.
+    IV, AccessPermission:Invalid,  desc="Going from I to V, waiting on TCC data";
     V, AccessPermission:Read_Only, desc="Valid";
-    A, AccessPermission:Invalid, desc="Waiting on Atomic";
+    A, AccessPermission:Invalid,   desc="Waiting on Atomic";
+
+    F, AccessPermission:Invalid,   desc="Flushing; Waiting for Ack";
   }
 
   enumeration(Event, desc="TCP Events") {
     // Core initiated
-    Load,           desc="Load";
+    Load,            desc="Load";
     LoadBypassEvict, desc="Bypass L1 on a load. Evict if cache block already allocated";
-    Store,          desc="Store to L1 (L1 is dirty)";
-    StoreThrough,   desc="Store directly to L2(L1 is clean)";
-    Atomic,         desc="Atomic";
-    Flush,          desc="Flush if dirty(wbL1 for Store Release)";
-    Evict,          desc="Evict if clean(invL1 for Load Acquire)";
+    Store,           desc="Store to L1 (L1 is dirty)";
+    StoreThrough,    desc="Store directly to L2(L1 is clean)";
+    Atomic,          desc="Atomic";
+    Flush,           desc="Flush if dirty(wbL1 for Store Release)";
+    Evict,           desc="Evict if clean(invL1 for Load Acquire)";
     // Mem sys initiated
-    Repl,           desc="Replacing block from cache";
+    Repl,            desc="Replacing block from cache";
 
     // TCC initiated
-    TCC_Ack,        desc="TCC Ack to Core Request";
-    TCC_AckWB,      desc="TCC Ack for WB";
+    TCC_Ack,         desc="TCC Ack to Core Request";
+    TCC_AckWB,       desc="TCC Ack for WB";
     // Disable L1 cache
-    Bypass,         desc="Bypass the entire L1 cache";
+    Bypass,          desc="Bypass the entire L1 cache";
  }
 
   enumeration(RequestType,
@@ -100,6 +107,8 @@ machine(MachineType:TCP, "GPU TCP (L1 Data Cache)")
     bool Dirty,        desc="Is the data dirty (different than memory)?";
     int NumPendingMsgs,desc="Number of acks/data messages that this processor is waiting for";
     bool Shared,       desc="Victim hit by shared probe";
+    bool isGLCSet,     desc="Bypass L1 Cache";
+    bool isSLCSet,     desc="Bypass L1 and L2 Cache";
    }
 
   structure(TBETable, external="yes") {
@@ -121,6 +130,7 @@ machine(MachineType:TCP, "GPU TCP (L1 Data Cache)")
   void unset_tbe();
   void wakeUpAllBuffers();
   void wakeUpBuffers(Addr a);
+  void wakeUpAllBuffers(Addr a);
   Cycles curCycle();
 
   // Internal functions
@@ -256,6 +266,8 @@ machine(MachineType:TCP, "GPU TCP (L1 Data Cache)")
       peek(responseToTCP_in, ResponseMsg, block_on="addr") {
         Entry cache_entry := getCacheEntry(in_msg.addr);
         TBE tbe := TBEs.lookup(in_msg.addr);
+        DPRINTF(RubySlicc, "In responseToTCP_in with %s\n", in_msg);
+
         if (in_msg.Type == CoherenceResponseType:TDSysResp) {
           if (disableL1 || in_msg.isGLCSet || in_msg.isSLCSet) {
               // If L1 is disabled or requests have GLC or SLC flag set,
@@ -273,6 +285,7 @@ machine(MachineType:TCP, "GPU TCP (L1 Data Cache)")
         } else if (in_msg.Type == CoherenceResponseType:TDSysWBAck ||
                      in_msg.Type == CoherenceResponseType:NBSysWBAck) {
             trigger(Event:TCC_AckWB, in_msg.addr, cache_entry, tbe);
+            DPRINTF(RubySlicc, "Issuing TCC_AckWB\n");
           } else {
             error("Unexpected Response Message to Core");
           }
@@ -287,10 +300,13 @@ machine(MachineType:TCP, "GPU TCP (L1 Data Cache)")
         TBE tbe := TBEs.lookup(in_msg.LineAddress);
         DPRINTF(RubySlicc, "%s\n", in_msg);
         if (in_msg.Type == RubyRequestType:LD) {
-          if ((in_msg.isGLCSet || in_msg.isSLCSet) && is_valid(cache_entry)) {
-            // Read requests with GLC or SLC bit set should not cache in the L1.
-            // They need to bypass the L1 and go to the L2. If an entry exists
-            // in the L1, it needs to be evicted
+          // Read requests with GLC or SLC bit set should not cache in the L1.
+          // They need to bypass the L1 and go to the L2.  If an entry exists in
+          // the L1, it needs to be evicted, and if no entry or invalid entry in
+          // the L1, still need to bypass.  The LoadBypassEvict Event handles
+          // both cases in its transitions below, so call LoadBypassEvict for
+          // both.
+          if ((in_msg.isGLCSet || in_msg.isSLCSet)) {
             trigger(Event:LoadBypassEvict, in_msg.LineAddress, cache_entry, tbe);
           }
           else {
@@ -436,10 +452,15 @@ machine(MachineType:TCP, "GPU TCP (L1 Data Cache)")
         out_msg.Destination.add(mapAddressToRange(address,MachineType:TCC,
                                 TCC_select_low_bit, TCC_select_num_bits));
         out_msg.MessageSize := MessageSizeType:Data;
-        out_msg.Type := CoherenceRequestType:Atomic;
         out_msg.InitialRequestTime := curCycle();
         out_msg.Shared := false;
         peek(mandatoryQueue_in, RubyRequest) {
+          if (in_msg.Type == RubyRequestType:ATOMIC_RETURN) {
+            out_msg.Type := CoherenceRequestType:AtomicReturn;
+          } else {
+            assert(in_msg.Type == RubyRequestType:ATOMIC_NO_RETURN);
+            out_msg.Type := CoherenceRequestType:AtomicNoReturn;
+          }
           out_msg.instSeqNum := in_msg.instSeqNum;
           out_msg.isGLCSet := in_msg.isGLCSet;
           out_msg.isSLCSet := in_msg.isSLCSet;
@@ -459,6 +480,15 @@ machine(MachineType:TCP, "GPU TCP (L1 Data Cache)")
     check_allocate(TBEs);
     TBEs.allocate(address);
     set_tbe(TBEs.lookup(address));
+
+    // pass GLC/SLC information along
+    if (mandatoryQueue_in.isReady(clockEdge())) {
+      peek(mandatoryQueue_in, RubyRequest) {
+        DPRINTF(RubySlicc, "Address: %p\n", address);
+        tbe.isGLCSet := in_msg.isGLCSet;
+        tbe.isSLCSet := in_msg.isSLCSet;
+      }
+    }
   }
 
   action(d_deallocateTBE, "d", desc="Deallocate TBE") {
@@ -469,6 +499,24 @@ machine(MachineType:TCP, "GPU TCP (L1 Data Cache)")
   action(sf_setFlush, "sf", desc="set flush") {
     inFlush := true;
     APPEND_TRANSITION_COMMENT(" inFlush is true");
+    enqueue(requestNetwork_out, CPURequestMsg, issue_latency) {
+      out_msg.addr := address;
+      out_msg.Requestor := machineID;
+      assert(is_valid(cache_entry));
+      out_msg.DataBlk := cache_entry.DataBlk;
+      out_msg.writeMask.clear();
+      out_msg.writeMask.orMask(cache_entry.writeMask);
+      out_msg.Destination.add(mapAddressToRange(address,MachineType:TCC,
+                              TCC_select_low_bit, TCC_select_num_bits));
+      out_msg.MessageSize := MessageSizeType:Data;
+      out_msg.Type := CoherenceRequestType:WriteFlush;
+      out_msg.InitialRequestTime := curCycle();
+      out_msg.Shared := false;
+      out_msg.isSLCSet := false;
+      peek(mandatoryQueue_in, RubyRequest) {
+        out_msg.instSeqNum := in_msg.instSeqNum;
+      }
+    }
   }
 
   action(p_popMandatoryQueue, "pm", desc="Pop Mandatory Queue") {
@@ -479,6 +527,10 @@ machine(MachineType:TCP, "GPU TCP (L1 Data Cache)")
     responseToTCP_in.dequeue(clockEdge());
   }
 
+  action(st_stallAndWaitRequest, "st", desc="Stall and wait on the address") {
+    stall_and_wait(mandatoryQueue_in, address);
+  }
+
   action(l_loadDoneHit, "ldh", desc="local load done (hits in TCP)") {
     assert(is_valid(cache_entry));
     if (use_seq_not_coal) {
@@ -497,6 +549,20 @@ machine(MachineType:TCP, "GPU TCP (L1 Data Cache)")
     }
   }
 
+  action(ldmi_loadDoneMissInv, "ldmi",
+         desc="local load done (misses in TCP and line was evicted)") {
+    // since line was evicted, can't rely on data from cache entry, so use from
+    // the response message
+    peek(responseToTCP_in, ResponseMsg) {
+      DataBlock tmp:= in_msg.DataBlk;
+      if (use_seq_not_coal) {
+        sequencer.readCallback(address, tmp, false, MachineType:L1Cache);
+      } else {
+        coalescer.readCallback(address, MachineType:L1Cache, tmp);
+      }
+    }
+  }
+
   action(ad_atomicDone, "ad", desc="atomic done") {
     assert(is_valid(cache_entry));
     coalescer.atomicCallback(address, MachineType:L1Cache, cache_entry.DataBlk);
@@ -524,6 +590,16 @@ machine(MachineType:TCP, "GPU TCP (L1 Data Cache)")
     cache_entry.Dirty := true;
   }
 
+  action(f_flushDone, "f", desc="flush done") {
+    assert(is_valid(cache_entry));
+
+    if (use_seq_not_coal) {
+        sequencer.writeCallback(address, cache_entry.DataBlk, false, MachineType:L1Cache);
+    } else {
+        coalescer.writeCallback(address, MachineType:L1Cache, cache_entry.DataBlk);
+    }
+  }
+
   action(inv_invDone, "inv", desc="local inv done") {
     if (use_seq_not_coal) {
         DPRINTF(RubySlicc, "Sequencer does not define invCallback!\n");
@@ -563,6 +639,10 @@ machine(MachineType:TCP, "GPU TCP (L1 Data Cache)")
     L1cache.setMRU(address);
   }
 
+  action(wada_wakeUpAllDependentsAddr, "wada", desc="Wake up any requests waiting for this address") {
+    wakeUpAllBuffers(address);
+  }
+
 //  action(zz_recycleMandatoryQueue, "\z", desc="recycle mandatory queue") {
 //    mandatoryQueue_in.recycle(clockEdge(), cyclesToTicks(recycle_latency));
 //  }
@@ -580,7 +660,6 @@ machine(MachineType:TCP, "GPU TCP (L1 Data Cache)")
     L1cache.profileDemandHit();
   }
 
-
   // Transitions
   // ArrayRead/Write assumptions:
   // All requests read Tag Array
@@ -592,11 +671,19 @@ machine(MachineType:TCP, "GPU TCP (L1 Data Cache)")
   // Stalling transitions do NOT check the tag array...and if they do,
   // they can cause a resource stall deadlock!
 
-  transition({A}, {Load, Atomic, StoreThrough}) { //TagArrayRead} {
-      z_stall;
+  // if another request arrives for the same cache line that has a pending
+  // atomic or load, put it on the wakeup buffer instead of z_stall'ing it.  By
+  // doing so we reduce resource contention since they won't try again every cycle
+  // and will instead only try again once woken up
+  transition({A, IV}, {Load, LoadBypassEvict, Atomic, Store, StoreThrough, Flush}) {
+      st_stallAndWaitRequest;
   }
 
-  transition(I, Load) {TagArrayRead} {
+  // if we have a load that misses, allocate TBE entry and transition to IV
+  // to prevent subsequent requests to same cache line from also going to TCC
+  // while this request is pending
+  transition(I, Load, IV) {TagArrayRead} {
+    t_allocateTBE;
     n_issueRdBlk;
     uu_profileDataMiss;
     p_popMandatoryQueue;
@@ -654,14 +741,38 @@ machine(MachineType:TCP, "GPU TCP (L1 Data Cache)")
     p_popMandatoryQueue;
   }
 
-  transition(I, TCC_Ack, V) {TagArrayRead, TagArrayWrite, DataArrayRead, DataArrayWrite} {
+  // if we got a response for a load where the line is in I, then
+  // another request must have come in that replaced the line in question in
+  // the cache.  Thus, complete this request without allocating the line, but
+  // still deallocate TBE and wakeup any dependent addresses.
+  // (Note: this assumes TCC_AckWB is what stores use)
+  transition(I, TCC_Ack) {TagArrayRead, TagArrayWrite} {
+    wada_wakeUpAllDependentsAddr;
+    // NOTE: Because we invalidated the cache line, the assert in l_loadDoneMiss
+    // will fail -- unlike atomics that automatically go to I when the line returns
+    // loads do not automatically go to I.  Resolve this by passing data from
+    // message.
+    ldmi_loadDoneMissInv;
+    d_deallocateTBE;
+    pr_popResponseQueue;
+  }
+
+  // if line is currently in IV, then TCC_Ack is returning the data for a
+  // pending load, so transition to V, deallocate TBE, and wakeup any dependent
+  // requests so they will be replayed now that this request has returned.
+  transition(IV, TCC_Ack, V) {TagArrayRead, TagArrayWrite, DataArrayRead, DataArrayWrite} {
     a_allocate;
     w_writeCache;
+    wada_wakeUpAllDependentsAddr;
     l_loadDoneMiss;
+    d_deallocateTBE;
     pr_popResponseQueue;
   }
 
-  transition(I, Bypass, I) {
+  // if a bypass request arrives back at the TCP, regardless of whether the line
+  // is in I (from the bypass request) or IV (from a subsequent non-bypassing
+  // load), retain the current state and complete the bypassing request.
+  transition({I, IV}, Bypass) {
     rb_bypassDone;
     pr_popResponseQueue;
   }
@@ -673,12 +784,13 @@ machine(MachineType:TCP, "GPU TCP (L1 Data Cache)")
   }
 
   transition(A, TCC_Ack, I) {TagArrayRead, DataArrayRead, DataArrayWrite} {
-    d_deallocateTBE;
     a_allocate;
     w_writeCache;
     ad_atomicDone;
-    pr_popResponseQueue;
     ic_invCache;
+    wada_wakeUpAllDependentsAddr;
+    d_deallocateTBE;
+    pr_popResponseQueue;
   }
 
   transition(V, TCC_Ack, V) {TagArrayRead, DataArrayRead, DataArrayWrite} {
@@ -695,15 +807,22 @@ machine(MachineType:TCP, "GPU TCP (L1 Data Cache)")
     ic_invCache;
   }
 
-  transition({V, I, A},Flush) {TagArrayFlash} {
+  // if a line with a pending load gets evicted, transition the line to I and
+  // invalidate it.
+  transition(IV, Repl, I) {TagArrayRead, TagArrayWrite} {
+    ic_invCache;
+  }
+
+  transition({V,I}, Flush, F) {TagArrayFlash} {
+    a_allocate;
     sf_setFlush;
     p_popMandatoryQueue;
   }
 
   transition({I, V}, Evict, I) {TagArrayFlash} {
     inv_invDone;
-    p_popMandatoryQueue;
     ic_invCache;
+    p_popMandatoryQueue;
   }
 
   transition(A, Evict) {TagArrayFlash} {
@@ -711,9 +830,18 @@ machine(MachineType:TCP, "GPU TCP (L1 Data Cache)")
     p_popMandatoryQueue;
   }
 
+  // if a line is in IV and a TCC_AckWB comes back, we must have had a WT
+  // store followed by a load. Thus, complete the store without affecting
+  // TBE or line state.
   // TCC_AckWB only snoops TBE
-  transition({V, I, A}, TCC_AckWB) {
+  transition({V, I, IV, A}, TCC_AckWB) {
     wd_wtDone;
     pr_popResponseQueue;
   }
+
+  transition(F, TCC_AckWB, I) {
+    f_flushDone;
+    pr_popResponseQueue;
+    ic_invCache;
+  }
 }
diff --git a/src/mem/ruby/protocol/MESI_Two_Level-dir.sm b/src/mem/ruby/protocol/MESI_Two_Level-dir.sm
index 9d6975570c..84ec578788 100644
--- a/src/mem/ruby/protocol/MESI_Two_Level-dir.sm
+++ b/src/mem/ruby/protocol/MESI_Two_Level-dir.sm
@@ -299,7 +299,7 @@ machine(MachineType:Directory, "MESI Two Level directory protocol")
   }
 
   action(l_popMemQueue, "q", desc="Pop off-chip request queue") {
-    memQueue_in.dequeue(clockEdge());
+    dequeueMemRespQueue();
   }
 
   action(kd_wakeUpDependents, "kd", desc="wake-up dependents") {
@@ -365,7 +365,7 @@ machine(MachineType:Directory, "MESI Two Level directory protocol")
          desc="Queue off-chip writeback request") {
     peek(requestNetwork_in, RequestMsg) {
       enqueue(memQueue_out, MemoryMsg, to_mem_ctrl_latency) {
-        out_msg.addr := address;
+        out_msg.addr := in_msg.addr;
         out_msg.Type := MemoryRequestType:MEMORY_WB;
         out_msg.Sender := machineID;
         out_msg.MessageSize := MessageSizeType:Writeback_Data;
diff --git a/src/mem/ruby/protocol/MI_example-dir.sm b/src/mem/ruby/protocol/MI_example-dir.sm
index 11d2862b91..bbaa7d0789 100644
--- a/src/mem/ruby/protocol/MI_example-dir.sm
+++ b/src/mem/ruby/protocol/MI_example-dir.sm
@@ -523,7 +523,7 @@ machine(MachineType:Directory, "Directory protocol")
   }
 
   action(l_popMemQueue, "q", desc="Pop off-chip request queue") {
-    memQueue_in.dequeue(clockEdge());
+    dequeueMemRespQueue();
   }
 
   // TRANSITIONS
diff --git a/src/mem/ruby/protocol/MOESI_AMD_Base-Region-dir.sm b/src/mem/ruby/protocol/MOESI_AMD_Base-Region-dir.sm
index f16c2576a2..2f5103f846 100644
--- a/src/mem/ruby/protocol/MOESI_AMD_Base-Region-dir.sm
+++ b/src/mem/ruby/protocol/MOESI_AMD_Base-Region-dir.sm
@@ -180,6 +180,9 @@ machine(MachineType:Directory, "AMD_Base-like protocol")
     bool MemData,       desc="Got MemData?",default="false";
     bool wtData,       desc="Got write through data?",default="false";
     bool atomicData,   desc="Got Atomic op?",default="false";
+    // Note, protocol invariant: atomicData = atomicDataReturn || atomicDataNoReturn;
+    bool atomicDataReturn, desc="Got Atomic op and need return value?",default="false";
+    bool atomicDataNoReturn, desc="Got Atomic op and don't need return value?",default="false";
     Cycles InitialRequestTime, desc="...";
     Cycles ForwardRequestTime, desc="...";
     Cycles ProbeRequestStartTime, desc="...";
@@ -436,7 +439,9 @@ machine(MachineType:Directory, "AMD_Base-like protocol")
           trigger(Event:RdBlkS, in_msg.addr, entry, tbe);
         } else if (in_msg.Type == CoherenceRequestType:RdBlkM) {
           trigger(Event:RdBlkM, in_msg.addr, entry, tbe);
-        } else if (in_msg.Type == CoherenceRequestType:Atomic) {
+        } else if (in_msg.Type == CoherenceRequestType:Atomic ||
+                   in_msg.Type == CoherenceRequestType:AtomicReturn ||
+                   in_msg.Type == CoherenceRequestType:AtomicNoReturn) {
           trigger(Event:Atomic, in_msg.addr, entry, tbe);
         } else if (in_msg.Type == CoherenceRequestType:WriteThrough) {
           trigger(Event:WriteThrough, in_msg.addr, entry, tbe);
@@ -474,7 +479,9 @@ machine(MachineType:Directory, "AMD_Base-like protocol")
             trigger(Event:RdBlkSP, in_msg.addr, entry, tbe);
           } else if (in_msg.Type == CoherenceRequestType:RdBlkM) {
             trigger(Event:RdBlkMP, in_msg.addr, entry, tbe);
-          } else if (in_msg.Type == CoherenceRequestType:Atomic) {
+          } else if (in_msg.Type == CoherenceRequestType:Atomic ||
+                     in_msg.Type == CoherenceRequestType:AtomicReturn ||
+                     in_msg.Type == CoherenceRequestType:AtomicNoReturn) {
             trigger(Event:AtomicP, in_msg.addr, entry, tbe);
           } else if (in_msg.Type == CoherenceRequestType:WriteThrough) {
             trigger(Event:WriteThroughP, in_msg.addr, entry, tbe);
@@ -670,7 +677,9 @@ machine(MachineType:Directory, "AMD_Base-like protocol")
           out_msg.DemandRequest := false;
         }
       } else {
-        assert(in_msg.Type == CoherenceRequestType:Atomic);
+        assert(in_msg.Type == CoherenceRequestType:Atomic ||
+               in_msg.Type == CoherenceRequestType:AtomicReturn ||
+               in_msg.Type == CoherenceRequestType:AtomicNoReturn);
         enqueue(responseNetwork_out, ResponseMsg, response_latency) {
           out_msg.addr := address;
           out_msg.Type := CoherenceResponseType:NBSysResp;
@@ -977,10 +986,18 @@ machine(MachineType:Directory, "AMD_Base-like protocol")
         tbe.WTRequestor := in_msg.WTRequestor;
         tbe.LastSender := in_msg.Requestor;
       }
-      if (in_msg.Type == CoherenceRequestType:Atomic) {
+      if (in_msg.Type == CoherenceRequestType:Atomic ||
+          in_msg.Type == CoherenceRequestType:AtomicReturn ||
+          in_msg.Type == CoherenceRequestType:AtomicNoReturn) {
         tbe.writeMask.clear();
         tbe.writeMask.orMask(in_msg.writeMask);
         tbe.atomicData := true;
+        if (in_msg.Type == CoherenceRequestType:AtomicReturn) {
+          tbe.atomicDataReturn := true;
+        } else {
+          assert(in_msg.Type == CoherenceRequestType:AtomicNoReturn);
+          tbe.atomicDataNoReturn := true;
+        }
         tbe.WTRequestor := in_msg.WTRequestor;
         tbe.LastSender := in_msg.Requestor;
       }
@@ -1012,10 +1029,18 @@ machine(MachineType:Directory, "AMD_Base-like protocol")
         tbe.WTRequestor := in_msg.WTRequestor;
         tbe.LastSender := in_msg.Requestor;
       }
-      if (in_msg.Type == CoherenceRequestType:Atomic) {
+      if (in_msg.Type == CoherenceRequestType:Atomic ||
+          in_msg.Type == CoherenceRequestType:AtomicReturn ||
+          in_msg.Type == CoherenceRequestType:AtomicNoReturn) {
         tbe.writeMask.clear();
         tbe.writeMask.orMask(in_msg.writeMask);
         tbe.atomicData := true;
+        if (in_msg.Type == CoherenceRequestType:AtomicReturn) {
+          tbe.atomicDataReturn := true;
+        } else {
+          assert(in_msg.Type == CoherenceRequestType:AtomicNoReturn);
+          tbe.atomicDataNoReturn := true;
+        }
         tbe.WTRequestor := in_msg.WTRequestor;
         tbe.LastSender := in_msg.Requestor;
       }
@@ -1062,8 +1087,15 @@ machine(MachineType:Directory, "AMD_Base-like protocol")
         tbe.DataBlkAux.copyPartial(in_msg.DataBlk,in_msg.writeMask);
         getDirectoryEntry(address).DataBlk := tbe.DataBlkAux;
       } else{
-        assert(in_msg.Type == CoherenceRequestType:Atomic);
-        tbe.DataBlkAux.atomicPartial(getDirectoryEntry(address).DataBlk,in_msg.writeMask);
+        assert(in_msg.Type == CoherenceRequestType:Atomic ||
+               in_msg.Type == CoherenceRequestType:AtomicReturn ||
+               in_msg.Type == CoherenceRequestType:AtomicNoReturn) {
+        if (in_msg.Type == CoherenceRequestType:AtomicReturn) {
+          tbe.DataBlkAux.atomicPartial(getDirectoryEntry(address).DataBlk,in_msg.writeMask, false);
+        } else {
+          assert(in_msg.Type == CoherenceRequestType:AtomicNoReturn);
+          tbe.DataBlkAux.atomicPartial(getDirectoryEntry(address).DataBlk,in_msg.writeMask, true);
+        }
         getDirectoryEntry(address).DataBlk := tbe.DataBlkAux;
       }
     }
@@ -1076,7 +1108,12 @@ machine(MachineType:Directory, "AMD_Base-like protocol")
       tbe.DataBlk := tmp;
       getDirectoryEntry(address).DataBlk := tbe.DataBlk;
     } else if (tbe.atomicData) {
-      tbe.DataBlk.atomicPartial(getDirectoryEntry(address).DataBlk,tbe.writeMask);
+      if (tbe.atomicDataReturn) {
+        tbe.DataBlk.atomicPartial(getDirectoryEntry(address).DataBlk,tbe.writeMask, false);
+      } else {
+        assert(tbe.atomicDataNoReturn);
+        tbe.DataBlk.atomicPartial(getDirectoryEntry(address).DataBlk,tbe.writeMask, true);
+      }
       getDirectoryEntry(address).DataBlk := tbe.DataBlk;
     } else if (tbe.Dirty == true) {
       APPEND_TRANSITION_COMMENT(" Wrote data back ");
@@ -1137,6 +1174,7 @@ machine(MachineType:Directory, "AMD_Base-like protocol")
           tbe.DataBlk := tmp;
         } else if (tbe.Dirty) {
           if(tbe.atomicData == false && tbe.wtData == false) {
+            assert(tbe.atomicDataReturn == false && tbe.atomicDataNoReturn);
             DPRINTF(RubySlicc, "Got double data for %s from %s\n", address, in_msg.Sender);
             assert(tbe.DataBlk == in_msg.DataBlk);  // in case of double data
           }
@@ -1397,7 +1435,7 @@ machine(MachineType:Directory, "AMD_Base-like protocol")
   }
 
   action(pm_popMemQueue, "pm", desc="pop mem queue") {
-    memQueue_in.dequeue(clockEdge());
+    dequeueMemRespQueue();
   }
 
   action(pt_popTriggerQueue, "pt", desc="pop trigger queue") {
diff --git a/src/mem/ruby/protocol/MOESI_AMD_Base-RegionBuffer.sm b/src/mem/ruby/protocol/MOESI_AMD_Base-RegionBuffer.sm
index 5987d7cf76..5d85ad2fc6 100644
--- a/src/mem/ruby/protocol/MOESI_AMD_Base-RegionBuffer.sm
+++ b/src/mem/ruby/protocol/MOESI_AMD_Base-RegionBuffer.sm
@@ -458,7 +458,9 @@ machine(MachineType:RegionBuffer, "Region Buffer for AMD_Base-like protocol")
             trigger(Event:CPUWrite, in_msg.addr, cache_entry, tbe);
           } else if (in_msg.Type == CoherenceRequestType:WriteThrough ) {
             trigger(Event:CPUWrite, in_msg.addr, cache_entry, tbe);
-          } else if (in_msg.Type == CoherenceRequestType:Atomic ) {
+          } else if (in_msg.Type == CoherenceRequestType:Atomic ||
+                     in_msg.Type == CoherenceRequestType:AtomicReturn ||
+                     in_msg.Type == CoherenceRequestType:AtomicNoReturn) {
             trigger(Event:CPUWrite, in_msg.addr, cache_entry, tbe);
           } else {
               if (in_msg.Type == CoherenceRequestType:VicDirty ||
@@ -523,9 +525,11 @@ machine(MachineType:RegionBuffer, "Region Buffer for AMD_Base-like protocol")
           assert(in_msg.Type == CoherenceRequestType:RdBlkM || in_msg.Type == CoherenceRequestType:RdBlkS);
         }
         APPEND_TRANSITION_COMMENT(cache_entry.NumOutstandingReqs);
-        if (in_msg.Type == CoherenceRequestType:RdBlkM || in_msg.Type == CoherenceRequestType:Atomic ||
-            in_msg.Type == CoherenceRequestType:WriteThrough )
-        {
+        if (in_msg.Type == CoherenceRequestType:RdBlkM ||
+            in_msg.Type == CoherenceRequestType:Atomic ||
+            in_msg.Type == CoherenceRequestType:AtomicReturn ||
+            in_msg.Type == CoherenceRequestType:AtomicNoReturn ||
+            in_msg.Type == CoherenceRequestType:WriteThrough) {
           cache_entry.dirty := true;
         }
         if (in_msg.Type == CoherenceRequestType:VicDirty ||
diff --git a/src/mem/ruby/protocol/MOESI_AMD_Base-dir.sm b/src/mem/ruby/protocol/MOESI_AMD_Base-dir.sm
index 3b38e3b1ff..c36fc9ec93 100644
--- a/src/mem/ruby/protocol/MOESI_AMD_Base-dir.sm
+++ b/src/mem/ruby/protocol/MOESI_AMD_Base-dir.sm
@@ -39,6 +39,7 @@ machine(MachineType:Directory, "AMD Baseline protocol")
   bool GPUonly := "False";
   int TCC_select_num_bits;
   bool useL3OnWT := "False";
+  bool L2isWB;
   Cycles to_memory_controller_latency := 1;
 
   // DMA
@@ -83,6 +84,8 @@ machine(MachineType:Directory, "AMD Baseline protocol")
     BM_Pm, AccessPermission:Backing_Store,      desc="blocked waiting for probes, already got memory";
     B_Pm, AccessPermission:Backing_Store,       desc="blocked waiting for probes, already got memory";
     B, AccessPermission:Backing_Store,          desc="sent response, Blocked til ack";
+
+    F, AccessPermission:Busy, desc="sent Flus, blocked till ack";
   }
 
   // Events
@@ -120,6 +123,9 @@ machine(MachineType:Directory, "AMD Baseline protocol")
     // DMA
     DmaRead,            desc="DMA read";
     DmaWrite,           desc="DMA write";
+
+    // Flush
+    Flush,              desc="Flush entry";
   }
 
   enumeration(RequestType, desc="To communicate stats from transitions to recordStats") {
@@ -148,11 +154,14 @@ machine(MachineType:Directory, "AMD Baseline protocol")
     bool Dirty,         desc="Is the data dirty?";
     int NumPendingAcks,        desc="num acks expected";
     MachineID OriginalRequestor,        desc="Original Requestor";
-    MachineID WTRequestor,        desc="WT Requestor";
+    MachineID CURequestor,        desc="CU that initiated the request";
     bool Cached,        desc="data hit in Cache";
     bool MemData,       desc="Got MemData?",default="false";
     bool wtData,       desc="Got write through data?",default="false";
     bool atomicData,   desc="Got Atomic op?",default="false";
+    // Note, protocol invariant: atomicData = atomicDataReturn || atomicDataNoReturn;
+    bool atomicDataReturn, desc="Got Atomic op and need return value?",default="false";
+    bool atomicDataNoReturn, desc="Got Atomic op and don't need return value?",default="false";
     Cycles InitialRequestTime, desc="...";
     Cycles ForwardRequestTime, desc="...";
     Cycles ProbeRequestStartTime, desc="...";
@@ -161,6 +170,9 @@ machine(MachineType:Directory, "AMD Baseline protocol")
     uint64_t probe_id,        desc="probe id for lifetime profiling";
     WriteMask writeMask,    desc="outstanding write through mask";
     int Len,            desc="Length of memory request for DMA";
+    // GLC is passed along because it is needed in the return path
+    bool isGLCSet,      desc="Bypass GPU L1 Cache";
+    bool isSLCSet,      desc="Bypass GPU L1 and L2 Cache";
   }
 
   structure(TBETable, external="yes") {
@@ -392,7 +404,9 @@ machine(MachineType:Directory, "AMD Baseline protocol")
           trigger(Event:RdBlkM, in_msg.addr, entry, tbe);
         } else if (in_msg.Type == CoherenceRequestType:WriteThrough) {
           trigger(Event:WriteThrough, in_msg.addr, entry, tbe);
-        } else if (in_msg.Type == CoherenceRequestType:Atomic) {
+        } else if (in_msg.Type == CoherenceRequestType:Atomic  ||
+                   in_msg.Type == CoherenceRequestType:AtomicReturn ||
+                   in_msg.Type == CoherenceRequestType:AtomicNoReturn) {
           trigger(Event:Atomic, in_msg.addr, entry, tbe);
         } else if (in_msg.Type == CoherenceRequestType:VicDirty) {
           if (getDirectoryEntry(in_msg.addr).VicDirtyIgnore.isElement(in_msg.Requestor)) {
@@ -410,6 +424,9 @@ machine(MachineType:Directory, "AMD Baseline protocol")
             DPRINTF(RubySlicc, "Got VicClean from %s on %s\n", in_msg.Requestor, in_msg.addr);
             trigger(Event:VicClean, in_msg.addr, entry, tbe);
           }
+        } else if (in_msg.Type == CoherenceRequestType:WriteFlush) {
+            DPRINTF(RubySlicc, "Got Flush from %s on %s\n", in_msg.Requestor, in_msg.addr);
+            trigger(Event:Flush, in_msg.addr, entry, tbe);
         } else {
           error("Bad request message type");
         }
@@ -455,6 +472,7 @@ machine(MachineType:Directory, "AMD Baseline protocol")
       out_msg.ForwardRequestTime := tbe.ForwardRequestTime;
       out_msg.ProbeRequestStartTime := tbe.ProbeRequestStartTime;
       out_msg.OriginalResponder := tbe.LastSender;
+      out_msg.CURequestor := tbe.CURequestor;
       out_msg.L3Hit := tbe.L3Hit;
       DPRINTF(RubySlicc, "%s\n", out_msg);
     }
@@ -483,6 +501,9 @@ machine(MachineType:Directory, "AMD Baseline protocol")
       out_msg.ProbeRequestStartTime := tbe.ProbeRequestStartTime;
       out_msg.OriginalResponder := tbe.LastSender;
       out_msg.L3Hit := tbe.L3Hit;
+      out_msg.isGLCSet := tbe.isGLCSet;
+      out_msg.isSLCSet := tbe.isSLCSet;
+      out_msg.CURequestor := tbe.CURequestor;
       DPRINTF(RubySlicc, "%s\n", out_msg);
     }
   }
@@ -512,9 +533,11 @@ machine(MachineType:Directory, "AMD Baseline protocol")
         out_msg.ForwardRequestTime := tbe.ForwardRequestTime;
         out_msg.ProbeRequestStartTime := tbe.ProbeRequestStartTime;
         out_msg.OriginalResponder := tbe.LastSender;
-        if(tbe.atomicData){
-          out_msg.WTRequestor := tbe.WTRequestor;
-        }
+        out_msg.isGLCSet := tbe.isGLCSet;
+        out_msg.isSLCSet := tbe.isSLCSet;
+	if(tbe.atomicData){
+          out_msg.CURequestor := tbe.CURequestor;
+	}
         out_msg.L3Hit := tbe.L3Hit;
         DPRINTF(RubySlicc, "%s\n", out_msg);
       }
@@ -540,6 +563,8 @@ machine(MachineType:Directory, "AMD Baseline protocol")
         out_msg.InitialRequestTime := tbe.InitialRequestTime;
         out_msg.ForwardRequestTime := curCycle();
         out_msg.ProbeRequestStartTime := tbe.ProbeRequestStartTime;
+        out_msg.isGLCSet := tbe.isGLCSet;
+        out_msg.isSLCSet := tbe.isSLCSet;
         DPRINTF(RubySlicc, "%s\n", out_msg);
       }
   }
@@ -550,13 +575,32 @@ machine(MachineType:Directory, "AMD Baseline protocol")
         out_msg.addr := address;
         out_msg.Type := CoherenceResponseType:NBSysWBAck;
         out_msg.Destination.add(in_msg.Requestor);
-        out_msg.WTRequestor := in_msg.WTRequestor;
+        out_msg.CURequestor := in_msg.CURequestor;
         out_msg.Sender := machineID;
         out_msg.MessageSize := MessageSizeType:Writeback_Control;
         out_msg.InitialRequestTime := in_msg.InitialRequestTime;
         out_msg.ForwardRequestTime := curCycle();
         out_msg.ProbeRequestStartTime := curCycle();
         out_msg.instSeqNum := in_msg.instSeqNum;
+        out_msg.isGLCSet := in_msg.isGLCSet;
+        out_msg.isSLCSet := in_msg.isSLCSet;
+      }
+    }
+  }
+
+  action(rf_sendResponseFlush, "rf", desc="send Flush Ack") {
+    peek(memQueue_in, MemoryMsg) {
+      enqueue(responseNetwork_out, ResponseMsg, 1) {
+        out_msg.addr := address;
+        out_msg.Type := CoherenceResponseType:NBSysWBAck;
+        out_msg.Destination.add(tbe.OriginalRequestor);
+        out_msg.CURequestor := tbe.CURequestor;
+        out_msg.Sender := machineID;
+        out_msg.MessageSize := MessageSizeType:Writeback_Control;
+        out_msg.InitialRequestTime := tbe.InitialRequestTime;
+        out_msg.ForwardRequestTime := curCycle();
+        out_msg.ProbeRequestStartTime := curCycle();
+        //out_msg.instSeqNum := in_msg.instSeqNum;
       }
     }
   }
@@ -716,7 +760,9 @@ machine(MachineType:Directory, "AMD Baseline protocol")
 
         // CPU + GPU or GPU only system
         if ((in_msg.Type != CoherenceRequestType:WriteThrough &&
-             in_msg.Type != CoherenceRequestType:Atomic) ||
+             in_msg.Type != CoherenceRequestType:Atomic &&
+             in_msg.Type != CoherenceRequestType:AtomicReturn &&
+             in_msg.Type != CoherenceRequestType:AtomicNoReturn) ||
              !in_msg.NoWriteConflict) {
           if (noTCCdir) {
             probe_dests.add(mapAddressToRange(address, MachineType:TCC,
@@ -739,6 +785,8 @@ machine(MachineType:Directory, "AMD Baseline protocol")
           out_msg.MessageSize := MessageSizeType:Control;
           out_msg.Destination := probe_dests;
           tbe.NumPendingAcks := out_msg.Destination.count();
+          out_msg.isGLCSet := in_msg.isGLCSet;
+          out_msg.isSLCSet := in_msg.isSLCSet;
           DPRINTF(RubySlicc, "%s\n", out_msg);
           APPEND_TRANSITION_COMMENT(" dc: Acks remaining: ");
           APPEND_TRANSITION_COMMENT(tbe.NumPendingAcks);
@@ -843,6 +891,8 @@ machine(MachineType:Directory, "AMD Baseline protocol")
           out_msg.MessageSize := MessageSizeType:Control;
           out_msg.Destination := probe_dests;
           tbe.NumPendingAcks := out_msg.Destination.count();
+          out_msg.isGLCSet := in_msg.isGLCSet;
+          out_msg.isSLCSet := in_msg.isSLCSet;
           DPRINTF(RubySlicc, "%s\n", (out_msg));
           APPEND_TRANSITION_COMMENT(" sc: Acks remaining: ");
           APPEND_TRANSITION_COMMENT(tbe.NumPendingAcks);
@@ -897,6 +947,8 @@ machine(MachineType:Directory, "AMD Baseline protocol")
           out_msg.ReturnData := false;
           out_msg.MessageSize := MessageSizeType:Control;
           out_msg.Destination := probe_dests;
+          out_msg.isGLCSet := in_msg.isGLCSet;
+          out_msg.isSLCSet := in_msg.isSLCSet;
           tbe.NumPendingAcks := out_msg.Destination.count();
           APPEND_TRANSITION_COMMENT(" ic: Acks remaining: ");
           APPEND_TRANSITION_COMMENT(tbe.NumPendingAcks);
@@ -932,6 +984,23 @@ machine(MachineType:Directory, "AMD Baseline protocol")
     }
   }
 
+  action(f_writeFlushDataToMemory, "f", desc="Write flush data to memory") {
+    peek(requestNetwork_in, CPURequestMsg) {
+      enqueue(memQueue_out, MemoryMsg, to_memory_controller_latency) {
+        out_msg.addr := address;
+        out_msg.Type := MemoryRequestType:MEMORY_WB;
+        out_msg.Sender := machineID;
+        out_msg.MessageSize := MessageSizeType:Writeback_Data;
+        out_msg.DataBlk := in_msg.DataBlk;
+      }
+      if (tbe.Dirty == false) {
+          // have to update the TBE, too, because of how this
+          // directory deals with functional writes
+        tbe.DataBlk := in_msg.DataBlk;
+      }
+    }
+  }
+
   action(atd_allocateTBEforDMA, "atd", desc="allocate TBE Entry for DMA") {
     check_allocate(TBEs);
     peek(dmaRequestQueue_in, DMARequestMsg) {
@@ -966,15 +1035,28 @@ machine(MachineType:Directory, "AMD Baseline protocol")
         tbe.writeMask.clear();
         tbe.writeMask.orMask(in_msg.writeMask);
         tbe.wtData := true;
-        tbe.WTRequestor := in_msg.WTRequestor;
+        tbe.CURequestor := in_msg.CURequestor;
         tbe.LastSender := in_msg.Requestor;
       }
-      if (in_msg.Type == CoherenceRequestType:Atomic) {
+      if (in_msg.Type == CoherenceRequestType:Atomic ||
+          in_msg.Type == CoherenceRequestType:AtomicReturn ||
+          in_msg.Type == CoherenceRequestType:AtomicNoReturn) {
         tbe.writeMask.clear();
         tbe.writeMask.orMask(in_msg.writeMask);
         tbe.atomicData := true;
-        tbe.WTRequestor := in_msg.WTRequestor;
+        if (in_msg.Type == CoherenceRequestType:AtomicReturn) {
+          tbe.atomicDataReturn := true;
+        } else {
+          assert(in_msg.Type == CoherenceRequestType:AtomicNoReturn);
+          tbe.atomicDataNoReturn := true;
+        }
+        tbe.CURequestor := in_msg.CURequestor;
         tbe.LastSender := in_msg.Requestor;
+        tbe.isSLCSet := in_msg.isSLCSet;
+      }
+      // GPU read requests also need to track where the requestor came from
+      if (in_msg.Type == CoherenceRequestType:RdBlk) {
+        tbe.CURequestor := in_msg.CURequestor;
       }
       tbe.Dirty := false;
       if (in_msg.Type == CoherenceRequestType:WriteThrough) {
@@ -985,6 +1067,9 @@ machine(MachineType:Directory, "AMD Baseline protocol")
       tbe.NumPendingAcks := 0;
       tbe.Cached := in_msg.ForceShared;
       tbe.InitialRequestTime := in_msg.InitialRequestTime;
+      tbe.isGLCSet := in_msg.isGLCSet;
+      tbe.isSLCSet := in_msg.isSLCSet;
+      DPRINTF(RubySlicc, "t_allocateTBE in_msg: %s, tbe: %s\n", in_msg, tbe.CURequestor);
     }
   }
 
@@ -995,8 +1080,15 @@ machine(MachineType:Directory, "AMD Baseline protocol")
 
   action(wd_writeBackData, "wd", desc="Write back data if needed") {
     if (tbe.wtData || tbe.atomicData || tbe.Dirty == false) {
-      if (tbe.atomicData) {
-        tbe.DataBlk.atomicPartial(tbe.DataBlk, tbe.writeMask);
+      // Only perform atomics in the directory if the SLC bit is set, or
+      // if the L2 is WT
+      if (tbe.atomicData && (tbe.isSLCSet || !L2isWB)) {
+        if (tbe.atomicDataReturn) {
+          tbe.DataBlk.atomicPartial(tbe.DataBlk, tbe.writeMask, false);
+        } else {
+          assert(tbe.atomicDataNoReturn);
+          tbe.DataBlk.atomicPartial(tbe.DataBlk, tbe.writeMask, true);
+        }
       }
       enqueue(memQueue_out, MemoryMsg, to_memory_controller_latency) {
         out_msg.addr := address;
@@ -1036,6 +1128,7 @@ machine(MachineType:Directory, "AMD Baseline protocol")
           tbe.writeMask.fillMask();
         } else if (tbe.Dirty) {
           if(tbe.atomicData == false && tbe.wtData == false) {
+            assert(tbe.atomicDataReturn == false && tbe.atomicDataNoReturn == false);
             DPRINTF(RubySlicc, "Got double data for %s from %s\n", address, in_msg.Sender);
             assert(tbe.DataBlk == in_msg.DataBlk);  // in case of double data
           }
@@ -1169,7 +1262,7 @@ machine(MachineType:Directory, "AMD Baseline protocol")
   }
 
   action(pm_popMemQueue, "pm", desc="pop mem queue") {
-    memQueue_in.dequeue(clockEdge());
+    dequeueMemRespQueue();
   }
 
   action(pt_popTriggerQueue, "pt", desc="pop trigger queue") {
@@ -1209,11 +1302,20 @@ machine(MachineType:Directory, "AMD Baseline protocol")
   }
 
   action(wada_wakeUpAllDependentsAddr, "wada", desc="Wake up any requests waiting for this address") {
+    DPRINTF(RubySlicc, "wada wakeup: 0x%x\n", address);
     wakeUpAllBuffers(address);
   }
 
+  /*
+    Currently z_stall is unused because it can lead to Protocol Stalls that
+    eventually lead to deadlock.  Instead, it is recommended to use
+    st_stallAndWaitRequest in combination with a wakeupBuffer call (e.g.,
+    wada_wakeUpAllDependentsAddr) to put the pending requests to sleep instead of
+    them causing head of line blocking -- wada_wakeUpAllDependentsAddr should wake
+    the request up once the request preventing it from completing is done.
   action(z_stall, "z", desc="...") {
   }
+  */
 
   // TRANSITIONS
   transition({BL, BDR_M, BDW_M, BS_M, BM_M, B_M, BP, BDR_PM, BDW_PM, BS_PM, BM_PM, B_PM, BDR_Pm, BDW_Pm, BS_Pm, BM_Pm, B_Pm, B}, {RdBlkS, RdBlkM, RdBlk, CtoD}) {
@@ -1315,19 +1417,19 @@ machine(MachineType:Directory, "AMD Baseline protocol")
     d_writeDataToMemory;
     al_allocateL3Block;
     pr_profileL3HitMiss; //Must come after al_allocateL3Block and before dt_deallocateTBE
-    wad_wakeUpDependents;
+    wada_wakeUpAllDependentsAddr;
     dt_deallocateTBE;
     pr_popResponseQueue;
   }
 
   transition(BL, StaleWB, U) {L3TagArrayWrite} {
     dt_deallocateTBE;
-    wa_wakeUpAllDependents;
+    wada_wakeUpAllDependentsAddr;
     pr_popResponseQueue;
   }
 
   transition({B, BDR_M, BDW_M, BS_M, BM_M, B_M, BP, BDR_PM, BDW_PM, BS_PM, BM_PM, B_PM, BDR_Pm, BDW_Pm, BS_Pm, BM_Pm, B_Pm}, {VicDirty, VicClean}) {
-    z_stall;
+    st_stallAndWaitRequest;
   }
 
   transition({U, BL, BDR_M, BDW_M, BS_M, BM_M, B_M, BP, BDR_PM, BDW_PM, BS_PM, BM_PM, B_PM, BDR_Pm, BDW_Pm, BS_Pm, BM_Pm, B_Pm, B}, WBAck) {
@@ -1405,8 +1507,8 @@ machine(MachineType:Directory, "AMD Baseline protocol")
 
   transition(BDW_M, MemData, U) {
     mt_writeMemDataToTBE;
-    da_sendResponseDmaAck;
     wd_writeBackData;
+    da_sendResponseDmaAck;
     wada_wakeUpAllDependentsAddr;
     dt_deallocateTBE;
     pm_popMemQueue;
@@ -1414,8 +1516,8 @@ machine(MachineType:Directory, "AMD Baseline protocol")
 
   transition(BS_M, MemData, B){L3TagArrayWrite, L3DataArrayWrite} {
     mt_writeMemDataToTBE;
-    s_sendResponseS;
     wd_writeBackData;
+    s_sendResponseS;
     alwt_allocateL3BlockOnWT;
     dt_deallocateTBE;
     pm_popMemQueue;
@@ -1423,8 +1525,8 @@ machine(MachineType:Directory, "AMD Baseline protocol")
 
   transition(BM_M, MemData, B){L3TagArrayWrite, L3DataArrayWrite} {
     mt_writeMemDataToTBE;
-    m_sendResponseM;
     wd_writeBackData;
+    m_sendResponseM;
     alwt_allocateL3BlockOnWT;
     dt_deallocateTBE;
     pm_popMemQueue;
@@ -1432,32 +1534,32 @@ machine(MachineType:Directory, "AMD Baseline protocol")
 
   transition(B_M, MemData, B){L3TagArrayWrite, L3DataArrayWrite} {
     mt_writeMemDataToTBE;
-    es_sendResponseES;
     wd_writeBackData;
+    es_sendResponseES;
     alwt_allocateL3BlockOnWT;
     dt_deallocateTBE;
     pm_popMemQueue;
   }
 
   transition(BS_M, L3Hit, B) {L3TagArrayWrite, L3DataArrayWrite} {
-    s_sendResponseS;
     wd_writeBackData;
+    s_sendResponseS;
     alwt_allocateL3BlockOnWT;
     dt_deallocateTBE;
     ptl_popTriggerQueue;
   }
 
   transition(BM_M, L3Hit, B) {L3DataArrayWrite, L3TagArrayWrite} {
-    m_sendResponseM;
     wd_writeBackData;
+    m_sendResponseM;
     alwt_allocateL3BlockOnWT;
     dt_deallocateTBE;
     ptl_popTriggerQueue;
   }
 
   transition(B_M, L3Hit, B) {L3DataArrayWrite, L3TagArrayWrite} {
-    es_sendResponseES;
     wd_writeBackData;
+    es_sendResponseES;
     alwt_allocateL3BlockOnWT;
     dt_deallocateTBE;
     ptl_popTriggerQueue;
@@ -1503,8 +1605,8 @@ machine(MachineType:Directory, "AMD Baseline protocol")
   }
 
   transition(BDW_Pm, ProbeAcksComplete, U) {
-    da_sendResponseDmaAck;
     wd_writeBackData;
+    da_sendResponseDmaAck;
     // Check for pending requests from the core we put to sleep while waiting
     // for a response
     wada_wakeUpAllDependentsAddr;
@@ -1514,8 +1616,8 @@ machine(MachineType:Directory, "AMD Baseline protocol")
 
   transition(BS_Pm, ProbeAcksComplete, B){L3DataArrayWrite, L3TagArrayWrite} {
     sf_setForwardReqTime;
-    s_sendResponseS;
     wd_writeBackData;
+    s_sendResponseS;
     alwt_allocateL3BlockOnWT;
     dt_deallocateTBE;
     pt_popTriggerQueue;
@@ -1523,8 +1625,8 @@ machine(MachineType:Directory, "AMD Baseline protocol")
 
   transition(BM_Pm, ProbeAcksComplete, B){L3DataArrayWrite, L3TagArrayWrite} {
     sf_setForwardReqTime;
-    m_sendResponseM;
     wd_writeBackData;
+    m_sendResponseM;
     alwt_allocateL3BlockOnWT;
     dt_deallocateTBE;
     pt_popTriggerQueue;
@@ -1532,8 +1634,8 @@ machine(MachineType:Directory, "AMD Baseline protocol")
 
   transition(B_Pm, ProbeAcksComplete, B){L3DataArrayWrite, L3TagArrayWrite} {
     sf_setForwardReqTime;
-    es_sendResponseES;
     wd_writeBackData;
+    es_sendResponseES;
     alwt_allocateL3BlockOnWT;
     dt_deallocateTBE;
     pt_popTriggerQueue;
@@ -1541,10 +1643,23 @@ machine(MachineType:Directory, "AMD Baseline protocol")
 
   transition(BP, ProbeAcksComplete, B){L3TagArrayWrite, L3TagArrayWrite} {
     sf_setForwardReqTime;
-    c_sendResponseCtoD;
     wd_writeBackData;
+    c_sendResponseCtoD;
     alwt_allocateL3BlockOnWT;
     dt_deallocateTBE;
     pt_popTriggerQueue;
   }
+
+ transition(U, Flush, F) {L3TagArrayRead, L3TagArrayWrite} {
+    t_allocateTBE;
+    f_writeFlushDataToMemory;
+    w_sendResponseWBAck;
+    p_popRequestQueue;
+ }
+
+ transition(F, WBAck, U) {
+    pm_popMemQueue;
+    dt_deallocateTBE;
+ }
+
 }
diff --git a/src/mem/ruby/protocol/MOESI_AMD_Base-msg.sm b/src/mem/ruby/protocol/MOESI_AMD_Base-msg.sm
index bb3a013325..b860ff1681 100644
--- a/src/mem/ruby/protocol/MOESI_AMD_Base-msg.sm
+++ b/src/mem/ruby/protocol/MOESI_AMD_Base-msg.sm
@@ -39,6 +39,8 @@ enumeration(CoherenceRequestType, desc="Coherence Request Types") {
   VicClean,     desc="L2 clean eviction";
   VicDirty,     desc="L2 dirty eviction";
   Atomic,       desc="Upper level atomic";
+  AtomicReturn, desc="Upper level atomic";
+  AtomicNoReturn, desc="Upper level atomic";
   AtomicWriteBack, desc="Upper level atomic";
   WriteThrough, desc="Ordered WriteThrough w/Data";
   WriteThroughFifo, desc="WriteThrough with no data";
@@ -132,14 +134,14 @@ structure(CPURequestMsg, desc="...", interface="Message") {
   int Acks, default="0", desc="Acks that the dir (mem ctrl) should expect to receive";
   CoherenceRequestType OriginalType, default="CoherenceRequestType_NA",  desc="Type of request from core fwded through region buffer";
   WriteMask writeMask, desc="Write Through Data";
-  MachineID WTRequestor,            desc="Node who initiated the write through";
+  MachineID CURequestor,            desc="Node who initiated the request";
   int wfid,                         default="0", desc="wavefront id";
   uint64_t instSeqNum,              desc="instruction sequence number";
   bool NoWriteConflict,             default="true", desc="write collided with CAB entry";
   int ProgramCounter,               desc="PC that accesses to this block";
 
-  bool isGLCSet, default="false", desc="GLC flag value in the request";
-  bool isSLCSet, default="false", desc="SLC flag value in the request";
+  bool isGLCSet, default="false",   desc="GLC flag value in the request";
+  bool isSLCSet, default="false",   desc="SLC flag value in the request";
 
   bool functionalRead(Packet *pkt) {
     // Only PUTX messages contains the data block
@@ -168,6 +170,8 @@ structure(NBProbeRequestMsg, desc="...", interface="Message") {
   MachineID Requestor,          desc="Requestor id for 3-hop requests";
   bool NoAckNeeded, default="false", desc="For short circuting acks";
   int ProgramCounter,           desc="PC that accesses to this block";
+  bool isGLCSet, default="false", desc="GLC flag value in the request";
+  bool isSLCSet, default="false", desc="SLC flag value in the request";
 
   bool functionalRead(Packet *pkt) {
     return false;
@@ -238,7 +242,7 @@ structure(ResponseMsg, desc="...", interface="Message") {
 
   bool L3Hit, default="false", desc="Did memory or L3 supply the data?";
   MachineID OriginalResponder, desc="Mach which wrote the data to the L3";
-  MachineID WTRequestor,             desc="Node who started the writethrough";
+  MachineID CURequestor,             desc="Node who started the access";
 
   bool NotCached, default="false", desc="True when the Region buffer has already evicted the line";
 
diff --git a/src/mem/ruby/protocol/MOESI_AMD_Base-probeFilter.sm b/src/mem/ruby/protocol/MOESI_AMD_Base-probeFilter.sm
index 8608608590..4e9e9597aa 100644
--- a/src/mem/ruby/protocol/MOESI_AMD_Base-probeFilter.sm
+++ b/src/mem/ruby/protocol/MOESI_AMD_Base-probeFilter.sm
@@ -170,6 +170,9 @@ machine(MachineType:Directory, "AMD Baseline protocol")
     bool MemData,       desc="Got MemData?",default="false";
     bool wtData,       desc="Got write through data?",default="false";
     bool atomicData,   desc="Got Atomic op?",default="false";
+    // Note, protocol invariant: atomicData = atomicDataReturn || atomicDataNoReturn;
+    bool atomicDataReturn, desc="Got Atomic op and need return value?",default="false";
+    bool atomicDataNoReturn, desc="Got Atomic op and don't need return value?",default="false";
     Cycles InitialRequestTime, desc="...";
     Cycles ForwardRequestTime, desc="...";
     Cycles ProbeRequestStartTime, desc="...";
@@ -451,7 +454,9 @@ machine(MachineType:Directory, "AMD Baseline protocol")
           trigger(Event:RdBlkM, in_msg.addr, entry, tbe);
         } else if (in_msg.Type == CoherenceRequestType:WriteThrough) {
           trigger(Event:WriteThrough, in_msg.addr, entry, tbe);
-        } else if (in_msg.Type == CoherenceRequestType:Atomic) {
+        } else if (in_msg.Type == CoherenceRequestType:Atomic ||
+                   in_msg.Type == CoherenceRequestType:AtomicReturn ||
+                   in_msg.Type == CoherenceRequestType:AtomicNoReturn) {
           trigger(Event:Atomic, in_msg.addr, entry, tbe);
         } else if (in_msg.Type == CoherenceRequestType:VicDirty) {
           if (getDirectoryEntry(in_msg.addr).VicDirtyIgnore.isElement(in_msg.Requestor)) {
@@ -656,7 +661,9 @@ machine(MachineType:Directory, "AMD Baseline protocol")
         // add relevant TCC node to list. This replaces all TCPs and SQCs
         if(isGPUSharer(address)) {
           if ((in_msg.Type == CoherenceRequestType:WriteThrough ||
-               in_msg.Type == CoherenceRequestType:Atomic) &&
+               in_msg.Type == CoherenceRequestType:Atomic ||
+               in_msg.Type == CoherenceRequestType:AtomicReturn ||
+               in_msg.Type == CoherenceRequestType:AtomicNoReturn) &&
                in_msg.NoWriteConflict) {
           // Don't Include TCCs unless there was write-CAB conflict in the TCC
           } else if(noTCCdir) {
@@ -814,6 +821,8 @@ machine(MachineType:Directory, "AMD Baseline protocol")
       tbe.writeMask.clear();
       tbe.wtData := false;
       tbe.atomicData := false;
+      tbe.atomicDataReturn := false;
+      tbe.atomicDataNoReturn := false;
       tbe.DataBlk := getDirectoryEntry(address).DataBlk; // Data only for WBs
       tbe.Dirty := false;
       tbe.NumPendingAcks := 0;
@@ -831,10 +840,18 @@ machine(MachineType:Directory, "AMD Baseline protocol")
         tbe.WTRequestor := in_msg.WTRequestor;
         tbe.LastSender := in_msg.Requestor;
       }
-      if (in_msg.Type == CoherenceRequestType:Atomic) {
+      if (in_msg.Type == CoherenceRequestType:Atomic ||
+          in_msg.Type == CoherenceRequestType:AtomicReturn ||
+          in_msg.Type == CoherenceRequestType:AtomicNoReturn) {
         tbe.writeMask.clear();
         tbe.writeMask.orMask(in_msg.writeMask);
         tbe.atomicData := true;
+        if (in_msg.Type == CoherenceRequestType:AtomicReturn) {
+          tbe.atomicDataReturn = true;
+        } else {
+          assert(in_msg.Type == CoherenceRequestType:AtomicNoReturn);
+          tbe.atomicDataNoReturn = true;
+        }
         tbe.WTRequestor := in_msg.WTRequestor;
         tbe.LastSender := in_msg.Requestor;
       }
@@ -866,8 +883,14 @@ machine(MachineType:Directory, "AMD Baseline protocol")
       tbe.DataBlk := tmp;
       getDirectoryEntry(address).DataBlk := tbe.DataBlk;
     } else if (tbe.atomicData) {
-      tbe.DataBlk.atomicPartial(getDirectoryEntry(address).DataBlk,
-                                tbe.writeMask);
+      if (tbe.atomicDataReturn) {
+        tbe.DataBlk.atomicPartial(getDirectoryEntry(address).DataBlk,
+                                tbe.writeMask, false);
+      } else {
+          assert(tbe.atomicDataNoReturn);
+        tbe.DataBlk.atomicPartial(getDirectoryEntry(address).DataBlk,
+                                tbe.writeMask, true);
+      }
       getDirectoryEntry(address).DataBlk := tbe.DataBlk;
     } else if (tbe.Dirty == false) {
       getDirectoryEntry(address).DataBlk := tbe.DataBlk;
@@ -896,6 +919,7 @@ machine(MachineType:Directory, "AMD Baseline protocol")
           tbe.DataBlk := tmp;
         } else if (tbe.Dirty) {
           if(tbe.atomicData == false && tbe.wtData == false) {
+            assert(atomicDataReturn == false && atomicDataNoReturn);
             DPRINTF(RubySlicc, "Got double data for %s from %s\n", address, in_msg.Sender);
             assert(tbe.DataBlk == in_msg.DataBlk);  // in case of double data
           }
@@ -1050,7 +1074,9 @@ machine(MachineType:Directory, "AMD Baseline protocol")
         entry.pfState := ProbeFilterState:T;
         entry.isOnCPU := false;
         entry.isOnGPU := false;
-      } else if (in_msg.Type == CoherenceRequestType:Atomic) {
+      } else if (in_msg.Type == CoherenceRequestType:Atomic ||
+                 in_msg.Type == CoherenceRequestType:AtomicReturn ||
+                 in_msg.Type == CoherenceRequestType:AtomicNoReturn) {
         entry.pfState := ProbeFilterState:T;
         entry.isOnCPU := false;
         entry.isOnGPU := false;
@@ -1103,7 +1129,7 @@ machine(MachineType:Directory, "AMD Baseline protocol")
   }
 
   action(pm_popMemQueue, "pm", desc="pop mem queue") {
-    memQueue_in.dequeue(clockEdge());
+    dequeueMemRespQueue();
   }
 
   action(pt_popTriggerQueue, "pt", desc="pop trigger queue") {
diff --git a/src/mem/ruby/protocol/MOESI_CMP_directory-dir.sm b/src/mem/ruby/protocol/MOESI_CMP_directory-dir.sm
index 3b4a8012c5..4a513d6d3f 100644
--- a/src/mem/ruby/protocol/MOESI_CMP_directory-dir.sm
+++ b/src/mem/ruby/protocol/MOESI_CMP_directory-dir.sm
@@ -598,7 +598,7 @@ machine(MachineType:Directory, "Directory protocol")
   }
 
   action(q_popMemQueue, "q", desc="Pop off-chip request queue") {
-    memQueue_in.dequeue(clockEdge());
+    dequeueMemRespQueue();
   }
 
   action(qf_queueMemoryFetchRequest, "qf", desc="Queue off-chip fetch request") {
@@ -1014,6 +1014,31 @@ machine(MachineType:Directory, "Directory protocol")
     i_popIncomingRequestQueue;
   }
 
+  //this happens when there is race between FwdGetX
+  //and PUTX on owner. Owner in this case hands off
+  //ownership to GetX requestor and PUTX still goes
+  //through. But since owner has changed, state should
+  //go back to M and PUTX is essentially trashed.
+  transition(MI, Unblock, M) {
+    w_deallocateTBE;
+    j_popIncomingUnblockQueue;
+  }
+
+  transition(MIS, Unblock, M) {
+    w_deallocateTBE;
+    j_popIncomingUnblockQueue;
+  }
+
+  transition(OS, Unblock, O) {
+    //In OS state there is no TBE for some reason
+    // w_deallocateTBE;
+    j_popIncomingUnblockQueue;
+  }
+
+  transition(OSS, Unblock, O) {
+    j_popIncomingUnblockQueue;
+  }
+
   transition(WBI, Memory_Ack, I) {
     clearWBAck;
     w_deallocateTBE;
diff --git a/src/mem/ruby/protocol/MOESI_CMP_token-dir.sm b/src/mem/ruby/protocol/MOESI_CMP_token-dir.sm
index 97ea292eb7..7f2bdf94e0 100644
--- a/src/mem/ruby/protocol/MOESI_CMP_token-dir.sm
+++ b/src/mem/ruby/protocol/MOESI_CMP_token-dir.sm
@@ -821,7 +821,7 @@ machine(MachineType:Directory, "Token protocol")
   }
 
   action(l_popMemQueue, "q", desc="Pop off-chip request queue") {
-    memQueue_in.dequeue(clockEdge());
+    dequeueMemRespQueue();
   }
 
   action(r_bounceResponse, "r", desc="Bounce response to starving processor") {
diff --git a/src/mem/ruby/protocol/MOESI_hammer-dir.sm b/src/mem/ruby/protocol/MOESI_hammer-dir.sm
index 8fd447fdf4..833ccd3b18 100644
--- a/src/mem/ruby/protocol/MOESI_hammer-dir.sm
+++ b/src/mem/ruby/protocol/MOESI_hammer-dir.sm
@@ -1141,7 +1141,7 @@ machine(MachineType:Directory, "AMD Hammer-like protocol")
   }
 
   action(l_popMemQueue, "q", desc="Pop off-chip request queue") {
-    memQueue_in.dequeue(clockEdge());
+    dequeueMemRespQueue();
   }
 
   action(g_popTriggerQueue, "g", desc="Pop trigger queue") {
diff --git a/src/mem/ruby/protocol/RubySlicc_Defines.sm b/src/mem/ruby/protocol/RubySlicc_Defines.sm
index 6ae3a6cb12..590a134ef9 100644
--- a/src/mem/ruby/protocol/RubySlicc_Defines.sm
+++ b/src/mem/ruby/protocol/RubySlicc_Defines.sm
@@ -37,3 +37,5 @@ Cycles recycle_latency;
 // memory controllers.
 void functionalMemoryRead(Packet *pkt);
 bool functionalMemoryWrite(Packet *pkt);
+
+void dequeueMemRespQueue();
diff --git a/src/mem/ruby/protocol/RubySlicc_Exports.sm b/src/mem/ruby/protocol/RubySlicc_Exports.sm
index a32983ada4..0d2bc742f9 100644
--- a/src/mem/ruby/protocol/RubySlicc_Exports.sm
+++ b/src/mem/ruby/protocol/RubySlicc_Exports.sm
@@ -77,6 +77,9 @@ structure(DataBlock, external = "yes", desc="..."){
   void copyPartial(DataBlock, int, int);
   void copyPartial(DataBlock, WriteMask);
   void atomicPartial(DataBlock, WriteMask);
+  void atomicPartial(DataBlock, WriteMask, bool);
+  int numAtomicLogEntries();
+  void clearAtomicLogEntries();
 }
 
 bool testAndRead(Addr addr, DataBlock datablk, Packet *pkt);
@@ -228,11 +231,13 @@ enumeration(CacheRequestType, desc="...", default="CacheRequestType_NULL") {
   DataArrayWrite,   desc="Write access to the cache's data array";
   TagArrayRead,     desc="Read access to the cache's tag array";
   TagArrayWrite,    desc="Write access to the cache's tag array";
+  AtomicALUOperation,  desc="Atomic ALU operation";
 }
 
 enumeration(CacheResourceType, desc="...", default="CacheResourceType_NULL") {
   DataArray,    desc="Access to the cache's data array";
   TagArray,     desc="Access to the cache's tag array";
+  AtomicALUArray, desc="Access to the cache's atomic ALU array";
 }
 
 enumeration(DirectoryRequestType, desc="...", default="DirectoryRequestType_NULL") {
diff --git a/src/mem/ruby/protocol/RubySlicc_Types.sm b/src/mem/ruby/protocol/RubySlicc_Types.sm
index 8ba9d935ff..293c731c37 100644
--- a/src/mem/ruby/protocol/RubySlicc_Types.sm
+++ b/src/mem/ruby/protocol/RubySlicc_Types.sm
@@ -139,6 +139,13 @@ structure (Sequencer, external = "yes") {
                      Cycles, Cycles, Cycles);
   void writeUniqueCallback(Addr, DataBlock);
 
+  void atomicCallback(Addr, DataBlock);
+  void atomicCallback(Addr, DataBlock, bool);
+  void atomicCallback(Addr, DataBlock, bool, MachineType);
+  void atomicCallback(Addr, DataBlock, bool, MachineType,
+                      Cycles, Cycles, Cycles);
+
+
   void unaddressedCallback(Addr, RubyRequestType);
   void unaddressedCallback(Addr, RubyRequestType, MachineType);
   void unaddressedCallback(Addr, RubyRequestType, MachineType,
diff --git a/src/mem/ruby/protocol/chi/CHI-cache-actions.sm b/src/mem/ruby/protocol/chi/CHI-cache-actions.sm
index d18c600516..ca7b3e203a 100644
--- a/src/mem/ruby/protocol/chi/CHI-cache-actions.sm
+++ b/src/mem/ruby/protocol/chi/CHI-cache-actions.sm
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2022 ARM Limited
+ * Copyright (c) 2021-2023 ARM Limited
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -63,6 +63,7 @@ action(AllocateTBE_Request, desc="") {
         out_msg.usesTxnId := false;
         out_msg.event := Event:SendRetryAck;
         out_msg.retryDest := in_msg.requestor;
+        out_msg.txnId := in_msg.txnId;
         retryQueue.emplace(in_msg.addr,false,in_msg.requestor);
       }
     }
@@ -145,16 +146,24 @@ action(AllocateTBE_SeqRequest, desc="") {
       assert(in_msg.Prefetch == PrefetchBit:No);
       out_msg.is_local_pf := false;
       out_msg.is_remote_pf := false;
+      out_msg.txnId := max_outstanding_transactions;
+
+      out_msg.atomic_op.clear();
+      out_msg.atomic_op.orMask(in_msg.writeMask);
 
       if ((in_msg.Type == RubyRequestType:LD) ||
           (in_msg.Type == RubyRequestType:IFETCH)) {
         out_msg.type := CHIRequestType:Load;
-      } else  if (in_msg.Type == RubyRequestType:ST) {
+      } else if (in_msg.Type == RubyRequestType:ST) {
         if (in_msg.Size == blockSize) {
           out_msg.type := CHIRequestType:StoreLine;
         } else {
           out_msg.type := CHIRequestType:Store;
         }
+      } else if (in_msg.Type == RubyRequestType:ATOMIC_RETURN) {
+        out_msg.type := CHIRequestType:AtomicLoad;
+      } else if (in_msg.Type == RubyRequestType:ATOMIC_NO_RETURN){
+        out_msg.type := CHIRequestType:AtomicStore;
       } else {
         error("Invalid RubyRequestType");
       }
@@ -598,10 +607,13 @@ action(Initiate_ReadUnique_AutoUpgrade, desc="") {
 action(Initiate_ReadUnique_Upgrade, desc="") {
   // must use the transitions with auto upgrade otherwise
   assert(is_HN == false);
-  assert(tbe.use_DCT == false);
   assert((tbe.dataValid && tbe.dataUnique) == false);
   assert((tbe.dir_ownerExists && tbe.dir_ownerIsExcl) == false);
 
+  // CompData or CompUC will always be send by us after permission is received
+  // from downstream
+  tbe.use_DCT := false;
+
   tbe.actions.push(Event:ReadMissPipe);
   if (tbe.dataMaybeDirtyUpstream) {
     tbe.actions.push(Event:SendSnpUnique);
@@ -764,6 +776,148 @@ action(Initiate_StoreMiss, desc="") {
   }
 }
 
+action(Initiate_Atomic_UC, desc="") {
+  if ((policy_type == 0) || // ALL NEAR
+      (policy_type == 1) || // UNIQUE NEAR
+      (policy_type == 2)    // PRESENT NEAR
+      ){
+    tbe.actions.push(Event:DataArrayRead);
+    tbe.actions.push(Event:DelayAtomic);
+    tbe.actions.push(Event:AtomicHit);
+    tbe.actions.pushNB(Event:DataArrayWrite);
+    tbe.actions.pushNB(Event:TagArrayWrite);
+  } else {
+    error("Invalid policy type");
+  }
+}
+
+action(Initiate_Atomic_UD, desc="") {
+  if ((policy_type == 0) || // ALL NEAR
+      (policy_type == 1) || // UNIQUE NEAR
+      (policy_type == 2)    // PRESENT NEAR
+      ){
+    tbe.actions.push(Event:DataArrayRead);
+    tbe.actions.push(Event:DelayAtomic);
+    tbe.actions.push(Event:AtomicHit);
+    tbe.actions.pushNB(Event:DataArrayWrite);
+    tbe.actions.pushNB(Event:TagArrayWrite);
+  } else {
+    error("Invalid policy type");
+  }
+}
+
+action(Initiate_AtomicReturn_I, desc="") {
+  if (policy_type == 0){ // ALL NEAR
+    tbe.actions.push(Event:SendReadUnique);
+    tbe.actions.push(Event:WriteFEPipe);
+    tbe.actions.push(Event:CheckCacheFill);
+    tbe.actions.push(Event:WriteBEPipe);
+    tbe.actions.push(Event:TagArrayWrite);
+    tbe.atomic_to_be_done := true;
+  } else if ((policy_type == 1) || // UNIQUE NEAR
+             (policy_type == 2)) { // PRESENT NEAR
+    tbe.actions.push(Event:SendAtomicReturn_NoWait);
+    tbe.dataToBeInvalid := true;
+    tbe.doCacheFill := false;
+    tbe.atomic_to_be_done := false;
+  } else {
+    error("Invalid policy type");
+  }
+}
+
+action(Initiate_AtomicNoReturn_I, desc="") {
+  if (policy_type == 0){ // ALL NEAR
+    tbe.actions.push(Event:SendReadUnique);
+    tbe.actions.push(Event:WriteFEPipe);
+    tbe.actions.push(Event:CheckCacheFill);
+    tbe.actions.push(Event:WriteBEPipe);
+    tbe.actions.push(Event:TagArrayWrite);
+    tbe.atomic_to_be_done := true;
+  } else if (policy_type == 1) { // UNIQUE NEAR
+    tbe.actions.push(Event:SendAtomicNoReturn);
+    tbe.actions.push(Event:SendANRData);
+    tbe.dataToBeInvalid := true;
+    tbe.doCacheFill := false;
+    tbe.atomic_to_be_done := false;
+  } else {
+    error("Invalid policy type");
+  }
+}
+
+action(Initiate_AtomicReturn_SD, desc="") {
+  if (policy_type == 0){ // ALL NEAR
+    tbe.actions.push(Event:SendReadUnique);
+    tbe.actions.push(Event:WriteFEPipe);
+    tbe.actions.push(Event:CheckCacheFill);
+    tbe.actions.push(Event:WriteBEPipe);
+    tbe.actions.push(Event:TagArrayWrite);
+    tbe.atomic_to_be_done := true;
+  } else if (policy_type == 1) { // UNIQUE NEAR
+    tbe.actions.push(Event:SendAtomicReturn_NoWait);
+    tbe.dataToBeInvalid := true;
+    tbe.doCacheFill := false;
+    tbe.atomic_to_be_done := false;
+  } else {
+    error("Invalid policy type");
+  }
+}
+
+action(Initiate_AtomicNoReturn_SD, desc="") {
+  if (policy_type == 0){ // ALL NEAR
+    tbe.actions.push(Event:SendReadUnique);
+    tbe.actions.push(Event:WriteFEPipe);
+    tbe.actions.push(Event:CheckCacheFill);
+    tbe.actions.push(Event:WriteBEPipe);
+    tbe.actions.push(Event:TagArrayWrite);
+    tbe.atomic_to_be_done := true;
+  } else if (policy_type == 1) { // UNIQUE NEAR
+    tbe.actions.push(Event:SendAtomicNoReturn);
+    tbe.actions.push(Event:SendANRData);
+    tbe.dataToBeInvalid := true;
+    tbe.doCacheFill := false;
+    tbe.atomic_to_be_done := false;
+  } else {
+    error("Invalid policy type");
+  }
+}
+
+action(Initiate_AtomicReturn_SC, desc="") {
+  if (policy_type == 0){ // ALL NEAR
+    tbe.actions.push(Event:SendReadUnique);
+    tbe.actions.push(Event:WriteFEPipe);
+    tbe.actions.push(Event:CheckCacheFill);
+    tbe.actions.push(Event:WriteBEPipe);
+    tbe.actions.push(Event:TagArrayWrite);
+    tbe.atomic_to_be_done := true;
+  } else if (policy_type == 1) { // UNIQUE NEAR
+    tbe.actions.push(Event:SendAtomicReturn_NoWait);
+    tbe.dataToBeInvalid := true;
+    tbe.doCacheFill := false;
+    tbe.atomic_to_be_done := false;
+  } else {
+    error("Invalid policy type");
+  }
+}
+
+action(Initiate_AtomicNoReturn_SC, desc="") {
+  if (policy_type == 0){ // ALL NEAR
+    tbe.actions.push(Event:SendReadUnique);
+    tbe.actions.push(Event:WriteFEPipe);
+    tbe.actions.push(Event:CheckCacheFill);
+    tbe.actions.push(Event:WriteBEPipe);
+    tbe.actions.push(Event:TagArrayWrite);
+    tbe.atomic_to_be_done := true;
+  } else if (policy_type == 1) { // UNIQUE NEAR
+    tbe.actions.push(Event:SendAtomicNoReturn);
+    tbe.actions.push(Event:SendANRData);
+    tbe.dataToBeInvalid := true;
+    tbe.doCacheFill := false;
+    tbe.atomic_to_be_done := false;
+  } else {
+    error("Invalid policy type");
+  }
+}
+
 action(Initiate_StoreUpgrade, desc="") {
   assert(tbe.dataValid);
   assert(is_valid(cache_entry));
@@ -860,7 +1014,110 @@ action(Initiate_WriteUnique_Forward, desc="") {
   tbe.actions.pushNB(Event:TagArrayWrite);
 }
 
+action(Initiate_AtomicReturn_LocalWrite, desc="") {
+  if ((tbe.dir_sharers.count() > 0) && tbe.dataMaybeDirtyUpstream) {
+    tbe.actions.push(Event:SendSnpUnique);
+  } else if (tbe.dir_sharers.count() > 0){
+    // no one will send us data unless we explicitly ask
+    tbe.actions.push(Event:SendSnpUniqueRetToSrc);
+  }
+  tbe.actions.push(Event:SendDBIDResp_AR);
+  tbe.actions.pushNB(Event:WriteFEPipe);
+  tbe.actions.pushNB(Event:SendCompData_AR);
+  tbe.actions.push(Event:WriteFEPipe);
+  tbe.actions.push(Event:CheckCacheFill);
+  tbe.actions.push(Event:DelayAtomic);
+  tbe.actions.push(Event:WriteBEPipe);
+  tbe.actions.push(Event:TagArrayWrite);
+}
+
+
+action(Initiate_AtomicNoReturn_LocalWrite, desc="") {
+  if ((tbe.dir_sharers.count() > 0) && tbe.dataMaybeDirtyUpstream) {
+    tbe.actions.push(Event:SendSnpUnique);
+  } else if (tbe.dir_sharers.count() > 0){
+    // no one will send us data unless we explicitly ask
+    tbe.actions.push(Event:SendSnpUniqueRetToSrc);
+  }
+  if (comp_anr) {
+    tbe.actions.push(Event:SendDBIDResp_ANR);
+    tbe.actions.pushNB(Event:WriteFEPipe);
+    tbe.actions.pushNB(Event:SendComp_ANR);
+  } else {
+    tbe.actions.push(Event:SendCompDBIDResp_ANR);
+    tbe.actions.pushNB(Event:WriteFEPipe);
+  }
+  tbe.actions.push(Event:WriteFEPipe);
+  tbe.actions.push(Event:CheckCacheFill);
+  tbe.actions.push(Event:DelayAtomic);
+  tbe.actions.push(Event:WriteBEPipe);
+  tbe.actions.push(Event:TagArrayWrite);
+}
+
+
+action(Initiate_AtomicReturn_Forward, desc="") {
+  if ((tbe.dir_sharers.count() > 0) &&
+     (tbe.dir_sharers.isElement(tbe.requestor))){
+    tbe.dir_sharers.remove(tbe.requestor);
+  }
+  tbe.actions.push(Event:SendAtomicReturn);
+  tbe.actions.push(Event:SendCompData_AR);
+  tbe.actions.pushNB(Event:TagArrayWrite);
+
+  tbe.dataToBeInvalid := true;
+}
+
+action(Initiate_AtomicNoReturn_Forward, desc="") {
+  if ((tbe.dir_sharers.count() > 0) &&
+     (tbe.dir_sharers.isElement(tbe.requestor))){
+    tbe.dir_sharers.remove(tbe.requestor);
+  }
+  if (comp_anr) {
+    tbe.actions.push(Event:SendAtomicNoReturn);
+    tbe.actions.push(Event:SendDBIDResp_ANR);
+    tbe.actions.pushNB(Event:SendComp_ANR);
+  } else {
+    tbe.actions.push(Event:SendAtomicNoReturn);
+    tbe.actions.push(Event:SendCompDBIDResp_ANR);
+  }
+  tbe.actions.push(Event:WriteBEPipe);
+  tbe.actions.push(Event:SendANRData);
+  tbe.actions.pushNB(Event:TagArrayWrite);
+
+  tbe.dataToBeInvalid := true;
+}
+
+action(Initiate_AtomicReturn_Miss, desc="") {
+  tbe.actions.push(Event:SendReadNoSnp);
+  tbe.actions.pushNB(Event:WriteFEPipe);
+  tbe.actions.push(Event:SendDBIDResp_AR);
+  tbe.actions.pushNB(Event:WriteFEPipe);
+  tbe.actions.pushNB(Event:SendCompData_AR);
+  tbe.actions.push(Event:WriteFEPipe);
+  tbe.actions.push(Event:CheckCacheFill);
+  tbe.actions.push(Event:DelayAtomic);
+  tbe.actions.push(Event:WriteBEPipe);
+  tbe.actions.push(Event:TagArrayWrite);
+}
+
+action(Initiate_AtomicNoReturn_Miss, desc="") {
+  assert(is_HN);
+  tbe.actions.push(Event:SendReadNoSnp);
+  if (comp_anr) {
+    tbe.actions.push(Event:SendDBIDResp_ANR);
+    tbe.actions.pushNB(Event:WriteFEPipe);
+    tbe.actions.pushNB(Event:SendComp_ANR);
+  } else {
+    tbe.actions.push(Event:SendCompDBIDResp_ANR);
+    tbe.actions.pushNB(Event:WriteFEPipe);
+  }
 
+  tbe.actions.push(Event:WriteFEPipe);
+  tbe.actions.push(Event:CheckCacheFill);
+  tbe.actions.push(Event:DelayAtomic);
+  tbe.actions.push(Event:WriteBEPipe);
+  tbe.actions.push(Event:TagArrayWrite);
+}
 
 action(Initiate_CopyBack, desc="") {
   // expect to receive this data after Send_CompDBIDResp
@@ -923,8 +1180,8 @@ action(Initiate_Evict, desc="") {
       tbe.actions.push(Event:WriteBEPipe);
       tbe.actions.push(Event:SendWBData);
     } else {
-      tbe.actions.push(Event:SendCompIResp);
       tbe.actions.push(Event:SendEvict);
+      tbe.actions.push(Event:SendCompIResp);
     }
   } else {
     tbe.actions.push(Event:SendCompIResp);
@@ -1152,7 +1409,9 @@ action(Send_ReadShared, desc="") {
 
 action(Send_ReadNoSnp, desc="") {
   assert(is_HN);
-  assert(tbe.use_DMT == false);
+  assert((tbe.use_DMT == false) ||
+         ((tbe.reqType == CHIRequestType:AtomicReturn) ||
+          (tbe.reqType == CHIRequestType:AtomicNoReturn)));
 
   clearExpectedReqResp(tbe);
   tbe.expected_req_resp.addExpectedDataType(CHIDataType:CompData_UC);
@@ -1363,6 +1622,45 @@ action(Send_WriteUnique, desc="") {
   tbe.expected_req_resp.addExpectedCount(1);
 }
 
+action(Send_AtomicReturn, desc="") {
+  assert(is_valid(tbe));
+
+  enqueue(reqOutPort, CHIRequestMsg, request_latency) {
+    prepareRequestAtomic(tbe, CHIRequestType:AtomicReturn, out_msg);
+    out_msg.Destination.add(mapAddressToDownstreamMachine(tbe.addr));
+    allowRequestRetry(tbe, out_msg);
+  }
+  clearExpectedReqResp(tbe);
+  tbe.expected_req_resp.addExpectedRespType(CHIResponseType:DBIDResp);
+  tbe.expected_req_resp.addExpectedCount(1);
+}
+
+action(Send_AtomicReturn_NoWait, desc="") {
+  assert(is_valid(tbe));
+
+  enqueue(reqOutPort, CHIRequestMsg, request_latency) {
+    prepareRequestAtomic(tbe, CHIRequestType:AtomicReturn, out_msg);
+    out_msg.Destination.add(mapAddressToDownstreamMachine(tbe.addr));
+    allowRequestRetry(tbe, out_msg);
+  }
+
+  tbe.dataAMOValid := false;
+}
+
+action(Send_AtomicNoReturn, desc="") {
+  assert(is_valid(tbe));
+
+  enqueue(reqOutPort, CHIRequestMsg, request_latency) {
+    prepareRequestAtomic(tbe, CHIRequestType:AtomicNoReturn, out_msg);
+    out_msg.Destination.add(mapAddressToDownstreamMachine(tbe.addr));
+    allowRequestRetry(tbe, out_msg);
+  }
+  tbe.expected_req_resp.addExpectedRespType(CHIResponseType:CompDBIDResp);
+  tbe.expected_req_resp.addExpectedRespType(CHIResponseType:DBIDResp);
+  tbe.expected_req_resp.addExpectedCount(1);
+}
+
+
 action(Send_SnpCleanInvalid, desc="") {
   assert(is_valid(tbe));
   assert(tbe.expected_snp_resp.hasExpected() == false);
@@ -1631,6 +1929,20 @@ action(ExpectNCBWrData, desc="") {
   tbe.dataBlkValid.setMask(addressOffset(tbe.accAddr, tbe.addr), tbe.accSize, false);
 }
 
+action(ExpectNCBWrData_A, desc="") {
+  // Expected data
+  int num_msgs := tbe.accSize / data_channel_size;
+  if ((tbe.accSize % data_channel_size) != 0) {
+    num_msgs := num_msgs + 1;
+  }
+  tbe.expected_req_resp.clear(num_msgs);
+  tbe.expected_req_resp.addExpectedDataType(CHIDataType:NCBWrData);
+  tbe.expected_req_resp.setExpectedCount(1);
+
+  // In atomic operations we do not expect real data for the current block
+  // Thus the mask bits do not care
+}
+
 action(ExpectCompAck, desc="") {
   assert(is_valid(tbe));
   tbe.expected_req_resp.addExpectedRespType(CHIResponseType:CompAck);
@@ -1653,7 +1965,22 @@ action(Receive_ReqDataResp, desc="") {
     }
     // Copy data to tbe only if we didn't have valid data or the received
     // data is dirty
-    if ((tbe.dataBlkValid.isFull() == false) ||
+    if ((in_msg.type == CHIDataType:NCBWrData) &&
+         ((tbe.reqType == CHIRequestType:AtomicReturn) ||
+          (tbe.reqType == CHIRequestType:AtomicNoReturn))){
+      // DO NOTHING
+    } else if ((in_msg.type == CHIDataType:CompData_I) &&
+               ((tbe.reqType == CHIRequestType:AtomicReturn) ||
+                (tbe.reqType == CHIRequestType:AtomicLoad))) {
+      if(tbe.dataBlkValid.isFull()){
+        tbe.dataBlkValid.clear();
+      }
+      tbe.oldDataBlk.copyPartial(in_msg.dataBlk, in_msg.bitMask);
+      assert(tbe.dataBlkValid.isOverlap(in_msg.bitMask) == false);
+      tbe.dataBlkValid.orMask(in_msg.bitMask);
+      DPRINTF(RubySlicc, "Received %s\n", tbe.oldDataBlk);
+      DPRINTF(RubySlicc, "dataBlkValid = %s\n", tbe.dataBlkValid);
+    } else if ((tbe.dataBlkValid.isFull() == false) ||
         (in_msg.type == CHIDataType:CompData_UD_PD) ||
         (in_msg.type == CHIDataType:CompData_SD_PD) ||
         (in_msg.type == CHIDataType:CBWrData_UD_PD) ||
@@ -1678,7 +2005,8 @@ action(Receive_RespSepDataFromCompData, desc="") {
     if (tbe.expected_req_resp.receiveResp(CHIResponseType:RespSepData) == false) {
       error("Received unexpected message");
     }
-    if (is_HN == false) {
+    if ((is_HN == false) && (tbe.reqType != CHIRequestType:AtomicReturn) &&
+        ((tbe.reqType != CHIRequestType:AtomicLoad) || (tbe.atomic_to_be_done == true))){
       // must now ack the responder
       tbe.actions.pushFrontNB(Event:SendCompAck);
     }
@@ -1900,6 +2228,7 @@ action(UpdateDataState_FromReqDataResp, desc="") {
 
       } else if (in_msg.type == CHIDataType:CompData_I) {
         tbe.dataValid := true;
+        tbe.dataAMOValid := true;
         tbe.dataToBeInvalid := true;
         assert(tbe.dataMaybeDirtyUpstream == false);
 
@@ -1941,7 +2270,9 @@ action(UpdateDataState_FromReqDataResp, desc="") {
 
 action(UpdateDataState_FromWUDataResp, desc="") {
   assert(is_valid(tbe));
-  if (tbe.expected_req_resp.hasReceivedData()) {
+  if (tbe.expected_req_resp.hasReceivedData() &&
+       (tbe.reqType != CHIRequestType:AtomicReturn) &&
+       (tbe.reqType != CHIRequestType:AtomicNoReturn)) {
     assert(tbe.dataBlkValid.test(addressOffset(tbe.accAddr, tbe.addr)));
     assert(tbe.dataBlkValid.test(addressOffset(tbe.accAddr, tbe.addr)
                                   + tbe.accSize - 1));
@@ -1959,6 +2290,23 @@ action(UpdateDataState_FromWUDataResp, desc="") {
   printTBEState(tbe);
 }
 
+action(UpdateDataState_FromADataResp, desc="") {
+  assert(is_valid(tbe));
+  if (is_HN && (tbe.expected_req_resp.hasReceivedData()) &&
+      ((tbe.reqType == CHIRequestType:AtomicReturn) ||
+       (tbe.reqType == CHIRequestType:AtomicNoReturn))) {
+    DPRINTF(RubySlicc, "Atomic before %s\n", tbe.dataBlk);
+
+    tbe.oldDataBlk := tbe.dataBlk;
+    tbe.dataBlk.atomicPartial(tbe.dataBlk, tbe.atomic_op);
+    tbe.dataBlk.clearAtomicLogEntries();
+    tbe.dataDirty := true;
+
+    DPRINTF(RubySlicc, "Atomic after %s\n", tbe.dataBlk);
+  }
+  printTBEState(tbe);
+}
+
 action(UpdateDataState_FromCUResp, desc="") {
   assert(is_valid(tbe));
   peek(rspInPort, CHIResponseMsg) {
@@ -2122,6 +2470,10 @@ action(Receive_ReqResp_WUNeedComp, desc="") {
   tbe.defer_expected_comp := true;
 }
 
+action(Receive_ReqResp_AR, desc="") {
+  tbe.actions.pushFrontNB(Event:SendARData);
+}
+
 action(Receive_ReqResp_WUComp, desc="") {
   if (tbe.defer_expected_comp) {
     tbe.defer_expected_comp := false;
@@ -2130,6 +2482,16 @@ action(Receive_ReqResp_WUComp, desc="") {
   }
 }
 
+action(Receive_ReqResp_CopyDBID, desc="Copy the rsp DBID into the TBE") {
+  if (tbe.expected_req_resp.receivedRespType(CHIResponseType:DBIDResp) == false &&
+      tbe.expected_req_resp.receivedRespType(CHIResponseType:CompDBIDResp) == false) {
+    error("Received unexpected message");
+  }
+  peek(rspInPort, CHIResponseMsg) {
+    tbe.txnId := in_msg.dbid;
+  }
+}
+
 action(Receive_SnpResp, desc="") {
   assert(tbe.expected_snp_resp.hasExpected());
   peek(rspInPort, CHIResponseMsg) {
@@ -2305,6 +2667,36 @@ action(CheckWUComp, desc="") {
   }
 }
 
+action(Send_ARData, desc="") {
+  assert(is_valid(tbe));
+  tbe.snd_msgType := CHIDataType:NCBWrData;
+  tbe.snd_destination := mapAddressToDownstreamMachine(tbe.addr);
+  setupPendingAtomicSend(tbe);
+}
+
+action(Send_ANRData, desc="") {
+  assert(is_valid(tbe));
+  tbe.snd_msgType := CHIDataType:NCBWrData;
+  tbe.snd_destination := mapAddressToDownstreamMachine(tbe.addr);
+  setupPendingAtomicSend(tbe);
+}
+
+action(CheckARComp, desc="") {
+  assert(is_valid(tbe));
+  tbe.expected_req_resp.addExpectedDataType(CHIDataType:CompData_I);
+  tbe.expected_req_resp.addExpectedRespType(CHIResponseType:RespSepData);
+  tbe.expected_req_resp.addExpectedCount(2);
+}
+
+action(CheckANRComp, desc="") {
+  assert(is_valid(tbe));
+  if (tbe.defer_expected_comp) {
+    tbe.defer_expected_comp := false;
+    tbe.expected_req_resp.addExpectedCount(1);
+    tbe.expected_req_resp.addExpectedRespType(CHIResponseType:Comp);
+  }
+}
+
 action(Send_SnpRespData, desc="") {
   assert(is_HN == false);
   assert(is_valid(tbe));
@@ -2316,7 +2708,7 @@ action(Send_SnpRespData, desc="") {
          ((tbe.dataDirty || tbe.dataUnique) && (tbe.reqType == CHIRequestType:SnpShared)) ||
          ((tbe.dataDirty || tbe.dataUnique) && (tbe.reqType == CHIRequestType:SnpUnique)));
 
-  if (tbe.dataToBeInvalid) {
+  if (tbe.dataToBeInvalid && tbe.dir_sharers.isEmpty()) {
     assert(tbe.dataMaybeDirtyUpstream == false);
     if (tbe.dataDirty) {
       tbe.snd_msgType := CHIDataType:SnpRespData_I_PD;
@@ -2505,6 +2897,7 @@ action(Send_Data, desc="") {
   enqueue(datOutPort, CHIDataMsg, data_latency) {
     out_msg.addr := tbe.addr;
     out_msg.type := tbe.snd_msgType;
+    out_msg.txnId := tbe.txnId;
 
     int offset := tbe.snd_pendBytes.firstBitSet(true);
     assert(offset < blockSize);
@@ -2515,7 +2908,12 @@ action(Send_Data, desc="") {
     }
     tbe.snd_pendBytes.setMask(offset, range, false);
 
-    out_msg.dataBlk := tbe.dataBlk;
+    if (tbe.reqType == CHIRequestType:AtomicReturn){
+        out_msg.dataBlk := tbe.oldDataBlk;
+    } else {
+        out_msg.dataBlk := tbe.dataBlk;
+    }
+
     out_msg.bitMask.setMask(offset, range);
 
     out_msg.responder := machineID;
@@ -2551,6 +2949,8 @@ action(Send_CompI, desc="") {
     out_msg.type := CHIResponseType:Comp_I;
     out_msg.responder := machineID;
     out_msg.Destination.add(tbe.requestor);
+    out_msg.txnId := tbe.txnId;
+    out_msg.dbid := tbe.txnId;
   }
 }
 
@@ -2561,6 +2961,8 @@ action(Send_CompUC, desc="") {
     out_msg.type := CHIResponseType:Comp_UC;
     out_msg.responder := machineID;
     out_msg.Destination.add(tbe.requestor);
+    out_msg.txnId := tbe.txnId;
+    out_msg.dbid := tbe.txnId;
   }
 }
 
@@ -2571,6 +2973,8 @@ action(Send_CompUC_Stale, desc="") {
     out_msg.type := CHIResponseType:Comp_UC;
     out_msg.responder := machineID;
     out_msg.Destination.add(tbe.requestor);
+    out_msg.txnId := tbe.txnId;
+    out_msg.dbid := tbe.txnId;
     // We don't know if this is a stale clean unique or a bug, so flag the
     // reponse so the requestor can make further checks
     out_msg.stale := true;
@@ -2584,6 +2988,7 @@ action(Send_CompAck, desc="") {
     out_msg.type := CHIResponseType:CompAck;
     out_msg.responder := machineID;
     out_msg.Destination.add(mapAddressToDownstreamMachine(tbe.addr));
+    out_msg.txnId := tbe.txnId;
   }
 }
 
@@ -2594,6 +2999,7 @@ action(Send_CompI_Stale, desc="") {
     out_msg.type := CHIResponseType:Comp_I;
     out_msg.responder := machineID;
     out_msg.Destination.add(tbe.requestor);
+    out_msg.dbid := tbe.txnId;
     // We don't know if this is a stale writeback or a bug, so flag the
     // reponse so the requestor can make further checks
     out_msg.stale := true;
@@ -2607,6 +3013,8 @@ action(Send_CompDBIDResp, desc="") {
     out_msg.type := CHIResponseType:CompDBIDResp;
     out_msg.responder := machineID;
     out_msg.Destination.add(tbe.requestor);
+    out_msg.txnId := tbe.txnId;
+    out_msg.dbid := tbe.txnId;
   }
 }
 
@@ -2617,6 +3025,8 @@ action(Send_CompDBIDResp_Stale, desc="") {
     out_msg.type := CHIResponseType:CompDBIDResp;
     out_msg.responder := machineID;
     out_msg.Destination.add(tbe.requestor);
+    out_msg.txnId := tbe.txnId;
+    out_msg.dbid := tbe.txnId;
     // We don't know if this is a stale writeback or a bug, so flag the
     // reponse so the requestor can make further checks
     out_msg.stale := true;
@@ -2630,6 +3040,7 @@ action(Send_DBIDResp, desc="") {
     out_msg.type := CHIResponseType:DBIDResp;
     out_msg.responder := machineID;
     out_msg.Destination.add(tbe.requestor);
+    out_msg.dbid := tbe.txnId;
   }
 }
 
@@ -2643,6 +3054,36 @@ action(Send_Comp_WU, desc="") {
   }
 }
 
+
+action(Send_CompData_AR, desc="") {
+  assert(is_valid(tbe));
+  assert(tbe.dataValid);
+
+  if (is_HN) {
+      tbe.oldDataBlk := tbe.dataBlk;
+  }
+
+  tbe.snd_msgType := CHIDataType:CompData_I;
+  tbe.dataMaybeDirtyUpstream := false;
+  tbe.requestorToBeExclusiveOwner := false;
+  tbe.requestorToBeOwner := false;
+  tbe.snd_destination := tbe.requestor;
+  setupPendingSend(tbe);
+  printTBEState(tbe);
+
+}
+
+action(Send_Comp_ANR, desc="") {
+  assert(is_valid(tbe));
+  enqueue(rspOutPort, CHIResponseMsg, comp_anr_latency + response_latency) {
+    out_msg.addr := address;
+    out_msg.type := CHIResponseType:Comp;
+    out_msg.responder := machineID;
+    out_msg.Destination.add(tbe.requestor);
+  }
+}
+
+
 action(Send_SnpRespI, desc="") {
   enqueue(rspOutPort, CHIResponseMsg, response_latency) {
     out_msg.addr := address;
@@ -2664,6 +3105,7 @@ action(Send_RetryAck, desc="") {
       out_msg.type := CHIResponseType:RetryAck;
       out_msg.responder := machineID;
       out_msg.Destination.add(in_msg.retryDest);
+      out_msg.txnId := in_msg.txnId;
     }
   }
 }
@@ -2972,6 +3414,23 @@ action(Callback_StoreHit, desc="") {
   }
 }
 
+action(Callback_AtomicHit, desc="") {
+  assert(is_valid(tbe));
+  assert(tbe.dataValid);
+  assert((tbe.reqType == CHIRequestType:AtomicLoad) ||
+         (tbe.reqType == CHIRequestType:AtomicStore));
+  DPRINTF(RubySlicc, "Atomic before %s\n", tbe.dataBlk);
+
+  DataBlock oldDataBlk;
+  oldDataBlk := tbe.dataBlk;
+  tbe.dataBlk.atomicPartial(tbe.dataBlk, tbe.atomic_op);
+  tbe.dataBlk.clearAtomicLogEntries();
+
+  sequencer.atomicCallback(tbe.addr, oldDataBlk, false);
+  DPRINTF(RubySlicc, "Atomic after %s\n", tbe.dataBlk);
+  tbe.dataDirty := true;
+}
+
 action(Callback_ExpressPrefetchHit, desc="") {
   // have not allocated TBE, but must clear the reservation
   assert(is_invalid(tbe));
@@ -3020,6 +3479,26 @@ action(Callback_Miss, desc="") {
       // also decay the timeout
       scLockDecayLatency();
     }
+  } else if (tbe.dataValid && tbe.atomic_to_be_done &&
+                ((tbe.reqType == CHIRequestType:AtomicLoad) ||
+                 (tbe.reqType == CHIRequestType:AtomicStore))){
+    assert(is_valid(tbe));
+    assert(tbe.dataValid);
+    assert((tbe.reqType == CHIRequestType:AtomicLoad) ||
+           (tbe.reqType == CHIRequestType:AtomicStore));
+    DPRINTF(RubySlicc, "Atomic before %s\n", tbe.dataBlk);
+
+    DataBlock oldDataBlk;
+    oldDataBlk := tbe.dataBlk;
+    tbe.dataBlk.atomicPartial(tbe.dataBlk, tbe.atomic_op);
+    tbe.dataBlk.clearAtomicLogEntries();
+
+    sequencer.atomicCallback(tbe.addr, oldDataBlk, false);
+    DPRINTF(RubySlicc, "Atomic after %s\n", tbe.dataBlk);
+    tbe.dataDirty := true;
+  } else if (tbe.dataValid && tbe.dataAMOValid && (tbe.reqType == CHIRequestType:AtomicLoad)) {
+    DPRINTF(RubySlicc, "Atomic before %s\n", tbe.oldDataBlk);
+    sequencer.atomicCallback(tbe.addr, tbe.oldDataBlk, false);
   }
 }
 
@@ -3039,6 +3518,18 @@ action(Unset_Timeout_Cache, desc="") {
   wakeup_port(snpRdyPort, address);
 }
 
+action(Callback_AtomicNoReturn, desc="") {
+  assert(is_valid(tbe));
+  assert((tbe.is_local_pf || tbe.is_remote_pf) == false);
+  assert((tbe.reqType == CHIRequestType:AtomicNoReturn) ||
+	 (tbe.reqType == CHIRequestType:AtomicStore));
+
+  if(tbe.reqType == CHIRequestType:AtomicStore){
+    sequencer.atomicCallback(tbe.addr, tbe.dataBlk);
+    DPRINTF(RubySlicc, "AtomicNoReturn %s\n", tbe.dataBlk);
+  }
+}
+
 action(Callback_WriteUnique, desc="") {
   assert(is_valid(tbe));
   assert((tbe.is_local_pf || tbe.is_remote_pf) == false);
@@ -3152,9 +3643,7 @@ action(Profile_OutgoingEnd_DatalessResp, desc="") {
 action(TagArrayRead, desc="") {
   assert(is_valid(tbe));
   tbe.delayNextAction := curTick() + cyclesToTicks(
-                            tagLatency((tbe.reqType == CHIRequestType:Load) ||
-                                      (tbe.reqType == CHIRequestType:Store) ||
-                                      (tbe.reqType == CHIRequestType:StoreLine)));
+  tagLatency(fromSequencer(tbe.reqType)));
 }
 
 action(TagArrayWrite, desc="") {
@@ -3206,6 +3695,11 @@ action(FillPipe, desc="") {
   tbe.delayNextAction := curTick() + cyclesToTicks(fill_latency);
 }
 
+action(DelayAtomic, desc="") {
+  assert(is_valid(tbe));
+  tbe.delayNextAction := curTick() + cyclesToTicks(atomic_op_latency);
+}
+
 action(SnpSharedPipe, desc="") {
   assert(is_valid(tbe));
   tbe.delayNextAction := curTick() + cyclesToTicks(snp_latency);
diff --git a/src/mem/ruby/protocol/chi/CHI-cache-funcs.sm b/src/mem/ruby/protocol/chi/CHI-cache-funcs.sm
index f990c0b3b5..ed8358fea4 100644
--- a/src/mem/ruby/protocol/chi/CHI-cache-funcs.sm
+++ b/src/mem/ruby/protocol/chi/CHI-cache-funcs.sm
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2022 ARM Limited
+ * Copyright (c) 2021-2023 Arm Limited
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -299,6 +299,14 @@ Cycles dataLatency() {
   return cache.getDataLatency();
 }
 
+bool fromSequencer(CHIRequestType reqType) {
+  return reqType == CHIRequestType:Load ||
+         reqType == CHIRequestType:Store ||
+         reqType == CHIRequestType:StoreLine ||
+         reqType == CHIRequestType:AtomicLoad ||
+         reqType == CHIRequestType:AtomicStore;
+}
+
 bool inCache(Addr addr) {
   CacheEntry entry := getCacheEntry(makeLineAddress(addr));
   // NOTE: we consider data for the addr to be in cache if it exists in local,
@@ -405,6 +413,12 @@ TBE allocateRequestTBE(Addr addr, CHIRequestMsg in_msg), return_by_pointer="yes"
   TBE tbe := TBEs[addr];
 
   initializeTBE(tbe, addr, storTBEs.addEntryToNewSlot());
+  if (fromSequencer(in_msg.type)) {
+    assert(in_msg.txnId == max_outstanding_transactions);
+    tbe.txnId := static_cast(Addr, "value", tbe.storSlot);
+  } else {
+    tbe.txnId := in_msg.txnId;
+  }
 
   assert(tbe.is_snp_tbe == false);
   assert(tbe.is_repl_tbe == false);
@@ -422,6 +436,9 @@ TBE allocateRequestTBE(Addr addr, CHIRequestMsg in_msg), return_by_pointer="yes"
   tbe.is_local_pf := in_msg.is_local_pf;
   tbe.is_remote_pf := in_msg.is_remote_pf;
 
+  tbe.atomic_op.clear();
+  tbe.atomic_op.orMask(in_msg.atomic_op);
+
   tbe.use_DMT := false;
   tbe.use_DCT := false;
 
@@ -486,6 +503,7 @@ TBE allocateSnoopTBE(Addr addr, CHIRequestMsg in_msg), return_by_pointer="yes" {
   tbe.requestor := in_msg.requestor;
   tbe.fwdRequestor := in_msg.fwdRequestor;
   tbe.reqType := in_msg.type;
+  tbe.txnId := in_msg.txnId;
 
   tbe.snpNeedsData := in_msg.retToSrc;
 
@@ -538,6 +556,8 @@ TBE _allocateReplacementTBE(Addr addr, int storSlot), return_by_pointer="yes" {
   tbe.accSize := blockSize;
   tbe.requestor := machineID;
   tbe.reqType := CHIRequestType:null;
+  // This is an internal event and should generate a new TxnId
+  tbe.txnId := static_cast(Addr, "value", storSlot);
 
   tbe.use_DMT := false;
   tbe.use_DCT := false;
@@ -607,6 +627,13 @@ void setupPendingPartialSend(TBE tbe) {
   scheduleSendData(tbe, 0);
 }
 
+void setupPendingAtomicSend(TBE tbe) {
+  assert(blockSize >= data_channel_size);
+  assert((blockSize % data_channel_size) == 0);
+  tbe.snd_pendBytes.setMask(0,tbe.accSize,true);
+  scheduleSendData(tbe, 0);
+}
+
 // common code for downstream requests
 void prepareRequest(TBE tbe, CHIRequestType type, CHIRequestMsg & out_msg) {
   out_msg.addr := tbe.addr;
@@ -624,6 +651,20 @@ void prepareRequest(TBE tbe, CHIRequestType type, CHIRequestMsg & out_msg) {
   out_msg.seqReq := tbe.seqReq;
   out_msg.is_local_pf := false;
   out_msg.is_remote_pf := tbe.is_local_pf || tbe.is_remote_pf;
+  out_msg.txnId := tbe.txnId;
+
+  assert(tbe.txnId != static_cast(Addr, "value", -1));
+}
+
+void prepareRequestAtomic(TBE tbe, CHIRequestType type,
+                          CHIRequestMsg & out_msg) {
+  assert((type == CHIRequestType:AtomicReturn) ||
+         (type == CHIRequestType:AtomicNoReturn));
+  prepareRequest(tbe, type, out_msg);
+  out_msg.accAddr := tbe.accAddr;
+  out_msg.accSize := tbe.accSize;
+  out_msg.atomic_op.clear();
+  out_msg.atomic_op.orMask(tbe.atomic_op);
 }
 
 void allowRequestRetry(TBE tbe, CHIRequestMsg & out_msg) {
@@ -654,6 +695,8 @@ void prepareRequestRetry(TBE tbe, CHIRequestMsg & out_msg) {
   out_msg.seqReq := tbe.seqReq;
   out_msg.is_local_pf := false;
   out_msg.is_remote_pf := tbe.is_local_pf || tbe.is_remote_pf;
+  out_msg.atomic_op.clear();
+  out_msg.atomic_op.orMask(tbe.atomic_op);
 }
 
 void prepareRequestRetryDVM(TBE tbe, CHIRequestMsg & out_msg) {
@@ -755,8 +798,12 @@ bool needCacheEntry(CHIRequestType req_type,
                                    (req_type == CHIRequestType:WriteEvictFull) ||
                                    (is_HN && (req_type == CHIRequestType:WriteUniqueFull)))) ||
            (alloc_on_seq_acc && ((req_type == CHIRequestType:Load) ||
-                                 (req_type == CHIRequestType:Store))) ||
-           (alloc_on_seq_line_write && (req_type == CHIRequestType:StoreLine));
+                                 (req_type == CHIRequestType:Store) ||
+                                 (req_type == CHIRequestType:AtomicLoad) ||
+                                 (req_type == CHIRequestType:AtomicStore))) ||
+           (alloc_on_seq_line_write && (req_type == CHIRequestType:StoreLine)) ||
+           (alloc_on_atomic && ((req_type == CHIRequestType:AtomicReturn) ||
+                               (req_type == CHIRequestType:AtomicNoReturn)));
   }
 }
 
@@ -780,12 +827,12 @@ bool upstreamHasShared(State state) {
 }
 
 void printTBEState(TBE tbe) {
-  DPRINTF(RubySlicc, "STATE: addr: %#x data present=%d valid=%d unique=%d dirty=%d mu_dirty=%d dir ownerV=%d ownerE=%d sharers=%d tobe_I=%d tobe_SC=%d doFill=%d pendAction=%s\n",
+  DPRINTF(RubySlicc, "STATE: addr: %#x data present=%d valid=%d unique=%d dirty=%d mu_dirty=%d dir ownerV=%d ownerE=%d sharers=%d tobe_I=%d tobe_SC=%d doFill=%d pendAction=%s txnId=%d\n",
                       tbe.addr, tbe.dataBlkValid.isFull(), tbe.dataValid, tbe.dataUnique,
                       tbe.dataDirty, tbe.dataMaybeDirtyUpstream, tbe.dir_ownerExists,
                       tbe.dir_ownerIsExcl,tbe.dir_sharers.count(),
                       tbe.dataToBeInvalid, tbe.dataToBeSharedClean,
-                      tbe.doCacheFill, tbe.pendAction);
+                      tbe.doCacheFill, tbe.pendAction, tbe.txnId);
   DPRINTF(RubySlicc, "dataBlkValid = %s\n", tbe.dataBlkValid);
 }
 
@@ -1156,6 +1203,10 @@ Event reqToEvent(CHIRequestType type, bool is_prefetch) {
     return Event:Store;
   } else if (type == CHIRequestType:StoreLine) {
     return Event:Store;
+  } else if (type == CHIRequestType:AtomicLoad) {
+    return Event:AtomicLoad;
+  } else if (type == CHIRequestType:AtomicStore){
+    return Event:AtomicStore;
   } else if (type == CHIRequestType:ReadShared) {
     return Event:ReadShared;
   } else if (type == CHIRequestType:ReadNotSharedDirty) {
@@ -1196,6 +1247,18 @@ Event reqToEvent(CHIRequestType type, bool is_prefetch) {
     return Event:DvmTlbi_Initiate;
   } else if (type == CHIRequestType:DvmSync_Initiate) {
     return Event:DvmSync_Initiate;
+  } else if (type == CHIRequestType:AtomicReturn){
+    if (is_HN) {
+      return Event:AtomicReturn_PoC;
+    } else {
+      return Event:AtomicReturn;
+    }
+  } else if (type == CHIRequestType:AtomicNoReturn){
+    if (is_HN) {
+      return Event:AtomicNoReturn_PoC;
+    } else {
+      return Event:AtomicNoReturn;
+    }
   } else {
     error("Invalid CHIRequestType");
   }
diff --git a/src/mem/ruby/protocol/chi/CHI-cache-transitions.sm b/src/mem/ruby/protocol/chi/CHI-cache-transitions.sm
index 4c93988afd..0e8c6ec0e3 100644
--- a/src/mem/ruby/protocol/chi/CHI-cache-transitions.sm
+++ b/src/mem/ruby/protocol/chi/CHI-cache-transitions.sm
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2022 ARM Limited
+ * Copyright (c) 2021-2023 Arm Limited
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -155,6 +155,12 @@ transition({BUSY_INTR,BUSY_BLKD}, FillPipe) {
   ProcessNextState_ClearPending;
 }
 
+transition({BUSY_INTR,BUSY_BLKD}, DelayAtomic) {
+  Pop_TriggerQueue;
+  DelayAtomic;
+  ProcessNextState_ClearPending;
+}
+
 transition({BUSY_INTR,BUSY_BLKD}, SnpSharedPipe) {
   Pop_TriggerQueue;
   SnpSharedPipe;
@@ -418,8 +424,82 @@ transition({RSC,RSD,RUSD,RUSC,RU,I}, WriteUnique, BUSY_BLKD) {
   ProcessNextState;
 }
 
+// AtomicReturn and AtomicNoReturn
+
+transition({I,SC,SC_RSC,SD,SD_RSD,SD_RSC,RSD,RUSD,
+            UD,UD_RSC,UD_RSD,UD_RU,UC,UC_RSC,UC_RU,RSC,RU}, AtomicReturn, BUSY_BLKD) {
+  Initiate_Request;
+  Initiate_AtomicReturn_Forward;
+  Profile_Miss;
+  Pop_ReqRdyQueue;
+  ProcessNextState;
+}
+
+transition({I,SC,SC_RSC,SD,SD_RSD,SD_RSC,RSD,RUSD,
+            UD,UD_RSC,UD_RSD,UD_RU,UC,UC_RSC,UC_RU,RSC,RU}, AtomicNoReturn, BUSY_BLKD) {
+  Initiate_Request;
+  Initiate_AtomicNoReturn_Forward;
+  Profile_Miss;
+  Pop_ReqRdyQueue;
+  ProcessNextState;
+}
 
-// Load / Store from sequencer & Prefetch from prefetcher
+transition({UD,UD_RU,UD_RSD,UD_RSC,UC,UC_RU,UC_RSC},
+           AtomicReturn_PoC, BUSY_BLKD) {
+  Initiate_Request;
+  Initiate_AtomicReturn_LocalWrite;
+  Profile_Hit;
+  Pop_ReqRdyQueue;
+  ProcessNextState;
+}
+
+transition({UD,UD_RU,UD_RSD,UD_RSC,UC,UC_RU,UC_RSC},
+           AtomicNoReturn_PoC, BUSY_BLKD) {
+  Initiate_Request;
+  Initiate_AtomicNoReturn_LocalWrite;
+  Profile_Hit;
+  Pop_ReqRdyQueue;
+  ProcessNextState;
+}
+
+transition({SD, SD_RSD, SD_RSC, SC, SC_RSC, RSC, RSD, RUSC, RUSD, RU},
+           AtomicReturn_PoC, BUSY_BLKD) {
+  Initiate_Request;
+  Initiate_AtomicReturn_LocalWrite;
+  Profile_Miss;
+  Pop_ReqRdyQueue;
+  ProcessNextState;
+}
+
+transition({SD, SD_RSD, SD_RSC, SC, SC_RSC, RSC, RSD, RUSC, RUSD, RU},
+           AtomicNoReturn_PoC, BUSY_BLKD) {
+  Initiate_Request;
+  Initiate_AtomicNoReturn_LocalWrite;
+  Profile_Miss;
+  Pop_ReqRdyQueue;
+  ProcessNextState;
+}
+
+transition(I, AtomicReturn_PoC, BUSY_BLKD) {
+  Initiate_Request;
+  Initiate_AtomicReturn_Miss;
+  Allocate_DirEntry;
+  Profile_Miss;
+  Pop_ReqRdyQueue;
+  ProcessNextState;
+}
+
+transition(I, AtomicNoReturn_PoC, BUSY_BLKD) {
+  Initiate_Request;
+  Initiate_AtomicNoReturn_Miss;
+  Allocate_DirEntry;
+  Profile_Miss;
+  Pop_ReqRdyQueue;
+  ProcessNextState;
+}
+
+
+// Load / Store / Atomic from sequencer & Prefetch from prefetcher
 
 transition({UD,UD_T,SD,UC,SC}, Load, BUSY_BLKD) {
   Initiate_Request;
@@ -460,6 +540,28 @@ transition(BUSY_BLKD, StoreHit) {
   ProcessNextState_ClearPending;
 }
 
+transition(UC, {AtomicLoad,AtomicStore}, BUSY_BLKD) {
+  Initiate_Request;
+  Initiate_Atomic_UC;
+  Profile_Hit;
+  Pop_ReqRdyQueue;
+  ProcessNextState;
+}
+
+transition({UD,UD_T}, {AtomicLoad,AtomicStore}, BUSY_BLKD) {
+  Initiate_Request;
+  Initiate_Atomic_UD;
+  Profile_Hit;
+  Pop_ReqRdyQueue;
+  ProcessNextState;
+}
+
+transition(BUSY_BLKD, AtomicHit) {
+  Pop_TriggerQueue;
+  Callback_AtomicHit;
+  ProcessNextState_ClearPending;
+}
+
 transition(I, {Load,Prefetch}, BUSY_BLKD) {
   Initiate_Request;
   Initiate_LoadMiss;
@@ -494,6 +596,55 @@ transition({BUSY_BLKD,BUSY_INTR}, UseTimeout) {
   Unset_Timeout_TBE;
 }
 
+transition(I, AtomicLoad, BUSY_BLKD){
+  Initiate_Request;
+  Initiate_AtomicReturn_I;
+  Profile_Miss;
+  Pop_ReqRdyQueue;
+  ProcessNextState;
+}
+
+transition(I, AtomicStore, BUSY_BLKD){
+  Initiate_Request;
+  Initiate_AtomicNoReturn_I;
+  Profile_Miss;
+  Pop_ReqRdyQueue;
+  ProcessNextState;
+}
+
+transition(SD, AtomicLoad, BUSY_BLKD) {
+  Initiate_Request;
+  Initiate_AtomicReturn_SD;
+  Profile_Miss;
+  Pop_ReqRdyQueue;
+  ProcessNextState;
+}
+
+transition(SC, AtomicLoad, BUSY_BLKD) {
+  Initiate_Request;
+  Initiate_AtomicReturn_SC;
+  Profile_Miss;
+  Pop_ReqRdyQueue;
+  ProcessNextState;
+}
+
+transition(SD, AtomicStore, BUSY_BLKD) {
+  Initiate_Request;
+  Initiate_AtomicNoReturn_SD;
+  Profile_Miss;
+  Pop_ReqRdyQueue;
+  ProcessNextState;
+}
+
+transition(SC, AtomicStore, BUSY_BLKD) {
+  Initiate_Request;
+  Initiate_AtomicNoReturn_SC;
+  Profile_Miss;
+  Pop_ReqRdyQueue;
+  ProcessNextState;
+}
+
+
 // Evict from Upstream
 
 transition({UD_RSC,SD_RSC,UC_RSC,SC_RSC,RSC,RSD,RUSD,RUSC,UD_RSD,SD_RSD}, Evict, BUSY_BLKD) {
@@ -691,13 +842,15 @@ transition(BUSY_INTR, {SnpOnce,SnpOnceFwd}, BUSY_BLKD) {
 transition({BUSY_BLKD,BUSY_INTR},
             {ReadShared, ReadNotSharedDirty, ReadUnique, ReadUnique_PoC,
             ReadOnce, CleanUnique, CleanUnique_Stale,
-            Load, Store, Prefetch,
+            Load, Store, AtomicLoad, AtomicStore, Prefetch,
             WriteBackFull, WriteBackFull_Stale,
             WriteEvictFull, WriteEvictFull_Stale,
             WriteCleanFull, WriteCleanFull_Stale,
             Evict, Evict_Stale,
             WriteUnique,WriteUniquePtl_PoC,
-            WriteUniqueFull_PoC,WriteUniqueFull_PoC_Alloc}) {
+            WriteUniqueFull_PoC,WriteUniqueFull_PoC_Alloc
+            AtomicReturn,AtomicReturn_PoC,
+            AtomicNoReturn,AtomicNoReturn_PoC}) {
   StallRequest;
 }
 
@@ -754,6 +907,30 @@ transition(BUSY_BLKD, SendWriteUnique, BUSY_INTR) {DestinationAvailable} {
   ProcessNextState_ClearPending;
 }
 
+transition(BUSY_BLKD, SendAtomicReturn, BUSY_INTR) {DestinationAvailable} {
+  Pop_TriggerQueue;
+  Send_AtomicReturn;
+  CheckARComp;
+  Profile_OutgoingStart;
+  ProcessNextState_ClearPending;
+}
+
+transition(BUSY_BLKD, SendAtomicReturn_NoWait, BUSY_INTR) {
+  Pop_TriggerQueue;
+  Send_AtomicReturn_NoWait;
+  CheckARComp;
+  Profile_OutgoingStart;
+  ProcessNextState_ClearPending;
+}
+
+transition(BUSY_BLKD, SendAtomicNoReturn, BUSY_INTR) {DestinationAvailable} {
+  Pop_TriggerQueue;
+  Send_AtomicNoReturn;
+  Profile_OutgoingStart;
+  ProcessNextState_ClearPending;
+}
+
+
 transition(BUSY_BLKD, SendWriteNoSnp, BUSY_INTR) {DestinationAvailable} {
   Pop_TriggerQueue;
   Send_WriteNoSnp;
@@ -804,6 +981,20 @@ transition(BUSY_BLKD, SendWUDataCB) {
   ProcessNextState_ClearPending;
 }
 
+transition({BUSY_BLKD,BUSY_INTR}, SendARData) {
+  Pop_TriggerQueue;
+  Send_ARData;
+  ProcessNextState_ClearPending;
+}
+
+transition({BUSY_BLKD,BUSY_INTR}, SendANRData) {
+  Pop_TriggerQueue;
+  Callback_AtomicNoReturn;
+  Send_ANRData;
+  CheckANRComp;
+  ProcessNextState_ClearPending;
+}
+
 transition(BUSY_BLKD, SendInvSnpResp) {
   Pop_TriggerQueue;
   Send_InvSnpResp;
@@ -1025,6 +1216,26 @@ transition({BUSY_BLKD,BUSY_INTR}, SendComp_WU) {
   ProcessNextState_ClearPending;
 }
 
+transition(BUSY_BLKD, SendCompDBIDResp_ANR) {
+  Pop_TriggerQueue;
+  ExpectNCBWrData_A;
+  Send_CompDBIDResp;
+  ProcessNextState_ClearPending;
+}
+
+transition(BUSY_BLKD, SendDBIDResp_AR) {
+  Pop_TriggerQueue;
+  ExpectNCBWrData_A;
+  Send_DBIDResp;
+  ProcessNextState_ClearPending;
+}
+
+transition({BUSY_BLKD,BUSY_INTR}, SendCompData_AR) {
+  Pop_TriggerQueue;
+  Send_CompData_AR;
+  ProcessNextState_ClearPending;
+}
+
 transition(BUSY_BLKD, SendCompDBIDRespStale) {
   Pop_TriggerQueue;
   Send_CompDBIDResp_Stale;
@@ -1085,6 +1296,7 @@ transition(BUSY_BLKD,
 transition({BUSY_BLKD,BUSY_INTR}, NCBWrData) {
   Receive_ReqDataResp;
   UpdateDataState_FromWUDataResp;
+  UpdateDataState_FromADataResp;
   Pop_DataInQueue;
   ProcessNextState;
 }
@@ -1212,8 +1424,7 @@ transition(BUSY_BLKD,
 }
 
 // waiting for WB or evict ack
-transition(BUSY_INTR,
-           {CompDBIDResp,Comp_I}, BUSY_BLKD) {
+transition(BUSY_INTR, Comp_I, BUSY_BLKD) {
   Receive_ReqResp;
   Profile_OutgoingEnd_DatalessResp;
   Pop_RespInQueue;
@@ -1229,10 +1440,21 @@ transition(BUSY_INTR, Comp_UC, BUSY_BLKD) {
   ProcessNextState;
 }
 
+// waiting for WB or evict ack
+transition(BUSY_INTR, CompDBIDResp, BUSY_BLKD) {
+  Receive_ReqResp;
+  Receive_ReqResp_CopyDBID;
+  Profile_OutgoingEnd_DatalessResp;
+  Pop_RespInQueue;
+  ProcessNextState;
+}
+
 // alternative flow for WU with separate Comp
-transition(BUSY_INTR, DBIDResp, BUSY_BLKD) {
+transition({BUSY_INTR,BUSY_BLKD}, DBIDResp, BUSY_BLKD) {
   Receive_ReqResp;
+  Receive_ReqResp_CopyDBID;
   Receive_ReqResp_WUNeedComp;
+  Receive_ReqResp_AR;
   Pop_RespInQueue;
   ProcessNextState;
 }
diff --git a/src/mem/ruby/protocol/chi/CHI-cache.sm b/src/mem/ruby/protocol/chi/CHI-cache.sm
index 3bd8d3f3c3..f806488b45 100644
--- a/src/mem/ruby/protocol/chi/CHI-cache.sm
+++ b/src/mem/ruby/protocol/chi/CHI-cache.sm
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2022 ARM Limited
+ * Copyright (c) 2021-2023 ARM Limited
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -51,6 +51,7 @@ machine(MachineType:Cache, "Cache coherency protocol") :
   // sending necessary snoops.
   Cycles read_hit_latency := 0;
   Cycles read_miss_latency := 0;
+  Cycles atomic_op_latency := 0;
   Cycles write_fe_latency := 0; // Front-end: Rcv req -> Snd req
   Cycles write_be_latency := 0; // Back-end: Rcv ack -> Snd data
   Cycles fill_latency := 0; // Fill latency
@@ -81,6 +82,9 @@ machine(MachineType:Cache, "Cache coherency protocol") :
   int sc_lock_multiplier_max   := 256;
   bool sc_lock_enabled;
 
+  // Maximum number of outstanding transactions from a single requester
+  Addr max_outstanding_transactions := 1024;
+
   // Recycle latency on resource stalls
   Cycles stall_recycle_lat := 1;
 
@@ -123,11 +127,24 @@ machine(MachineType:Cache, "Cache coherency protocol") :
   // possible.
   bool enable_DCT;
 
+  // Atomic Operation Policy
+  // All Near executes all Atomics at L1 (variable set to 0; default)
+  // Unique Near executes Atomics at HNF for states I, SC, SD (set to 1)
+  // Present Near execites all Atomics at L1 except when state is I (set to 2)
+  int policy_type := 1;
+
+
   // Use separate Comp/DBIDResp responses for WriteUnique
   bool comp_wu := "False";
   // additional latency for the WU Comp response
   Cycles comp_wu_latency := 0;
 
+
+  // Use separate Comp/DBIDResp responses for AtomicNoResponse
+  bool comp_anr := "False";
+  // additional latency for the ANR Comp response
+  Cycles comp_anr_latency := 0;
+
   // Controls cache clusivity for different request types.
   // set all alloc_on* to false to completelly disable caching
   bool alloc_on_readshared;
@@ -136,6 +153,7 @@ machine(MachineType:Cache, "Cache coherency protocol") :
   bool alloc_on_writeback;
   bool alloc_on_seq_acc;
   bool alloc_on_seq_line_write;
+  bool alloc_on_atomic;
   // Controls if the clusivity is strict.
   bool dealloc_on_unique;
   bool dealloc_on_shared;
@@ -280,37 +298,43 @@ machine(MachineType:Cache, "Cache coherency protocol") :
 
     // Events triggered by sequencer requests or snoops in the rdy queue
     // See CHIRequestType in CHi-msg.sm for descriptions
-    Load,                        desc="";
-    Store,                       desc="";
-    Prefetch,                    desc="";
-    ReadShared,                  desc="";
-    ReadNotSharedDirty,          desc="";
-    ReadUnique,                  desc="";
-    ReadUnique_PoC,              desc="";
-    ReadOnce,                    desc="";
-    CleanUnique,                 desc="";
-    Evict,                       desc="";
-    WriteBackFull,               desc="";
-    WriteEvictFull,              desc="";
-    WriteCleanFull,              desc="";
-    WriteUnique,                 desc="";
-    WriteUniquePtl_PoC,          desc="";
-    WriteUniqueFull_PoC,         desc="";
-    WriteUniqueFull_PoC_Alloc,   desc="";
-    SnpCleanInvalid,             desc="";
-    SnpShared,                   desc="";
-    SnpSharedFwd,                desc="";
-    SnpNotSharedDirtyFwd,        desc="";
-    SnpUnique,                   desc="";
-    SnpUniqueFwd,                desc="";
-    SnpOnce,                     desc="";
-    SnpOnceFwd,                  desc="";
-    SnpStalled, desc=""; // A snoop stall triggered from the inport
+    Load,                        desc="", in_trans="yes";
+    Store,                       desc="", in_trans="yes";
+    AtomicLoad,                  desc="", in_trans="yes";
+    AtomicStore,                 desc="", in_trans="yes";
+    Prefetch,                    desc="", in_trans="yes";
+    ReadShared,                  desc="", in_trans="yes";
+    ReadNotSharedDirty,          desc="", in_trans="yes";
+    ReadUnique,                  desc="", in_trans="yes";
+    ReadUnique_PoC,              desc="", in_trans="yes";
+    ReadOnce,                    desc="", in_trans="yes";
+    CleanUnique,                 desc="", in_trans="yes";
+    Evict,                       desc="", in_trans="yes";
+    WriteBackFull,               desc="", in_trans="yes";
+    WriteEvictFull,              desc="", in_trans="yes";
+    WriteCleanFull,              desc="", in_trans="yes";
+    WriteUnique,                 desc="", in_trans="yes";
+    WriteUniquePtl_PoC,          desc="", in_trans="yes";
+    WriteUniqueFull_PoC,         desc="", in_trans="yes";
+    WriteUniqueFull_PoC_Alloc,   desc="", in_trans="yes";
+    AtomicReturn,                desc="", in_trans="yes";
+    AtomicNoReturn,              desc="", in_trans="yes";
+    AtomicReturn_PoC,            desc="", in_trans="yes";
+    AtomicNoReturn_PoC,          desc="", in_trans="yes";
+    SnpCleanInvalid,             desc="", in_trans="yes";
+    SnpShared,                   desc="", in_trans="yes";
+    SnpSharedFwd,                desc="", in_trans="yes";
+    SnpNotSharedDirtyFwd,        desc="", in_trans="yes";
+    SnpUnique,                   desc="", in_trans="yes";
+    SnpUniqueFwd,                desc="", in_trans="yes";
+    SnpOnce,                     desc="", in_trans="yes";
+    SnpOnceFwd,                  desc="", in_trans="yes";
+    SnpStalled, desc="", in_trans="yes"; // A snoop stall triggered from the inport
 
     // DVM sequencer requests
-    DvmTlbi_Initiate, desc=""; // triggered when a CPU core wants to send a TLBI
+    DvmTlbi_Initiate, desc="", out_trans="yes", in_trans="yes"; // triggered when a CPU core wants to send a TLBI
     // TLBIs are handled entirely within Ruby, so there's no ExternCompleted message
-    DvmSync_Initiate, desc=""; // triggered when a CPU core wants to send a sync
+    DvmSync_Initiate, desc="", out_trans="yes", in_trans="yes"; // triggered when a CPU core wants to send a sync
     DvmSync_ExternCompleted, desc=""; // triggered when an externally requested Sync is completed
 
     // Events triggered by incoming response messages
@@ -344,10 +368,10 @@ machine(MachineType:Cache, "Cache coherency protocol") :
     PCrdGrant_PoC_Hazard,    desc="";
 
     // Events triggered by incoming DVM messages
-    SnpDvmOpSync_P1,         desc="";
-    SnpDvmOpSync_P2,         desc="";
-    SnpDvmOpNonSync_P1,      desc="";
-    SnpDvmOpNonSync_P2,      desc="";
+    SnpDvmOpSync_P1,         desc="", in_trans="yes";
+    SnpDvmOpSync_P2,         desc="", in_trans="yes";
+    SnpDvmOpNonSync_P1,      desc="", in_trans="yes";
+    SnpDvmOpNonSync_P2,      desc="", in_trans="yes";
 
     // Events triggered by incoming data response messages
     // See CHIDataType in CHi-msg.sm for descriptions
@@ -383,20 +407,20 @@ machine(MachineType:Cache, "Cache coherency protocol") :
     // A Write or Evict becomes stale when the requester receives a snoop that
     // changes the state of the data while the request was pending.
     // Actual CHI implementations don't have this check.
-    Evict_Stale,            desc="";
-    WriteBackFull_Stale,    desc="";
-    WriteEvictFull_Stale,   desc="";
-    WriteCleanFull_Stale,   desc="";
-    CleanUnique_Stale,   desc="";
+    Evict_Stale,            desc="", in_trans="yes";
+    WriteBackFull_Stale,    desc="", in_trans="yes";
+    WriteEvictFull_Stale,   desc="", in_trans="yes";
+    WriteCleanFull_Stale,   desc="", in_trans="yes";
+    CleanUnique_Stale,   desc="", in_trans="yes";
 
     // Cache fill handling
     CheckCacheFill,   desc="Check if need to write or update the cache and trigger any necessary allocation and evictions";
 
     // Internal requests generated to evict or writeback a local copy
     // to free-up cache space
-    Local_Eviction,   desc="Evicts/WB the local copy of the line";
-    LocalHN_Eviction, desc="Local_Eviction triggered when is HN";
-    Global_Eviction,  desc="Local_Eviction + back-invalidate line in all upstream requesters";
+    Local_Eviction,   in_trans="yes", desc="Evicts/WB the local copy of the line";
+    LocalHN_Eviction, in_trans="yes", desc="Local_Eviction triggered when is HN";
+    Global_Eviction,  in_trans="yes", desc="Local_Eviction + back-invalidate line in all upstream requesters";
 
     // Events triggered from tbe.actions
     // In general, for each event we define a single transition from
@@ -415,47 +439,62 @@ machine(MachineType:Cache, "Cache coherency protocol") :
     DataArrayWriteOnFill, desc="Write the cache data array (cache fill)";
 
     // Events for modeling the pipeline latency
-    ReadHitPipe,  desc="Latency of reads served from local cache";
-    ReadMissPipe, desc="Latency of reads not served from local cache";
-    WriteFEPipe,  desc="Front-end latency of write requests";
-    WriteBEPipe,  desc="Back-end latency of write requests";
-    FillPipe,     desc="Cache fill latency";
+    ReadHitPipe,   desc="Latency of reads served from local cache";
+    ReadMissPipe,  desc="Latency of reads not served from local cache";
+    WriteFEPipe,   desc="Front-end latency of write requests";
+    WriteBEPipe,   desc="Back-end latency of write requests";
+    FillPipe,      desc="Cache fill latency";
+    DelayAtomic,   desc="Atomic operation latency";
     SnpSharedPipe, desc="Latency for SnpShared requests";
     SnpInvPipe,    desc="Latency for SnpUnique and SnpCleanInv requests";
     SnpOncePipe,   desc="Latency for SnpOnce requests";
 
     // Send a read request downstream.
-    SendReadShared,       desc="Send a ReadShared or ReadNotSharedDirty is allow_SD is false";
-    SendReadOnce,         desc="Send a ReadOnce";
-    SendReadNoSnp,        desc="Send a SendReadNoSnp";
-    SendReadNoSnpDMT,     desc="Send a SendReadNoSnp using DMT";
-    SendReadUnique,       desc="Send a ReadUnique";
+    SendReadShared,       out_trans="yes", desc="Send a ReadShared or ReadNotSharedDirty is allow_SD is false";
+    SendReadOnce,         out_trans="yes", desc="Send a ReadOnce";
+    SendReadNoSnp,        out_trans="yes", desc="Send a SendReadNoSnp";
+    SendReadNoSnpDMT,     out_trans="yes", desc="Send a SendReadNoSnp using DMT";
+    SendReadUnique,       out_trans="yes", desc="Send a ReadUnique";
     SendCompAck,          desc="Send CompAck";
     // Read handling at the completer
-    SendCompData,    desc="Send CompData";
-    WaitCompAck,     desc="Expect to receive CompAck";
-    SendRespSepData, desc="Send RespSepData for a DMT request";
+    SendCompData,         desc="Send CompData";
+    WaitCompAck,          desc="Expect to receive CompAck";
+    SendRespSepData,      desc="Send RespSepData for a DMT request";
 
     // Send a write request downstream.
-    SendWriteBackOrWriteEvict, desc="Send a WriteBackFull (if line is UD or SD) or WriteEvictFull (if UC)";
-    SendWriteClean,            desc="Send a WriteCleanFull";
-    SendWriteNoSnp,            desc="Send a WriteNoSnp for a full line";
-    SendWriteNoSnpPartial,     desc="Send a WriteNoSnpPtl";
-    SendWriteUnique,           desc="Send a WriteUniquePtl";
+    SendWriteBackOrWriteEvict, out_trans="yes", desc="Send a WriteBackFull (if line is UD or SD) or WriteEvictFull (if UC)";
+    SendWriteClean,            out_trans="yes", desc="Send a WriteCleanFull";
+    SendWriteNoSnp,            out_trans="yes", desc="Send a WriteNoSnp for a full line";
+    SendWriteNoSnpPartial,     out_trans="yes", desc="Send a WriteNoSnpPtl";
+    SendWriteUnique,           out_trans="yes", desc="Send a WriteUniquePtl";
     SendWBData,                desc="Send writeback data";
     SendWUData,                desc="Send write unique data";
     SendWUDataCB,              desc="Send write unique data from a sequencer callback";
     // Write handling at the completer
-    SendCompDBIDResp,      desc="Ack WB with CompDBIDResp";
-    SendCompDBIDRespStale, desc="Ack stale WB with CompDBIDResp";
-    SendCompDBIDResp_WU,   desc="Ack WU with CompDBIDResp and set expected data";
-    SendDBIDResp_WU,       desc="Ack WU with DBIDResp and set expected data";
-    SendComp_WU,           desc="Ack WU completion";
+    SendCompDBIDResp,          desc="Ack WB with CompDBIDResp";
+    SendCompDBIDRespStale,     desc="Ack stale WB with CompDBIDResp";
+    SendCompDBIDResp_WU,       desc="Ack WU with CompDBIDResp and set expected data";
+    SendDBIDResp_WU,           desc="Ack WU with DBIDResp and set expected data";
+    SendComp_WU,               desc="Ack WU completion";
+
+    // Send an atomic request downstream.
+    SendAtomicReturn,          out_trans="yes", desc="Send atomic request with return";
+    SendAtomicReturn_NoWait,   out_trans="yes", desc="Send atomic request with return, but no DBID";
+    SendAtomicNoReturn,        out_trans="yes", desc="Send atomic request without return";
+    SendARData,                desc="Send atomic return request data";
+    SendANRData,               desc="Send atomic no return request data";
+    // Atomic handling at the completer
+    SendDBIDResp_AR,       desc="Ack AR with DBIDResp and set expected data";
+    SendCompData_AR,       desc="Ack AR completion";
+    SendCompDBIDResp_ANR,  desc="Ack ANR with CompDBIDResp and set expected data";
+    SendDBIDResp_ANR,      desc="Ack ANR with DBIDResp and set expected data";
+    SendComp_ANR,          desc="Ack ANR completion";
+
 
     // Dataless requests
-    SendEvict,      desc="Send a Evict";
+    SendEvict,      out_trans="yes", desc="Send a Evict";
     SendCompIResp,  desc="Ack Evict with Comp_I";
-    SendCleanUnique,desc="Send a CleanUnique";
+    SendCleanUnique,out_trans="yes", desc="Send a CleanUnique";
     SendCompUCResp, desc="Ack CleanUnique with Comp_UC";
     SendCompUCRespStale, desc="Ack stale CleanUnique with Comp_UC";
 
@@ -499,6 +538,7 @@ machine(MachineType:Cache, "Cache coherency protocol") :
     // Misc triggers
     LoadHit,  desc="Complete a load hit";
     StoreHit, desc="Complete a store hit";
+    AtomicHit, desc="Complete an atomic hit";
     UseTimeout, desc="Transition from UD_T -> UD";
     RestoreFromHazard, desc="Restore from a snoop hazard";
     TX_Data, desc="Transmit pending data messages";
@@ -599,6 +639,7 @@ machine(MachineType:Cache, "Cache coherency protocol") :
     Addr accAddr,           desc="Access address for Load/Store/WriteUniquePtl; otherwisse == addr";
     int accSize,            desc="Access size for Load/Store/WriteUniquePtl; otherwisse == blockSize";
     CHIRequestType reqType, desc="Request type that initiated this transaction";
+    Addr txnId,             desc="Transaction ID. We default to -1 for debug purposes", default="-1";
     MachineID requestor,    desc="Requestor ID";
     MachineID fwdRequestor, desc="Requestor to receive data on fwding snoops";
     bool use_DMT,           desc="Use DMT for this transaction";
@@ -609,6 +650,10 @@ machine(MachineType:Cache, "Cache coherency protocol") :
     bool is_local_pf,       desc="Request generated by a local prefetcher";
     bool is_remote_pf,      desc="Request generated a prefetcher in another cache";
 
+    // Atomic info associated with the transaction
+    WriteMask atomic_op,    desc="Atomic Operation Wrapper";
+    bool atomic_to_be_done, desc="We have yet to perform the atomic";
+
     // NOTE: seqReq is a smart pointer pointing to original CPU request object
     // that triggers transactions associated with this TBE. seqReq carries some
     // information (e.g., PC of requesting instruction, virtual address of this
@@ -626,8 +671,10 @@ machine(MachineType:Cache, "Cache coherency protocol") :
     // stable state.
     bool hasUseTimeout,           desc="Line is locked under store/use timeout";
     DataBlock dataBlk,            desc="Local copy of the line";
+    DataBlock oldDataBlk,         desc="Local copy of the line before executing atomic";
     WriteMask dataBlkValid,       desc="Marks which bytes in the DataBlock are valid";
     bool dataValid,               desc="Local copy is valid";
+    bool dataAMOValid,            desc="Local copy is valid for AMO";
     bool dataDirty,               desc="Local copy is dirtry";
     bool dataMaybeDirtyUpstream,  desc="Line maybe dirty upstream";
     bool dataUnique,              desc="Line is unique either locally or upsatream";
@@ -785,6 +832,7 @@ machine(MachineType:Cache, "Cache coherency protocol") :
     Event event;
     MachineID retryDest;
     bool usesTxnId;
+    Addr txnId;
 
     bool functionalRead(Packet *pkt) { return false; }
     bool functionalRead(Packet *pkt, WriteMask &mask) { return false; }
diff --git a/src/mem/ruby/protocol/chi/CHI-dvm-misc-node.sm b/src/mem/ruby/protocol/chi/CHI-dvm-misc-node.sm
index 92a04ed3d2..aa27c40964 100644
--- a/src/mem/ruby/protocol/chi/CHI-dvm-misc-node.sm
+++ b/src/mem/ruby/protocol/chi/CHI-dvm-misc-node.sm
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2022 ARM Limited
+ * Copyright (c) 2021-2023 ARM Limited
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -170,8 +170,8 @@ machine(MachineType:MiscNode, "CHI Misc Node for handling and distrbuting DVM op
     SendPCrdGrant,  desc="Send PCrdGrant";
     DoRetry,        desc="Resend the current pending request";
 
-    DvmTlbi_Initiate, desc="Initiate a DVM TLBI on the provided TBE";
-    DvmSync_Initiate, desc="Initiate a DVM Sync on the provided TBE";
+    DvmTlbi_Initiate, out_trans="yes", in_trans="yes", desc="Initiate a DVM TLBI on the provided TBE";
+    DvmSync_Initiate, out_trans="yes", in_trans="yes", desc="Initiate a DVM Sync on the provided TBE";
     DvmSendNextMessage_P1, desc="Trigger a SnpDvmOp_P1 message based on the TBE type";
     DvmSendNextMessage_P2, desc="Trigger a SnpDvmOp_P2 message based on the TBE type";
     DvmFinishDistributing, desc="Move the TBE out of the Distributing state into Waiting";
diff --git a/src/mem/ruby/protocol/chi/CHI-mem.sm b/src/mem/ruby/protocol/chi/CHI-mem.sm
index 820f2dfcf4..46f57456a5 100644
--- a/src/mem/ruby/protocol/chi/CHI-mem.sm
+++ b/src/mem/ruby/protocol/chi/CHI-mem.sm
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021,2022 ARM Limited
+ * Copyright (c) 2021-2023 Arm Limited
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -164,6 +164,7 @@ machine(MachineType:Memory, "Memory controller interface") :
     int storSlot,   desc="Slot in the storage tracker occupied by this entry";
     Addr addr,      desc="Line address for this TBE";
     Addr accAddr,   desc="Original access address. Set only for Write*Ptl";
+    Addr txnId,     desc="Transaction ID";
     int  accSize,   desc="Access size. Set only for Write*Ptl";
     State state,    desc="Current line state";
     DataBlock dataBlk, desc="Transaction data";
@@ -503,6 +504,7 @@ machine(MachineType:Memory, "Memory controller interface") :
       }
       tbe.accAddr := in_msg.accAddr;
       tbe.accSize := in_msg.accSize;
+      tbe.txnId := in_msg.txnId;
     }
   }
 
@@ -608,6 +610,7 @@ machine(MachineType:Memory, "Memory controller interface") :
     assert(tbe.rxtxBytes < blockSize);
     enqueue(datOutPort, CHIDataMsg, data_latency) {
       out_msg.addr := tbe.addr;
+      out_msg.txnId := tbe.txnId;
       if (tbe.useDataSepResp) {
         out_msg.type := CHIDataType:DataSepResp_UC;
       } else {
@@ -663,7 +666,7 @@ machine(MachineType:Memory, "Memory controller interface") :
   }
 
   action(popMemoryQueue, "pmem", desc="Pop memory queue.") {
-    memQueue_in.dequeue(clockEdge());
+    dequeueMemRespQueue();
   }
 
   // Stall/wake-up only used for requests that arrive when we are on the
diff --git a/src/mem/ruby/protocol/chi/CHI-msg.sm b/src/mem/ruby/protocol/chi/CHI-msg.sm
index 63648a5920..b9e11d9dd9 100644
--- a/src/mem/ruby/protocol/chi/CHI-msg.sm
+++ b/src/mem/ruby/protocol/chi/CHI-msg.sm
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021 ARM Limited
+ * Copyright (c) 2021, 2023 Arm Limited
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -46,6 +46,8 @@ enumeration(CHIRequestType, desc="") {
   Load;
   Store;
   StoreLine;
+  AtomicLoad;
+  AtomicStore;
   // Incoming DVM-related requests generated by the sequencer
   DvmTlbi_Initiate;
   DvmSync_Initiate;
@@ -66,6 +68,9 @@ enumeration(CHIRequestType, desc="") {
   WriteUniquePtl;
   WriteUniqueFull;
 
+  AtomicReturn;
+  AtomicNoReturn;
+
   SnpSharedFwd;
   SnpNotSharedDirtyFwd;
   SnpUniqueFwd;
@@ -108,6 +113,8 @@ structure(CHIRequestMsg, desc="", interface="Message") {
   bool is_local_pf,         desc="Request generated by a local prefetcher";
   bool is_remote_pf,        desc="Request generated a prefetcher in another cache";
 
+  WriteMask atomic_op,      desc="Atomic Operation Wrapper";
+
   bool usesTxnId,       desc="True if using a Transaction ID", default="false";
   Addr txnId,           desc="Transaction ID", default="0";
 
@@ -156,6 +163,7 @@ structure(CHIResponseMsg, desc="", interface="Message") {
   bool stale,           desc="Response to a stale request";
   bool usesTxnId,       desc="True if using a Transaction ID", default="false";
   Addr txnId,           desc="Transaction ID", default="0";
+  Addr dbid,            desc="Data Buffer ID", default="0";
   //NOTE: not in CHI and for debuging only
 
   MessageSizeType MessageSize, default="MessageSizeType_Control";
diff --git a/src/mem/ruby/slicc_interface/AbstractController.cc b/src/mem/ruby/slicc_interface/AbstractController.cc
index 2d10422487..36092387ac 100644
--- a/src/mem/ruby/slicc_interface/AbstractController.cc
+++ b/src/mem/ruby/slicc_interface/AbstractController.cc
@@ -62,8 +62,10 @@ AbstractController::AbstractController(const Params &p)
       m_buffer_size(p.buffer_size), m_recycle_latency(p.recycle_latency),
       m_mandatory_queue_latency(p.mandatory_queue_latency),
       m_waiting_mem_retry(false),
+      m_mem_ctrl_waiting_retry(false),
       memoryPort(csprintf("%s.memory", name()), this),
       addrRanges(p.addr_ranges.begin(), p.addr_ranges.end()),
+      mRetryRespEvent{*this, false},
       stats(this)
 {
     if (m_version == 0) {
@@ -123,6 +125,7 @@ AbstractController::resetStats()
     for (uint32_t i = 0; i < size; i++) {
         stats.delayVCHistogram[i]->reset();
     }
+    ClockedObject::resetStats();
 }
 
 void
@@ -367,11 +370,17 @@ AbstractController::functionalMemoryWrite(PacketPtr pkt)
     return num_functional_writes + 1;
 }
 
-void
+bool
 AbstractController::recvTimingResp(PacketPtr pkt)
 {
-    assert(getMemRespQueue());
-    assert(pkt->isResponse());
+    auto* memRspQueue = getMemRespQueue();
+    gem5_assert(memRspQueue);
+    gem5_assert(pkt->isResponse());
+
+    if (!memRspQueue->areNSlotsAvailable(1, curTick())) {
+        m_mem_ctrl_waiting_retry = true;
+        return false;
+    }
 
     std::shared_ptr<MemoryMsg> msg = std::make_shared<MemoryMsg>(clockEdge());
     (*msg).m_addr = pkt->getAddr();
@@ -395,8 +404,9 @@ AbstractController::recvTimingResp(PacketPtr pkt)
         panic("Incorrect packet type received from memory controller!");
     }
 
-    getMemRespQueue()->enqueue(msg, clockEdge(), cyclesToTicks(Cycles(1)));
+    memRspQueue->enqueue(msg, clockEdge(), cyclesToTicks(Cycles(1)));
     delete pkt;
+    return true;
 }
 
 Tick
@@ -438,11 +448,33 @@ const
 }
 
 
+void
+AbstractController::memRespQueueDequeued() {
+    if (m_mem_ctrl_waiting_retry && !mRetryRespEvent.scheduled()) {
+        schedule(mRetryRespEvent, clockEdge(Cycles{1}));
+    }
+}
+
+void
+AbstractController::dequeueMemRespQueue() {
+    auto* q = getMemRespQueue();
+    gem5_assert(q);
+    q->dequeue(clockEdge());
+    memRespQueueDequeued();
+}
+
+void
+AbstractController::sendRetryRespToMem() {
+    if (m_mem_ctrl_waiting_retry) {
+        m_mem_ctrl_waiting_retry = false;
+        memoryPort.sendRetryResp();
+    }
+}
+
 bool
 AbstractController::MemoryPort::recvTimingResp(PacketPtr pkt)
 {
-    controller->recvTimingResp(pkt);
-    return true;
+    return controller->recvTimingResp(pkt);
 }
 
 void
diff --git a/src/mem/ruby/slicc_interface/AbstractController.hh b/src/mem/ruby/slicc_interface/AbstractController.hh
index a5ab5c2c44..72b679d6cf 100644
--- a/src/mem/ruby/slicc_interface/AbstractController.hh
+++ b/src/mem/ruby/slicc_interface/AbstractController.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017,2019-2022 ARM Limited
+ * Copyright (c) 2017,2019-2023 ARM Limited
  * All rights reserved.
  *
  * The license below extends only to copyright in the software and shall
@@ -61,6 +61,7 @@
 #include "mem/ruby/system/CacheRecorder.hh"
 #include "params/RubyController.hh"
 #include "sim/clocked_object.hh"
+#include "sim/eventq.hh"
 
 namespace gem5
 {
@@ -100,6 +101,14 @@ class AbstractController : public ClockedObject, public Consumer
     virtual MessageBuffer* getMandatoryQueue() const = 0;
     virtual MessageBuffer* getMemReqQueue() const = 0;
     virtual MessageBuffer* getMemRespQueue() const = 0;
+
+    // That function must be called by controller when dequeuing mem resp queue
+    // for memory controller to receive the retry request in time
+    void memRespQueueDequeued();
+    // Or that function can be called to perform both dequeue and notification
+    // at once.
+    void dequeueMemRespQueue();
+
     virtual AccessPermission getAccessPermission(const Addr &addr) = 0;
 
     virtual void print(std::ostream & out) const = 0;
@@ -165,7 +174,7 @@ class AbstractController : public ClockedObject, public Consumer
     Port &getPort(const std::string &if_name,
                   PortID idx=InvalidPortID);
 
-    void recvTimingResp(PacketPtr pkt);
+    bool recvTimingResp(PacketPtr pkt);
     Tick recvAtomic(PacketPtr pkt);
 
     const AddrRangeList &getAddrRanges() const { return addrRanges; }
@@ -258,7 +267,7 @@ class AbstractController : public ClockedObject, public Consumer
         assert(m_inTrans.find(addr) == m_inTrans.end());
         m_inTrans[addr] = {type, initialState, curTick()};
         if (retried)
-          ++(*stats.inTransLatRetries[type]);
+          ++(*stats.inTransRetryCnt[type]);
     }
 
     /**
@@ -279,11 +288,23 @@ class AbstractController : public ClockedObject, public Consumer
           isAddressed ? m_inTransAddressed : m_inTransUnaddressed;
         auto iter = m_inTrans.find(addr);
         assert(iter != m_inTrans.end());
-        stats.inTransLatHist[iter->second.transaction]
-                              [iter->second.state]
-                              [(unsigned)finalState]->sample(
-                                ticksToCycles(curTick() - iter->second.time));
-        ++(*stats.inTransLatTotal[iter->second.transaction]);
+        auto &trans = iter->second;
+
+        auto stat_iter_ev = stats.inTransStateChanges.find(trans.transaction);
+        gem5_assert(stat_iter_ev != stats.inTransStateChanges.end(),
+          "%s: event type=%d not marked as in_trans in SLICC",
+          name(), trans.transaction);
+
+        auto stat_iter_state = stat_iter_ev->second.find(trans.state);
+        gem5_assert(stat_iter_state != stat_iter_ev->second.end(),
+          "%s: event type=%d has no transition from state=%d",
+          name(), trans.transaction, trans.state);
+
+        ++(*stat_iter_state->second[(unsigned)finalState]);
+
+        stats.inTransLatHist[iter->second.transaction]->sample(
+                                ticksToCycles(curTick() - trans.time));
+
        m_inTrans.erase(iter);
     }
 
@@ -325,10 +346,17 @@ class AbstractController : public ClockedObject, public Consumer
           isAddressed ? m_outTransAddressed : m_outTransUnaddressed;
         auto iter = m_outTrans.find(addr);
         assert(iter != m_outTrans.end());
-        stats.outTransLatHist[iter->second.transaction]->sample(
-            ticksToCycles(curTick() - iter->second.time));
+        auto &trans = iter->second;
+
+        auto stat_iter = stats.outTransLatHist.find(trans.transaction);
+        gem5_assert(stat_iter != stats.outTransLatHist.end(),
+          "%s: event type=%d not marked as out_trans in SLICC",
+          name(), trans.transaction);
+
+        stat_iter->second->sample(
+            ticksToCycles(curTick() - trans.time));
         if (retried)
-          ++(*stats.outTransLatHistRetries[iter->second.transaction]);
+          ++(*stats.outTransRetryCnt[trans.transaction]);
         m_outTrans.erase(iter);
     }
 
@@ -364,6 +392,7 @@ class AbstractController : public ClockedObject, public Consumer
     Cycles m_recycle_latency;
     const Cycles m_mandatory_queue_latency;
     bool m_waiting_mem_retry;
+    bool m_mem_ctrl_waiting_retry;
 
     /**
      * Port that forwards requests and receives responses from the
@@ -411,22 +440,33 @@ class AbstractController : public ClockedObject, public Consumer
     NetDest downstreamDestinations;
     NetDest upstreamDestinations;
 
+    void sendRetryRespToMem();
+    MemberEventWrapper<&AbstractController::sendRetryRespToMem> mRetryRespEvent;
+
   public:
     struct ControllerStats : public statistics::Group
     {
         ControllerStats(statistics::Group *parent);
 
-        // Initialized by the SLICC compiler for all combinations of event and
-        // states. Only histograms with samples will appear in the stats
-        std::vector<std::vector<std::vector<statistics::Histogram*>>>
-          inTransLatHist;
-        std::vector<statistics::Scalar*> inTransLatRetries;
-        std::vector<statistics::Scalar*> inTransLatTotal;
-
-        // Initialized by the SLICC compiler for all events.
+        // Initialized by the SLICC compiler for all events with the
+        // "in_trans" property.
+        // Only histograms with samples will appear in the stats
+        std::unordered_map<unsigned, statistics::Histogram*> inTransLatHist;
+        std::unordered_map<unsigned, statistics::Scalar*> inTransRetryCnt;
+        // Initialized by the SLICC compiler for all combinations of events
+        // with the "in_trans" property, potential initial states, and
+        // potential final states. Potential initial states are states that
+        // appear in transitions triggered by that event. Currently all states
+        // are considered as potential final states.
+        std::unordered_map<unsigned, std::unordered_map<unsigned,
+          std::vector<statistics::Scalar*>>> inTransStateChanges;
+
+        // Initialized by the SLICC compiler for all events with the
+        // "out_trans" property.
         // Only histograms with samples will appear in the stats.
-        std::vector<statistics::Histogram*> outTransLatHist;
-        std::vector<statistics::Scalar*> outTransLatHistRetries;
+        std::unordered_map<unsigned, statistics::Histogram*> outTransLatHist;
+        std::unordered_map<unsigned, statistics::Scalar*>
+          outTransRetryCnt;
 
         //! Counter for the number of cycles when the transitions carried out
         //! were equal to the maximum allowed
diff --git a/src/mem/ruby/slicc_interface/RubyRequest.cc b/src/mem/ruby/slicc_interface/RubyRequest.cc
index 643c1dec6f..fbd211d2a8 100644
--- a/src/mem/ruby/slicc_interface/RubyRequest.cc
+++ b/src/mem/ruby/slicc_interface/RubyRequest.cc
@@ -61,7 +61,9 @@ RubyRequest::print(std::ostream& out) const
   out << " " << "AccessMode = " << m_AccessMode << " ";
   out << "Size = " << m_Size << " ";
   out << "Prefetch = " << m_Prefetch << " ";
-//  out << "Time = " << getTime() << " ";
+  out << "isGLCSet = " << m_isGLCSet << "";
+  out << "isSLCSet = " << m_isSLCSet << "";
+  //  out << "Time = " << getTime() << " ";
   out << "]";
 }
 
@@ -123,5 +125,14 @@ RubyRequest::functionalWrite(Packet *pkt)
     return cBase < cTail;
 }
 
+void
+RubyRequest::setWriteMask(uint32_t offset, uint32_t len,
+        std::vector< std::pair<int,AtomicOpFunctor*>> atomicOps)
+{
+    m_writeMask.setMask(offset, len);
+    m_writeMask.setAtomicOps(atomicOps);
+}
+
+
 } // namespace ruby
 } // namespace gem5
diff --git a/src/mem/ruby/slicc_interface/RubyRequest.hh b/src/mem/ruby/slicc_interface/RubyRequest.hh
index 89ce83451e..1e9674b9f5 100644
--- a/src/mem/ruby/slicc_interface/RubyRequest.hh
+++ b/src/mem/ruby/slicc_interface/RubyRequest.hh
@@ -226,6 +226,8 @@ class RubyRequest : public Message
     const PrefetchBit& getPrefetch() const { return m_Prefetch; }
     RequestPtr getRequestPtr() const { return m_pkt->req; }
 
+    void setWriteMask(uint32_t offset, uint32_t len,
+        std::vector< std::pair<int,AtomicOpFunctor*>> atomicOps);
     void print(std::ostream& out) const;
     bool functionalRead(Packet *pkt);
     bool functionalRead(Packet *pkt, WriteMask &mask);
diff --git a/src/mem/ruby/structures/ALUFreeListArray.cc b/src/mem/ruby/structures/ALUFreeListArray.cc
new file mode 100644
index 0000000000..87b5cbfbd2
--- /dev/null
+++ b/src/mem/ruby/structures/ALUFreeListArray.cc
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) 2023 The University of Wisconsin
+ *
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "mem/ruby/structures/ALUFreeListArray.hh"
+
+#include "base/intmath.hh"
+#include "mem/ruby/system/RubySystem.hh"
+#include "sim/cur_tick.hh"
+
+namespace gem5
+{
+
+namespace ruby
+{
+
+/*
+*
+* Models num_ALUs pipelined atomic ALUs with a depth of access_latency ticks.
+* Rather than reserving ALUs, this class assumes multiple requests can go
+* through an ALU at the same time. As such, up to numALU new requests can
+* go through at once, with the caveat that a line already being processed
+* in an ALU can't start processing again until the previous request has exited
+* the pipeline.
+*
+* ALUs aren't mapped directly to cache lines. Rather, ALUs are treated as
+* a free list.
+*
+* Behavior:
+*   Requests will go through unless one/both of the following are met:
+*       - There have been more than [numALUs] requests in the current cycle
+*       - The same line has been accessed in the past accessLatency ticks
+*/
+
+ALUFreeListArray::ALUFreeListArray(unsigned int num_ALUs, Tick access_latency)
+{
+    this->numALUs = num_ALUs;
+    this->accessLatency = access_latency;
+}
+
+bool ALUFreeListArray::tryAccess(Addr addr)
+{
+    uint32_t accesses_this_tick = 0;
+
+    // Remove requests from the tail of the queue that occured more than
+    // accessLatency ticks ago
+    Tick oldestValidRecordStart = curTick() - this->accessLatency;
+
+    while (accessQueue.size() > 0 &&
+         (accessQueue.back().startTick < oldestValidRecordStart)) {
+        accessQueue.pop_back();
+    }
+
+    for (AccessRecord& record : accessQueue) {
+        // Block access if we would be using more ALUs than we have in a
+        // single tick
+        if (record.startTick == curTick() &&
+            (++accesses_this_tick > numALUs)) {
+            return false;
+        }
+
+        // Block access if the line is already being used
+        if (record.lineAddr == makeLineAddress(addr)) {
+            return false;
+        }
+    }
+
+    return true;
+}
+
+void ALUFreeListArray::reserve(Addr addr)
+{
+    // Only called after tryAccess, so we know queue is up to date and that
+    // the access is valid
+
+    // Add record to queue
+    accessQueue.push_front(AccessRecord(makeLineAddress(addr), curTick()));
+}
+
+} // namespace ruby
+} // namespace gem5
diff --git a/src/mem/ruby/structures/ALUFreeListArray.hh b/src/mem/ruby/structures/ALUFreeListArray.hh
new file mode 100644
index 0000000000..bed1b00b5c
--- /dev/null
+++ b/src/mem/ruby/structures/ALUFreeListArray.hh
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2023 The University of Wisconsin
+ *
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __MEM_RUBY_STRUCTURES_ALUFREELISTARRAY_HH__
+#define __MEM_RUBY_STRUCTURES_ALUFREELISTARRAY_HH__
+
+#include <deque>
+
+#include "mem/ruby/common/TypeDefines.hh"
+#include "sim/cur_tick.hh"
+
+namespace gem5
+{
+
+namespace ruby
+{
+
+class ALUFreeListArray
+{
+  private:
+    unsigned int numALUs;
+    Tick accessLatency;
+
+    class AccessRecord
+    {
+      public:
+        AccessRecord(Addr line_addr, Tick start_tick) {
+          this->lineAddr = line_addr;
+          this->startTick = start_tick;
+        }
+
+        Addr lineAddr;
+        Tick startTick;
+    };
+
+    // Queue of accesses from past accessLatency cycles
+    std::deque<AccessRecord> accessQueue;
+
+  public:
+    ALUFreeListArray(unsigned int num_ALUs, Tick access_latency);
+
+    bool tryAccess(Addr addr);
+
+    void reserve(Addr addr);
+
+    Tick getLatency() const { return accessLatency; }
+};
+
+} // namespace ruby
+} // namespace gem5
+
+#endif
diff --git a/src/mem/ruby/structures/CacheMemory.cc b/src/mem/ruby/structures/CacheMemory.cc
index 5a5eaffa02..3b97d34d18 100644
--- a/src/mem/ruby/structures/CacheMemory.cc
+++ b/src/mem/ruby/structures/CacheMemory.cc
@@ -73,6 +73,8 @@ CacheMemory::CacheMemory(const Params &p)
               p.start_index_bit, p.ruby_system),
     tagArray(p.tagArrayBanks, p.tagAccessLatency,
              p.start_index_bit, p.ruby_system),
+    atomicALUArray(p.atomicALUs, p.atomicLatency *
+             p.ruby_system->clockPeriod()),
     cacheMemoryStats(this)
 {
     m_cache_size = p.size;
@@ -288,7 +290,7 @@ CacheMemory::allocate(Addr address, AbstractCacheEntry *entry)
             set[i] = entry;  // Init entry
             set[i]->m_Address = address;
             set[i]->m_Permission = AccessPermission_Invalid;
-            DPRINTF(RubyCache, "Allocate clearing lock for addr: %x\n",
+            DPRINTF(RubyCache, "Allocate clearing lock for addr: 0x%x\n",
                     address);
             set[i]->m_locked = -1;
             m_tag_index[address] = i;
@@ -529,6 +531,8 @@ CacheMemoryStats::CacheMemoryStats(statistics::Group *parent)
       ADD_STAT(numTagArrayWrites, "Number of tag array writes"),
       ADD_STAT(numTagArrayStalls, "Number of stalls caused by tag array"),
       ADD_STAT(numDataArrayStalls, "Number of stalls caused by data array"),
+      ADD_STAT(numAtomicALUOperations, "Number of atomic ALU operations"),
+      ADD_STAT(numAtomicALUArrayStalls, "Number of stalls caused by atomic ALU array"),
       ADD_STAT(htmTransCommitReadSet, "Read set size of a committed "
                                       "transaction"),
       ADD_STAT(htmTransCommitWriteSet, "Write set size of a committed "
@@ -564,6 +568,12 @@ CacheMemoryStats::CacheMemoryStats(statistics::Group *parent)
     numDataArrayStalls
         .flags(statistics::nozero);
 
+    numAtomicALUOperations
+        .flags(statistics::nozero);
+
+    numAtomicALUArrayStalls
+        .flags(statistics::nozero);
+
     htmTransCommitReadSet
         .init(8)
         .flags(statistics::pdf | statistics::dist | statistics::nozero |
@@ -633,6 +643,11 @@ CacheMemory::recordRequestType(CacheRequestType requestType, Addr addr)
             tagArray.reserve(addressToCacheSet(addr));
         cacheMemoryStats.numTagArrayWrites++;
         return;
+    case CacheRequestType_AtomicALUOperation:
+        if (m_resource_stalls)
+            atomicALUArray.reserve(addr);
+        cacheMemoryStats.numAtomicALUOperations++;
+        return;
     default:
         warn("CacheMemory access_type not found: %s",
              CacheRequestType_to_string(requestType));
@@ -664,6 +679,15 @@ CacheMemory::checkResourceAvailable(CacheResourceType res, Addr addr)
             cacheMemoryStats.numDataArrayStalls++;
             return false;
         }
+    } else if (res == CacheResourceType_AtomicALUArray) {
+        if (atomicALUArray.tryAccess(addr)) return true;
+        else {
+            DPRINTF(RubyResourceStalls,
+                    "Atomic ALU array stall on addr %#x in line address %#x\n",
+                    addr, makeLineAddress(addr));
+            cacheMemoryStats.numAtomicALUArrayStalls++;
+            return false;
+        }
     } else {
         panic("Unrecognized cache resource type.");
     }
diff --git a/src/mem/ruby/structures/CacheMemory.hh b/src/mem/ruby/structures/CacheMemory.hh
index a63bb02748..de7c327f63 100644
--- a/src/mem/ruby/structures/CacheMemory.hh
+++ b/src/mem/ruby/structures/CacheMemory.hh
@@ -56,6 +56,7 @@
 #include "mem/ruby/slicc_interface/AbstractCacheEntry.hh"
 #include "mem/ruby/slicc_interface/RubySlicc_ComponentMapping.hh"
 #include "mem/ruby/structures/BankedArray.hh"
+#include "mem/ruby/structures/ALUFreeListArray.hh"
 #include "mem/ruby/system/CacheRecorder.hh"
 #include "params/RubyCache.hh"
 #include "sim/sim_object.hh"
@@ -186,6 +187,7 @@ class CacheMemory : public SimObject
 
     BankedArray dataArray;
     BankedArray tagArray;
+    ALUFreeListArray atomicALUArray;
 
     int m_cache_size;
     int m_cache_num_sets;
@@ -224,6 +226,9 @@ class CacheMemory : public SimObject
           statistics::Scalar numTagArrayStalls;
           statistics::Scalar numDataArrayStalls;
 
+          statistics::Scalar numAtomicALUOperations;
+          statistics::Scalar numAtomicALUArrayStalls;
+
           // hardware transactional memory
           statistics::Histogram htmTransCommitReadSet;
           statistics::Histogram htmTransCommitWriteSet;
diff --git a/src/mem/ruby/structures/RubyCache.py b/src/mem/ruby/structures/RubyCache.py
index f2c1b7230c..7446ac3de0 100644
--- a/src/mem/ruby/structures/RubyCache.py
+++ b/src/mem/ruby/structures/RubyCache.py
@@ -44,6 +44,11 @@ class RubyCache(SimObject):
         "0B", "block size in bytes. 0 means default RubyBlockSize"
     )
 
+    # Atomic parameters only applicable to GPU atomics
+    # Zero atomic latency corresponds to instantanous atomic ALU operations
+    atomicLatency = Param.Cycles(0, "Cycles for an atomic ALU operation")
+    atomicALUs = Param.Int(64, "Number of atomic ALUs")
+
     dataArrayBanks = Param.Int(1, "Number of banks for the data array")
     tagArrayBanks = Param.Int(1, "Number of banks for the tag array")
     dataAccessLatency = Param.Cycles(1, "cycles for a data array access")
diff --git a/src/mem/ruby/structures/SConscript b/src/mem/ruby/structures/SConscript
index cae03909c7..7baab6a4c4 100644
--- a/src/mem/ruby/structures/SConscript
+++ b/src/mem/ruby/structures/SConscript
@@ -55,6 +55,7 @@ Source('PersistentTable.cc')
 Source('RubyPrefetcher.cc')
 Source('TimerTable.cc')
 Source('BankedArray.cc')
+Source('ALUFreeListArray.cc')
 Source('TBEStorage.cc')
 if env['PROTOCOL'] == 'CHI':
     Source('MN_TBETable.cc')
diff --git a/src/mem/ruby/system/CacheRecorder.cc b/src/mem/ruby/system/CacheRecorder.cc
index 20a8a30ebc..3326856849 100644
--- a/src/mem/ruby/system/CacheRecorder.cc
+++ b/src/mem/ruby/system/CacheRecorder.cc
@@ -30,8 +30,10 @@
 #include "mem/ruby/system/CacheRecorder.hh"
 
 #include "debug/RubyCacheTrace.hh"
+#include "mem/packet.hh"
 #include "mem/ruby/system/RubySystem.hh"
 #include "mem/ruby/system/Sequencer.hh"
+#include "sim/sim_exit.hh"
 
 namespace gem5
 {
@@ -56,12 +58,14 @@ CacheRecorder::CacheRecorder()
 
 CacheRecorder::CacheRecorder(uint8_t* uncompressed_trace,
                              uint64_t uncompressed_trace_size,
-                             std::vector<Sequencer*>& seq_map,
+                             std::vector<RubyPort*>& ruby_port_map,
                              uint64_t block_size_bytes)
     : m_uncompressed_trace(uncompressed_trace),
       m_uncompressed_trace_size(uncompressed_trace_size),
-      m_seq_map(seq_map),  m_bytes_read(0), m_records_read(0),
-      m_records_flushed(0), m_block_size_bytes(block_size_bytes)
+      m_ruby_port_map(ruby_port_map), m_bytes_read(0),
+      m_records_read(0), m_records_flushed(0),
+      m_block_size_bytes(block_size_bytes)
+
 {
     if (m_uncompressed_trace != NULL) {
         if (m_block_size_bytes < RubySystem::getBlockSizeBytes()) {
@@ -80,7 +84,7 @@ CacheRecorder::~CacheRecorder()
         delete [] m_uncompressed_trace;
         m_uncompressed_trace = NULL;
     }
-    m_seq_map.clear();
+    m_ruby_port_map.clear();
 }
 
 void
@@ -94,13 +98,19 @@ CacheRecorder::enqueueNextFlushRequest()
                                              Request::funcRequestorId);
         MemCmd::Command requestType = MemCmd::FlushReq;
         Packet *pkt = new Packet(req, requestType);
+        pkt->req->setReqInstSeqNum(m_records_flushed);
+
 
-        Sequencer* m_sequencer_ptr = m_seq_map[rec->m_cntrl_id];
-        assert(m_sequencer_ptr != NULL);
-        m_sequencer_ptr->makeRequest(pkt);
+        RubyPort* m_ruby_port_ptr = m_ruby_port_map[rec->m_cntrl_id];
+        assert(m_ruby_port_ptr != NULL);
+        m_ruby_port_ptr->makeRequest(pkt);
 
         DPRINTF(RubyCacheTrace, "Flushing %s\n", *rec);
+
     } else {
+        if (m_records_flushed > 0) {
+            exitSimLoop("Finished Drain", 0);
+        }
         DPRINTF(RubyCacheTrace, "Flushed all %d records\n", m_records_flushed);
     }
 }
@@ -141,15 +151,19 @@ CacheRecorder::enqueueNextFetchRequest()
 
             Packet *pkt = new Packet(req, requestType);
             pkt->dataStatic(traceRecord->m_data + rec_bytes_read);
+            pkt->req->setReqInstSeqNum(m_records_read);
+
 
-            Sequencer* m_sequencer_ptr = m_seq_map[traceRecord->m_cntrl_id];
-            assert(m_sequencer_ptr != NULL);
-            m_sequencer_ptr->makeRequest(pkt);
+            RubyPort* m_ruby_port_ptr =
+                m_ruby_port_map[traceRecord->m_cntrl_id];
+            assert(m_ruby_port_ptr != NULL);
+            m_ruby_port_ptr->makeRequest(pkt);
         }
 
         m_bytes_read += (sizeof(TraceRecord) + m_block_size_bytes);
         m_records_read++;
     } else {
+        exitSimLoop("Finished Warmup", 0);
         DPRINTF(RubyCacheTrace, "Fetched all %d records\n", m_records_read);
     }
 }
@@ -168,6 +182,8 @@ CacheRecorder::addRecord(int cntrl, Addr data_addr, Addr pc_addr,
     memcpy(rec->m_data, data.getData(0, m_block_size_bytes),
            m_block_size_bytes);
 
+    DPRINTF(RubyCacheTrace, "Inside addRecord with cntrl id %d and type %d\n",
+            cntrl, type);
     m_records.push_back(rec);
 }
 
diff --git a/src/mem/ruby/system/CacheRecorder.hh b/src/mem/ruby/system/CacheRecorder.hh
index be95590313..021da6a4da 100644
--- a/src/mem/ruby/system/CacheRecorder.hh
+++ b/src/mem/ruby/system/CacheRecorder.hh
@@ -50,7 +50,7 @@ namespace ruby
 {
 
 class Sequencer;
-
+class RubyPort;
 /*!
  * Class for recording cache contents. Note that the last element of the
  * class is an array of length zero. It is used for creating variable
@@ -78,7 +78,7 @@ class CacheRecorder
 
     CacheRecorder(uint8_t* uncompressed_trace,
                   uint64_t uncompressed_trace_size,
-                  std::vector<Sequencer*>& SequencerMap,
+                  std::vector<RubyPort*>& ruby_port_map,
                   uint64_t block_size_bytes);
     void addRecord(int cntrl, Addr data_addr, Addr pc_addr,
                    RubyRequestType type, Tick time, DataBlock& data);
@@ -114,7 +114,7 @@ class CacheRecorder
     std::vector<TraceRecord*> m_records;
     uint8_t* m_uncompressed_trace;
     uint64_t m_uncompressed_trace_size;
-    std::vector<Sequencer*> m_seq_map;
+    std::vector<RubyPort*> m_ruby_port_map;
     uint64_t m_bytes_read;
     uint64_t m_records_read;
     uint64_t m_records_flushed;
diff --git a/src/mem/ruby/system/GPUCoalescer.cc b/src/mem/ruby/system/GPUCoalescer.cc
index 8bde3f7bc8..90d6031c6e 100644
--- a/src/mem/ruby/system/GPUCoalescer.cc
+++ b/src/mem/ruby/system/GPUCoalescer.cc
@@ -73,6 +73,14 @@ UncoalescedTable::insertPacket(PacketPtr pkt)
             pkt->getAddr(), seqNum, instMap.size(), instMap[seqNum].size());
 }
 
+void
+UncoalescedTable::insertReqType(PacketPtr pkt, RubyRequestType type)
+{
+    uint64_t seqNum = pkt->req->getReqInstSeqNum();
+
+    reqTypeMap[seqNum] = type;
+}
+
 bool
 UncoalescedTable::packetAvailable()
 {
@@ -128,9 +136,21 @@ UncoalescedTable::updateResources()
             instMap.erase(iter++);
             instPktsRemaining.erase(seq_num);
 
-            // Release the token
-            DPRINTF(GPUCoalescer, "Returning token seqNum %d\n", seq_num);
-            coalescer->getGMTokenPort().sendTokens(1);
+            // Release the token if the Ruby system is not in cooldown
+            // or warmup phases. When in these phases, the RubyPorts
+            // are accessed directly using the makeRequest() command
+            // instead of accessing through the port. This makes
+            // sending tokens through the port unnecessary
+            if (!RubySystem::getWarmupEnabled()
+                    && !RubySystem::getCooldownEnabled()) {
+                if (reqTypeMap[seq_num] != RubyRequestType_FLUSH) {
+                    DPRINTF(GPUCoalescer,
+                            "Returning token seqNum %d\n", seq_num);
+                    coalescer->getGMTokenPort().sendTokens(1);
+                }
+            }
+
+            reqTypeMap.erase(seq_num);
         } else {
             ++iter;
         }
@@ -324,7 +344,8 @@ GPUCoalescer::printRequestTable(std::stringstream& ss)
                << "\t\tIssue time: "
                << request->getIssueTime() * clockPeriod() << "\n"
                << "\t\tDifference from current tick: "
-               << (curCycle() - request->getIssueTime()) * clockPeriod();
+               << (curCycle() - request->getIssueTime()) * clockPeriod()
+               << "\n";
         }
     }
 
@@ -505,26 +526,16 @@ GPUCoalescer::readCallback(Addr address,
     fatal_if(crequest->getRubyType() != RubyRequestType_LD,
              "readCallback received non-read type response\n");
 
-    // Iterate over the coalesced requests to respond to as many loads as
-    // possible until another request type is seen. Models MSHR for TCP.
-    while (crequest->getRubyType() == RubyRequestType_LD) {
-        hitCallback(crequest, mach, data, true, crequest->getIssueTime(),
-                    forwardRequestTime, firstResponseTime, isRegion);
-
-        delete crequest;
-        coalescedTable.at(address).pop_front();
-        if (coalescedTable.at(address).empty()) {
-            break;
-        }
-
-        crequest = coalescedTable.at(address).front();
-    }
+    hitCallback(crequest, mach, data, true, crequest->getIssueTime(),
+                forwardRequestTime, firstResponseTime, isRegion);
 
+    delete crequest;
+    coalescedTable.at(address).pop_front();
     if (coalescedTable.at(address).empty()) {
-        coalescedTable.erase(address);
+      coalescedTable.erase(address);
     } else {
-        auto nextRequest = coalescedTable.at(address).front();
-        issueRequest(nextRequest);
+      auto nextRequest = coalescedTable.at(address).front();
+      issueRequest(nextRequest);
     }
 }
 
@@ -554,25 +565,56 @@ GPUCoalescer::hitCallback(CoalescedRequest* crequest,
                       success, isRegion);
     // update the data
     //
-    // MUST AD DOING THIS FOR EACH REQUEST IN COALESCER
+    // MUST ADD DOING THIS FOR EACH REQUEST IN COALESCER
     std::vector<PacketPtr> pktList = crequest->getPackets();
+
+    uint8_t* log = nullptr;
     DPRINTF(GPUCoalescer, "Responding to %d packets for addr 0x%X\n",
             pktList.size(), request_line_address);
+    uint32_t offset;
+    int pkt_size;
     for (auto& pkt : pktList) {
+        offset = getOffset(pkt->getAddr());
+        pkt_size = pkt->getSize();
         request_address = pkt->getAddr();
+
+        // When the Ruby system is cooldown phase, the requests come from
+        // the cache recorder. These requests do not get coalesced and
+        // do not return valid data.
+        if (RubySystem::getCooldownEnabled())
+            continue;
+
         if (pkt->getPtr<uint8_t>()) {
-            if ((type == RubyRequestType_LD) ||
-                (type == RubyRequestType_ATOMIC) ||
-                (type == RubyRequestType_ATOMIC_RETURN) ||
-                (type == RubyRequestType_IFETCH) ||
-                (type == RubyRequestType_RMW_Read) ||
-                (type == RubyRequestType_Locked_RMW_Read) ||
-                (type == RubyRequestType_Load_Linked)) {
-                pkt->setData(
-                    data.getData(getOffset(request_address), pkt->getSize()));
-            } else {
-                data.setData(pkt->getPtr<uint8_t>(),
-                             getOffset(request_address), pkt->getSize());
+            switch(type) {
+                // Store and AtomicNoReturns follow the same path, as the
+                // data response is not needed.
+                case RubyRequestType_ATOMIC_NO_RETURN:
+                    assert(pkt->isAtomicOp());
+                    break;
+                case RubyRequestType_ST:
+                    break;
+                case RubyRequestType_LD:
+                    pkt->setData(data.getData(offset, pkt_size));
+                    break;
+                case RubyRequestType_ATOMIC_RETURN:
+                    assert(pkt->isAtomicOp());
+                    // Atomic operations are performed by the WriteMask
+                    // in packet order, set by the crequest. Thus, when
+                    // unpacking the changes from the log, we read from
+                    // the front of the log to correctly map response
+                    // data into the packets.
+
+                    // Log entry contains the old value before the current
+                    // atomic operation occurred.
+                    log = data.popAtomicLogEntryFront();
+                    pkt->setData(&log[offset]);
+                    delete [] log;
+                    log = nullptr;
+                    break;
+                default:
+                    panic("Unsupported ruby packet type:%s\n",
+                                    RubyRequestType_to_string(type));
+                    break;
             }
         } else {
             DPRINTF(MemoryAccess,
@@ -581,6 +623,7 @@ GPUCoalescer::hitCallback(CoalescedRequest* crequest,
                     RubyRequestType_to_string(type));
         }
     }
+    assert(data.numAtomicLogEntries() == 0);
 
     m_outstanding_count--;
     assert(m_outstanding_count >= 0);
@@ -603,7 +646,6 @@ GPUCoalescer::getRequestType(PacketPtr pkt)
     assert(!pkt->req->isLLSC());
     assert(!pkt->req->isLockedRMW());
     assert(!pkt->req->isInstFetch());
-    assert(!pkt->isFlush());
 
     if (pkt->req->isAtomicReturn()) {
         req_type = RubyRequestType_ATOMIC_RETURN;
@@ -613,6 +655,8 @@ GPUCoalescer::getRequestType(PacketPtr pkt)
         req_type = RubyRequestType_LD;
     } else if (pkt->isWrite()) {
         req_type = RubyRequestType_ST;
+    } else if (pkt->isFlush()) {
+        req_type = RubyRequestType_FLUSH;
     } else {
         panic("Unsupported ruby packet type\n");
     }
@@ -634,7 +678,7 @@ GPUCoalescer::makeRequest(PacketPtr pkt)
         issueMemSyncRequest(pkt);
     } else {
         // otherwise, this must be either read or write command
-        assert(pkt->isRead() || pkt->isWrite());
+        assert(pkt->isRead() || pkt->isWrite() || pkt->isFlush());
 
         InstSeqNum seq_num = pkt->req->getReqInstSeqNum();
 
@@ -643,10 +687,17 @@ GPUCoalescer::makeRequest(PacketPtr pkt)
         // number of lanes actives for that vmem request (i.e., the popcnt
         // of the exec_mask.
         int num_packets = 1;
-        if (!m_usingRubyTester) {
-            num_packets = 0;
-            for (int i = 0; i < TheGpuISA::NumVecElemPerVecReg; i++) {
-                num_packets += getDynInst(pkt)->getLaneStatus(i);
+
+        // When Ruby is in warmup or cooldown phase, the requests come from
+        // the cache recorder. There is no dynamic instruction associated
+        // with these requests either
+        if (!RubySystem::getWarmupEnabled()
+                && !RubySystem::getCooldownEnabled()) {
+            if (!m_usingRubyTester) {
+                num_packets = 0;
+                for (int i = 0; i < TheGpuISA::NumVecElemPerVecReg; i++) {
+                    num_packets += getDynInst(pkt)->getLaneStatus(i);
+                }
             }
         }
 
@@ -655,6 +706,7 @@ GPUCoalescer::makeRequest(PacketPtr pkt)
         // future cycle. Packets remaining is set to the number of excepted
         // requests from the instruction based on its exec_mask.
         uncoalescedTable.insertPacket(pkt);
+        uncoalescedTable.insertReqType(pkt, getRequestType(pkt));
         uncoalescedTable.initPacketsRemaining(seq_num, num_packets);
         DPRINTF(GPUCoalescer, "Put pkt with addr 0x%X to uncoalescedTable\n",
                 pkt->getAddr());
@@ -921,21 +973,27 @@ void
 GPUCoalescer::completeHitCallback(std::vector<PacketPtr> & mylist)
 {
     for (auto& pkt : mylist) {
-        RubyPort::SenderState *ss =
-            safe_cast<RubyPort::SenderState *>(pkt->senderState);
-        MemResponsePort *port = ss->port;
-        assert(port != NULL);
-
-        pkt->senderState = ss->predecessor;
+        // When Ruby is in warmup or cooldown phase, the requests come
+        // from the cache recorder. They do not track which port to use
+        // and do not need to send the response back
+        if (!RubySystem::getWarmupEnabled()
+                && !RubySystem::getCooldownEnabled()) {
+            RubyPort::SenderState *ss =
+                safe_cast<RubyPort::SenderState *>(pkt->senderState);
+            MemResponsePort *port = ss->port;
+            assert(port != NULL);
+
+            pkt->senderState = ss->predecessor;
+
+            if (pkt->cmd != MemCmd::WriteReq) {
+                // for WriteReq, we keep the original senderState until
+                // writeCompleteCallback
+                delete ss;
+            }
 
-        if (pkt->cmd != MemCmd::WriteReq) {
-            // for WriteReq, we keep the original senderState until
-            // writeCompleteCallback
-            delete ss;
+            port->hitCallback(pkt);
+            trySendRetries();
         }
-
-        port->hitCallback(pkt);
-        trySendRetries();
     }
 
     // We schedule an event in the same tick as hitCallback (similar to
@@ -947,7 +1005,14 @@ GPUCoalescer::completeHitCallback(std::vector<PacketPtr> & mylist)
         schedule(issueEvent, curTick());
     }
 
-    testDrainComplete();
+    RubySystem *rs = m_ruby_system;
+    if (RubySystem::getWarmupEnabled()) {
+        rs->m_cache_recorder->enqueueNextFetchRequest();
+    } else if (RubySystem::getCooldownEnabled()) {
+        rs->m_cache_recorder->enqueueNextFlushRequest();
+    } else {
+        testDrainComplete();
+    }
 }
 
 void
diff --git a/src/mem/ruby/system/GPUCoalescer.hh b/src/mem/ruby/system/GPUCoalescer.hh
index dd28855547..d6db5c00ba 100644
--- a/src/mem/ruby/system/GPUCoalescer.hh
+++ b/src/mem/ruby/system/GPUCoalescer.hh
@@ -71,6 +71,7 @@ class UncoalescedTable
     ~UncoalescedTable() {}
 
     void insertPacket(PacketPtr pkt);
+    void insertReqType(PacketPtr pkt, RubyRequestType type);
     bool packetAvailable();
     void printRequestTable(std::stringstream& ss);
 
@@ -101,6 +102,8 @@ class UncoalescedTable
     std::map<InstSeqNum, PerInstPackets> instMap;
 
     std::map<InstSeqNum, int> instPktsRemaining;
+
+    std::map<InstSeqNum, RubyRequestType> reqTypeMap;
 };
 
 class CoalescedRequest
diff --git a/src/mem/ruby/system/RubySystem.cc b/src/mem/ruby/system/RubySystem.cc
index b38c903b09..109fd43051 100644
--- a/src/mem/ruby/system/RubySystem.cc
+++ b/src/mem/ruby/system/RubySystem.cc
@@ -177,22 +177,32 @@ RubySystem::makeCacheRecorder(uint8_t *uncompressed_trace,
                               uint64_t cache_trace_size,
                               uint64_t block_size_bytes)
 {
-    std::vector<Sequencer*> sequencer_map;
-    Sequencer* sequencer_ptr = NULL;
+    std::vector<RubyPort*> ruby_port_map;
+    RubyPort* ruby_port_ptr = NULL;
 
     for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) {
-        sequencer_map.push_back(m_abs_cntrl_vec[cntrl]->getCPUSequencer());
-        if (sequencer_ptr == NULL) {
-            sequencer_ptr = sequencer_map[cntrl];
+        if (m_abs_cntrl_vec[cntrl]->getGPUCoalescer() != NULL) {
+            ruby_port_map.push_back(
+                    (RubyPort*)m_abs_cntrl_vec[cntrl]->getGPUCoalescer());
+        } else {
+            ruby_port_map.push_back(
+                    (RubyPort*)m_abs_cntrl_vec[cntrl]->getCPUSequencer());
+        }
+
+        if (ruby_port_ptr == NULL) {
+            ruby_port_ptr = ruby_port_map[cntrl];
         }
     }
 
-    assert(sequencer_ptr != NULL);
+    assert(ruby_port_ptr != NULL);
 
     for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) {
-        if (sequencer_map[cntrl] == NULL) {
-            sequencer_map[cntrl] = sequencer_ptr;
+        if (ruby_port_map[cntrl] == NULL) {
+            ruby_port_map[cntrl] = ruby_port_ptr;
+        } else {
+            ruby_port_ptr = ruby_port_map[cntrl];
         }
+
     }
 
     // Remove the old CacheRecorder if it's still hanging about.
@@ -202,7 +212,8 @@ RubySystem::makeCacheRecorder(uint8_t *uncompressed_trace,
 
     // Create the CacheRecorder and record the cache trace
     m_cache_recorder = new CacheRecorder(uncompressed_trace, cache_trace_size,
-                                         sequencer_map, block_size_bytes);
+                                         ruby_port_map,
+                                         block_size_bytes);
 }
 
 void
diff --git a/src/mem/ruby/system/Sequencer.cc b/src/mem/ruby/system/Sequencer.cc
index 82fc19b57c..48054febef 100644
--- a/src/mem/ruby/system/Sequencer.cc
+++ b/src/mem/ruby/system/Sequencer.cc
@@ -466,8 +466,12 @@ Sequencer::writeCallback(Addr address, DataBlock& data,
     bool ruby_request = true;
     while (!seq_req_list.empty()) {
         SequencerRequest &seq_req = seq_req_list.front();
+        // Atomic Request may be executed remotly in the cache hierarchy
+        bool atomic_req =
+           ((seq_req.m_type == RubyRequestType_ATOMIC_RETURN) ||
+            (seq_req.m_type == RubyRequestType_ATOMIC_NO_RETURN));
 
-        if (noCoales && !ruby_request) {
+        if ((noCoales || atomic_req) && !ruby_request) {
             // Do not process follow-up requests
             // (e.g. if full line no present)
             // Reissue to the cache hierarchy
@@ -479,6 +483,8 @@ Sequencer::writeCallback(Addr address, DataBlock& data,
             assert(seq_req.m_type != RubyRequestType_LD);
             assert(seq_req.m_type != RubyRequestType_Load_Linked);
             assert(seq_req.m_type != RubyRequestType_IFETCH);
+            assert(seq_req.m_type != RubyRequestType_ATOMIC_RETURN);
+            assert(seq_req.m_type != RubyRequestType_ATOMIC_NO_RETURN);
         }
 
         // handle write request
@@ -594,6 +600,62 @@ Sequencer::readCallback(Addr address, DataBlock& data,
     }
 }
 
+void
+Sequencer::atomicCallback(Addr address, DataBlock& data,
+                         const bool externalHit, const MachineType mach,
+                         const Cycles initialRequestTime,
+                         const Cycles forwardRequestTime,
+                         const Cycles firstResponseTime)
+{
+    //
+    // Free the first request (an atomic operation) from the list.
+    // Then issue the next request to ruby system as we cannot
+    // assume the cache line is present in the cache
+    // (the opperation could be performed remotly)
+    //
+    assert(address == makeLineAddress(address));
+    assert(m_RequestTable.find(address) != m_RequestTable.end());
+    auto &seq_req_list = m_RequestTable[address];
+
+    // Perform hitCallback only on the first cpu request that
+    // issued the ruby request
+    bool ruby_request = true;
+    while (!seq_req_list.empty()) {
+        SequencerRequest &seq_req = seq_req_list.front();
+
+        if (ruby_request) {
+            // Check that the request was an atomic memory operation
+            // and record the latency
+            assert((seq_req.m_type == RubyRequestType_ATOMIC_RETURN) ||
+                   (seq_req.m_type == RubyRequestType_ATOMIC_NO_RETURN));
+            recordMissLatency(&seq_req, true, mach, externalHit,
+                              initialRequestTime, forwardRequestTime,
+                              firstResponseTime);
+        } else {
+            // Read, Write or Atomic request:
+            // reissue request to the cache hierarchy
+            // (we don't know if op was performed remotly)
+            issueRequest(seq_req.pkt, seq_req.m_second_type);
+            break;
+        }
+
+        // Atomics clean the monitor entry
+        llscClearMonitor(address);
+
+        markRemoved();
+        ruby_request = false;
+        hitCallback(&seq_req, data, true, mach, externalHit,
+                    initialRequestTime, forwardRequestTime,
+                    firstResponseTime, false);
+        seq_req_list.pop_front();
+    }
+
+    // free all outstanding requests corresponding to this address
+    if (seq_req_list.empty()) {
+        m_RequestTable.erase(address);
+    }
+}
+
 void
 Sequencer::hitCallback(SequencerRequest* srequest, DataBlock& data,
                        bool llscSuccess,
@@ -637,10 +699,16 @@ Sequencer::hitCallback(SequencerRequest* srequest, DataBlock& data,
             (type == RubyRequestType_IFETCH) ||
             (type == RubyRequestType_RMW_Read) ||
             (type == RubyRequestType_Locked_RMW_Read) ||
-            (type == RubyRequestType_Load_Linked)) {
+            (type == RubyRequestType_Load_Linked) ||
+            (type == RubyRequestType_ATOMIC_RETURN)) {
             pkt->setData(
                 data.getData(getOffset(request_address), pkt->getSize()));
-            DPRINTF(RubySequencer, "read data %s\n", data);
+
+           if (type == RubyRequestType_ATOMIC_RETURN) {
+               DPRINTF(RubySequencer, "ATOMIC RETURN data %s\n", data);
+           } else {
+               DPRINTF(RubySequencer, "read data %s\n", data);
+           }
         } else if (pkt->req->isSwap()) {
             assert(!pkt->isMaskedWrite());
             std::vector<uint8_t> overwrite_val(pkt->getSize());
@@ -807,6 +875,19 @@ Sequencer::makeRequest(PacketPtr pkt)
     } else if (pkt->req->isTlbiCmd()) {
         primary_type = secondary_type = tlbiCmdToRubyRequestType(pkt);
         DPRINTF(RubySequencer, "Issuing TLBI\n");
+#if defined (PROTOCOL_CHI)
+    } else if (pkt->isAtomicOp()) {
+        if (pkt->req->isAtomicReturn()){
+            DPRINTF(RubySequencer, "Issuing ATOMIC RETURN \n");
+            primary_type = secondary_type =
+                           RubyRequestType_ATOMIC_RETURN;
+        } else {
+            DPRINTF(RubySequencer, "Issuing ATOMIC NO RETURN\n");
+            primary_type = secondary_type =
+                           RubyRequestType_ATOMIC_NO_RETURN;
+
+        }
+#endif
     } else {
         //
         // To support SwapReq, we need to check isWrite() first: a SwapReq
@@ -914,6 +995,18 @@ Sequencer::issueRequest(PacketPtr pkt, RubyRequestType secondary_type)
                                             RubyAccessMode_Supervisor, pkt,
                                             PrefetchBit_No, proc_id, core_id);
 
+        if (pkt->isAtomicOp() &&
+            ((secondary_type == RubyRequestType_ATOMIC_RETURN) ||
+             (secondary_type == RubyRequestType_ATOMIC_NO_RETURN))){
+            // Create the blocksize, access mask and atomicops
+            uint32_t offset = getOffset(pkt->getAddr());
+            std::vector<std::pair<int,AtomicOpFunctor*>> atomicOps;
+            atomicOps.push_back(std::make_pair<int,AtomicOpFunctor*>
+                                (offset, pkt->getAtomicOp()));
+
+            msg->setWriteMask(offset, pkt->getSize(), atomicOps);
+        }
+
         DPRINTFR(ProtocolTrace, "%15s %3s %10s%20s %6s>%-6s %#x %s\n",
                 curTick(), m_version, "Seq", "Begin", "", "",
                 printAddress(msg->getPhysicalAddress()),
diff --git a/src/mem/ruby/system/Sequencer.hh b/src/mem/ruby/system/Sequencer.hh
index 020a7d8c20..8f736da6d5 100644
--- a/src/mem/ruby/system/Sequencer.hh
+++ b/src/mem/ruby/system/Sequencer.hh
@@ -126,6 +126,14 @@ class Sequencer : public RubyPort
                       const Cycles forwardRequestTime = Cycles(0),
                       const Cycles firstResponseTime = Cycles(0));
 
+    void atomicCallback(Addr address,
+                        DataBlock& data,
+                        const bool externalHit = false,
+                        const MachineType mach = MachineType_NUM,
+                        const Cycles initialRequestTime = Cycles(0),
+                        const Cycles forwardRequestTime = Cycles(0),
+                        const Cycles firstResponseTime = Cycles(0));
+
     void unaddressedCallback(Addr unaddressedReqId,
                              RubyRequestType requestType,
                              const MachineType mach = MachineType_NUM,
diff --git a/src/mem/ruby/system/VIPERCoalescer.cc b/src/mem/ruby/system/VIPERCoalescer.cc
index ea95129841..a5198cce63 100644
--- a/src/mem/ruby/system/VIPERCoalescer.cc
+++ b/src/mem/ruby/system/VIPERCoalescer.cc
@@ -75,12 +75,14 @@ VIPERCoalescer::makeRequest(PacketPtr pkt)
     //    ReadReq             : cache read
     //    WriteReq            : cache write
     //    AtomicOp            : cache atomic
+    //    Flush               : flush and invalidate cache
     //
     // VIPER does not expect MemSyncReq & Release since in GCN3, compute unit
     // does not specify an equivalent type of memory request.
     assert((pkt->cmd == MemCmd::MemSyncReq && pkt->req->isInvL1()) ||
             pkt->cmd == MemCmd::ReadReq ||
             pkt->cmd == MemCmd::WriteReq ||
+            pkt->cmd == MemCmd::FlushReq ||
             pkt->isAtomicOp());
 
     if (pkt->req->isInvL1() && m_cache_inv_pkt) {
diff --git a/src/mem/slicc/ast/DeferEnqueueingStatementAST.py b/src/mem/slicc/ast/DeferEnqueueingStatementAST.py
index 0c34113902..14b2e48cd3 100644
--- a/src/mem/slicc/ast/DeferEnqueueingStatementAST.py
+++ b/src/mem/slicc/ast/DeferEnqueueingStatementAST.py
@@ -41,7 +41,7 @@ def __init__(self, slicc, queue_name, type_ast, statements):
         self.statements = statements
 
     def __repr__(self):
-        return "[DeferEnqueueingStatementAst: %s %s %s]" % (
+        return "[DeferEnqueueingStatementAst: {} {} {}]".format(
             self.queue_name,
             self.type_ast.ident,
             self.statements,
diff --git a/src/mem/slicc/ast/EnqueueStatementAST.py b/src/mem/slicc/ast/EnqueueStatementAST.py
index 148cc3a223..c2d47af9ce 100644
--- a/src/mem/slicc/ast/EnqueueStatementAST.py
+++ b/src/mem/slicc/ast/EnqueueStatementAST.py
@@ -31,16 +31,25 @@
 
 
 class EnqueueStatementAST(StatementAST):
-    def __init__(self, slicc, queue_name, type_ast, lexpr, statements):
+    def __init__(
+        self,
+        slicc,
+        queue_name,
+        type_ast,
+        lexpr,
+        bypass_strict_fifo,
+        statements,
+    ):
         super().__init__(slicc)
 
         self.queue_name = queue_name
         self.type_ast = type_ast
         self.latexpr = lexpr
+        self.bypass_strict_fifo = bypass_strict_fifo
         self.statements = statements
 
     def __repr__(self):
-        return "[EnqueueStatementAst: %s %s %s]" % (
+        return "[EnqueueStatementAst: {} {} {}]".format(
             self.queue_name,
             self.type_ast.ident,
             self.statements,
@@ -76,10 +85,17 @@ def generate(self, code, return_type, **kwargs):
 
         if self.latexpr != None:
             ret_type, rcode = self.latexpr.inline(True)
-            code(
-                "(${{self.queue_name.var.code}}).enqueue("
-                "out_msg, clockEdge(), cyclesToTicks(Cycles($rcode)));"
-            )
+            if self.bypass_strict_fifo != None:
+                bypass_strict_fifo_code = self.bypass_strict_fifo.inline(False)
+                code(
+                    "(${{self.queue_name.var.code}}).enqueue("
+                    "out_msg, clockEdge(), cyclesToTicks(Cycles($rcode)), $bypass_strict_fifo_code);"
+                )
+            else:
+                code(
+                    "(${{self.queue_name.var.code}}).enqueue("
+                    "out_msg, clockEdge(), cyclesToTicks(Cycles($rcode)));"
+                )
         else:
             code(
                 "(${{self.queue_name.var.code}}).enqueue(out_msg, "
diff --git a/src/mem/slicc/ast/EnumDeclAST.py b/src/mem/slicc/ast/EnumDeclAST.py
index 9b4a6be77a..19eb1eeddd 100644
--- a/src/mem/slicc/ast/EnumDeclAST.py
+++ b/src/mem/slicc/ast/EnumDeclAST.py
@@ -47,7 +47,7 @@ def files(self, parent=None):
             ident = f"{parent}_{self.type_ast.ident}"
         else:
             ident = self.type_ast.ident
-        s = set((f"{ident}.hh", f"{ident}.cc"))
+        s = {f"{ident}.hh", f"{ident}.cc"}
         return s
 
     def generate(self):
diff --git a/src/mem/slicc/ast/FuncCallExprAST.py b/src/mem/slicc/ast/FuncCallExprAST.py
index 6ccca6650a..01b604c8bf 100644
--- a/src/mem/slicc/ast/FuncCallExprAST.py
+++ b/src/mem/slicc/ast/FuncCallExprAST.py
@@ -282,7 +282,7 @@ def generate(self, code, **kwargs):
             params = ""
             first_param = True
 
-            for (param_code, type) in zip(cvec, type_vec):
+            for param_code, type in zip(cvec, type_vec):
                 if first_param:
                     params = str(param_code)
                     first_param = False
diff --git a/src/mem/slicc/ast/MachineAST.py b/src/mem/slicc/ast/MachineAST.py
index 5c76aa8173..0d9c0ec31b 100644
--- a/src/mem/slicc/ast/MachineAST.py
+++ b/src/mem/slicc/ast/MachineAST.py
@@ -42,15 +42,13 @@ def __repr__(self):
         return f"[Machine: {self.ident!r}]"
 
     def files(self, parent=None):
-        s = set(
-            (
-                f"{self.ident}_Controller.cc",
-                f"{self.ident}_Controller.hh",
-                f"{self.ident}_Controller.py",
-                f"{self.ident}_Transitions.cc",
-                f"{self.ident}_Wakeup.cc",
-            )
-        )
+        s = {
+            f"{self.ident}_Controller.cc",
+            f"{self.ident}_Controller.hh",
+            f"{self.ident}_Controller.py",
+            f"{self.ident}_Transitions.cc",
+            f"{self.ident}_Wakeup.cc",
+        }
 
         s |= self.decls.files(self.ident)
         return s
diff --git a/src/mem/slicc/ast/MethodCallExprAST.py b/src/mem/slicc/ast/MethodCallExprAST.py
index 7bdf0c7dd9..b1649a644e 100644
--- a/src/mem/slicc/ast/MethodCallExprAST.py
+++ b/src/mem/slicc/ast/MethodCallExprAST.py
@@ -77,7 +77,7 @@ def __init__(self, slicc, obj_expr_ast, func_call):
         self.obj_expr_ast = obj_expr_ast
 
     def __repr__(self):
-        return "[MethodCallExpr: %r%r %r]" % (
+        return "[MethodCallExpr: {!r}{!r} {!r}]".format(
             self.proc_name,
             self.obj_expr_ast,
             self.expr_ast_vec,
@@ -189,7 +189,6 @@ def __repr__(self):
         return f"[MethodCallExpr: {self.proc_name!r} {self.expr_ast_vec!r}]"
 
     def generate_prefix(self, paramTypes):
-
         # class method call
         prefix = f"({self.type_ast}::"
         obj_type = self.type_ast.type
diff --git a/src/mem/slicc/ast/OperatorExprAST.py b/src/mem/slicc/ast/OperatorExprAST.py
index 714b553101..87417b50ea 100644
--- a/src/mem/slicc/ast/OperatorExprAST.py
+++ b/src/mem/slicc/ast/OperatorExprAST.py
@@ -1,3 +1,15 @@
+# Copyright (c) 2023 Arm Limited
+# All rights reserved.
+#
+# The license below extends only to copyright in the software and shall
+# not be construed as granting a license to any other intellectual
+# property including but not limited to intellectual property relating
+# to a hardware implementation of the functionality of the software
+# licensed hereunder.  You may use the software subject to the license
+# terms below provided that you ensure that this notice is replicated
+# unmodified and in its entirety in all distributions of the software,
+# modified or unmodified, in source code or in binary form.
+#
 # Copyright (c) 1999-2008 Mark D. Hill and David A. Wood
 # Copyright (c) 2009 The Hewlett-Packard Development Company
 # All rights reserved.
@@ -76,11 +88,14 @@ def generate(self, code, **kwargs):
                     ("int", "int", "int"),
                     ("Cycles", "Cycles", "Cycles"),
                     ("Tick", "Tick", "Tick"),
+                    ("Addr", "Addr", "Addr"),
                     ("Cycles", "int", "Cycles"),
                     ("Scalar", "int", "Scalar"),
                     ("int", "bool", "int"),
                     ("bool", "int", "int"),
                     ("int", "Cycles", "Cycles"),
+                    ("Addr", "int", "Addr"),
+                    ("int", "Addr", "Addr"),
                 ]
             else:
                 self.error(f"No operator matched with {self.op}!")
@@ -94,8 +109,8 @@ def generate(self, code, **kwargs):
 
             if output == None:
                 self.error(
-                    "Type mismatch: operands ({0}, {1}) for operator "
-                    "'{2}' failed to match with the expected types".format(
+                    "Type mismatch: operands ({}, {}) for operator "
+                    "'{}' failed to match with the expected types".format(
                         ltype, rtype, self.op
                     )
                 )
diff --git a/src/mem/slicc/ast/PeekStatementAST.py b/src/mem/slicc/ast/PeekStatementAST.py
index fd07d4e13a..00edff4e7b 100644
--- a/src/mem/slicc/ast/PeekStatementAST.py
+++ b/src/mem/slicc/ast/PeekStatementAST.py
@@ -40,11 +40,13 @@ def __init__(self, slicc, queue_name, type_ast, pairs, statements, method):
         self.method = method
 
     def __repr__(self):
-        return "[PeekStatementAST: %r queue_name: %r type: %r %r]" % (
-            self.method,
-            self.queue_name,
-            self.type_ast,
-            self.statements,
+        return (
+            "[PeekStatementAST: {!r} queue_name: {!r} type: {!r} {!r}]".format(
+                self.method,
+                self.queue_name,
+                self.type_ast,
+                self.statements,
+            )
         )
 
     def generate(self, code, return_type, **kwargs):
diff --git a/src/mem/slicc/ast/StateDeclAST.py b/src/mem/slicc/ast/StateDeclAST.py
index d190326484..3ff3ab4e89 100644
--- a/src/mem/slicc/ast/StateDeclAST.py
+++ b/src/mem/slicc/ast/StateDeclAST.py
@@ -46,7 +46,7 @@ def files(self, parent=None):
             ident = f"{parent}_{self.type_ast.ident}"
         else:
             ident = self.type_ast.ident
-        s = set((f"{ident}.hh", f"{ident}.cc"))
+        s = {f"{ident}.hh", f"{ident}.cc"}
         return s
 
     def generate(self):
diff --git a/src/mem/slicc/ast/StaticCastAST.py b/src/mem/slicc/ast/StaticCastAST.py
index 178285202b..b6b70efcb5 100644
--- a/src/mem/slicc/ast/StaticCastAST.py
+++ b/src/mem/slicc/ast/StaticCastAST.py
@@ -1,3 +1,15 @@
+# Copyright (c) 2023 Arm Limited
+# All rights reserved.
+#
+# The license below extends only to copyright in the software and shall
+# not be construed as granting a license to any other intellectual
+# property including but not limited to intellectual property relating
+# to a hardware implementation of the functionality of the software
+# licensed hereunder.  You may use the software subject to the license
+# terms below provided that you ensure that this notice is replicated
+# unmodified and in its entirety in all distributions of the software,
+# modified or unmodified, in source code or in binary form.
+#
 # Copyright (c) 2009 Advanced Micro Devices, Inc.
 # All rights reserved.
 #
@@ -42,22 +54,9 @@ def generate(self, code, **kwargs):
         actual_type, ecode = self.expr_ast.inline(True)
         if self.type_modifier == "pointer":
             code("static_cast<${{self.type_ast.type.c_ident}} *>($ecode)")
+        elif self.type_modifier == "value":
+            code("static_cast<${{self.type_ast.type.c_ident}} >($ecode)")
         else:
             code("static_cast<${{self.type_ast.type.c_ident}} &>($ecode)")
 
-        if not "interface" in self.type_ast.type:
-            self.expr_ast.error(
-                "static cast only premitted for those types "
-                "that implement inherit an interface"
-            )
-
-        # The interface type should match
-        if str(actual_type) != str(self.type_ast.type["interface"]):
-            self.expr_ast.error(
-                "static cast miss-match, type is '%s',"
-                "but inherited type is '%s'",
-                actual_type,
-                self.type_ast.type["interface"],
-            )
-
         return self.type_ast.type
diff --git a/src/mem/slicc/ast/TypeDeclAST.py b/src/mem/slicc/ast/TypeDeclAST.py
index d39e678477..bef49b484c 100644
--- a/src/mem/slicc/ast/TypeDeclAST.py
+++ b/src/mem/slicc/ast/TypeDeclAST.py
@@ -47,7 +47,7 @@ def files(self, parent=None):
             ident = f"{parent}_{self.type_ast.ident}"
         else:
             ident = self.type_ast.ident
-        return set((f"{ident}.hh", f"{ident}.cc"))
+        return {f"{ident}.hh", f"{ident}.cc"}
 
     def generate(self):
         ident = str(self.type_ast)
diff --git a/src/mem/slicc/parser.py b/src/mem/slicc/parser.py
index 155eb07f7a..af47ddb639 100644
--- a/src/mem/slicc/parser.py
+++ b/src/mem/slicc/parser.py
@@ -86,7 +86,7 @@ def writeHTMLFiles(self, html_path):
         self.symtab.writeHTMLFiles(html_path)
 
     def files(self):
-        f = set(["Types.hh"])
+        f = {"Types.hh"}
 
         f |= self.decl_list.files()
 
@@ -284,7 +284,7 @@ def p_declsx__none(self, p):
     def p_decl__protocol(self, p):
         "decl : PROTOCOL STRING SEMI"
         if self.protocol:
-            msg = "Protocol can only be set once! Error at %s:%s\n" % (
+            msg = "Protocol can only be set once! Error at {}:{}\n".format(
                 self.current_source,
                 self.current_line,
             )
@@ -633,11 +633,15 @@ def p_statement__assign(self, p):
 
     def p_statement__enqueue(self, p):
         "statement : ENQUEUE '(' var ',' type ')' statements"
-        p[0] = ast.EnqueueStatementAST(self, p[3], p[5], None, p[7])
+        p[0] = ast.EnqueueStatementAST(self, p[3], p[5], None, None, p[7])
 
     def p_statement__enqueue_latency(self, p):
         "statement : ENQUEUE '(' var ',' type ',' expr ')' statements"
-        p[0] = ast.EnqueueStatementAST(self, p[3], p[5], p[7], p[9])
+        p[0] = ast.EnqueueStatementAST(self, p[3], p[5], p[7], None, p[9])
+
+    def p_statement__enqueue_latency_bypass_strict_fifo(self, p):
+        "statement : ENQUEUE '(' var ',' type ',' expr ',' expr ')' statements"
+        p[0] = ast.EnqueueStatementAST(self, p[3], p[5], p[7], p[9], p[11])
 
     def p_statement__defer_enqueueing(self, p):
         "statement : DEFER_ENQUEUEING '(' var ',' type ')' statements"
diff --git a/src/mem/slicc/symbols/StateMachine.py b/src/mem/slicc/symbols/StateMachine.py
index 4712064089..68a1a6a8af 100644
--- a/src/mem/slicc/symbols/StateMachine.py
+++ b/src/mem/slicc/symbols/StateMachine.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2019-2021 ARM Limited
+# Copyright (c) 2019-2021,2023 ARM Limited
 # All rights reserved.
 #
 # The license below extends only to copyright in the software and shall
@@ -64,6 +64,7 @@
     "DMASequencer": "DMASequencer",
     "RubyPrefetcher": "RubyPrefetcher",
     "Cycles": "Cycles",
+    "Addr": "Addr",
 }
 
 
@@ -111,8 +112,11 @@ def __init__(self, symtab, ident, location, pairs, config_parameters):
         self.actions = OrderedDict()
         self.request_types = OrderedDict()
         self.transitions = []
+        self.transitions_per_ev = {}
         self.in_ports = []
         self.functions = []
+        self.event_stats_in_trans = []
+        self.event_stats_out_trans = []
 
         # Data members in the State Machine that have been declared inside
         # the {} machine.  Note that these along with the config params
@@ -136,6 +140,10 @@ def addState(self, state):
     def addEvent(self, event):
         assert self.table is None
         self.events[event.ident] = event
+        if "in_trans" in event.pairs:
+            self.event_stats_in_trans.append(event)
+        if "out_trans" in event.pairs:
+            self.event_stats_out_trans.append(event)
 
     def addAction(self, action):
         assert self.table is None
@@ -163,6 +171,9 @@ def addRequestType(self, request_type):
     def addTransition(self, trans):
         assert self.table is None
         self.transitions.append(trans)
+        if trans.event not in self.transitions_per_ev:
+            self.transitions_per_ev[trans.event] = []
+        self.transitions_per_ev[trans.event].append(trans)
 
     def addInPort(self, var):
         self.in_ports.append(var)
@@ -957,53 +968,91 @@ def printControllerCC(self, path, includes):
         }
     }
 
-    for (${ident}_Event event = ${ident}_Event_FIRST;
-                 event < ${ident}_Event_NUM; ++event) {
+"""
+        )
+        # check if Events/States have profiling qualifiers flags for
+        # inTransLatHist and outTransLatHist stats.
+        ev_ident_list = [
+            f"{ident}_Event_{ev.ident}" for ev in self.event_stats_out_trans
+        ]
+        ev_ident_str = "{" + ",".join(ev_ident_list) + "}"
+        code(
+            """
+    const std::vector<${ident}_Event> out_trans_evs = ${ev_ident_str};
+"""
+        )
+        ev_ident_list = [
+            f"{ident}_Event_{ev.ident}" for ev in self.event_stats_in_trans
+        ]
+        ev_ident_str = "{" + ",".join(ev_ident_list) + "}"
+        code(
+            """
+    const std::vector<${ident}_Event> in_trans_evs = ${ev_ident_str};
+"""
+        )
+        kv_ident_list = []
+        for ev in self.event_stats_in_trans:
+            key_ident = f"{ident}_Event_{ev.ident}"
+            val_ident_lst = [
+                f"{ident}_State_{trans.state.ident}"
+                for trans in self.transitions_per_ev[ev]
+            ]
+            val_ident_str = "{" + ",".join(val_ident_lst) + "}"
+            kv_ident_list.append(f"{{{key_ident}, {val_ident_str}}}")
+        key_ident_str = "{" + ",".join(kv_ident_list) + "}"
+        code(
+            """
+    const std::unordered_map<${ident}_Event, std::vector<${ident}_State>>
+                                in_trans_evs_states = ${key_ident_str};
+"""
+        )
+        code(
+            """
+
+    for (const auto event : out_trans_evs) {
         std::string stat_name =
             "outTransLatHist." + ${ident}_Event_to_string(event);
         statistics::Histogram* t =
             new statistics::Histogram(&stats, stat_name.c_str());
-        stats.outTransLatHist.push_back(t);
+        stats.outTransLatHist[event] = t;
         t->init(5);
         t->flags(statistics::pdf | statistics::total |
                  statistics::oneline | statistics::nozero);
 
         statistics::Scalar* r = new statistics::Scalar(&stats,
                                              (stat_name + ".retries").c_str());
-        stats.outTransLatHistRetries.push_back(r);
+        stats.outTransRetryCnt[event] = r;
         r->flags(statistics::nozero);
     }
 
-    for (${ident}_Event event = ${ident}_Event_FIRST;
-                 event < ${ident}_Event_NUM; ++event) {
-        std::string stat_name = "inTransLatHist." +
-                                ${ident}_Event_to_string(event);
-        statistics::Scalar* r = new statistics::Scalar(&stats,
-                                             (stat_name + ".total").c_str());
-        stats.inTransLatTotal.push_back(r);
-        r->flags(statistics::nozero);
+    for (const auto event : in_trans_evs) {
+        std::string stat_name =
+            "inTransLatHist." + ${ident}_Event_to_string(event);
+        statistics::Histogram* t =
+            new statistics::Histogram(&stats, stat_name.c_str());
+        stats.inTransLatHist[event] = t;
+        t->init(5);
+        t->flags(statistics::pdf | statistics::total |
+                 statistics::oneline | statistics::nozero);
 
-        r = new statistics::Scalar(&stats,
-                              (stat_name + ".retries").c_str());
-        stats.inTransLatRetries.push_back(r);
+        statistics::Scalar* r = new statistics::Scalar(&stats,
+                                             (stat_name + ".retries").c_str());
+        stats.inTransRetryCnt[event] = r;
         r->flags(statistics::nozero);
 
-        stats.inTransLatHist.emplace_back();
-        for (${ident}_State initial_state = ${ident}_State_FIRST;
-             initial_state < ${ident}_State_NUM; ++initial_state) {
-            stats.inTransLatHist.back().emplace_back();
+        auto &src_states = stats.inTransStateChanges[event];
+        for (const auto initial_state : in_trans_evs_states.at(event)) {
+            auto &dst_vector = src_states[initial_state];
             for (${ident}_State final_state = ${ident}_State_FIRST;
                  final_state < ${ident}_State_NUM; ++final_state) {
                 std::string stat_name = "inTransLatHist." +
                     ${ident}_Event_to_string(event) + "." +
                     ${ident}_State_to_string(initial_state) + "." +
-                    ${ident}_State_to_string(final_state);
-                statistics::Histogram* t =
-                    new statistics::Histogram(&stats, stat_name.c_str());
-                stats.inTransLatHist.back().back().push_back(t);
-                t->init(5);
-                t->flags(statistics::pdf | statistics::total |
-                         statistics::oneline | statistics::nozero);
+                    ${ident}_State_to_string(final_state) + ".total";
+                statistics::Scalar* t =
+                    new statistics::Scalar(&stats, stat_name.c_str());
+                t->flags(statistics::nozero);
+                dst_vector.push_back(t);
             }
         }
     }
@@ -1683,7 +1732,7 @@ def printCSwitch(self, path):
         cases = OrderedDict()
 
         for trans in self.transitions:
-            case_string = "%s_State_%s, %s_Event_%s" % (
+            case_string = "{}_State_{}, {}_Event_{}".format(
                 self.ident,
                 trans.state.ident,
                 self.ident,
@@ -1727,10 +1776,10 @@ def printCSwitch(self, path):
             # Check all of the request_types for resource constraints
             for request_type in request_types:
                 val = """
-if (!checkResourceAvailable(%s_RequestType_%s, addr)) {
+if (!checkResourceAvailable({}_RequestType_{}, addr)) {{
     return TransitionResult_ResourceStall;
-}
-""" % (
+}}
+""".format(
                     self.ident,
                     request_type.ident,
                 )
diff --git a/src/mem/slicc/symbols/SymbolTable.py b/src/mem/slicc/symbols/SymbolTable.py
index d2fbf8f7a9..5ea18c6d62 100644
--- a/src/mem/slicc/symbols/SymbolTable.py
+++ b/src/mem/slicc/symbols/SymbolTable.py
@@ -43,7 +43,7 @@ def makeDir(path):
         os.makedirs(path, exist_ok=True)
 
 
-class SymbolTable(object):
+class SymbolTable:
     def __init__(self, slicc):
         self.slicc = slicc
 
diff --git a/src/mem/slicc/symbols/Transition.py b/src/mem/slicc/symbols/Transition.py
index b517cf4d44..385065f972 100644
--- a/src/mem/slicc/symbols/Transition.py
+++ b/src/mem/slicc/symbols/Transition.py
@@ -72,7 +72,7 @@ def __init__(
                 self.resources[var] = str(num)
 
     def __repr__(self):
-        return "[Transition: (%r, %r) -> %r, %r]" % (
+        return "[Transition: ({!r}, {!r}) -> {!r}, {!r}]".format(
             self.state,
             self.event,
             self.nextState,
diff --git a/src/mem/slicc/util.py b/src/mem/slicc/util.py
index 3bb4131a01..bcbf057170 100644
--- a/src/mem/slicc/util.py
+++ b/src/mem/slicc/util.py
@@ -28,7 +28,7 @@
 import sys
 
 
-class PairContainer(object):
+class PairContainer:
     def __init__(self, pairs=None):
         self.pairs = {}
         if pairs:
@@ -47,7 +47,7 @@ def get(self, item, failobj=None):
         return self.pairs.get(item, failobj)
 
 
-class Location(object):
+class Location:
     def __init__(self, filename, lineno, no_warning=False):
         if not isinstance(filename, str):
             raise AttributeError(
diff --git a/src/mem/snoop_filter.hh b/src/mem/snoop_filter.hh
index 7d4a222874..23cf77fcdd 100644
--- a/src/mem/snoop_filter.hh
+++ b/src/mem/snoop_filter.hh
@@ -302,7 +302,7 @@ class SnoopFilter : public SimObject
     /** Track the mapping from port ids to the local mask ids. */
     std::vector<PortID> localResponsePortIds;
     /** Cache line size. */
-    const unsigned linesize;
+    const Addr linesize;
     /** Latency for doing a lookup in the filter */
     const Cycles lookupLatency;
     /** Max capacity in terms of cache blocks tracked, for sanity checking */
diff --git a/src/mem/translating_port_proxy.cc b/src/mem/translating_port_proxy.cc
index 8ab859f40d..8daa390d80 100644
--- a/src/mem/translating_port_proxy.cc
+++ b/src/mem/translating_port_proxy.cc
@@ -86,7 +86,7 @@ TranslatingPortProxy::tryOnBlob(BaseMMU::Mode mode, TranslationGenPtr gen,
 }
 
 bool
-TranslatingPortProxy::tryReadBlob(Addr addr, void *p, int size) const
+TranslatingPortProxy::tryReadBlob(Addr addr, void *p, uint64_t size) const
 {
     constexpr auto mode = BaseMMU::Read;
     return tryOnBlob(mode, _tc->getMMUPtr()->translateFunctional(
@@ -99,7 +99,7 @@ TranslatingPortProxy::tryReadBlob(Addr addr, void *p, int size) const
 
 bool
 TranslatingPortProxy::tryWriteBlob(
-        Addr addr, const void *p, int size) const
+        Addr addr, const void *p, uint64_t size) const
 {
     constexpr auto mode = BaseMMU::Write;
     return tryOnBlob(mode, _tc->getMMUPtr()->translateFunctional(
@@ -111,7 +111,7 @@ TranslatingPortProxy::tryWriteBlob(
 }
 
 bool
-TranslatingPortProxy::tryMemsetBlob(Addr addr, uint8_t v, int size) const
+TranslatingPortProxy::tryMemsetBlob(Addr addr, uint8_t v, uint64_t size) const
 {
     constexpr auto mode = BaseMMU::Write;
     return tryOnBlob(mode, _tc->getMMUPtr()->translateFunctional(
diff --git a/src/mem/translating_port_proxy.hh b/src/mem/translating_port_proxy.hh
index bedb57a3ce..9e60a858b9 100644
--- a/src/mem/translating_port_proxy.hh
+++ b/src/mem/translating_port_proxy.hh
@@ -77,16 +77,16 @@ class TranslatingPortProxy : public PortProxy
 
     /** Version of tryReadblob that translates virt->phys and deals
       * with page boundries. */
-    bool tryReadBlob(Addr addr, void *p, int size) const override;
+    bool tryReadBlob(Addr addr, void *p, uint64_t size) const override;
 
     /** Version of tryWriteBlob that translates virt->phys and deals
       * with page boundries. */
-    bool tryWriteBlob(Addr addr, const void *p, int size) const override;
+    bool tryWriteBlob(Addr addr, const void *p, uint64_t size) const override;
 
     /**
      * Fill size bytes starting at addr with byte value val.
      */
-    bool tryMemsetBlob(Addr address, uint8_t  v, int size) const override;
+    bool tryMemsetBlob(Addr address, uint8_t  v, uint64_t size) const override;
 };
 
 } // namespace gem5
diff --git a/src/mem/xbar.cc b/src/mem/xbar.cc
index e1b2a8b521..0d4b2fca97 100644
--- a/src/mem/xbar.cc
+++ b/src/mem/xbar.cc
@@ -45,6 +45,9 @@
 
 #include "mem/xbar.hh"
 
+#include <memory>
+#include <string>
+
 #include "base/logging.hh"
 #include "base/trace.hh"
 #include "debug/AddrRanges.hh"
@@ -328,7 +331,7 @@ BaseXBar::Layer<SrcType, DstType>::recvRetry()
 }
 
 PortID
-BaseXBar::findPort(AddrRange addr_range)
+BaseXBar::findPort(AddrRange addr_range, PacketPtr pkt)
 {
     // we should never see any address lookups before we've got the
     // ranges of all connected CPU-side-port modules
@@ -353,10 +356,17 @@ BaseXBar::findPort(AddrRange addr_range)
         return defaultPortID;
     }
 
-    // we should use the range for the default port and it did not
-    // match, or the default port is not set
-    fatal("Unable to find destination for %s on %s\n", addr_range.to_string(),
-          name());
+    // We should use the range for the default port and it did not match,
+    // or the default port is not set. Dump out the port trace if possible.
+    std::string port_trace = "";
+    if (pkt) {
+        std::shared_ptr<TracingExtension> ext =
+            pkt->getExtension<TracingExtension>();
+        port_trace = ext ? ext->getTraceInString() :
+            "Use --debug-flags=PortTrace to see the port trace of the packet.";
+    }
+    fatal("Unable to find destination for %s on %s\n%s\n",
+          addr_range.to_string(), name(), port_trace);
 }
 
 /** Function called by the port when the crossbar is receiving a range change.*/
diff --git a/src/mem/xbar.hh b/src/mem/xbar.hh
index 1df8b7ea13..b7f763a15a 100644
--- a/src/mem/xbar.hh
+++ b/src/mem/xbar.hh
@@ -344,9 +344,16 @@ class BaseXBar : public ClockedObject
      * given a packet with this address range.
      *
      * @param addr_range Address range to find port for.
-     * @return id of port that the packet should be sent out of.
+     * @param pkt Packet that containing the address range.
+     * @return id of port that the packet should be sent ou of.
      */
-    PortID findPort(AddrRange addr_range);
+    PortID findPort(AddrRange addr_range, PacketPtr pkt=nullptr);
+
+    PortID
+    findPort(PacketPtr pkt)
+    {
+        return findPort(pkt->getAddrRange(), pkt);
+    }
 
     /**
      * Return the address ranges the crossbar is responsible for.
diff --git a/src/proto/SConsopts b/src/proto/SConsopts
index 6b5b25d0ce..6cc4a48642 100644
--- a/src/proto/SConsopts
+++ b/src/proto/SConsopts
@@ -50,14 +50,6 @@ with gem5_scons.Configure(main) as conf:
         warning('protoc version', min_protoc_version, 'or newer required.\n'
                 'Installed version:', protoc_version[1])
     else:
-        # Attempt to determine the appropriate include path and
-        # library path using pkg-config, that means we also need to
-        # check for pkg-config. Note that it is possible to use
-        # protobuf without the involvement of pkg-config. Later on we
-        # check go a library config check and at that point the test
-        # will fail if libprotobuf cannot be found.
-        if conf.env['HAVE_PKG_CONFIG']:
-            conf.CheckPkgConfig('protobuf', '--cflags', '--libs-only-L')
         conf.env['HAVE_PROTOC'] = True
 
     # If we have the protobuf compiler, also make sure we have the
@@ -65,9 +57,13 @@ with gem5_scons.Configure(main) as conf:
     # automatically added to the LIBS environment variable. After
     # this, we can use the HAVE_PROTOBUF flag to determine if we have
     # got both protoc and libprotobuf available.
-    conf.env['CONF']['HAVE_PROTOBUF'] = conf.env['HAVE_PROTOC'] and \
-        conf.CheckLibWithHeader('protobuf', 'google/protobuf/message.h',
-                                'C++', 'GOOGLE_PROTOBUF_VERIFY_VERSION;')
+    conf.env['CONF']['HAVE_PROTOBUF'] = bool(
+        conf.env['HAVE_PROTOC'] and (
+            (conf.env['HAVE_PKG_CONFIG'] and
+            conf.CheckPkgConfig('protobuf', '--cflags', '--libs')) or
+            conf.CheckLibWithHeader('protobuf', 'google/protobuf/message.h',
+                                    'C++', 'GOOGLE_PROTOBUF_VERIFY_VERSION;'))
+    )
 
 # If we have the compiler but not the library, print another warning.
 if main['HAVE_PROTOC'] and not main['CONF']['HAVE_PROTOBUF']:
diff --git a/src/python/SConscript b/src/python/SConscript
index f98b5700c7..eaaea203f0 100644
--- a/src/python/SConscript
+++ b/src/python/SConscript
@@ -154,6 +154,18 @@ PySource('gem5.components.cachehierarchies.ruby.caches.mesi_three_level',
 PySource('gem5.components.cachehierarchies.ruby.caches.mesi_three_level',
     'gem5/components/cachehierarchies/ruby/caches/mesi_three_level/'
     'l3_cache.py')
+PySource('gem5.components.cachehierarchies.ruby.caches.mesi_three_level',
+    'gem5/components/cachehierarchies/ruby/caches/prebuilt/octopi_cache/'
+    'octopi.py')
+PySource('gem5.components.cachehierarchies.ruby.caches.mesi_three_level',
+    'gem5/components/cachehierarchies/ruby/caches/prebuilt/octopi_cache/'
+    'core_complex.py')
+PySource('gem5.components.cachehierarchies.ruby.caches.mesi_three_level',
+    'gem5/components/cachehierarchies/ruby/caches/prebuilt/octopi_cache/'
+    'octopi_network.py')
+PySource('gem5.components.cachehierarchies.ruby.caches.mesi_three_level',
+    'gem5/components/cachehierarchies/ruby/caches/prebuilt/octopi_cache/'
+    'ruby_network_components.py')
 PySource('gem5.components.cachehierarchies.ruby.caches.mi_example',
     'gem5/components/cachehierarchies/ruby/caches/mi_example/__init__.py')
 PySource('gem5.components.cachehierarchies.ruby.caches.mi_example',
@@ -299,6 +311,7 @@ PySource('gem5.utils.multiprocessing',
 PySource('', 'importer.py')
 PySource('m5', 'm5/__init__.py')
 PySource('m5', 'm5/SimObject.py')
+PySource('m5', 'm5/citations.py')
 PySource('m5', 'm5/core.py')
 PySource('m5', 'm5/debug.py')
 PySource('m5', 'm5/event.py')
diff --git a/src/python/gem5/components/boards/abstract_board.py b/src/python/gem5/components/boards/abstract_board.py
index 4ea8866009..aba080e239 100644
--- a/src/python/gem5/components/boards/abstract_board.py
+++ b/src/python/gem5/components/boards/abstract_board.py
@@ -28,7 +28,7 @@
 import inspect
 
 from .mem_mode import MemMode, mem_mode_to_string
-from ...resources.workload import AbstractWorkload
+from ...resources.resource import WorkloadResource
 
 from m5.objects import (
     AddrRange,
@@ -198,7 +198,7 @@ def is_fullsystem(self) -> bool:
             )
         return self._is_fs
 
-    def set_workload(self, workload: AbstractWorkload) -> None:
+    def set_workload(self, workload: WorkloadResource) -> None:
         """
         Set the workload for this board to run.
 
diff --git a/src/python/gem5/components/boards/arm_board.py b/src/python/gem5/components/boards/arm_board.py
index b439edf970..032a863fd3 100644
--- a/src/python/gem5/components/boards/arm_board.py
+++ b/src/python/gem5/components/boards/arm_board.py
@@ -88,7 +88,6 @@ def __init__(
         platform: VExpress_GEM5_Base = VExpress_GEM5_Foundation(),
         release: ArmRelease = ArmDefaultRelease(),
     ) -> None:
-
         # The platform and the clk has to be set before calling the super class
         self._platform = platform
         self._clk_freq = clk_freq
@@ -117,7 +116,6 @@ def __init__(
 
     @overrides(AbstractBoard)
     def _setup_board(self) -> None:
-
         # This board is expected to run full-system simulation.
         # Loading ArmFsLinux() from `src/arch/arm/ArmFsWorkload.py`
         self.workload = ArmFsLinux()
@@ -213,7 +211,6 @@ def _setup_io_devices(self) -> None:
         # created. The IO device has to be attached first. This is done in the
         # realview class.
         if self.get_cache_hierarchy().is_ruby():
-
             # All the on-chip devices are attached in this method.
             self.realview.attachOnChipIO(
                 self.iobus,
@@ -359,7 +356,6 @@ def get_disk_device(self):
 
     @overrides(KernelDiskWorkload)
     def _add_disk_to_board(self, disk_image: AbstractResource):
-
         self._image = CowDiskImage(
             child=RawDiskImage(
                 read_only=True, image_file=disk_image.get_local_path()
@@ -379,13 +375,13 @@ def _setup_memory_ranges(self) -> None:
 
     @overrides(KernelDiskWorkload)
     def get_default_kernel_args(self) -> List[str]:
-
         # The default kernel string is taken from the devices.py file.
         return [
             "console=ttyAMA0",
             "lpj=19988480",
             "norandmaps",
             "root={root_value}",
+            "disk_device={disk_device}",
             "rw",
             f"mem={self.get_memory().get_size()}",
         ]
diff --git a/src/python/gem5/components/boards/experimental/lupv_board.py b/src/python/gem5/components/boards/experimental/lupv_board.py
index ad130b7273..85843b89e2 100644
--- a/src/python/gem5/components/boards/experimental/lupv_board.py
+++ b/src/python/gem5/components/boards/experimental/lupv_board.py
@@ -90,9 +90,8 @@ def __init__(
         memory: AbstractMemorySystem,
         cache_hierarchy: AbstractCacheHierarchy,
     ) -> None:
-
         if cache_hierarchy.is_ruby():
-            raise EnvironmentError("RiscvBoard is not compatible with Ruby")
+            raise OSError("RiscvBoard is not compatible with Ruby")
 
         if processor.get_isa() != ISA.RISCV:
             raise Exception(
@@ -105,7 +104,6 @@ def __init__(
 
     @overrides(AbstractSystemBoard)
     def _setup_board(self) -> None:
-
         self.workload = RiscvLinux()
 
         # Initialize all the devices that we want to use on this board
@@ -318,7 +316,7 @@ def _generate_device_tree(self, outdir: str) -> None:
             node.append(FdtPropertyWords("reg", state.CPUAddrCells(i)))
             node.append(FdtPropertyStrings("mmu-type", "riscv,sv48"))
             node.append(FdtPropertyStrings("status", "okay"))
-            node.append(FdtPropertyStrings("riscv,isa", "rv64imafdcsu"))
+            node.append(FdtPropertyStrings("riscv,isa", "rv64imafdc"))
             # TODO: Should probably get this from the core.
             freq = self.clk_domain.clock[0].frequency
             node.appendCompatible(["riscv"])
@@ -534,14 +532,19 @@ def _generate_device_tree(self, outdir: str) -> None:
         fdt.writeDtsFile(os.path.join(outdir, "device.dts"))
         fdt.writeDtbFile(os.path.join(outdir, "device.dtb"))
 
-    @overrides(KernelDiskWorkload)
-    def get_default_kernel_args(self) -> List[str]:
-        return ["console=ttyLIO0", "root={root_value}", "rw"]
-
     @overrides(KernelDiskWorkload)
     def get_disk_device(self) -> str:
         return "/dev/lda"
 
+    @overrides(KernelDiskWorkload)
+    def get_default_kernel_args(self) -> List[str]:
+        return [
+            "console=ttyLIO0",
+            "root={root_value}",
+            "disk_device={disk_device}",
+            "rw",
+        ]
+
     @overrides(KernelDiskWorkload)
     def _add_disk_to_board(self, disk_image: AbstractResource) -> None:
         # Note: This must be called after set_workload because it looks for an
diff --git a/src/python/gem5/components/boards/kernel_disk_workload.py b/src/python/gem5/components/boards/kernel_disk_workload.py
index 15e0cdf303..72b143e6ff 100644
--- a/src/python/gem5/components/boards/kernel_disk_workload.py
+++ b/src/python/gem5/components/boards/kernel_disk_workload.py
@@ -87,7 +87,7 @@ def get_default_kernel_args(self) -> List[str]:
     @abstractmethod
     def get_disk_device(self) -> str:
         """
-        Get the disk device, e.g., "/dev/sda", where the disk image is placed.
+        Set a default disk device, in case user does not specify a disk device.
 
         :returns: The disk device.
         """
@@ -139,6 +139,7 @@ def set_kernel_disk_workload(
         kernel: KernelResource,
         disk_image: DiskImageResource,
         bootloader: Optional[BootloaderResource] = None,
+        disk_device: Optional[str] = None,
         readfile: Optional[str] = None,
         readfile_contents: Optional[str] = None,
         kernel_args: Optional[List[str]] = None,
@@ -171,6 +172,9 @@ def set_kernel_disk_workload(
         # Abstract board. This function will not work otherwise.
         assert isinstance(self, AbstractBoard)
 
+        # Set the disk device
+        self._disk_device = disk_device
+
         # If we are setting a workload of this type, we need to run as a
         # full-system simulation.
         self._set_fullsystem(True)
@@ -182,7 +186,12 @@ def set_kernel_disk_workload(
         self.workload.command_line = (
             " ".join(kernel_args or self.get_default_kernel_args())
         ).format(
-            root_value=self.get_default_kernel_root_val(disk_image=disk_image)
+            root_value=self.get_default_kernel_root_val(disk_image=disk_image),
+            disk_device=(
+                self._disk_device
+                if self._disk_device
+                else self.get_disk_device()
+            ),
         )
 
         # Setting the bootloader information for ARM board. The current
diff --git a/src/python/gem5/components/boards/riscv_board.py b/src/python/gem5/components/boards/riscv_board.py
index 25f1fac562..5e5af815a4 100644
--- a/src/python/gem5/components/boards/riscv_board.py
+++ b/src/python/gem5/components/boards/riscv_board.py
@@ -259,6 +259,12 @@ def generate_device_tree(self, outdir: str) -> None:
             )
             root.append(node)
 
+        node = FdtNode(f"chosen")
+        bootargs = " ".join(self.get_default_kernel_args())
+        node.append(FdtPropertyStrings("bootargs", [bootargs]))
+        node.append(FdtPropertyStrings("stdout-path", ["/uart@10000000"]))
+        root.append(node)
+
         # See Documentation/devicetree/bindings/riscv/cpus.txt for details.
         cpus_node = FdtNode("cpus")
         cpus_state = FdtState(addr_cells=1, size_cells=0)
@@ -273,8 +279,24 @@ def generate_device_tree(self, outdir: str) -> None:
             node.append(FdtPropertyStrings("device_type", "cpu"))
             node.append(FdtPropertyWords("reg", state.CPUAddrCells(i)))
             node.append(FdtPropertyStrings("mmu-type", "riscv,sv48"))
+            if core.core.isa[0].enable_Zicbom_fs.value:
+                node.append(
+                    FdtPropertyWords(
+                        "riscv,cbom-block-size", self.get_cache_line_size()
+                    )
+                )
+            if core.core.isa[0].enable_Zicboz_fs.value:
+                node.append(
+                    FdtPropertyWords(
+                        "riscv,cboz-block-size", self.get_cache_line_size()
+                    )
+                )
             node.append(FdtPropertyStrings("status", "okay"))
-            node.append(FdtPropertyStrings("riscv,isa", "rv64imafdc"))
+            node.append(
+                FdtPropertyStrings(
+                    "riscv,isa", core.core.isa[0].get_isa_string()
+                )
+            )
             # TODO: Should probably get this from the core.
             freq = self.clk_domain.clock[0].frequency
             node.append(FdtPropertyWords("clock-frequency", freq))
@@ -432,7 +454,7 @@ def generate_device_tree(self, outdir: str) -> None:
         uart_node.append(
             FdtPropertyWords("interrupt-parent", soc_state.phandle(plic))
         )
-        uart_node.appendCompatible(["ns8250"])
+        uart_node.appendCompatible(["ns8250", "ns16550a"])
         soc_node.append(uart_node)
 
         # VirtIO MMIO disk node
@@ -494,4 +516,9 @@ def _add_disk_to_board(self, disk_image: AbstractResource):
 
     @overrides(KernelDiskWorkload)
     def get_default_kernel_args(self) -> List[str]:
-        return ["console=ttyS0", "root={root_value}", "rw"]
+        return [
+            "console=ttyS0",
+            "root={root_value}",
+            "disk_device={disk_device}",
+            "rw",
+        ]
diff --git a/src/python/gem5/components/boards/se_binary_workload.py b/src/python/gem5/components/boards/se_binary_workload.py
index c62a1b67ea..cba268b2df 100644
--- a/src/python/gem5/components/boards/se_binary_workload.py
+++ b/src/python/gem5/components/boards/se_binary_workload.py
@@ -75,7 +75,6 @@ def set_se_binary_workload(
         """Set up the system to run a specific binary.
 
         **Limitations**
-        * Only supports single threaded applications.
         * Dynamically linked executables are partially supported when the host
           ISA and the simulated ISA are the same.
 
diff --git a/src/python/gem5/components/boards/x86_board.py b/src/python/gem5/components/boards/x86_board.py
index 04fec617c1..e7e65ecf71 100644
--- a/src/python/gem5/components/boards/x86_board.py
+++ b/src/python/gem5/components/boards/x86_board.py
@@ -202,7 +202,6 @@ def _setup_io_devices(self):
         base_entries.append(pci_dev4_inta)
 
         def assignISAInt(irq, apicPin):
-
             assign_8259_to_apic = X86IntelMPIOIntAssignment(
                 interrupt_type="ExtInt",
                 polarity="ConformPolarity",
@@ -318,4 +317,5 @@ def get_default_kernel_args(self) -> List[str]:
             "console=ttyS0",
             "lpj=7999923",
             "root={root_value}",
+            "disk_device={disk_device}",
         ]
diff --git a/src/python/gem5/components/cachehierarchies/chi/nodes/abstract_node.py b/src/python/gem5/components/cachehierarchies/chi/nodes/abstract_node.py
index 9853174464..ed8c3e0d5a 100644
--- a/src/python/gem5/components/cachehierarchies/chi/nodes/abstract_node.py
+++ b/src/python/gem5/components/cachehierarchies/chi/nodes/abstract_node.py
@@ -66,7 +66,7 @@ def versionCount(cls):
     # TODO: I don't love that we have to pass in the cache line size.
     # However, we need some way to set the index bits
     def __init__(self, network: RubyNetwork, cache_line_size: int):
-        super(AbstractNode, self).__init__()
+        super().__init__()
 
         # Note: Need to call versionCount method on *this* class, not the
         # potentially derived class
@@ -85,6 +85,9 @@ def __init__(self, network: RubyNetwork, cache_line_size: int):
         # Use 32-byte channels (two flits per message)
         self.data_channel_size = 32
 
+        # Use near atomics (see: https://github.com/gem5/gem5/issues/449)
+        self.policy_type = 0
+
         self.connectQueues(network)
 
     def getBlockSizeBits(self):
diff --git a/src/python/gem5/components/cachehierarchies/chi/nodes/directory.py b/src/python/gem5/components/cachehierarchies/chi/nodes/directory.py
index 3488435d56..b93112a620 100644
--- a/src/python/gem5/components/cachehierarchies/chi/nodes/directory.py
+++ b/src/python/gem5/components/cachehierarchies/chi/nodes/directory.py
@@ -72,6 +72,7 @@ def __init__(
         self.alloc_on_readunique = False
         self.alloc_on_readonce = False
         self.alloc_on_writeback = False
+        self.alloc_on_atomic = False
         self.dealloc_on_unique = False
         self.dealloc_on_shared = False
         self.dealloc_backinv_unique = False
diff --git a/src/python/gem5/components/cachehierarchies/chi/nodes/dma_requestor.py b/src/python/gem5/components/cachehierarchies/chi/nodes/dma_requestor.py
index ccac6cae91..f6b63e0649 100644
--- a/src/python/gem5/components/cachehierarchies/chi/nodes/dma_requestor.py
+++ b/src/python/gem5/components/cachehierarchies/chi/nodes/dma_requestor.py
@@ -56,6 +56,7 @@ def __init__(self, network, cache_line_size, clk_domain: ClockDomain):
         self.alloc_on_readunique = False
         self.alloc_on_readonce = False
         self.alloc_on_writeback = False
+        self.alloc_on_atomic = False
         self.dealloc_on_unique = False
         self.dealloc_on_shared = False
         self.dealloc_backinv_unique = True
diff --git a/src/python/gem5/components/cachehierarchies/chi/nodes/private_l1_moesi_cache.py b/src/python/gem5/components/cachehierarchies/chi/nodes/private_l1_moesi_cache.py
index 3e38c9038f..2f618491ca 100644
--- a/src/python/gem5/components/cachehierarchies/chi/nodes/private_l1_moesi_cache.py
+++ b/src/python/gem5/components/cachehierarchies/chi/nodes/private_l1_moesi_cache.py
@@ -66,6 +66,7 @@ def __init__(
         self.alloc_on_readunique = True
         self.alloc_on_readonce = True
         self.alloc_on_writeback = False  # Should never happen in an L1
+        self.alloc_on_atomic = False
         self.dealloc_on_unique = False
         self.dealloc_on_shared = False
         self.dealloc_backinv_unique = True
diff --git a/src/python/gem5/components/cachehierarchies/chi/private_l1_cache_hierarchy.py b/src/python/gem5/components/cachehierarchies/chi/private_l1_cache_hierarchy.py
index 9c91e05ac1..7be9239331 100644
--- a/src/python/gem5/components/cachehierarchies/chi/private_l1_cache_hierarchy.py
+++ b/src/python/gem5/components/cachehierarchies/chi/private_l1_cache_hierarchy.py
@@ -75,7 +75,6 @@ def __init__(self, size: str, assoc: int) -> None:
 
     @overrides(AbstractCacheHierarchy)
     def incorporate_cache(self, board: AbstractBoard) -> None:
-
         requires(coherence_protocol_required=CoherenceProtocol.CHI)
 
         self.ruby_system = RubySystem()
diff --git a/src/python/gem5/components/cachehierarchies/classic/no_cache.py b/src/python/gem5/components/cachehierarchies/classic/no_cache.py
index 51b5d30eb4..b7af6ed02f 100644
--- a/src/python/gem5/components/cachehierarchies/classic/no_cache.py
+++ b/src/python/gem5/components/cachehierarchies/classic/no_cache.py
@@ -97,12 +97,10 @@ def get_cpu_side_port(self) -> Port:
 
     @overrides(AbstractCacheHierarchy)
     def incorporate_cache(self, board: AbstractBoard) -> None:
-
         if board.has_coherent_io():
             self._setup_coherent_io_bridge(board)
 
         for core in board.get_processor().get_cores():
-
             core.connect_icache(self.membus.cpu_side_ports)
             core.connect_dcache(self.membus.cpu_side_ports)
             core.connect_walker_ports(
diff --git a/src/python/gem5/components/cachehierarchies/classic/private_l1_cache_hierarchy.py b/src/python/gem5/components/cachehierarchies/classic/private_l1_cache_hierarchy.py
index 42ff183a1d..9a40c39550 100644
--- a/src/python/gem5/components/cachehierarchies/classic/private_l1_cache_hierarchy.py
+++ b/src/python/gem5/components/cachehierarchies/classic/private_l1_cache_hierarchy.py
@@ -86,7 +86,6 @@ def get_cpu_side_port(self) -> Port:
 
     @overrides(AbstractCacheHierarchy)
     def incorporate_cache(self, board: AbstractBoard) -> None:
-
         # Set up the system port for functional access from the simulator.
         board.connect_system_port(self.membus.cpu_side_ports)
 
@@ -117,7 +116,6 @@ def incorporate_cache(self, board: AbstractBoard) -> None:
             self._setup_io_cache(board)
 
         for i, cpu in enumerate(board.get_processor().get_cores()):
-
             cpu.connect_icache(self.l1icaches[i].cpu_side)
             cpu.connect_dcache(self.l1dcaches[i].cpu_side)
 
diff --git a/src/python/gem5/components/cachehierarchies/classic/private_l1_private_l2_cache_hierarchy.py b/src/python/gem5/components/cachehierarchies/classic/private_l1_private_l2_cache_hierarchy.py
index 8b60aef7f6..b27ced916c 100644
--- a/src/python/gem5/components/cachehierarchies/classic/private_l1_private_l2_cache_hierarchy.py
+++ b/src/python/gem5/components/cachehierarchies/classic/private_l1_private_l2_cache_hierarchy.py
@@ -111,7 +111,6 @@ def get_cpu_side_port(self) -> Port:
 
     @overrides(AbstractCacheHierarchy)
     def incorporate_cache(self, board: AbstractBoard) -> None:
-
         # Set up the system port for functional access from the simulator.
         board.connect_system_port(self.membus.cpu_side_ports)
 
@@ -148,7 +147,6 @@ def incorporate_cache(self, board: AbstractBoard) -> None:
             self._setup_io_cache(board)
 
         for i, cpu in enumerate(board.get_processor().get_cores()):
-
             cpu.connect_icache(self.l1icaches[i].cpu_side)
             cpu.connect_dcache(self.l1dcaches[i].cpu_side)
 
diff --git a/src/python/gem5/components/cachehierarchies/classic/private_l1_shared_l2_cache_hierarchy.py b/src/python/gem5/components/cachehierarchies/classic/private_l1_shared_l2_cache_hierarchy.py
index 72df1a53de..be2dfbe79c 100644
--- a/src/python/gem5/components/cachehierarchies/classic/private_l1_shared_l2_cache_hierarchy.py
+++ b/src/python/gem5/components/cachehierarchies/classic/private_l1_shared_l2_cache_hierarchy.py
@@ -107,7 +107,6 @@ def get_cpu_side_port(self) -> Port:
 
     @overrides(AbstractCacheHierarchy)
     def incorporate_cache(self, board: AbstractBoard) -> None:
-
         # Set up the system port for functional access from the simulator.
         board.connect_system_port(self.membus.cpu_side_ports)
 
@@ -143,7 +142,6 @@ def incorporate_cache(self, board: AbstractBoard) -> None:
             self._setup_io_cache(board)
 
         for i, cpu in enumerate(board.get_processor().get_cores()):
-
             cpu.connect_icache(self.l1icaches[i].cpu_side)
             cpu.connect_dcache(self.l1dcaches[i].cpu_side)
 
diff --git a/src/python/gem5/components/cachehierarchies/ruby/caches/abstract_directory.py b/src/python/gem5/components/cachehierarchies/ruby/caches/abstract_directory.py
index e39a38ccc9..8552b975e1 100644
--- a/src/python/gem5/components/cachehierarchies/ruby/caches/abstract_directory.py
+++ b/src/python/gem5/components/cachehierarchies/ruby/caches/abstract_directory.py
@@ -30,7 +30,6 @@
 
 
 class AbstractDirectory(Directory_Controller):
-
     _version = 0
 
     @classmethod
diff --git a/src/python/gem5/components/cachehierarchies/ruby/caches/abstract_dma_controller.py b/src/python/gem5/components/cachehierarchies/ruby/caches/abstract_dma_controller.py
index 8d36736017..dad1f7cd72 100644
--- a/src/python/gem5/components/cachehierarchies/ruby/caches/abstract_dma_controller.py
+++ b/src/python/gem5/components/cachehierarchies/ruby/caches/abstract_dma_controller.py
@@ -30,7 +30,6 @@
 
 
 class AbstractDMAController(DMA_Controller):
-
     _version = 0
 
     @classmethod
diff --git a/src/python/gem5/components/cachehierarchies/ruby/caches/abstract_l1_cache.py b/src/python/gem5/components/cachehierarchies/ruby/caches/abstract_l1_cache.py
index 683d69584c..237cd606df 100644
--- a/src/python/gem5/components/cachehierarchies/ruby/caches/abstract_l1_cache.py
+++ b/src/python/gem5/components/cachehierarchies/ruby/caches/abstract_l1_cache.py
@@ -35,7 +35,6 @@
 
 
 class AbstractL1Cache(L1Cache_Controller):
-
     _version = 0
 
     @classmethod
diff --git a/src/python/gem5/components/cachehierarchies/ruby/caches/abstract_l2_cache.py b/src/python/gem5/components/cachehierarchies/ruby/caches/abstract_l2_cache.py
index 88b41228c4..41929f4e42 100644
--- a/src/python/gem5/components/cachehierarchies/ruby/caches/abstract_l2_cache.py
+++ b/src/python/gem5/components/cachehierarchies/ruby/caches/abstract_l2_cache.py
@@ -30,7 +30,6 @@
 
 
 class AbstractL2Cache(L2Cache_Controller):
-
     _version = 0
 
     @classmethod
diff --git a/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/directory.py b/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/directory.py
index cd4f166fed..58676daaf0 100644
--- a/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/directory.py
+++ b/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/directory.py
@@ -32,7 +32,6 @@
 
 class Directory(AbstractDirectory):
     def __init__(self, network, cache_line_size, mem_range, port):
-
         super().__init__(network, cache_line_size)
         self.addr_ranges = [mem_range]
         self.directory = RubyDirectoryMemory()
diff --git a/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/l1_cache.py b/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/l1_cache.py
index e746579834..0d7f436193 100644
--- a/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/l1_cache.py
+++ b/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/l1_cache.py
@@ -39,10 +39,10 @@
 
 import math
 
+
 # L0Cache_Controller is the ruby backend's terminology corresponding to
 # L1 cache in stdlib terms.
 class L1Cache(L0Cache_Controller):
-
     _version = 0
 
     @classmethod
diff --git a/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/l2_cache.py b/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/l2_cache.py
index dfc1304a87..280c2e4110 100644
--- a/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/l2_cache.py
+++ b/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/l2_cache.py
@@ -38,10 +38,10 @@
 
 import math
 
+
 # L1Cache_Controller is ruby backend's terminology corresponding to
 # L2Cache in stdlib's terms
 class L2Cache(L1Cache_Controller):
-
     _version = 0
 
     @classmethod
diff --git a/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/l3_cache.py b/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/l3_cache.py
index 0a93d9b0c8..4b05166752 100644
--- a/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/l3_cache.py
+++ b/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/l3_cache.py
@@ -28,10 +28,10 @@
 
 import math
 
+
 # L2Cache_Controller is ruby backend's terminology corresponding to
 # L3 cache in stdlib.
 class L3Cache(L2Cache_Controller):
-
     _version = 0
 
     @classmethod
diff --git a/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_two_level/directory.py b/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_two_level/directory.py
index cd4f166fed..58676daaf0 100644
--- a/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_two_level/directory.py
+++ b/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_two_level/directory.py
@@ -32,7 +32,6 @@
 
 class Directory(AbstractDirectory):
     def __init__(self, network, cache_line_size, mem_range, port):
-
         super().__init__(network, cache_line_size)
         self.addr_ranges = [mem_range]
         self.directory = RubyDirectoryMemory()
diff --git a/src/python/gem5/components/cachehierarchies/ruby/caches/mi_example/directory.py b/src/python/gem5/components/cachehierarchies/ruby/caches/mi_example/directory.py
index e74772cc18..0e7cddf6fe 100644
--- a/src/python/gem5/components/cachehierarchies/ruby/caches/mi_example/directory.py
+++ b/src/python/gem5/components/cachehierarchies/ruby/caches/mi_example/directory.py
@@ -37,7 +37,6 @@ class Directory(AbstractDirectory):
     """
 
     def __init__(self, network, cache_line_size, mem_range, port):
-
         super().__init__(network, cache_line_size)
         self.addr_ranges = [mem_range]
         self.directory = RubyDirectoryMemory()
diff --git a/tests/configs/simple-atomic-dummychecker.py b/src/python/gem5/components/cachehierarchies/ruby/caches/prebuilt/octopi_cache/__init__.py
similarity index 66%
rename from tests/configs/simple-atomic-dummychecker.py
rename to src/python/gem5/components/cachehierarchies/ruby/caches/prebuilt/octopi_cache/__init__.py
index 7ec004765a..b08f46b513 100644
--- a/tests/configs/simple-atomic-dummychecker.py
+++ b/src/python/gem5/components/cachehierarchies/ruby/caches/prebuilt/octopi_cache/__init__.py
@@ -1,15 +1,6 @@
-# Copyright (c) 2013 ARM Limited
+# Copyright (c) 2022-2023 The Regents of the University of California
 # All rights reserved.
 #
-# The license below extends only to copyright in the software and shall
-# not be construed as granting a license to any other intellectual
-# property including but not limited to intellectual property relating
-# to a hardware implementation of the functionality of the software
-# licensed hereunder.  You may use the software subject to the license
-# terms below provided that you ensure that this notice is replicated
-# unmodified and in its entirety in all distributions of the software,
-# modified or unmodified, in source code or in binary form.
-#
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions are
 # met: redistributions of source code must retain the above copyright
@@ -32,10 +23,3 @@
 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-from m5.objects import *
-from base_config import *
-
-root = BaseSESystemUniprocessor(
-    mem_mode="atomic", cpu_class=AtomicSimpleCPU, checker=True
-).create_root()
diff --git a/src/python/gem5/components/cachehierarchies/ruby/caches/prebuilt/octopi_cache/core_complex.py b/src/python/gem5/components/cachehierarchies/ruby/caches/prebuilt/octopi_cache/core_complex.py
new file mode 100644
index 0000000000..f056d76f98
--- /dev/null
+++ b/src/python/gem5/components/cachehierarchies/ruby/caches/prebuilt/octopi_cache/core_complex.py
@@ -0,0 +1,245 @@
+# Copyright (c) 2022-2023 The Regents of the University of California
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from typing import List, Tuple
+
+from gem5.isas import ISA
+from gem5.components.boards.abstract_board import AbstractBoard
+from gem5.components.processors.abstract_core import AbstractCore
+from gem5.components.cachehierarchies.ruby.caches.mesi_three_level.l1_cache import (
+    L1Cache,
+)
+from gem5.components.cachehierarchies.ruby.caches.mesi_three_level.l2_cache import (
+    L2Cache,
+)
+from gem5.components.cachehierarchies.ruby.caches.mesi_three_level.l3_cache import (
+    L3Cache,
+)
+
+from m5.objects import SubSystem, RubySequencer
+
+from .ruby_network_components import (
+    RubyRouter,
+    RubyExtLink,
+    RubyIntLink,
+    RubyNetworkComponent,
+)
+
+
+class CoreComplex(SubSystem, RubyNetworkComponent):
+    _core_id = 0
+    _core_complex_id = 0
+
+    @classmethod
+    def _get_core_id(cls):
+        cls._core_id += 1
+        return cls._core_id - 1
+
+    @classmethod
+    def _get_core_complex_id(cls):
+        cls._core_complex_id += 1
+        return cls._core_complex_id - 1
+
+    def __init__(
+        self,
+        board: AbstractBoard,
+        cores: List[AbstractCore],
+        ruby_system,
+        l1i_size: str,
+        l1i_assoc: int,
+        l1d_size: str,
+        l1d_assoc: int,
+        l2_size: str,
+        l2_assoc: int,
+        l3_size: str,
+        l3_assoc: int,
+    ):
+        SubSystem.__init__(self=self)
+        RubyNetworkComponent.__init__(self=self)
+
+        self._l1i_size = l1i_size
+        self._l1i_assoc = l1i_assoc
+        self._l1d_size = l1d_size
+        self._l1d_assoc = l1d_assoc
+        self._l2_size = l2_size
+        self._l2_assoc = l2_assoc
+        self._l3_size = l3_size
+        self._l3_assoc = l3_assoc
+
+        self._board = board
+        self._cores = cores
+        self._ruby_system = ruby_system
+        self._cache_line_size = 64
+
+        self._directory_controllers = []
+
+        self._core_complex_id = self._get_core_complex_id()
+        self.main_router = RubyRouter(
+            self._ruby_system
+        )  # this will be connect to component outside the core complex
+        self._add_router(self.main_router)
+        self._create_core_complex()
+
+    def get_main_router(self):
+        return self.main_router
+
+    def _create_core_complex(self):
+        # Create L1 caches, L2 cache, and corresponding controllers per core
+        self.core_clusters = [
+            self._create_core_cluster(core) for core in self._cores
+        ]
+        # Create L3 cache and its corresponding controller
+        self._create_shared_cache()
+        # Setting up one router and one external link per controller
+        self._create_external_links()
+        # Setting up L1/L2 links, L2/main links, L3/main link
+        self._create_internal_links()
+
+    def _create_core_cluster(self, core: AbstractCore):
+        cluster = SubSystem()
+        core_id = self._get_core_id()
+
+        cluster.l1_cache = L1Cache(
+            l1i_size=self._l1i_size,
+            l1i_assoc=self._l1i_assoc,
+            l1d_size=self._l1d_size,
+            l1d_assoc=self._l1d_assoc,
+            network=self._ruby_system.network,
+            core=core,
+            cache_line_size=self._cache_line_size,
+            target_isa=self._board.processor.get_isa(),
+            clk_domain=self._board.get_clock_domain(),
+        )
+        cluster.l1_cache.sequencer = RubySequencer(
+            version=core_id,
+            dcache=cluster.l1_cache.Dcache,
+            clk_domain=cluster.l1_cache.clk_domain,
+        )
+
+        if self._board.has_io_bus():
+            cluster.l1_cache.sequencer.connectIOPorts(self._board.get_io_bus())
+        cluster.l1_cache.ruby_system = self._ruby_system
+        core.connect_icache(cluster.l1_cache.sequencer.in_ports)
+        core.connect_dcache(cluster.l1_cache.sequencer.in_ports)
+        core.connect_walker_ports(
+            cluster.l1_cache.sequencer.in_ports,
+            cluster.l1_cache.sequencer.in_ports,
+        )
+        if self._board.get_processor().get_isa() == ISA.X86:
+            core.connect_interrupt(
+                cluster.l1_cache.sequencer.interrupt_out_port,
+                cluster.l1_cache.sequencer.in_ports,
+            )
+        else:
+            core.connect_interrupt()
+
+        cluster.l2_cache = L2Cache(
+            l2_size=self._l2_size,
+            l2_assoc=self._l2_assoc,
+            network=self._ruby_system.network,
+            core=core,
+            num_l3Caches=1,  # each core complex has 1 slice of L3 Cache
+            cache_line_size=self._cache_line_size,
+            cluster_id=self._core_complex_id,
+            target_isa=self._board.processor.get_isa(),
+            clk_domain=self._board.get_clock_domain(),
+        )
+        cluster.l2_cache.ruby_system = self._ruby_system
+        # L0Cache in the ruby backend is l1 cache in stdlib
+        # L1Cache in the ruby backend is l2 cache in stdlib
+        cluster.l2_cache.bufferFromL0 = cluster.l1_cache.bufferToL1
+        cluster.l2_cache.bufferToL0 = cluster.l1_cache.bufferFromL1
+
+        return cluster
+
+    def _create_shared_cache(self):
+        self.l3_cache = L3Cache(
+            l3_size=self._l3_size,
+            l3_assoc=self._l3_assoc,
+            network=self._ruby_system.network,
+            num_l3Caches=1,
+            cache_line_size=self._cache_line_size,
+            cluster_id=self._core_complex_id,
+        )
+        self.l3_cache.ruby_system = self._ruby_system
+
+    # This is where all routers and links are created
+    def _create_external_links(self):
+        # create a router per cache controller
+        #  - there is one L3 per ccd
+        self.l3_router = RubyRouter(self._ruby_system)
+        self._add_router(self.l3_router)
+        #  - there is one L1 and one L2 per cluster
+        for cluster in self.core_clusters:
+            cluster.l1_router = RubyRouter(self._ruby_system)
+            self._add_router(cluster.l1_router)
+            cluster.l2_router = RubyRouter(self._ruby_system)
+            self._add_router(cluster.l2_router)
+
+        # create an ext link from a controller to a router
+        self.l3_router_link = RubyExtLink(
+            ext_node=self.l3_cache,
+            int_node=self.l3_router,
+            bandwidth_factor=64,
+        )
+        self._add_ext_link(self.l3_router_link)
+        for cluster in self.core_clusters:
+            cluster.l1_router_link = RubyExtLink(
+                ext_node=cluster.l1_cache, int_node=cluster.l1_router
+            )
+            self._add_ext_link(cluster.l1_router_link)
+            cluster.l2_router_link = RubyExtLink(
+                ext_node=cluster.l2_cache, int_node=cluster.l2_router
+            )
+            self._add_ext_link(cluster.l2_router_link)
+
+    def _create_internal_links(self):
+        # create L1/L2 links
+        for cluster in self.core_clusters:
+            l1_to_l2, l2_to_l1 = RubyIntLink.create_bidirectional_links(
+                cluster.l1_router, cluster.l2_router
+            )
+            cluster.l1_to_l2_link = l1_to_l2
+            cluster.l2_to_l1_link = l2_to_l1
+            self._add_int_link(l1_to_l2)
+            self._add_int_link(l2_to_l1)
+        # create L2/main_router links
+        for cluster in self.core_clusters:
+            l2_to_main, main_to_l2 = RubyIntLink.create_bidirectional_links(
+                cluster.l2_router, self.main_router
+            )
+            cluster.l2_to_main_link = l2_to_main
+            cluster.main_to_l2_link = main_to_l2
+            self._add_int_link(l2_to_main)
+            self._add_int_link(main_to_l2)
+        # create L3/main_router link
+        l3_to_main, main_to_l3 = RubyIntLink.create_bidirectional_links(
+            self.l3_router, self.main_router, bandwidth_factor=64
+        )
+        self.l3_to_main_link = l3_to_main
+        self.main_to_l3_link = main_to_l3
+        self._add_int_link(l3_to_main)
+        self._add_int_link(main_to_l3)
diff --git a/src/python/gem5/components/cachehierarchies/ruby/caches/prebuilt/octopi_cache/octopi.py b/src/python/gem5/components/cachehierarchies/ruby/caches/prebuilt/octopi_cache/octopi.py
new file mode 100644
index 0000000000..09ee69e011
--- /dev/null
+++ b/src/python/gem5/components/cachehierarchies/ruby/caches/prebuilt/octopi_cache/octopi.py
@@ -0,0 +1,257 @@
+# Copyright (c) 2022-2023 The Regents of the University of California
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from ...abstract_ruby_cache_hierarchy import AbstractRubyCacheHierarchy
+from ....abstract_three_level_cache_hierarchy import (
+    AbstractThreeLevelCacheHierarchy,
+)
+from ......coherence_protocol import CoherenceProtocol
+from ......components.boards.abstract_board import AbstractBoard
+from ......utils.requires import requires
+
+from ......components.cachehierarchies.ruby.caches.mesi_three_level.directory import (
+    Directory,
+)
+from ......components.cachehierarchies.ruby.caches.mesi_three_level.dma_controller import (
+    DMAController,
+)
+
+from m5.objects import RubySystem, DMASequencer, RubyPortProxy
+
+from .core_complex import CoreComplex
+from .octopi_network import OctopiNetwork
+from .ruby_network_components import RubyRouter, RubyExtLink, RubyIntLink
+
+
+# CoreComplex sub-systems own the L1, L2, L3 controllers
+# OctopiCache owns the directory controllers
+# RubySystem owns the DMA Controllers
+class OctopiCache(
+    AbstractRubyCacheHierarchy, AbstractThreeLevelCacheHierarchy
+):
+    def __init__(
+        self,
+        l1i_size: str,
+        l1i_assoc: int,
+        l1d_size: str,
+        l1d_assoc: int,
+        l2_size: str,
+        l2_assoc: int,
+        l3_size: str,
+        l3_assoc: int,
+        num_core_complexes: int,
+        is_fullsystem: bool,
+    ):
+        AbstractRubyCacheHierarchy.__init__(self=self)
+        AbstractThreeLevelCacheHierarchy.__init__(
+            self=self,
+            l1i_size=l1i_size,
+            l1i_assoc=l1i_assoc,
+            l1d_size=l1d_size,
+            l1d_assoc=l1d_assoc,
+            l2_size=l2_size,
+            l2_assoc=l2_assoc,
+            l3_size=l3_size,
+            l3_assoc=l3_assoc,
+        )
+
+        self._directory_controllers = []
+        self._dma_controllers = []
+        self._io_controllers = []
+        self._core_complexes = []
+        self._num_core_complexes = num_core_complexes
+        self._is_fullsystem = is_fullsystem
+
+    def incorporate_cache(self, board: AbstractBoard) -> None:
+        requires(
+            coherence_protocol_required=CoherenceProtocol.MESI_THREE_LEVEL
+        )
+
+        cache_line_size = board.get_cache_line_size()
+
+        self.ruby_system = RubySystem()
+        # MESI_Three_Level needs 3 virtual networks
+        self.ruby_system.number_of_virtual_networks = 3
+        self.ruby_system.network = OctopiNetwork(self.ruby_system)
+
+        # Setting up the core complex
+        all_cores = board.get_processor().get_cores()
+        num_cores_per_core_complex = len(all_cores) // self._num_core_complexes
+
+        self.core_complexes = [
+            CoreComplex(
+                board=board,
+                cores=all_cores[
+                    core_complex_idx
+                    * num_cores_per_core_complex : (core_complex_idx + 1)
+                    * num_cores_per_core_complex
+                ],
+                ruby_system=self.ruby_system,
+                l1i_size=self._l1i_size,
+                l1i_assoc=self._l1i_assoc,
+                l1d_size=self._l1d_size,
+                l1d_assoc=self._l1d_assoc,
+                l2_size=self._l2_size,
+                l2_assoc=self._l2_assoc,
+                l3_size=self._l3_size,
+                l3_assoc=self._l3_assoc,
+            )
+            for core_complex_idx in range(self._num_core_complexes)
+        ]
+
+        self.ruby_system.network.incorporate_ccds(self.core_complexes)
+
+        self._create_directory_controllers(board)
+        self._create_dma_controllers(board, self.ruby_system)
+
+        self.ruby_system.num_of_sequencers = (
+            len(all_cores)
+            + len(self._dma_controllers)
+            + len(self._io_controllers)
+        )
+        # SimpleNetwork requires .int_links and .routers to exist
+        # if we want to call SimpleNetwork.setup_buffers()
+        self.ruby_system.network.int_links = (
+            self.ruby_system.network._int_links
+        )
+        self.ruby_system.network.ext_links = (
+            self.ruby_system.network._ext_links
+        )
+        self.ruby_system.network.routers = self.ruby_system.network._routers
+        self.ruby_system.network.setup_buffers()
+
+        # Set up a proxy port for the system_port. Used for load binaries and
+        # other functional-only things.
+        self.ruby_system.sys_port_proxy = RubyPortProxy()
+        board.connect_system_port(self.ruby_system.sys_port_proxy.in_ports)
+
+    def _create_directory_controllers(self, board):
+        # Adding controllers
+        self.directory_controllers = [
+            Directory(
+                self.ruby_system.network,
+                board.get_cache_line_size(),
+                addr_range,
+                mem_port,
+            )
+            for addr_range, mem_port in board.get_mem_ports()
+        ]
+        for ctrl in self.directory_controllers:
+            ctrl.ruby_system = self.ruby_system
+        # Adding controller routers
+        self.directory_controller_routers = [
+            RubyRouter(self.ruby_system.network)
+            for _ in range(len(self.directory_controllers))
+        ]
+        for router in self.directory_controller_routers:
+            self.ruby_system.network._add_router(router)
+        # Adding an external link for each controller and its router
+        self.directory_controller_ext_links = [
+            RubyExtLink(ext_node=dir_ctrl, int_node=dir_router)
+            for dir_ctrl, dir_router in zip(
+                self.directory_controllers, self.directory_controller_routers
+            )
+        ]
+        for ext_link in self.directory_controller_ext_links:
+            self.ruby_system.network._add_ext_link(ext_link)
+        _directory_controller_int_links = []
+        for router in self.directory_controller_routers:
+            int_link_1, int_link_2 = RubyIntLink.create_bidirectional_links(
+                router, self.ruby_system.network.cross_ccd_router
+            )
+            _directory_controller_int_links.extend([int_link_1, int_link_2])
+            self.ruby_system.network._add_int_link(int_link_1)
+            self.ruby_system.network._add_int_link(int_link_2)
+        self.directory_controller_int_links = _directory_controller_int_links
+
+    def _create_dma_controllers(self, board, ruby_system):
+        # IOController for full system simulation
+        if self._is_fullsystem:
+            self.io_sequencer = DMASequencer(
+                version=0, ruby_system=self.ruby_system
+            )
+            self.io_sequencer.in_ports = board.get_mem_side_coherent_io_port()
+            self.ruby_system.io_controller = DMAController(
+                dma_sequencer=self.io_sequencer, ruby_system=self.ruby_system
+            )
+            self._io_controllers.append(self.ruby_system.io_controller)
+            self.io_controller_router = RubyRouter(self.ruby_system.network)
+            self.ruby_system.network._add_router(self.io_controller_router)
+            self.io_controller_ext_link = RubyExtLink(
+                ext_node=self._io_controllers[0],
+                int_node=self.io_controller_router,
+            )
+            self.ruby_system.network._add_ext_link(self.io_controller_ext_link)
+            self.io_controller_int_links = (
+                RubyIntLink.create_bidirectional_links(
+                    self.io_controller_router,
+                    self.ruby_system.network.cross_ccd_router,
+                )
+            )
+            self.ruby_system.network._add_int_link(
+                self.io_controller_int_links[0]
+            )
+            self.ruby_system.network._add_int_link(
+                self.io_controller_int_links[1]
+            )
+
+        self._dma_controllers = []
+        if board.has_dma_ports():
+            self.ruby_system.dma_controllers = [
+                DMAController(
+                    dma_sequencer=DMASequencer(version=i + 1, in_ports=port),
+                    ruby_system=self.ruby_system,
+                )
+                for i, port in enumerate(board.get_dma_ports())
+            ]
+            self._dma_controllers = self.ruby_system.dma_controllers
+            self.dma_routers = [
+                RubyRouter(self.ruby_system.network)
+                for dma_controller in self._dma_controllers
+            ]
+            for dma_router in self.dma_routers:
+                self.ruby_system.network._add_router(dma_router)
+            self.dma_ext_links = [
+                RubyExtLink(ext_node=dma_controller, int_node=dma_router)
+                for dma_controller, dma_router in zip(
+                    self._dma_controllers, self.dma_routers
+                )
+            ]
+            for link in self.dma_ext_links:
+                self.ruby_system.network._add_ext_link(link)
+            self.dma_int_links = [
+                RubyIntLink(
+                    dma_router, self.ruby_system.network.cross_ccd_router
+                )
+                for dma_router in self.dma_routers
+            ] + [
+                RubyIntLink(
+                    self.ruby_system.network.cross_ccd_router, dma_router
+                )
+                for dma_router in self.dma_routers
+            ]
+            for link in self.dma_int_links:
+                self.ruby_system.network._add_int_link(link)
diff --git a/src/python/gem5/components/cachehierarchies/ruby/caches/prebuilt/octopi_cache/octopi_network.py b/src/python/gem5/components/cachehierarchies/ruby/caches/prebuilt/octopi_cache/octopi_network.py
new file mode 100644
index 0000000000..8e5befabf7
--- /dev/null
+++ b/src/python/gem5/components/cachehierarchies/ruby/caches/prebuilt/octopi_cache/octopi_network.py
@@ -0,0 +1,68 @@
+# Copyright (c) 2022-2023 The Regents of the University of California
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from m5.objects import SimpleNetwork
+
+from .ruby_network_components import (
+    RubyNetworkComponent,
+    RubyRouter,
+    RubyIntLink,
+)
+
+
+# . The Network owns all routers, all int links and all ext links that are not in CCD's.
+# . The CCD subsystems are not of type RubyNetwork, so we need to copy the references of
+# routers and links to OctopiNetwork._routers, ._int_links, and ._ext_links; which will
+# be, in turns, copied to RubyNetwork.routers, .int_links, and .ext_links respectively.
+class OctopiNetwork(SimpleNetwork, RubyNetworkComponent):
+    def __init__(self, ruby_system):
+        SimpleNetwork.__init__(self=self)
+        RubyNetworkComponent.__init__(self=self)
+        self.netifs = []
+        self.ruby_system = ruby_system
+        self.number_of_virtual_networks = (
+            ruby_system.number_of_virtual_networks
+        )
+
+        self.cross_ccd_router = RubyRouter(self)
+        self._add_router(self.cross_ccd_router)
+
+    def connect_ccd_routers_to_cross_ccd_router(self, ccds):
+        for ccd in ccds:
+            int_link_1, int_link_2 = RubyIntLink.create_bidirectional_links(
+                self.cross_ccd_router,
+                ccd.get_main_router(),
+                bandwidth_factor=64,
+            )
+            ccd.to_cross_ccd_router_link = int_link_1
+            ccd.from_cross_ccd_router_link = int_link_2
+            self._add_int_link(int_link_1)
+            self._add_int_link(int_link_2)
+
+    def incorporate_ccds(self, ccds):
+        for ccd in ccds:
+            self.incorporate_ruby_subsystem(ccd)
+        self.connect_ccd_routers_to_cross_ccd_router(ccds)
diff --git a/src/python/gem5/components/cachehierarchies/ruby/caches/prebuilt/octopi_cache/ruby_network_components.py b/src/python/gem5/components/cachehierarchies/ruby/caches/prebuilt/octopi_cache/ruby_network_components.py
new file mode 100644
index 0000000000..8a413ea59d
--- /dev/null
+++ b/src/python/gem5/components/cachehierarchies/ruby/caches/prebuilt/octopi_cache/ruby_network_components.py
@@ -0,0 +1,111 @@
+# Copyright (c) 2022-2023 The Regents of the University of California
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from m5.objects import Switch, SimpleIntLink, SimpleExtLink
+
+
+class RubyNetworkComponent:
+    def __init__(self):
+        super().__init__()
+        self._routers = []
+        self._ext_links = []
+        self._int_links = []
+
+    def _add_router(self, router):
+        self._routers.append(router)
+
+    def _add_ext_link(self, link):
+        self._ext_links.append(link)
+
+    def _add_int_link(self, link):
+        self._int_links.append(link)
+
+    def get_routers(self):
+        return self._routers
+
+    def get_ext_links(self):
+        return self._ext_links
+
+    def get_int_links(self):
+        return self._int_links
+
+    def incorporate_ruby_subsystem(self, other_ruby_subsystem):
+        self._routers.extend(other_ruby_subsystem.get_routers())
+        self._ext_links.extend(other_ruby_subsystem.get_ext_links())
+        self._int_links.extend(other_ruby_subsystem.get_int_links())
+
+
+class RubyRouter(Switch):
+    _router_id = 0
+
+    @classmethod
+    def _get_router_id(cls):
+        cls._router_id += 1
+        return cls._router_id - 1
+
+    def __init__(self, network):
+        super().__init__()
+        self.router_id = self._get_router_id()
+        self.virt_nets = network.number_of_virtual_networks
+
+
+class RubyExtLink(SimpleExtLink):
+    _link_id = 0
+
+    @classmethod
+    def _get_link_id(cls):
+        cls._link_id += 1
+        return cls._link_id - 1
+
+    def __init__(self, ext_node, int_node, bandwidth_factor=16):
+        super().__init__()
+        self.link_id = self._get_link_id()
+        self.ext_node = ext_node
+        self.int_node = int_node
+        self.bandwidth_factor = bandwidth_factor
+
+
+class RubyIntLink(SimpleIntLink):
+    _link_id = 0
+
+    @classmethod
+    def _get_link_id(cls):
+        cls._link_id += 1
+        return cls._link_id - 1
+
+    @classmethod
+    def create_bidirectional_links(cls, node_1, node_2, bandwidth_factor=16):
+        return [
+            RubyIntLink(node_1, node_2, bandwidth_factor),
+            RubyIntLink(node_2, node_1, bandwidth_factor),
+        ]
+
+    def __init__(self, src_node, dst_node, bandwidth_factor=16):
+        super().__init__()
+        self.link_id = self._get_link_id()
+        self.src_node = src_node
+        self.dst_node = dst_node
+        self.bandwidth_factor = bandwidth_factor
diff --git a/src/python/gem5/components/cachehierarchies/ruby/mesi_three_level_cache_hierarchy.py b/src/python/gem5/components/cachehierarchies/ruby/mesi_three_level_cache_hierarchy.py
index 89b6b21177..2a8ce30cda 100644
--- a/src/python/gem5/components/cachehierarchies/ruby/mesi_three_level_cache_hierarchy.py
+++ b/src/python/gem5/components/cachehierarchies/ruby/mesi_three_level_cache_hierarchy.py
@@ -80,7 +80,6 @@ def __init__(
         self._num_l3_banks = num_l3_banks
 
     def incorporate_cache(self, board: AbstractBoard) -> None:
-
         requires(
             coherence_protocol_required=CoherenceProtocol.MESI_THREE_LEVEL
         )
@@ -193,10 +192,10 @@ def incorporate_cache(self, board: AbstractBoard) -> None:
         if board.has_dma_ports():
             dma_ports = board.get_dma_ports()
             for i, port in enumerate(dma_ports):
-                ctrl = DMAController(self.ruby_system.network, cache_line_size)
-                ctrl.dma_sequencer = DMASequencer(version=i, in_ports=port)
+                ctrl = DMAController(
+                    DMASequencer(version=i, in_ports=port), self.ruby_system
+                )
                 self._dma_controllers.append(ctrl)
-                ctrl.ruby_system = self.ruby_system
 
         self.ruby_system.num_of_sequencers = len(self._l1_controllers) + len(
             self._dma_controllers
diff --git a/src/python/gem5/components/cachehierarchies/ruby/mesi_two_level_cache_hierarchy.py b/src/python/gem5/components/cachehierarchies/ruby/mesi_two_level_cache_hierarchy.py
index 79c8b0ada3..8c7bba4ed4 100644
--- a/src/python/gem5/components/cachehierarchies/ruby/mesi_two_level_cache_hierarchy.py
+++ b/src/python/gem5/components/cachehierarchies/ruby/mesi_two_level_cache_hierarchy.py
@@ -76,7 +76,6 @@ def __init__(
         self._num_l2_banks = num_l2_banks
 
     def incorporate_cache(self, board: AbstractBoard) -> None:
-
         requires(coherence_protocol_required=CoherenceProtocol.MESI_TWO_LEVEL)
 
         cache_line_size = board.get_cache_line_size()
diff --git a/src/python/gem5/components/cachehierarchies/ruby/mi_example_cache_hierarchy.py b/src/python/gem5/components/cachehierarchies/ruby/mi_example_cache_hierarchy.py
index 5955ad3b20..93b19591cc 100644
--- a/src/python/gem5/components/cachehierarchies/ruby/mi_example_cache_hierarchy.py
+++ b/src/python/gem5/components/cachehierarchies/ruby/mi_example_cache_hierarchy.py
@@ -58,7 +58,6 @@ def __init__(self, size: str, assoc: str):
 
     @overrides(AbstractCacheHierarchy)
     def incorporate_cache(self, board: AbstractBoard) -> None:
-
         requires(coherence_protocol_required=CoherenceProtocol.MI_EXAMPLE)
 
         self.ruby_system = RubySystem()
diff --git a/src/python/gem5/components/memory/dram_interfaces/hbm.py b/src/python/gem5/components/memory/dram_interfaces/hbm.py
index 5063c4d9e1..50578a04c8 100644
--- a/src/python/gem5/components/memory/dram_interfaces/hbm.py
+++ b/src/python/gem5/components/memory/dram_interfaces/hbm.py
@@ -201,7 +201,6 @@ class HBM_1000_4H_1x64(HBM_1000_4H_1x128):
 # on HBM gen2 specifications. 4H stack, 8Gb per die and total capacity
 # of 4GiB.
 class HBM_2000_4H_1x64(DRAMInterface):
-
     # 64-bit interface for a single pseudo channel
     device_bus_width = 64
 
diff --git a/src/python/gem5/components/memory/dramsys.py b/src/python/gem5/components/memory/dramsys.py
index 28f3bd319f..a09d2fa0fc 100644
--- a/src/python/gem5/components/memory/dramsys.py
+++ b/src/python/gem5/components/memory/dramsys.py
@@ -24,7 +24,8 @@
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-import m5
+from typing import Tuple, Sequence, List, Optional
+from pathlib import Path
 
 from m5.objects import (
     DRAMSys,
@@ -40,27 +41,46 @@
 from ..boards.abstract_board import AbstractBoard
 from .abstract_memory_system import AbstractMemorySystem
 
-from typing import Tuple, Sequence, List
+
+DEFAULT_DRAMSYS_DIRECTORY = Path("ext/dramsys/DRAMSys")
 
 
 class DRAMSysMem(AbstractMemorySystem):
+    """
+    A DRAMSys memory controller.
+
+    This class requires gem5 to be built with DRAMSys (see ext/dramsys).
+    The specified memory size does not control the simulated memory size but it's sole purpose is
+    to notify gem5 of DRAMSys's memory size.
+    Therefore it has to match the DRAMSys configuration.
+    DRAMSys is configured using JSON files, whose base configuration has to be passed as a
+    parameter. Sub-configs are specified relative to the optional resource directory parameter.
+    """
+
     def __init__(
         self,
         configuration: str,
         size: str,
-        resource_directory: str,
         recordable: bool,
+        resource_directory: Optional[str] = None,
     ) -> None:
         """
         :param configuration: Path to the base configuration JSON for DRAMSys.
         :param size: Memory size of DRAMSys. Must match the size specified in JSON configuration.
-        :param resource_directory: Path to the base resource directory for DRAMSys.
         :param recordable: Whether the database recording feature of DRAMSys is enabled.
+        :param resource_directory: Path to the base resource directory for DRAMSys.
         """
         super().__init__()
+
+        resource_directory_path = (
+            DEFAULT_DRAMSYS_DIRECTORY / "configs"
+            if resource_directory is None
+            else Path(resource_directory)
+        )
+
         self.dramsys = DRAMSys(
             configuration=configuration,
-            resource_directory=resource_directory,
+            resource_directory=resource_directory_path.as_posix(),
             recordable=recordable,
         )
 
@@ -97,56 +117,72 @@ def set_memory_range(self, ranges: List[AddrRange]) -> None:
 
 
 class DRAMSysDDR4_1866(DRAMSysMem):
+    """
+    An example DDR4 1866 DRAMSys configuration.
+    """
+
     def __init__(self, recordable: bool):
         """
         :param recordable: Whether the database recording feature of DRAMSys is enabled.
         """
         super().__init__(
-            configuration="ext/dramsys/DRAMSys/DRAMSys/"
-            "library/resources/simulations/ddr4-example.json",
+            configuration=(
+                DEFAULT_DRAMSYS_DIRECTORY / "configs/ddr4-example.json"
+            ).as_posix(),
             size="4GB",
-            resource_directory="ext/dramsys/DRAMSys/DRAMSys/library/resources",
             recordable=recordable,
         )
 
 
 class DRAMSysDDR3_1600(DRAMSysMem):
+    """
+    An example DDR3 1600 DRAMSys configuration.
+    """
+
     def __init__(self, recordable: bool):
         """
         :param recordable: Whether the database recording feature of DRAMSys is enabled.
         """
         super().__init__(
-            configuration="ext/dramsys/DRAMSys/DRAMSys/"
-            "library/resources/simulations/ddr3-gem5-se.json",
-            size="4GB",
-            resource_directory="ext/dramsys/DRAMSys/DRAMSys/library/resources",
+            configuration=(
+                DEFAULT_DRAMSYS_DIRECTORY / "configs/ddr3-gem5-se.json"
+            ).as_posix(),
+            size="1GB",
             recordable=recordable,
         )
 
 
 class DRAMSysLPDDR4_3200(DRAMSysMem):
+    """
+    An example LPDDR4 3200 DRAMSys configuration.
+    """
+
     def __init__(self, recordable: bool):
         """
         :param recordable: Whether the database recording feature of DRAMSys is enabled.
         """
         super().__init__(
-            configuration="ext/dramsys/DRAMSys/DRAMSys/"
-            "library/resources/simulations/lpddr4-example.json",
-            size="4GB",
-            resource_directory="ext/dramsys/DRAMSys/DRAMSys/library/resources",
+            configuration=(
+                DEFAULT_DRAMSYS_DIRECTORY / "configs/lpddr4-example.json"
+            ).as_posix(),
+            size="1GB",
             recordable=recordable,
         )
 
 
 class DRAMSysHBM2(DRAMSysMem):
+    """
+    An example HBM2 DRAMSys configuration.
+    """
+
     def __init__(self, recordable: bool):
         """
         :param recordable: Whether the database recording feature of DRAMSys is enabled.
         """
         super().__init__(
-            configuration="ext/dramsys/DRAMSys/DRAMSys/"
-            "library/resources/simulations/hbm2-example.json",
-            size="4GB",
-            resource_directory="ext/dramsys/DRAMSys/DRAMSys/library/resources",
+            configuration=(
+                DEFAULT_DRAMSYS_DIRECTORY / "configs/hbm2-example.json"
+            ).as_posix(),
+            size="1GB",
             recordable=recordable,
         )
diff --git a/src/python/gem5/components/memory/hbm.py b/src/python/gem5/components/memory/hbm.py
index 75db1f9fde..3f0716c14b 100644
--- a/src/python/gem5/components/memory/hbm.py
+++ b/src/python/gem5/components/memory/hbm.py
@@ -137,7 +137,6 @@ def _interleave_addresses(self):
 
     @overrides(ChanneledMemory)
     def get_mem_ports(self) -> Sequence[Tuple[AddrRange, Port]]:
-
         intlv_bits = log(self._num_channels, 2)
         mask_list = []
 
diff --git a/src/python/gem5/components/processors/abstract_generator.py b/src/python/gem5/components/processors/abstract_generator.py
index ff5387dd14..30e8f17970 100644
--- a/src/python/gem5/components/processors/abstract_generator.py
+++ b/src/python/gem5/components/processors/abstract_generator.py
@@ -35,6 +35,22 @@
 from typing import List
 
 
+def partition_range(
+    min_addr: int, max_addr: int, num_partitions: int
+) -> List[tuple]:
+    assert (
+        isinstance(min_addr, int)
+        and isinstance(max_addr, int)
+        and isinstance(num_partitions, int)
+    )
+    assert ((max_addr - min_addr) % num_partitions) == 0
+    chunk_size = int((max_addr - min_addr) / num_partitions)
+    return [
+        (min_addr + chunk_size * i, min_addr + chunk_size * (i + 1))
+        for i in range(num_partitions)
+    ]
+
+
 class AbstractGenerator(AbstractProcessor):
     """The abstract generator
     It defines the external interface of every generator component.
diff --git a/src/python/gem5/components/processors/abstract_processor.py b/src/python/gem5/components/processors/abstract_processor.py
index a0f8b5cf44..8978513562 100644
--- a/src/python/gem5/components/processors/abstract_processor.py
+++ b/src/python/gem5/components/processors/abstract_processor.py
@@ -54,7 +54,7 @@ def __init__(
         if cores:
             # In the stdlib we assume the system processor conforms to a single
             # ISA target.
-            assert len(set(core.get_isa() for core in cores)) == 1
+            assert len({core.get_isa() for core in cores}) == 1
             self.cores = cores
             self._isa = cores[0].get_isa()
         else:
diff --git a/src/python/gem5/components/processors/base_cpu_core.py b/src/python/gem5/components/processors/base_cpu_core.py
index c75c0029cf..58d06024bc 100644
--- a/src/python/gem5/components/processors/base_cpu_core.py
+++ b/src/python/gem5/components/processors/base_cpu_core.py
@@ -93,7 +93,6 @@ def requires_send_evicts(self) -> bool:
 
     @overrides(AbstractCore)
     def is_kvm_core(self) -> bool:
-
         try:
             from m5.objects import BaseKvmCPU
 
@@ -118,7 +117,6 @@ def connect_dcache(self, port: Port) -> None:
     @overrides(AbstractCore)
     def connect_walker_ports(self, port1: Port, port2: Port) -> None:
         if self.get_isa() == ISA.ARM:
-
             # Unlike X86 and RISCV MMU, the ARM MMU has two L1 TLB walker ports
             # named `walker` and `stage2_walker` for both data and instruction.
             # The gem5 standard library currently supports one TLB walker port
@@ -144,7 +142,6 @@ def connect_interrupt(
         interrupt_requestor: Optional[Port] = None,
         interrupt_responce: Optional[Port] = None,
     ) -> None:
-
         # TODO: This model assumes that we will only create an interrupt
         # controller as we require it. Not sure how true this is in all cases.
         self.core.createInterruptController()
diff --git a/src/python/gem5/components/processors/base_cpu_processor.py b/src/python/gem5/components/processors/base_cpu_processor.py
index 9a7561587a..d097682d26 100644
--- a/src/python/gem5/components/processors/base_cpu_processor.py
+++ b/src/python/gem5/components/processors/base_cpu_processor.py
@@ -71,7 +71,6 @@ def __init__(self, cores: List[BaseCPUCore]):
 
     @overrides(AbstractProcessor)
     def incorporate_processor(self, board: AbstractBoard) -> None:
-
         if any(core.is_kvm_core() for core in self.get_cores()):
             board.kvm_vm = self.kvm_vm
             # To get the KVM CPUs to run on different host CPUs
diff --git a/src/python/gem5/components/processors/complex_generator.py b/src/python/gem5/components/processors/complex_generator.py
index b113640ae7..81b94f0c14 100644
--- a/src/python/gem5/components/processors/complex_generator.py
+++ b/src/python/gem5/components/processors/complex_generator.py
@@ -27,6 +27,7 @@
 from ...utils.override import overrides
 from .complex_generator_core import ComplexGeneratorCore
 from .abstract_generator import AbstractGenerator
+from .abstract_generator import partition_range
 
 from typing import Iterator, List, Any
 
@@ -76,13 +77,14 @@ def add_linear(
         :param data_limit: The amount of data in bytes to read/write by the
         generator before stopping generation.
         """
-        for core in self.cores:
+        ranges = partition_range(min_addr, max_addr, len(self.cores))
+        for i, core in enumerate(self.cores):
             core.add_linear(
                 duration,
                 rate,
                 block_size,
-                min_addr,
-                max_addr,
+                ranges[i][0],
+                ranges[i][1],
                 rd_perc,
                 data_limit,
             )
diff --git a/src/python/gem5/components/processors/cpu_types.py b/src/python/gem5/components/processors/cpu_types.py
index e12eb99816..d2bf6548ed 100644
--- a/src/python/gem5/components/processors/cpu_types.py
+++ b/src/python/gem5/components/processors/cpu_types.py
@@ -62,7 +62,7 @@ def get_cpu_type_from_str(input: str) -> CPUTypes:
         if input.lower() == cpu_type.value:
             return cpu_type
 
-    valid_cpu_types_list_str = str()
+    valid_cpu_types_list_str = ""
     for cpu_type_str in get_cpu_types_str_set():
         valid_cpu_types_list_str += f"{os.linesep}{cpu_type_str}"
 
diff --git a/src/python/gem5/components/processors/linear_generator.py b/src/python/gem5/components/processors/linear_generator.py
index 90fe62e7d6..32587d40c2 100644
--- a/src/python/gem5/components/processors/linear_generator.py
+++ b/src/python/gem5/components/processors/linear_generator.py
@@ -27,6 +27,7 @@
 from ...utils.override import overrides
 from .linear_generator_core import LinearGeneratorCore
 from .abstract_generator import AbstractGenerator
+from .abstract_generator import partition_range
 
 from typing import List
 
@@ -91,17 +92,20 @@ def _create_cores(
         The helper function to create the cores for the generator, it will use
         the same inputs as the constructor function.
         """
+
+        ranges = partition_range(min_addr, max_addr, num_cores)
+
         return [
             LinearGeneratorCore(
                 duration=duration,
                 rate=rate,
                 block_size=block_size,
-                min_addr=min_addr,
-                max_addr=max_addr,
+                min_addr=ranges[i][0],
+                max_addr=ranges[i][1],
                 rd_perc=rd_perc,
                 data_limit=data_limit,
             )
-            for _ in range(num_cores)
+            for i in range(num_cores)
         ]
 
     @overrides(AbstractGenerator)
diff --git a/src/python/gem5/components/processors/simple_core.py b/src/python/gem5/components/processors/simple_core.py
index 15e15dc0cf..65c0f0ec83 100644
--- a/src/python/gem5/components/processors/simple_core.py
+++ b/src/python/gem5/components/processors/simple_core.py
@@ -44,7 +44,6 @@ class SimpleCore(BaseCPUCore):
     def __init__(
         self, cpu_type: CPUTypes, core_id: int, isa: Optional[ISA] = None
     ):
-
         # If the ISA is not specified, we infer it via the `get_runtime_isa`
         # function.
         if isa:
diff --git a/src/python/gem5/components/processors/switchable_processor.py b/src/python/gem5/components/processors/switchable_processor.py
index 20754fbf73..88e5f4cc47 100644
--- a/src/python/gem5/components/processors/switchable_processor.py
+++ b/src/python/gem5/components/processors/switchable_processor.py
@@ -52,7 +52,6 @@ def __init__(
         switchable_cores: Dict[str, List[SimpleCore]],
         starting_cores: str,
     ) -> None:
-
         if starting_cores not in switchable_cores.keys():
             raise AssertionError(
                 f"Key {starting_cores} cannot be found in the "
@@ -64,7 +63,7 @@ def __init__(
 
         # In the stdlib we assume the system processor conforms to a single
         # ISA target.
-        assert len(set(core.get_isa() for core in self._current_cores)) == 1
+        assert len({core.get_isa() for core in self._current_cores}) == 1
         super().__init__(isa=self._current_cores[0].get_isa())
 
         for name, core_list in self._switchable_cores.items():
@@ -85,7 +84,6 @@ def __init__(
 
     @overrides(AbstractProcessor)
     def incorporate_processor(self, board: AbstractBoard) -> None:
-
         # This is a bit of a hack. The `m5.switchCpus` function, used in the
         # "switch_to_processor" function, requires the System simobject as an
         # argument. We therefore need to store the board when incorporating the
@@ -115,11 +113,9 @@ def get_cores(self) -> List[AbstractCore]:
 
     def _all_cores(self):
         for core_list in self._switchable_cores.values():
-            for core in core_list:
-                yield core
+            yield from core_list
 
     def switch_to_processor(self, switchable_core_key: str):
-
         # Run various checks.
         if not hasattr(self, "_board"):
             raise AssertionError("The processor has not been incorporated.")
diff --git a/src/python/gem5/isas.py b/src/python/gem5/isas.py
index 84f02b87e7..ad156d6f2e 100644
--- a/src/python/gem5/isas.py
+++ b/src/python/gem5/isas.py
@@ -80,7 +80,7 @@ def get_isa_from_str(input: str) -> ISA:
         if input.lower() == isa.value:
             return isa
 
-    valid_isas_str_list = str()
+    valid_isas_str_list = ""
     for isa_str in get_isas_str_set():
         valid_isas_str_list += f"{os.linesep}{isa_str}"
 
diff --git a/src/python/gem5/prebuilt/riscvmatched/riscvmatched_board.py b/src/python/gem5/prebuilt/riscvmatched/riscvmatched_board.py
index 9ca95839f8..a4e639801d 100644
--- a/src/python/gem5/prebuilt/riscvmatched/riscvmatched_board.py
+++ b/src/python/gem5/prebuilt/riscvmatched/riscvmatched_board.py
@@ -566,7 +566,12 @@ def _add_disk_to_board(self, disk_image: AbstractResource):
 
     @overrides(KernelDiskWorkload)
     def get_default_kernel_args(self) -> List[str]:
-        return ["console=ttyS0", "root={root_value}", "rw"]
+        return [
+            "console=ttyS0",
+            "root={root_value}",
+            "disk_device={disk_device}",
+            "rw",
+        ]
 
     @overrides(KernelDiskWorkload)
     def set_kernel_disk_workload(
diff --git a/src/python/gem5/prebuilt/riscvmatched/riscvmatched_cache.py b/src/python/gem5/prebuilt/riscvmatched/riscvmatched_cache.py
index 25e55ef310..dce1c5a964 100644
--- a/src/python/gem5/prebuilt/riscvmatched/riscvmatched_cache.py
+++ b/src/python/gem5/prebuilt/riscvmatched/riscvmatched_cache.py
@@ -97,7 +97,6 @@ def get_cpu_side_port(self) -> Port:
 
     @overrides(AbstractCacheHierarchy)
     def incorporate_cache(self, board: AbstractBoard) -> None:
-
         # Set up the system port for functional access from the simulator.
         board.connect_system_port(self.membus.cpu_side_ports)
 
@@ -135,7 +134,6 @@ def incorporate_cache(self, board: AbstractBoard) -> None:
             self._setup_io_cache(board)
 
         for i, cpu in enumerate(board.get_processor().get_cores()):
-
             cpu.connect_icache(self.l1icaches[i].cpu_side)
             cpu.connect_dcache(self.l1dcaches[i].cpu_side)
 
diff --git a/src/python/gem5/prebuilt/riscvmatched/riscvmatched_core.py b/src/python/gem5/prebuilt/riscvmatched/riscvmatched_core.py
index 4b8d2c1d32..19dc2f2e8c 100644
--- a/src/python/gem5/prebuilt/riscvmatched/riscvmatched_core.py
+++ b/src/python/gem5/prebuilt/riscvmatched/riscvmatched_core.py
@@ -95,8 +95,8 @@ class U74FUPool(MinorFUPool):
 
 
 class U74BP(TournamentBP):
-    BTBEntries = 32
-    RASSize = 12
+    btb = SimpleBTB(numEntries=32)
+    ras = ReturnAddrStack(numEntries=12)
     localHistoryTableSize = 4096  # is 3.6 KiB but gem5 requires power of 2
     localPredictorSize = 16384
     globalPredictorSize = 16384
@@ -214,3 +214,4 @@ def __init__(
         core_id,
     ):
         super().__init__(core=U74CPU(cpu_id=core_id), isa=ISA.RISCV)
+        self.core.isa[0].enable_rvv = False
diff --git a/src/python/gem5/resources/client.py b/src/python/gem5/resources/client.py
index ab8262bf92..74475caec2 100644
--- a/src/python/gem5/resources/client.py
+++ b/src/python/gem5/resources/client.py
@@ -30,7 +30,7 @@
 from typing import Optional, Dict, List
 from .client_api.client_wrapper import ClientWrapper
 from gem5.gem5_default_config import config
-from m5.util import inform
+from m5.util import inform, warn
 from _m5 import core
 
 
@@ -41,7 +41,7 @@ def getFileContent(file_path: Path) -> Dict:
     :return: The content of the file
     """
     if file_path.exists():
-        with open(file_path, "r") as file:
+        with open(file_path) as file:
             return json.load(file)
     else:
         raise Exception(f"File not found at {file_path}")
@@ -53,8 +53,38 @@ def getFileContent(file_path: Path) -> Dict:
 def _get_clientwrapper():
     global clientwrapper
     if clientwrapper is None:
+        if (
+            "GEM5_RESOURCE_JSON" in os.environ
+            and "GEM5_RESOURCE_JSON_APPEND" in os.environ
+        ):
+            raise Exception(
+                "Both GEM5_RESOURCE_JSON and GEM5_RESOURCE_JSON_APPEND are set. Please set only one of them."
+            )
+        gem5_config = {}
+        # If the GEM5_RESOURCE_JSON is set, use it as the only source
+        if "GEM5_RESOURCE_JSON" in os.environ:
+            json_source = {
+                "url": os.environ["GEM5_RESOURCE_JSON"],
+                "isMongo": False,
+            }
+            gem5_config["sources"] = {"GEM5_RESOURCE_JSON": json_source}
+            if "GEM5_CONFIG" in os.environ:
+                warn(
+                    f"Both GEM5_CONFIG and GEM5_RESOURCE_JSON are set.\n"
+                    f"GEM5_CONFIG will be ignored in favor of the GEM5_RESOURCE_JSON environment variable."
+                )
+            elif (Path().cwd().resolve() / "gem5-config.json").exists():
+                warn(
+                    f"Both gem5-config.json and GEM5_RESOURCE_JSON are set.\n"
+                    f"gem5-config.json will be ignored in favor of the GEM5_RESOURCE_JSON environment variable."
+                )
+            else:
+                warn(
+                    f"GEM5_RESOURCE_JSON is set.\n"
+                    f"gem5-default-config will be ignored in favor of the GEM5_RESOURCE_JSON environment variable."
+                )
         # First check if the config file path is provided in the environment variable
-        if "GEM5_CONFIG" in os.environ:
+        elif "GEM5_CONFIG" in os.environ:
             config_file_path = Path(os.environ["GEM5_CONFIG"])
             gem5_config = getFileContent(config_file_path)
             inform("Using config file specified by $GEM5_CONFIG")
@@ -68,6 +98,20 @@ def _get_clientwrapper():
         else:
             gem5_config = config
             inform("Using default config")
+
+        # If the GEM5_RESOURCE_JSON_APPEND is set, append the resources to the gem5_config
+        if "GEM5_RESOURCE_JSON_APPEND" in os.environ:
+            json_source = {
+                "url": os.environ["GEM5_RESOURCE_JSON_APPEND"],
+                "isMongo": False,
+            }
+            gem5_config["sources"].update(
+                {"GEM5_RESOURCE_JSON_APPEND": json_source}
+            )
+            inform(
+                f"Appending resources from {os.environ['GEM5_RESOURCE_JSON_APPEND']}"
+            )
+
         clientwrapper = ClientWrapper(gem5_config)
     return clientwrapper
 
diff --git a/src/python/gem5/resources/client_api/abstract_client.py b/src/python/gem5/resources/client_api/abstract_client.py
index 7f8ad6166e..0365b5ca60 100644
--- a/src/python/gem5/resources/client_api/abstract_client.py
+++ b/src/python/gem5/resources/client_api/abstract_client.py
@@ -30,26 +30,6 @@
 
 
 class AbstractClient(ABC):
-    def verify_status_code(self, status_code: int) -> None:
-        """
-        Verifies that the status code is 200.
-        :param status_code: The status code to verify.
-        """
-        if status_code == 200:
-            return
-        if status_code == 429:
-            raise Exception("Panic: Too many requests")
-        if status_code == 401:
-            raise Exception("Panic: Unauthorized")
-        if status_code == 404:
-            raise Exception("Panic: Not found")
-        if status_code == 400:
-            raise Exception("Panic: Bad request")
-        if status_code == 500:
-            raise Exception("Panic: Internal server error")
-
-        raise Exception(f"Panic: Unknown status code {status_code}")
-
     def _url_validator(self, url: str) -> bool:
         """
         Validates the provided URL.
diff --git a/src/python/gem5/resources/client_api/atlasclient.py b/src/python/gem5/resources/client_api/atlasclient.py
index 7d2a27c3f7..7b1d263936 100644
--- a/src/python/gem5/resources/client_api/atlasclient.py
+++ b/src/python/gem5/resources/client_api/atlasclient.py
@@ -25,10 +25,38 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 from urllib import request, parse
-from urllib.error import HTTPError, URLError
 from typing import Optional, Dict, Union, Type, Tuple, List, Any
 import json
+import time
+import itertools
 from .abstract_client import AbstractClient
+from m5.util import warn
+
+
+class AtlasClientHttpJsonRequestError(Exception):
+    def __init__(
+        self,
+        client: "AtlasClient",
+        data: Dict[str, Any],
+        purpose_of_request: Optional[str],
+    ):
+        """An exception raised when an HTTP request to Atlas MongoDB fails.
+        :param client: The AtlasClient instance that raised the exception.
+        :param purpose_of_request: A string describing the purpose of the
+        request.
+        """
+        error_str = (
+            f"Http Request to Atlas MongoDB failed.\n"
+            f"Atlas URL: {client.url}\n"
+            f"Auth URL: {client.authUrl}\n"
+            f"Database: {client.database}\n"
+            f"Collection: {client.collection}\n\n"
+            f"Data sent:\n\n{json.dumps(data,indent=4)}\n\n"
+        )
+
+        if purpose_of_request:
+            error_str += f"Purpose of Request: {purpose_of_request}\n\n"
+        super().__init__(error_str)
 
 
 class AtlasClient(AbstractClient):
@@ -47,22 +75,69 @@ def __init__(self, config: Dict[str, str]):
         self.authUrl = config["authUrl"]
 
     def get_token(self):
-        data = {"key": self.apiKey}
-        data = json.dumps(data).encode("utf-8")
+        return self._atlas_http_json_req(
+            self.authUrl,
+            data_json={"key": self.apiKey},
+            headers={"Content-Type": "application/json"},
+            purpose_of_request="Get Access Token with API key",
+        )["access_token"]
+
+    def _atlas_http_json_req(
+        self,
+        url: str,
+        data_json: Dict[str, Any],
+        headers: Dict[str, str],
+        purpose_of_request: Optional[str],
+        max_failed_attempts: int = 4,
+        reattempt_pause_base: int = 2,
+    ) -> Dict[str, Any]:
+        """Sends a JSON object over HTTP to a given Atlas MongoDB server and
+        returns the response. This function will attempt to reconnect to the
+        server if the connection fails a set number of times before raising an
+        exception.
+
+        :param url: The URL to open the connection.
+        :param data_json: The JSON object to send.
+        :param headers: The headers to send with the request.
+        :param purpose_of_request: A string describing the purpose of the
+        request. This is optional. It's used to give context to the user if an
+        exception is raised.
+        :param max_failed_attempts: The maximum number of times to an attempt
+        at making a request should be done before throwing an exception.
+        :param reattempt_pause_base: The base of the exponential backoff -- the
+        time between each attempt.
+
+        **Warning**: This function assumes a JSON response.
+        """
+        data = json.dumps(data_json).encode("utf-8")
 
         req = request.Request(
-            self.authUrl,
+            url,
             data=data,
-            headers={"Content-Type": "application/json"},
+            headers=headers,
         )
-        try:
-            response = request.urlopen(req)
-        except HTTPError as e:
-            self.verify_status_code(e.status)
-            return None
-        result = json.loads(response.read().decode("utf-8"))
-        token = result["access_token"]
-        return token
+
+        for attempt in itertools.count(start=1):
+            try:
+                response = request.urlopen(req)
+                break
+            except Exception as e:
+                if attempt >= max_failed_attempts:
+                    raise AtlasClientHttpJsonRequestError(
+                        client=self,
+                        data=data_json,
+                        purpose_of_request=purpose_of_request,
+                    )
+                pause = reattempt_pause_base**attempt
+                warn(
+                    f"Attempt {attempt} of Atlas HTTP Request failed.\n"
+                    f"Purpose of Request: {purpose_of_request}.\n\n"
+                    f"Failed with Exception:\n{e}\n\n"
+                    f"Retrying after {pause} seconds..."
+                )
+                time.sleep(pause)
+
+        return json.loads(response.read().decode("utf-8"))
 
     def get_resources(
         self,
@@ -84,21 +159,18 @@ def get_resources(
 
         if filter:
             data["filter"] = filter
-        data = json.dumps(data).encode("utf-8")
 
         headers = {
             "Authorization": f"Bearer {self.get_token()}",
             "Content-Type": "application/json",
         }
 
-        req = request.Request(url, data=data, headers=headers)
-        try:
-            response = request.urlopen(req)
-        except HTTPError as e:
-            self.verify_status_code(e.status)
-            return None
-        result = json.loads(response.read().decode("utf-8"))
-        resources = result["documents"]
+        resources = self._atlas_http_json_req(
+            url,
+            data_json=data,
+            headers=headers,
+            purpose_of_request="Get Resources",
+        )["documents"]
 
         # I do this as a lazy post-processing step because I can't figure out
         # how to do this via an Atlas query, which may be more efficient.
diff --git a/src/python/gem5/resources/client_api/client_wrapper.py b/src/python/gem5/resources/client_api/client_wrapper.py
index d2baabc52d..9ddd69e2df 100644
--- a/src/python/gem5/resources/client_api/client_wrapper.py
+++ b/src/python/gem5/resources/client_api/client_wrapper.py
@@ -30,6 +30,7 @@
 from typing import Optional, Dict, List, Tuple
 import itertools
 from m5.util import warn
+import sys
 
 
 class ClientWrapper:
@@ -63,7 +64,6 @@ def list_resources(
         clients: Optional[List[str]] = None,
         gem5_version: Optional[str] = core.gem5Version,
     ) -> Dict[str, List[str]]:
-
         clients_to_search = (
             list(self.clients.keys()) if clients is None else clients
         )
@@ -114,7 +114,12 @@ def get_all_resources_by_id(
                     self.clients[client].get_resources_by_id(resource_id)
                 )
             except Exception as e:
-                warn(f"Error getting resources from client {client}: {str(e)}")
+                print(
+                    f"Exception thrown while getting resource '{resource_id}' "
+                    f"from client '{client}'\n",
+                    file=sys.stderr,
+                )
+                raise e
         # check if no 2 resources have the same id and version
         for res1, res2 in itertools.combinations(resources, 2):
             if res1["resource_version"] == res2["resource_version"]:
@@ -285,19 +290,21 @@ def _check_resource_version_compatibility(
             return False
         if (
             gem5_version
+            and not gem5_version.upper().startswith("DEVELOP")
             and not self._get_resources_compatible_with_gem5_version(
                 [resource], gem5_version=gem5_version
             )
         ):
-            warn(
-                f"Resource {resource['id']} with version "
-                f"{resource['resource_version']} is not known to be compatible"
-                f" with gem5 version {gem5_version}. "
-                "This may cause problems with your simulation. "
-                "This resource's compatibility "
-                "with different gem5 versions can be found here: "
-                "https://resources.gem5.org"
-                f"/resources/{resource['id']}/versions"
-            )
+            if not gem5_version.upper().startswith("DEVELOP"):
+                warn(
+                    f"Resource {resource['id']} with version "
+                    f"{resource['resource_version']} is not known to be compatible"
+                    f" with gem5 version {gem5_version}. "
+                    "This may cause problems with your simulation. "
+                    "This resource's compatibility "
+                    "with different gem5 versions can be found here: "
+                    "https://resources.gem5.org"
+                    f"/resources/{resource['id']}/versions"
+                )
             return False
         return True
diff --git a/src/python/gem5/resources/downloader.py b/src/python/gem5/resources/downloader.py
index bb5ca84cc0..e9b4980b53 100644
--- a/src/python/gem5/resources/downloader.py
+++ b/src/python/gem5/resources/downloader.py
@@ -34,6 +34,7 @@
 from pathlib import Path
 import tarfile
 from urllib.error import HTTPError
+from urllib.parse import urlparse
 from typing import List, Optional, Dict
 
 from _m5 import core
@@ -139,14 +140,32 @@ def _download(url: str, download_to: str, max_attempts: int = 6) -> None:
                 time.sleep((2**attempt) + random.uniform(0, 1))
             else:
                 raise e
+        except ConnectionResetError as e:
+            # This catches the ConnectionResetError we see occassionally see
+            # when accessing resources on GitHub Actions.  It retries using a
+            # Truncated Exponential backoff algorithm, truncating after
+            # "max_attempts". If any other is retrieved we raise the error.
+            if e.errno == 104:
+                attempt += 1
+                if attempt >= max_attempts:
+                    raise Exception(
+                        f"After {attempt} attempts, the resource json could "
+                        "not be retrieved. OS Error Code retrieved: "
+                        f"{e.errno}"
+                    )
+                time.sleep((2**attempt) + random.uniform(0, 1))
+            else:
+                raise e
         except ValueError as e:
             raise Exception(
+                f"ValueError: {e}\n"
                 "Environment variable GEM5_USE_PROXY is set to "
                 f"'{use_proxy}'. The expected form is "
                 "<host>:<port>'."
             )
         except ImportError as e:
             raise Exception(
+                f"ImportError: {e}\n"
                 "An import error has occurred. This is likely due "
                 "the Python SOCKS client module not being "
                 "installed. It can be installed with "
@@ -185,6 +204,7 @@ def get_resource(
     resource_version: Optional[str] = None,
     clients: Optional[List] = None,
     gem5_version: Optional[str] = core.gem5Version,
+    quiet: bool = False,
 ) -> None:
     """
     Obtains a gem5 resource and stored it to a specified location. If the
@@ -217,6 +237,9 @@ def get_resource(
     By default, the version of gem5 being used is used. This is used primarily
     for testing purposes.
 
+    :param quiet: If true, no output will be printed to the console (baring
+    exceptions). False by default.
+
     :raises Exception: An exception is thrown if a file is already present at
     `to_path` but it does not have the correct md5 sum. An exception will also
     be thrown is a directory is present at `to_path`
@@ -262,17 +285,21 @@ def get_resource(
         # string-based way of doing things. It can be refactored away over
         # time:
         # https://gem5-review.googlesource.com/c/public/gem5-resources/+/51168
-        if isinstance(resource_json["is_zipped"], str):
-            run_unzip = unzip and resource_json["is_zipped"].lower() == "true"
-        elif isinstance(resource_json["is_zipped"], bool):
-            run_unzip = unzip and resource_json["is_zipped"]
-        else:
-            raise Exception(
-                "The resource.json entry for '{}' has a value for the "
-                "'is_zipped' field which is neither a string or a boolean.".format(
-                    resource_name
+        run_unzip = False
+        if "is_zipped" in resource_json:
+            if isinstance(resource_json["is_zipped"], str):
+                run_unzip = (
+                    unzip and resource_json["is_zipped"].lower() == "true"
+                )
+            elif isinstance(resource_json["is_zipped"], bool):
+                run_unzip = unzip and resource_json["is_zipped"]
+            else:
+                raise Exception(
+                    "The resource.json entry for '{}' has a value for the "
+                    "'is_zipped' field which is neither a string or a boolean.".format(
+                        resource_name
+                    )
                 )
-            )
 
         run_tar_extract = (
             untar
@@ -288,42 +315,60 @@ def get_resource(
         if run_unzip:
             download_dest += zip_extension
 
-        # TODO: Might be nice to have some kind of download status bar here.
-        # TODO: There might be a case where this should be silenced.
-        print(
-            "Resource '{}' was not found locally. Downloading to '{}'...".format(
-                resource_name, download_dest
+        file_uri_path = _file_uri_to_path(resource_json["url"])
+        if file_uri_path:
+            if not file_uri_path.exists():
+                raise Exception(
+                    f"Could not find file at path '{file_uri_path}'"
+                )
+            print(
+                "Resource '{}' is being copied from '{}' to '{}'...".format(
+                    resource_name,
+                    urlparse(resource_json["url"]).path,
+                    download_dest,
+                )
             )
-        )
+            shutil.copy(file_uri_path, download_dest)
+        else:
+            # TODO: Might be nice to have some kind of download status bar here..
+            if not quiet:
+                print(
+                    f"Resource '{resource_name}' was not found locally. "
+                    f"Downloading to '{download_dest}'..."
+                )
 
-        # Get the URL.
-        url = resource_json["url"]
+            # Get the URL.
+            url = resource_json["url"]
 
-        _download(url=url, download_to=download_dest)
-        print(f"Finished downloading resource '{resource_name}'.")
+            _download(url=url, download_to=download_dest)
+            if not quiet:
+                print(f"Finished downloading resource '{resource_name}'.")
 
         if run_unzip:
-            print(
-                f"Decompressing resource '{resource_name}' ('{download_dest}')..."
-            )
+            if not quiet:
+                print(
+                    f"Decompressing resource '{resource_name}' "
+                    f"('{download_dest}')..."
+                )
             unzip_to = download_dest[: -len(zip_extension)]
             with gzip.open(download_dest, "rb") as f:
                 with open(unzip_to, "wb") as o:
                     shutil.copyfileobj(f, o)
             os.remove(download_dest)
             download_dest = unzip_to
-            print(f"Finished decompressing resource '{resource_name}'.")
+            if not quiet:
+                print(f"Finished decompressing resource '{resource_name}'.")
 
         if run_tar_extract:
-            print(
-                f"Unpacking the the resource '{resource_name}' "
-                f"('{download_dest}')"
-            )
+            if not quiet:
+                print(
+                    f"Unpacking the the resource '{resource_name}' "
+                    f"('{download_dest}')"
+                )
             unpack_to = download_dest[: -len(tar_extension)]
             with tarfile.open(download_dest) as f:
 
                 def is_within_directory(directory, target):
-
                     abs_directory = os.path.abspath(directory)
                     abs_target = os.path.abspath(target)
 
@@ -334,7 +379,6 @@ def is_within_directory(directory, target):
                 def safe_extract(
                     tar, path=".", members=None, *, numeric_owner=False
                 ):
-
                     for member in tar.getmembers():
                         member_path = os.path.join(path, member.name)
                         if not is_within_directory(path, member_path):
@@ -346,3 +390,27 @@ def safe_extract(
 
                 safe_extract(f, unpack_to)
             os.remove(download_dest)
+
+
+def _file_uri_to_path(uri: str) -> Optional[Path]:
+    """
+    If the URI uses the File scheme (e.g, `file://host/path`) then
+    a Path object for the local path is returned, otherwise None.
+
+    **Note:** Only files from localhost are permitted. An exception
+    is thrown otherwise.
+
+    :param uri: The file URI to convert.
+
+    :returns: The path to the file.
+    """
+
+    if urlparse(uri).scheme == "file":
+        if urlparse(uri).netloc == "" or urlparse(uri).netloc == "localhost":
+            local_path = urlparse(uri).path
+            return Path(local_path)
+        raise Exception(
+            f"File URI '{uri}' specifies host '{urlparse(uri).netloc}'. "
+            "Only localhost is permitted."
+        )
+    return None
diff --git a/src/python/gem5/resources/looppoint.py b/src/python/gem5/resources/looppoint.py
index 684faef37d..6e26efefdc 100644
--- a/src/python/gem5/resources/looppoint.py
+++ b/src/python/gem5/resources/looppoint.py
@@ -491,7 +491,6 @@ def __init__(
         with open(_path) as file:
             json_contents = json.load(file)
             for rid in json_contents:
-
                 start_pc = int(json_contents[rid]["simulation"]["start"]["pc"])
                 start_globl = int(
                     json_contents[rid]["simulation"]["start"]["global"]
diff --git a/src/python/gem5/resources/md5_utils.py b/src/python/gem5/resources/md5_utils.py
index f4a1a87df5..a371274fef 100644
--- a/src/python/gem5/resources/md5_utils.py
+++ b/src/python/gem5/resources/md5_utils.py
@@ -1,4 +1,5 @@
 # Copyright (c) 2022-2023 The Regents of the University of California
+# Copyright (c) 2023 COSEDA Technologies GmbH
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -25,11 +26,13 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 from pathlib import Path
+from typing import Type
 import hashlib
-from _hashlib import HASH as Hash
 
 
-def _md5_update_from_file(filename: Path, hash: Hash) -> Hash:
+def _md5_update_from_file(
+    filename: Path, hash: Type[hashlib.md5]
+) -> Type[hashlib.md5]:
     assert filename.is_file()
 
     if filename.stat().st_size < 1024 * 1024 * 100:
@@ -52,7 +55,9 @@ def _md5_update_from_file(filename: Path, hash: Hash) -> Hash:
     return hash
 
 
-def _md5_update_from_dir(directory: Path, hash: Hash) -> Hash:
+def _md5_update_from_dir(
+    directory: Path, hash: Type[hashlib.md5]
+) -> Type[hashlib.md5]:
     assert directory.is_dir()
     for path in sorted(directory.iterdir(), key=lambda p: str(p).lower()):
         hash.update(path.name.encode())
diff --git a/src/python/gem5/resources/resource.py b/src/python/gem5/resources/resource.py
index bc9f4480ba..98c58cf832 100644
--- a/src/python/gem5/resources/resource.py
+++ b/src/python/gem5/resources/resource.py
@@ -25,6 +25,7 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 from abc import ABCMeta
+import json
 import os
 from pathlib import Path
 from m5.util import warn, fatal
@@ -35,7 +36,17 @@
 from .looppoint import LooppointCsvLoader, LooppointJsonLoader
 from ..isas import ISA, get_isa_from_str
 
-from typing import Optional, Dict, Union, Type, Tuple, List
+from typing import (
+    Optional,
+    Dict,
+    Union,
+    Type,
+    Tuple,
+    List,
+    Any,
+    Set,
+    Generator,
+)
 
 from .client import get_resource_json_obj
 
@@ -70,6 +81,7 @@ class AbstractResource:
 
     def __init__(
         self,
+        id: Optional[str] = None,
         resource_version: Optional[str] = None,
         local_path: Optional[str] = None,
         description: Optional[str] = None,
@@ -91,12 +103,24 @@ def __init__(
                 f"Local path specified for resource, '{local_path}', does not "
                 "exist."
             )
-
+        self._id = id
         self._local_path = local_path
         self._description = description
         self._source = source
         self._version = resource_version
 
+    def get_category_name(cls) -> str:
+        raise NotImplementedError
+
+    def __str__(self):
+        message = (
+            f"{self.get_category_name()}({self._id}, {self._version})\n"
+            "For more information, please visit "
+            f"https://resources.gem5.org/resources/{self._id}?"
+            f"version={self._version}"
+        )
+        return message
+
     def get_resource_version(self) -> str:
         """Returns the version of the resource."""
         return self._version
@@ -122,6 +146,7 @@ class FileResource(AbstractResource):
     def __init__(
         self,
         local_path: str,
+        id: Optional[str] = None,
         resource_version: Optional[str] = None,
         description: Optional[str] = None,
         source: Optional[str] = None,
@@ -134,11 +159,15 @@ def __init__(
 
         super().__init__(
             local_path=local_path,
+            id=id,
             description=description,
             source=source,
             resource_version=resource_version,
         )
 
+    def get_category_name(cls) -> str:
+        return "FileResource"
+
 
 class DirectoryResource(AbstractResource):
     """A resource consisting of a directory."""
@@ -146,6 +175,7 @@ class DirectoryResource(AbstractResource):
     def __init__(
         self,
         local_path: str,
+        id: Optional[str] = None,
         resource_version: Optional[str] = None,
         description: Optional[str] = None,
         source: Optional[str] = None,
@@ -159,11 +189,15 @@ def __init__(
 
         super().__init__(
             local_path=local_path,
+            id=id,
             description=description,
             source=source,
             resource_version=resource_version,
         )
 
+    def get_category_name(cls) -> str:
+        return "DirectoryResource"
+
 
 class DiskImageResource(FileResource):
     """A Disk Image resource."""
@@ -171,6 +205,7 @@ class DiskImageResource(FileResource):
     def __init__(
         self,
         local_path: str,
+        id: Optional[str] = None,
         resource_version: Optional[str] = None,
         description: Optional[str] = None,
         source: Optional[str] = None,
@@ -179,6 +214,7 @@ def __init__(
     ):
         super().__init__(
             local_path=local_path,
+            id=id,
             description=description,
             source=source,
             resource_version=resource_version,
@@ -189,6 +225,9 @@ def get_root_partition(self) -> Optional[str]:
         """Returns, if applicable, the Root Partition of the disk image."""
         return self._root_partition
 
+    def get_category_name(cls) -> str:
+        return "DiskImageResource"
+
 
 class BinaryResource(FileResource):
     """A binary resource."""
@@ -196,6 +235,7 @@ class BinaryResource(FileResource):
     def __init__(
         self,
         local_path: str,
+        id: Optional[str] = None,
         resource_version: Optional[str] = None,
         description: Optional[str] = None,
         source: Optional[str] = None,
@@ -204,6 +244,7 @@ def __init__(
     ):
         super().__init__(
             local_path=local_path,
+            id=id,
             description=description,
             source=source,
             resource_version=resource_version,
@@ -216,6 +257,9 @@ def __init__(
             elif isinstance(architecture, ISA):
                 self._architecture = architecture
 
+    def get_category_name(cls) -> str:
+        return "BinaryResource"
+
     def get_architecture(self) -> Optional[ISA]:
         """Returns the ISA this binary is compiled to."""
         return self._architecture
@@ -227,6 +271,7 @@ class BootloaderResource(BinaryResource):
     def __init__(
         self,
         local_path: str,
+        id: Optional[str] = None,
         resource_version: Optional[str] = None,
         description: Optional[str] = None,
         source: Optional[str] = None,
@@ -235,12 +280,16 @@ def __init__(
     ):
         super().__init__(
             local_path=local_path,
+            id=id,
             description=description,
             architecture=architecture,
             source=source,
             resource_version=resource_version,
         )
 
+    def get_category_name(cls) -> str:
+        return "BootloaderResource"
+
 
 class GitResource(DirectoryResource):
     """A git resource."""
@@ -248,6 +297,7 @@ class GitResource(DirectoryResource):
     def __init__(
         self,
         local_path: str,
+        id: Optional[str] = None,
         resource_version: Optional[str] = None,
         description: Optional[str] = None,
         source: Optional[str] = None,
@@ -255,11 +305,15 @@ def __init__(
     ):
         super().__init__(
             local_path=local_path,
+            id=id,
             description=description,
             source=source,
             resource_version=resource_version,
         )
 
+    def get_category_name(cls) -> str:
+        return "GitResource"
+
 
 class KernelResource(BinaryResource):
     """A kernel resource."""
@@ -267,6 +321,7 @@ class KernelResource(BinaryResource):
     def __init__(
         self,
         local_path: str,
+        id: Optional[str] = None,
         resource_version: Optional[str] = None,
         description: Optional[str] = None,
         source: Optional[str] = None,
@@ -275,12 +330,16 @@ def __init__(
     ):
         super().__init__(
             local_path=local_path,
+            id=id,
             description=description,
             source=source,
             architecture=architecture,
             resource_version=resource_version,
         )
 
+    def get_category_name(cls) -> str:
+        return "KernelResource"
+
 
 class CheckpointResource(DirectoryResource):
     """A checkpoint resource. The following directory structure is expected:
@@ -293,6 +352,7 @@ class CheckpointResource(DirectoryResource):
     def __init__(
         self,
         local_path: str,
+        id: Optional[str] = None,
         resource_version: Optional[str] = None,
         description: Optional[str] = None,
         source: Optional[str] = None,
@@ -300,11 +360,15 @@ def __init__(
     ):
         super().__init__(
             local_path=local_path,
+            id=id,
             description=description,
             source=source,
             resource_version=resource_version,
         )
 
+    def get_category_name(cls) -> str:
+        return "CheckpointResource"
+
 
 class SimpointResource(AbstractResource):
     """A simpoint resource. This resource stores all information required to
@@ -320,6 +384,7 @@ def __init__(
         simpoint_list: List[int] = None,
         weight_list: List[float] = None,
         warmup_interval: int = 0,
+        id: Optional[str] = None,
         workload_name: Optional[str] = None,
         description: Optional[str] = None,
         source: Optional[str] = None,
@@ -340,6 +405,7 @@ def __init__(
 
         super().__init__(
             local_path=local_path,
+            id=id,
             description=description,
             source=source,
             resource_version=resource_version,
@@ -421,6 +487,9 @@ def _set_warmup_list(self) -> List[int]:
             self._simpoint_start_insts[index] = start_inst - warmup_inst
         return warmup_list
 
+    def get_category_name(cls) -> str:
+        return "SimpointResource"
+
 
 class LooppointCsvResource(FileResource, LooppointCsvLoader):
     """This Looppoint resource used to create a Looppoint resource from a
@@ -429,6 +498,7 @@ class LooppointCsvResource(FileResource, LooppointCsvLoader):
     def __init__(
         self,
         local_path: str,
+        id: Optional[str] = None,
         resource_version: Optional[str] = None,
         description: Optional[str] = None,
         source: Optional[str] = None,
@@ -437,17 +507,22 @@ def __init__(
         FileResource.__init__(
             self,
             local_path=local_path,
+            id=id,
             description=description,
             source=source,
             resource_version=resource_version,
         )
         LooppointCsvLoader.__init__(self, pinpoints_file=Path(local_path))
 
+    def get_category_name(cls) -> str:
+        return "LooppointCsvResource"
+
 
 class LooppointJsonResource(FileResource, LooppointJsonLoader):
     def __init__(
         self,
         local_path: str,
+        id: Optional[str] = None,
         resource_version: Optional[str] = None,
         region_id: Optional[Union[str, int]] = None,
         description: Optional[str] = None,
@@ -457,6 +532,7 @@ def __init__(
         FileResource.__init__(
             self,
             local_path=local_path,
+            id=id,
             description=description,
             source=source,
             resource_version=resource_version,
@@ -465,6 +541,9 @@ def __init__(
             self, looppoint_file=local_path, region_id=region_id
         )
 
+    def get_category_name(cls) -> str:
+        return "LooppointJsonResource"
+
 
 class SimpointDirectoryResource(SimpointResource):
     """A Simpoint diretory resource. This Simpoint Resource assumes the
@@ -477,6 +556,7 @@ def __init__(
         weight_file: str,
         simpoint_interval: int,
         warmup_interval: int,
+        id: Optional[str] = None,
         resource_version: Optional[str] = None,
         workload_name: Optional[str] = None,
         description: Optional[str] = None,
@@ -510,6 +590,7 @@ def __init__(
             warmup_interval=warmup_interval,
             workload_name=workload_name,
             local_path=local_path,
+            id=id,
             description=description,
             source=source,
             resource_version=resource_version,
@@ -553,6 +634,183 @@ def _get_weights_and_simpoints_from_file(
             weight_list.append(weight)
         return simpoint_list, weight_list
 
+    def get_category_name(cls) -> str:
+        return "SimpointDirectoryResource"
+
+
+class SuiteResource(AbstractResource):
+    """
+    A suite resource. This resource is used to specify a suite of workloads to
+    run on a board. It contains a list of workloads to run, along with their
+    IDs and versions.
+
+    Each workload in a suite is used to create a `WorkloadResource` object.
+    These objects are stored in a list and can be iterated over.
+    """
+
+    def __init__(
+        self,
+        workloads: Dict["WorkloadResource", Set[str]] = {},
+        resource_version: Optional[str] = None,
+        description: Optional[str] = None,
+        source: Optional[str] = None,
+        id: Optional[str] = None,
+        **kwargs,
+    ) -> None:
+        """
+        :param workloads: A list of `WorkloadResource` objects
+        created from the `_workloads` parameter.
+        :param local_path: The path on the host system where this resource is
+        located.
+        :param description: Description describing this resource. Not a
+        required parameter. By default is None.
+        :param source: The source (as in "source code") for this resource
+        on gem5-resources. Not a required parameter. By default is None.
+        :param resource_version: Version of the resource itself.
+        """
+        self._workloads = workloads
+        self._description = description
+        self._source = source
+        self._resource_version = resource_version
+
+        super().__init__(
+            id=id,
+            description=description,
+            source=source,
+            resource_version=resource_version,
+        )
+
+    def __iter__(self) -> Generator["WorkloadResource", None, None]:
+        """
+        Returns a generator that iterates over the workloads in the suite.
+
+        :yields: A generator that iterates over the workloads in the suite.
+        """
+        yield from self._workloads.keys()
+
+    def __len__(self):
+        """
+        Returns the number of workloads in the suite.
+
+        :returns: The number of workloads in the suite.
+        """
+        return len(self._workloads)
+
+    def get_category_name(cls) -> str:
+        return "SuiteResource"
+
+    def with_input_group(self, input_group: str) -> "SuiteResource":
+        """
+        Returns a new SuiteResource object with only the workloads that use the
+        specified input group.
+
+        :param input_group: The input group to filter the workloads by.
+        :returns: A new SuiteResource object with only the workloads that use
+        the specified input group.
+        """
+
+        if input_group not in self.get_input_groups():
+            raise Exception(
+                f"Input group {input_group} not found in Suite.\n"
+                f"Available input groups are {self.get_input_groups()}"
+            )
+
+        filtered_workloads = {}
+
+        for workload, input_groups in self._workloads.items():
+            if input_group in input_groups:
+                filtered_workloads[workload] = input_groups
+
+        return SuiteResource(
+            local_path=self._local_path,
+            resource_version=self._resource_version,
+            description=self._description,
+            source=self._source,
+            workloads=filtered_workloads,
+        )
+
+    def get_input_groups(self) -> Set[str]:
+        """
+        Returns a set of all input groups used by the workloads in a suite.
+
+        :returns: A set of all input groups used by the workloads in a suite.
+        """
+        return {
+            input_group
+            for input_groups in self._workloads.values()
+            for input_group in input_groups
+        }
+
+
+class WorkloadResource(AbstractResource):
+    """A workload resource. This resource is used to specify a workload to run
+    on a board. It contains the function to call and the parameters to pass to
+    that function.
+    """
+
+    def __init__(
+        self,
+        function: str = None,
+        id: Optional[str] = None,
+        resource_version: Optional[str] = None,
+        description: Optional[str] = None,
+        source: Optional[str] = None,
+        local_path: Optional[str] = None,
+        parameters: Optional[Dict[str, Any]] = {},
+        **kwargs,
+    ):
+        """
+        :param function: The function to call on the board.
+        :param parameters: The parameters to pass to the function.
+        """
+
+        super().__init__(
+            local_path=local_path,
+            id=id,
+            description=description,
+            source=source,
+            resource_version=resource_version,
+        )
+
+        self._id = id
+        self._func = function
+        self._params = parameters
+
+    def get_id(self) -> str:
+        """Returns the ID of the workload."""
+        return self._id
+
+    def get_function_str(self) -> str:
+        """
+        Returns the name of the workload function to be run.
+
+        This function is called via the AbstractBoard's `set_workload`
+        function. The parameters from the `get_parameters` function are passed
+        to this function.
+        """
+        return self._func
+
+    def get_parameters(self) -> Dict[str, Any]:
+        """
+        Returns a dictionary mapping the workload parameters to their values.
+
+        These parameters are passed to the function specified by
+        `get_function_str` via the AbstractBoard's `set_workload` function.
+        """
+        return self._params
+
+    def set_parameter(self, parameter: str, value: Any) -> None:
+        """
+        Used to set or override a workload parameter
+
+        :param parameter: The parameter of the function to set.
+        :param value: The value to set to the parameter.
+        """
+        self._params[parameter] = value
+
+    def get_category_name(cls) -> str:
+        return "WorkloadResource"
+
 
 def obtain_resource(
     resource_id: str,
@@ -561,6 +819,8 @@ def obtain_resource(
     resource_version: Optional[str] = None,
     clients: Optional[List] = None,
     gem5_version=core.gem5Version,
+    to_path: Optional[str] = None,
+    quiet: bool = False,
 ) -> AbstractResource:
     """
     This function primarily serves as a factory for resources. It will return
@@ -573,6 +833,7 @@ def obtain_resource(
     resource is to be stored. If this parameter is not set, it will set to
     the environment variable `GEM5_RESOURCE_DIR`. If the environment is not
     set it will default to `~/.cache/gem5` if available, otherwise the CWD.
+    **Note**: This argument is ignored if the `to_path` parameter is specified.
     :param download_md5_mismatch: If the resource is present, but does not
     have the correct md5 value, the resoruce will be deleted and
     re-downloaded if this value is True. Otherwise an exception will be
@@ -584,6 +845,11 @@ def obtain_resource(
     :param gem5_version: The gem5 version to use to filter incompatible
     resource versions. By default set to the current gem5 version. If None,
     this filtering is not performed.
+    :param to_path: The path to which the resource is to be downloaded. If
+    None, the resource will be downloaded to the resource directory with
+    the file/directory name equal to the ID of the resource. **Note**: Usage
+    of this parameter will override the `resource_directory` parameter.
+    :param quiet: If True, suppress output. False by default.
     """
 
     # Obtain the resource object entry for this resource
@@ -594,47 +860,63 @@ def obtain_resource(
         gem5_version=gem5_version,
     )
 
-    to_path = None
     # If the "url" field is specified, the resoruce must be downloaded.
     if "url" in resource_json and resource_json["url"]:
+        # If the `to_path` parameter is set, we use that as the path to which
+        # the resource is to be downloaded. Otherwise, default to the
+        # `resource_directory` parameter plus the resource ID.
+        if not to_path:
+            # If the `resource_directory` parameter is not set via this
+            # function, we heck the "GEM5_RESOURCE_DIR" environment variable.
+            # If this too is not set we call `_get_default_resource_dir()` to
+            # determine where the resource directory is, or should be, located.
+            if resource_directory == None:
+                resource_directory = os.getenv(
+                    "GEM5_RESOURCE_DIR", _get_default_resource_dir()
+                )
 
-        # If the `resource_directory` parameter is not set via this function, we
-        # check the "GEM5_RESOURCE_DIR" environment variable. If this too is not
-        # set we call `_get_default_resource_dir()` to determine where the
-        # resource directory is, or should be, located.
-        if resource_directory == None:
-            resource_directory = os.getenv(
-                "GEM5_RESOURCE_DIR", _get_default_resource_dir()
-            )
-
-        # Small checks here to ensure the resource directory is valid.
-        if os.path.exists(resource_directory):
-            if not os.path.isdir(resource_directory):
-                raise Exception(
-                    "gem5 resource directory, "
-                    "'{}', exists but is not a directory".format(
-                        resource_directory
+            # Small checks here to ensure the resource directory is valid.
+            if os.path.exists(resource_directory):
+                if not os.path.isdir(resource_directory):
+                    raise Exception(
+                        "gem5 resource directory, "
+                        "'{}', exists but is not a directory".format(
+                            resource_directory
+                        )
                     )
-                )
-        else:
-            # `exist_ok=True` here as, occasionally, if multiple instance of
-            # gem5 are started simultaneously, a race condition can exist to
-            # create the resource directory. Without `exit_ok=True`, threads
-            # which lose this race will thrown a `FileExistsError` exception.
-            # `exit_ok=True` ensures no exception is thrown.
-            os.makedirs(resource_directory, exist_ok=True)
 
-        # This is the path to which the resource is to be stored.
-        to_path = os.path.join(resource_directory, resource_id)
+            # This is the path to which the resource is to be stored.
+            to_path = os.path.join(resource_directory, resource_id)
+
+        assert to_path
+
+        # Here we ensure the directory in which the resource is to be stored
+        # is created.
+        #
+        # `exist_ok=True` here as, occasionally, if multiple instance of gem5
+        # are started simultaneously, a race condition can exist to create the
+        # resource directory. Without `exit_ok=True`, threads which lose this
+        # race will thrown a `FileExistsError` exception. `exit_ok=True`
+        # ensures no exception is thrown.
+        try:
+            Path(to_path).parent.mkdir(parents=True, exist_ok=True)
+        except Exception as e:
+            fatal(
+                f"Recursive creation of the directory "
+                f"'{Path(to_path).parent.absolute}' failed. \n"
+                f"Perhaps the path specified, '{to_path}', is incorrect?\n"
+                f"Failed with Exception:\n{e}"
+            )
 
         # Download the resource if it does not already exist.
         get_resource(
             resource_name=resource_id,
-            to_path=os.path.join(resource_directory, resource_id),
+            to_path=to_path,
             download_md5_mismatch=download_md5_mismatch,
             resource_version=resource_version,
             clients=clients,
             gem5_version=gem5_version,
+            quiet=quiet,
         )
 
     # Obtain the type from the JSON. From this we will determine what subclass
@@ -658,6 +940,54 @@ def obtain_resource(
     assert resources_category in _get_resource_json_type_map
     resource_class = _get_resource_json_type_map[resources_category]
 
+    if resources_category == "suite":
+        workloads = resource_json["workloads"]
+        workloads_obj = {}
+        for workload in workloads:
+            workloads_obj[
+                obtain_resource(
+                    workload["id"],
+                    resource_version=workload["resource_version"],
+                    resource_directory=resource_directory,
+                    clients=clients,
+                    gem5_version=gem5_version,
+                )
+            ] = set(workload["input_group"])
+        resource_json["workloads"] = workloads_obj
+
+    if resources_category == "workload":
+        # This parses the "resources" and "additional_params" fields of the
+        # workload resource into a dictionary of AbstractResource objects and
+        # strings respectively.
+        params = {}
+        if "resources" in resource_json:
+            for key in resource_json["resources"].keys():
+                assert isinstance(key, str)
+                value = resource_json["resources"][key]
+
+                if isinstance(value, str):
+                    warn(
+                        "Deprecation warning: resources field in workloads has changed"
+                        "from { category: id } to"
+                        "{ category: { id: id, resource_version: resource_version } }"
+                        "The current develop branch of gem5 supports both formats"
+                        "but this will be removed in the 23.1 release."
+                    )
+                    params[key] = obtain_resource(
+                        value,
+                    )
+                elif isinstance(value, dict):
+                    params[key] = obtain_resource(
+                        value["id"],
+                        resource_version=value["resource_version"],
+                    )
+        if "additional_params" in resource_json:
+            for key in resource_json["additional_params"].keys():
+                assert isinstance(key, str)
+                value = resource_json["additional_params"][key]
+                assert isinstance(value, str)
+                params[key] = value
+        resource_json["parameters"] = params
     # Once we know what AbstractResource subclass we are using, we create it.
     # The fields in the JSON object are assumed to map like-for-like to the
     # subclass contructor, so we can pass the resource_json map directly.
@@ -812,4 +1142,6 @@ def Resource(
     "resource": Resource,
     "looppoint-pinpoint-csv": LooppointCsvResource,
     "looppoint-json": LooppointJsonResource,
+    "suite": SuiteResource,
+    "workload": WorkloadResource,
 }
diff --git a/src/python/gem5/resources/workload.py b/src/python/gem5/resources/workload.py
index 0798b891ab..bdb596921f 100644
--- a/src/python/gem5/resources/workload.py
+++ b/src/python/gem5/resources/workload.py
@@ -24,217 +24,67 @@
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-from .resource import obtain_resource
+from .resource import obtain_resource, WorkloadResource
 from .client import get_resource_json_obj
 
 from _m5 import core
+from m5.util import warn
 
 from typing import Dict, Any, List, Optional
 
 
-class AbstractWorkload:
+def CustomWorkload(function: str, parameters: Dict[str, Any]):
     """
-    Workloads contain information needed to build a workload.
+    A custom workload gem5 resource. It can be used to specify a custom,
+    local workload.
 
-    A workload specifies a function and its parameters to run on a board to
-    set a workload. Workload's are passed to board via the `AbstractBoard`'s
-    `set_workload` function.
+    **Warning**: This `CustomWorkload` class is deprecated. It will be removed in a
+    future release of gem5. Please use the `gem5.resources.resource.WorkloadResource`
+    class instead.
 
-    The `AbstractBoard` has a `set_workload` function which accepts an
-    AbstractWorkload. The `set_workload` function uses the `get_function_str`
-    to determine which function should be called on the board and the
-    `get_parameters` function specifies the parameters to be passed.
-
-    Example
-    -------
-
-    ```py
-    workload = CustomWorkload(
-        function = "set_se_binary_workload",
-        parameters = {
-            "binary" : Resource("x86-print-this"),
-            "arguments" : ["hello", 6]
-        },
-    )
-
-    board.set_workload(workload)
-    ```
-
-    The above is the equivalent of:
-
-    ```py
-    board.set_se_binary_workload(
-        binary = Resource("x86-print-this"),
-        arguments = ["hello", 6],
-    )
-    ```
-
-    Notes
-    -----
-    This class should not be used directly. Please use `Workload` or
-    `CustomWorkload`.
-    """
-
-    def __init__(self, function: str, parameters: Dict[str, Any]) -> None:
-        self._func = function
-        self._params = parameters
-
-    def get_function_str(self) -> str:
-        """
-        Returns the name of the workload function to be run.
-
-        This function is called via the AbstractBoard's `set_workload`
-        function. The parameters from the `get_parameters` function are passed
-        to this function.
-        """
-        return self._func
-
-    def get_parameters(self) -> Dict[str, Any]:
-        """
-        Returns a dictionary mapping the workload parameters to their values.
-
-        These parameters are passed to the function specified by
-        `get_function_str` via the AbstractBoard's `set_workload` function.
-        """
-        return self._params
-
-    def set_parameter(self, parameter: str, value: Any) -> None:
-        """
-        Used to set or override a workload parameter
-
-        :param parameter: The parameter of the function to set.
-        :param value: The value to set to the parameter.
-        """
-        self._params[parameter] = value
-
-
-class CustomWorkload(AbstractWorkload):
+    The class has been stealthily converted to a function which wraps the
+    `WorkloadResource` class.
     """
-    A workload specified locally (i.e., not via gem5-resources as with the
-    `Workload` class). Here the user specifies the function and the parameters
-    to be passed.
-
-    Usage
-    -----
-
-    ```py
-    workload = CustomWorkload(
-        function = "set_se_binary_workload",
-        parameters = {
-            "binary" : Resource("x86-print-this"),
-            "arguments" : ["hello", 6]
-        },
+    warn(
+        "The `CustomWorkload` class is deprecated. Please use "
+        "the `gem5.resources.resource.WorkloadResource` class instead."
     )
-
-    board.set_workload(workload)
-    ```
-    """
-
-    def __init__(self, function: str, parameters: Dict[str, Any]) -> None:
-        super().__init__(function=function, parameters=parameters)
+    return WorkloadResource(function=function, parameters=parameters)
 
 
-class Workload(AbstractWorkload):
+def Workload(
+    workload_name: str,
+    resource_directory: Optional[str] = None,
+    resource_version: Optional[str] = None,
+    clients: Optional[List] = None,
+    gem5_version: Optional[str] = core.gem5Version,
+):
     """
-    The `Workload` class loads a workload's information from gem5-resources
-    based on a name/id passed via the constructor.
-
-    Usage
-    -----
-
-    ```py
-    # Determine what workload we want to run.
-    workload = Workload("example-workload-id")
-
-    # Optionally we can override a parameter in the workload. In this example
-    # we are going to run this workload with a difference kernel.
-    workload.set_parameter("kernel", Resource("arm64-linux-kernel-4.14.134"))
-
-    # We then set this workload to the board.
-    board.set_workload(workload)
-    ```
+    **Warning**: The `Workload` class is deprecated. It will be removed in a
+        future release of gem5. Please use the `gem5.resources.resource.WorkloadResource`
+        class instead.
 
+    The class has been stealthily converted to a function which wraps the
+    `WorkloadResource` class.
     """
-
-    def __init__(
-        self,
-        workload_name: str,
-        resource_directory: Optional[str] = None,
-        resource_version: Optional[str] = None,
-        clients: Optional[List] = None,
-        gem5_version: Optional[str] = core.gem5Version,
-    ) -> None:
-        """
-        This constructor will load the workload details from the workload with
-        the given name/id.
-
-        This function assumes the dictionary returned by the downloader's
-        `get_workload_json_obj` is a dictionary. An example of the schema is
-        shown below:
-
-        ```json
-        {
-            "category" : "workload",
-            "id" : "x86-ubuntu-18.04-echo-hello",
-            "description" : "Description of workload here",
-            "function" : "set_kernel_disk_workload",
-            "resources" : {
-                "kernel" : "x86-linux-kernel-5.4.49",
-                "disk-image" : "x86-ubuntu-18.04-img"
-            },
-            "additional_params" : {
-                "readfile_contents" : "m5_exit; echo 'hello'; m5_exit"
-            }
-        }
-        ```
-
-        This resource will result in the equivalent of the following action
-        being taken:
-
-        ```python
-        board.set_kernel_disk_workload(
-            kernel = Resource("x86-linux-kernel-5.4.49"),
-            disk-image = Resource("x86-ubuntu-18.04-img"),
-            readfile_contents = "m5_exit; echo 'hello'; m5_exit",
-        )
-        ```
-
-        :param workload_name: The name of the workload in the resources.json
-        file to be loaded.
-        :param resource_directory: An optional parameter that specifies where
-        any resources should be download and accessed from. If None, a default
-        location will be used. None by default.
-        :param gem5_version: The gem5 version for the Workload to be loaded.
-        By default, the current gem5 version is used. This will filter
-        resources which are incompatible with the current gem5 version. If
-        None, no filtering will be done.
-        """
-
-        workload_json = get_resource_json_obj(
-            workload_name,
-            resource_version=resource_version,
-            clients=clients,
-            gem5_version=gem5_version,
-        )
-
-        func = workload_json["function"]
-        assert isinstance(func, str)
-
-        params = {}
-        if "resources" in workload_json:
-            for key in workload_json["resources"].keys():
-                assert isinstance(key, str)
-                value = workload_json["resources"][key]
-                assert isinstance(value, str)
-                params[key] = obtain_resource(
-                    value,
-                    resource_directory=resource_directory,
-                    gem5_version=gem5_version,
-                )
-
-        if "additional_params" in workload_json:
-            for key in workload_json["additional_params"]:
-                assert isinstance(key, str)
-                params[key] = workload_json["additional_params"][key]
-
-        super().__init__(function=func, parameters=params)
+    warn(
+        "`Workload` has been deprecated. Please use the `obtain_resource` "
+        "function instead:\n\n"
+        "```\n"
+        "from gem5.resources.resource import obtain_resource\n"
+        "workload = obtain_resource(\n"
+        f'    resource_id="{workload_name}",\n'
+        f'    resource_directory="{resource_directory}",\n'
+        f'    gem5_version="{gem5_version}",\n'
+        f"    clients={clients},\n"
+        f"    resource_version={resource_version},\n"
+        ")\n"
+        "```"
+    )
+    return obtain_resource(
+        workload_name,
+        resource_directory=resource_directory,
+        gem5_version=gem5_version,
+        clients=clients,
+        resource_version=resource_version,
+    )
diff --git a/src/python/gem5/simulate/exit_event_generators.py b/src/python/gem5/simulate/exit_event_generators.py
index 37998d3a9b..6f0e0a1eac 100644
--- a/src/python/gem5/simulate/exit_event_generators.py
+++ b/src/python/gem5/simulate/exit_event_generators.py
@@ -48,8 +48,7 @@ def wrapped_generator(*args, **kw_args):
             f"No behavior was set by the user for {type}."
             f" Default behavior is {effect}."
         )
-        for value in gen(*args, **kw_args):
-            yield value
+        yield from gen(*args, **kw_args)
 
     return wrapped_generator
 
diff --git a/src/python/gem5/simulate/simulator.py b/src/python/gem5/simulate/simulator.py
index 0551745b36..5470202830 100644
--- a/src/python/gem5/simulate/simulator.py
+++ b/src/python/gem5/simulate/simulator.py
@@ -34,7 +34,7 @@
 import os
 import sys
 from pathlib import Path
-from typing import Optional, List, Tuple, Dict, Generator, Union
+from typing import Optional, List, Tuple, Dict, Generator, Union, Callable
 
 from .exit_event_generators import (
     warn_default_decorator,
@@ -83,7 +83,14 @@ def __init__(
         board: AbstractBoard,
         full_system: Optional[bool] = None,
         on_exit_event: Optional[
-            Dict[ExitEvent, Generator[Optional[bool], None, None]]
+            Dict[
+                ExitEvent,
+                Union[
+                    Generator[Optional[bool], None, None],
+                    List[Callable],
+                    Callable,
+                ],
+            ]
         ] = None,
         expected_execution_order: Optional[List[ExitEvent]] = None,
         checkpoint_path: Optional[Path] = None,
@@ -94,14 +101,21 @@ def __init__(
         This is optional and used to override default behavior. If not set,
         whether or not to run in FS mode will be determined via the board's
         `is_fullsystem()` function.
-        :param on_exit_event: An optional map to specify the generator to
-        execute on each exit event. The generator may yield a boolean which,
-        if True, will have the Simulator exit the run loop.
-        :param expected_execution_order: May be specified to check the exit
-        events come in a specified order. If the order specified is not
-        encountered (e.g., 'Workbegin', 'Workend', then 'Exit'), an Exception
-        is thrown. If this parameter is not specified, any ordering of exit
-        events is valid.
+        :param on_exit_event: An optional map to specify what to execute on
+        each exit event. There are three possibilities here: a generator, a
+        list of functions, or a single function.:
+        1. Generator: The generator may yield a boolean each time the
+        associated exit event is encountered. If True the simulator will exit
+        the simulation loop.
+        2. List of functions: Each function must be callable with no mandatory
+        arguments and return a boolean specifying if the Simulation should exit
+        the simulation loop. Upon each exit event the list will pop the start
+        of the list and execute it. If the list is empty the default behavior
+        for that exit event will be executed.
+        3. Single function: The function must be callable with no mandatory
+        arguments and return a boolean specifying if the Simulation should exit
+        or not. This function is executed each time the associated exit event
+        is encountered.
         :param checkpoint_path: An optional parameter specifying the directory
         of the checkpoint to instantiate from. When the path is None, no
         checkpoint will be loaded. By default, the path is None. **This
@@ -111,6 +125,9 @@ def __init__(
         `on_exit_event` usage notes
         ---------------------------
 
+        With Generators
+        ===============
+
         The `on_exit_event` parameter specifies a Python generator for each
         exit event. `next(<generator>)` is run each time an exit event. The
         generator may yield a boolean. If this value of this boolean is True
@@ -142,6 +159,77 @@ def unique_exit_event():
         encountered, will dump gem5 statistics the second time an exit event is
         encountered, and will terminate the Simulator run loop the third time.
 
+        With a list of functions
+        ========================
+
+        Alternatively, instead of passing a generator per exit event, a list of
+        functions may be passed. Each function must take no mandatory arguments
+        and return True if the simulator is to exit after being called.
+
+        An example:
+
+        ```
+        def stop_simulation() -> bool:
+            return True
+
+        def switch_cpus() -> bool:
+            processor.switch()
+            return False
+
+        def print_hello() -> None:
+            # Here we don't explicitly return a boolean, but the simulator
+            # treats a None return as False. Ergo the Simulation loop is not
+            # terminated.
+            print("Hello")
+
+
+        simulator = Simulator(
+            board=board,
+            on_exit_event = {
+                ExitEvent.Exit : [
+                    print_hello,
+                    switch_cpus,
+                    print_hello,
+                    stop_simulation
+                ],
+            },
+        )
+        ```
+
+        Upon each `EXIT` type exit event the list will function as a queue,
+        with the top function of the list popped and executed. Therefore, in
+        this example, the first `EXIT` type exit event will cause `print_hello`
+        to be executed, and the second `EXIT` type exit event will cause the
+        `switch_cpus` function to run. The third will execute `print_hello`
+        again before finally, on the forth exit event will call
+        `stop_simulation` which will stop the simulation as it returns False.
+
+        With a function
+        ===============
+        A single function can be passed. In this case every exit event of that
+        type will execute that function every time. The function should not
+        accept any mandatory parameters and return a boolean specifying if the
+        simulation loop should end after it is executed.
+        An example:
+        ```
+        def print_hello() -> bool:
+            print("Hello")
+            return False
+        simulator = Simulator(
+            board=board,
+            on_exit_event = {
+                ExitEvent.Exit : print_hello
+            },
+        )
+        ```
+        The above will print "Hello" on every `Exit` type Exit Event. As the
+        function returns False, the simulation loop will not end on these
+        events.
+
+
+        Exit Event defaults
+        ===================
+
         Each exit event has a default behavior if none is specified by the
         user. These are as follows:
 
@@ -202,7 +290,28 @@ def unique_exit_event():
         }
 
         if on_exit_event:
-            self._on_exit_event = on_exit_event
+            self._on_exit_event = {}
+            for key, value in on_exit_event.items():
+                if isinstance(value, Generator):
+                    self._on_exit_event[key] = value
+                elif isinstance(value, List):
+                    # In instances where we have a list of functions, we
+                    # convert this to a generator.
+                    self._on_exit_event[key] = (func() for func in value)
+                elif isinstance(value, Callable):
+                    # In instances where the user passes a lone function, the
+                    # function is called on every exit event of that type. Here
+                    # we convert the function into an infinite generator.
+                    def function_generator(func: Callable):
+                        while True:
+                            yield func()
+
+                    self._on_exit_event[key] = function_generator(func=value)
+                else:
+                    raise Exception(
+                        f"`on_exit_event` for '{key.value}' event is "
+                        "not a Generator or List[Callable]."
+                    )
         else:
             self._on_exit_event = self._default_on_exit_dict
 
@@ -355,7 +464,7 @@ def get_roi_ticks(self) -> List[int]:
         """
         start = 0
         to_return = []
-        for (exit_event, tick) in self._tick_stopwatch:
+        for exit_event, tick in self._tick_stopwatch:
             if exit_event == ExitEvent.WORKBEGIN:
                 start = tick
             elif exit_event == ExitEvent.WORKEND:
@@ -371,7 +480,6 @@ def _instantiate(self) -> None:
         """
 
         if not self._instantiated:
-
             # Before anything else we run the AbstractBoard's
             # `_pre_instantiate` function.
             self._board._pre_instantiate()
@@ -448,7 +556,6 @@ def run(self, max_ticks: int = m5.MaxTick) -> None:
 
         # This while loop will continue until an a generator yields True.
         while True:
-
             self._last_exit_event = m5.simulate(max_ticks)
 
             # Translate the exit event cause to the exit event enum.
@@ -479,8 +586,8 @@ def run(self, max_ticks: int = m5.MaxTick) -> None:
                 # If the user's generator has ended, throw a warning and use
                 # the default generator for this exit event.
                 warn(
-                    "User-specified generator for the exit event "
-                    f"'{exit_enum.value}' has ended. Using the default "
+                    "User-specified generator/function list for the exit "
+                    f"event'{exit_enum.value}' has ended. Using the default "
                     "generator."
                 )
                 exit_on_completion = next(
@@ -496,7 +603,7 @@ def run(self, max_ticks: int = m5.MaxTick) -> None:
             self._exit_event_count += 1
 
             # If the generator returned True we will return from the Simulator
-            # run loop.
+            # run loop. In the case of a function: if it returned True.
             if exit_on_completion:
                 return
 
diff --git a/src/python/gem5/utils/filelock.py b/src/python/gem5/utils/filelock.py
index 6fb4e3e1d1..309a9f868b 100644
--- a/src/python/gem5/utils/filelock.py
+++ b/src/python/gem5/utils/filelock.py
@@ -32,7 +32,7 @@ class FileLockException(Exception):
     pass
 
 
-class FileLock(object):
+class FileLock:
     """A file locking mechanism that has context-manager support so
     you can use it in a with statement. This should be relatively cross
     compatible as it doesn't rely on msvcrt or fcntl for the locking.
diff --git a/src/python/gem5/utils/multiprocessing/README.md b/src/python/gem5/utils/multiprocessing/README.md
index da2116c44c..c6b0406e54 100644
--- a/src/python/gem5/utils/multiprocessing/README.md
+++ b/src/python/gem5/utils/multiprocessing/README.md
@@ -48,8 +48,8 @@ def run_sim(name):
     from gem5.simulate.simulator import Simulator
     board = X86DemoBoard()
     board.set_kernel_disk_workload(
-        kernel=Resource("x86-linux-kernel-5.4.49"),
-        disk_image=Resource("x86-ubuntu-18.04-img"),
+        kernel=obtain_resource("x86-linux-kernel-5.4.49"),
+        disk_image=obtain_resource("x86-ubuntu-18.04-img"),
     )
     simulator = Simulator(board=board)
     simulator.run(max_ticks=10000000)
diff --git a/src/python/gem5/utils/multiprocessing/context.py b/src/python/gem5/utils/multiprocessing/context.py
index 87917d1bfb..0fc48e2789 100644
--- a/src/python/gem5/utils/multiprocessing/context.py
+++ b/src/python/gem5/utils/multiprocessing/context.py
@@ -33,6 +33,7 @@
 from multiprocessing import context, process
 from multiprocessing.context import DefaultContext
 
+
 # The `_start_method` must be `None` for the `Spawn_gem5Process` class.
 # Otherwise, in `_bootstrap` in the `BaseProcess` it will try to force the
 # `_start_method` to be gem5-specific, which the `multiprocessing` module
diff --git a/src/python/gem5/utils/progress_bar.py b/src/python/gem5/utils/progress_bar.py
index 0ac13200b9..43d3a3dec8 100644
--- a/src/python/gem5/utils/progress_bar.py
+++ b/src/python/gem5/utils/progress_bar.py
@@ -55,6 +55,7 @@ def __exit__(self, exc_type, exc_val, exc_tb):
     tqdm = FakeTQDM()
     _have_tqdm = False
 
+
 # Hook for the progress bar
 def progress_hook(t):
     if not _have_tqdm:
diff --git a/src/python/gem5py.cc b/src/python/gem5py.cc
index f2d87596f0..37ddee2e7c 100644
--- a/src/python/gem5py.cc
+++ b/src/python/gem5py.cc
@@ -51,6 +51,21 @@ namespace py = pybind11;
 int
 main(int argc, const char **argv)
 {
+#if PY_VERSION_HEX >= 0x03080000
+    // Preinitialize Python for Python 3.8+
+    // This ensures that the locale configuration takes effect
+    PyStatus status;
+    PyPreConfig preconfig;
+    PyPreConfig_InitPythonConfig(&preconfig);
+
+    preconfig.utf8_mode = 1;
+
+    status = Py_PreInitialize(&preconfig);
+    if (PyStatus_Exception(status)) {
+        Py_ExitStatusException(status);
+    }
+#endif
+
     py::scoped_interpreter guard;
 
     // Embedded python doesn't set up sys.argv, so we'll do that ourselves.
diff --git a/src/python/gem5py_m5.cc b/src/python/gem5py_m5.cc
new file mode 100644
index 0000000000..78337c1ffe
--- /dev/null
+++ b/src/python/gem5py_m5.cc
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2019 ARM Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <pybind11/embed.h>
+
+#include <cstdlib>
+#include <iostream>
+
+namespace py = pybind11;
+
+/*
+ * This wrapper program runs python scripts using the python interpretter which
+ * will be built into gem5. Its first argument is the script to run, and then
+ * all subsequent arguments are passed to the python script as its argv.
+ */
+
+int
+main(int argc, const char **argv)
+{
+    py::scoped_interpreter guard;
+
+    // Embedded python doesn't set up sys.argv, so we'll do that ourselves.
+    py::list py_argv;
+    auto sys = py::module::import("sys");
+    if (py::hasattr(sys, "argv")) {
+        // sys.argv already exists, so grab that.
+        py_argv = sys.attr("argv");
+    } else {
+        // sys.argv doesn't exist, so create it.
+        sys.add_object("argv", py_argv);
+    }
+
+    auto importer = py::module_::import("importer");
+    importer.attr("install")();
+
+    // Clear out argv just in case it has something in it.
+    py_argv.attr("clear")();
+
+    if (argc < 2) {
+        std::cerr << "Usage: gem5py SCRIPT [arg] ..." << std::endl;
+        std::exit(1);
+    }
+
+    // Fill it with our argvs.
+    for (int i = 1; i < argc; i++)
+        py_argv.append(argv[i]);
+
+    // Actually call the script.
+    py::eval_file(argv[1]);
+
+    return 0;
+}
diff --git a/src/python/importer.py b/src/python/importer.py
index d3bdd593ef..6b64f1cb5c 100644
--- a/src/python/importer.py
+++ b/src/python/importer.py
@@ -38,11 +38,14 @@ def __init__(self, code):
     def exec_module(self, module):
         exec(self.code, module.__dict__)
 
+    def get_code(self, _):
+        return self.code
+
 
 # Simple importer that allows python to import data from a dict of
 # code objects.  The keys are the module path, and the items are the
 # filename and bytecode of the file.
-class CodeImporter(object):
+class CodeImporter:
     def __init__(self):
         self.modules = {}
         override_var = os.environ.get("M5_OVERRIDE_PY_SOURCE", "false")
@@ -61,7 +64,7 @@ def find_spec(self, fullname, path, target=None):
         abspath, code = self.modules[fullname]
 
         if self.override and os.path.exists(abspath):
-            src = open(abspath, "r").read()
+            src = open(abspath).read()
             code = compile(src, abspath, "exec")
 
         is_package = os.path.basename(abspath) == "__init__.py"
diff --git a/src/python/m5/SimObject.py b/src/python/m5/SimObject.py
index 08105d8833..31b0f5d180 100644
--- a/src/python/m5/SimObject.py
+++ b/src/python/m5/SimObject.py
@@ -47,6 +47,8 @@
 from m5.util import *
 from m5.util.pybind import *
 
+from m5.citations import gem5_citations
+
 # Use the pyfdt and not the helper class, because the fdthelper
 # relies on the SimObject definition
 from m5.ext.pyfdt import pyfdt
@@ -215,6 +217,8 @@ def __init__(cls, name, bases, dict):
         cls._instantiated = False  # really instantiated, cloned, or subclassed
         cls._init_called = False  # Used to check if __init__ overridden
 
+        cls._citations = gem5_citations  # Default to gem5's citations
+
         # We don't support multiple inheritance of sim objects.  If you want
         # to, you must fix multidict to deal with it properly. Non sim-objects
         # are ok, though
@@ -227,7 +231,10 @@ def __init__(cls, name, bases, dict):
                     "SimObjects do not support multiple inheritance"
                 )
 
-        base = bases[0]
+        # If the base class is not set, we assume type `object`. This ensures
+        # `class Foo(object): pass` is considered equivalent to
+        # `class Foo: pass`.
+        base = bases[0] if len(bases) > 0 else object
 
         # Set up general inheritance via multidicts.  A subclass will
         # inherit all its settings from the base class.  The only time
@@ -517,7 +524,7 @@ def py_call(self, *args, **kwargs):
 # This class holds information about each simobject parameter
 # that should be displayed on the command line for use in the
 # configuration system.
-class ParamInfo(object):
+class ParamInfo:
     def __init__(self, type, desc, type_str, example, default_val, access_str):
         self.type = type
         self.desc = desc
@@ -542,7 +549,7 @@ def __init__(self, message):
         super().__init__(message)
 
 
-class SimObjectCliWrapper(object):
+class SimObjectCliWrapper:
     """
     Wrapper class to restrict operations that may be done
     from the command line on SimObjects.
@@ -605,7 +612,7 @@ def __iter__(self):
 # The SimObject class is the root of the special hierarchy.  Most of
 # the code in this class deals with the configuration hierarchy itself
 # (parent/child node relationships).
-class SimObject(object, metaclass=MetaSimObject):
+class SimObject(metaclass=MetaSimObject):
     # Specify metaclass.  Any class inheriting from SimObject will
     # get this metaclass.
     type = "SimObject"
@@ -870,7 +877,7 @@ def __setattr__(self, attr, value):
                 hr_value = value
                 value = param.convert(value)
             except Exception as e:
-                msg = "%s\nError setting param %s.%s to %s\n" % (
+                msg = "{}\nError setting param {}.{} to {}\n".format(
                     e,
                     self.__class__.__name__,
                     attr,
@@ -1248,9 +1255,8 @@ def descendants(self):
         # The order of the dict is implementation dependent, so sort
         # it based on the key (name) to ensure the order is the same
         # on all hosts
-        for (name, child) in sorted(self._children.items()):
-            for obj in child.descendants():
-                yield obj
+        for name, child in sorted(self._children.items()):
+            yield from child.descendants()
 
     # Call C++ to create C++ object corresponding to this object
     def createCCObject(self):
@@ -1271,7 +1277,7 @@ def getPort(self, if_name, idx):
     def connectPorts(self):
         # Sort the ports based on their attribute name to ensure the
         # order is the same on all hosts
-        for (attr, portRef) in sorted(self._port_refs.items()):
+        for attr, portRef in sorted(self._port_refs.items()):
             portRef.ccConnect()
 
     # Default function for generating the device structure.
@@ -1283,8 +1289,7 @@ def generateDeviceTree(self, state):
     def recurseDeviceTree(self, state):
         for child in self._children.values():
             for item in child:  # For looping over SimObjectVectors
-                for dt in item.generateDeviceTree(state):
-                    yield dt
+                yield from item.generateDeviceTree(state)
 
     # On a separate method otherwise certain buggy Python versions
     # would fail with: SyntaxError: unqualified exec is not allowed
diff --git a/src/python/m5/citations.py b/src/python/m5/citations.py
new file mode 100644
index 0000000000..3df9de22a1
--- /dev/null
+++ b/src/python/m5/citations.py
@@ -0,0 +1,191 @@
+# Copyright (c) 2023 The Regents of The University of California
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from pathlib import Path
+from typing import Type
+
+import m5.options
+
+
+def add_citation(sim_obj_cls: Type["SimObject"], citation: str):
+    """Add a citation to a SimObject class.
+
+    :param sim_obj_cls: The SimObject class to add the citation to.
+    :param citation: The citation to add. Should be bibtex compatible
+                     entry or entries
+
+    This function will encode the citation into the SimObject class and it
+    will be included in the citations in the output directory when the
+    SimObject is used. If you have multiple citations, then you should include
+    one multiline string with all of the citations.
+    """
+
+    sim_obj_cls._citations += citation
+
+
+def gather_citations(root: "SimObject"):
+    """Based on the root SimObject, walk the object hierarchy and gather all
+    of the citations together and then print them to citations.bib in the
+    output directory.
+    """
+
+    citations = {}
+    for obj in root.descendants():
+        loc = 0
+        while loc >= 0:
+            key, cite, loc = _get_next_key_entry(obj._citations, loc)
+            # If a key repeats, then just overwrite it
+            citations[key] = cite
+
+    with open(Path(m5.options.outdir) / "citations.bib", "w") as output:
+        output.writelines(citations.values())
+
+
+def _get_next_key_entry(citations: str, loc: int = 0):
+    """Return the key, the citation, and the end of the citation location"""
+
+    start = citations.find("@", loc)
+    key_start = citations.find("{", start)
+    key_end = citations.find(",", key_start)
+    end = citations.find("@", start + 1)
+    if end == -1:
+        end = len(citations)
+        next = -1
+    else:
+        next = end
+
+    return citations[key_start:key_end], citations[start:end], next
+
+
+gem5_citations = """@article{Binkert:2011:gem5,
+  author       = {Nathan L. Binkert and
+                  Bradford M. Beckmann and
+                  Gabriel Black and
+                  Steven K. Reinhardt and
+                  Ali G. Saidi and
+                  Arkaprava Basu and
+                  Joel Hestness and
+                  Derek Hower and
+                  Tushar Krishna and
+                  Somayeh Sardashti and
+                  Rathijit Sen and
+                  Korey Sewell and
+                  Muhammad Shoaib Bin Altaf and
+                  Nilay Vaish and
+                  Mark D. Hill and
+                  David A. Wood},
+  title        = {The gem5 simulator},
+  journal      = {{SIGARCH} Comput. Archit. News},
+  volume       = {39},
+  number       = {2},
+  pages        = {1--7},
+  year         = {2011},
+  url          = {https://doi.org/10.1145/2024716.2024718},
+  doi          = {10.1145/2024716.2024718}
+}
+@article{Lowe-Power:2020:gem5-20,
+  author       = {Jason Lowe{-}Power and
+                  Abdul Mutaal Ahmad and
+                  Ayaz Akram and
+                  Mohammad Alian and
+                  Rico Amslinger and
+                  Matteo Andreozzi and
+                  Adri{\\`{a}} Armejach and
+                  Nils Asmussen and
+                  Srikant Bharadwaj and
+                  Gabe Black and
+                  Gedare Bloom and
+                  Bobby R. Bruce and
+                  Daniel Rodrigues Carvalho and
+                  Jer{\'{o}}nimo Castrill{\'{o}}n and
+                  Lizhong Chen and
+                  Nicolas Derumigny and
+                  Stephan Diestelhorst and
+                  Wendy Elsasser and
+                  Marjan Fariborz and
+                  Amin Farmahini Farahani and
+                  Pouya Fotouhi and
+                  Ryan Gambord and
+                  Jayneel Gandhi and
+                  Dibakar Gope and
+                  Thomas Grass and
+                  Bagus Hanindhito and
+                  Andreas Hansson and
+                  Swapnil Haria and
+                  Austin Harris and
+                  Timothy Hayes and
+                  Adrian Herrera and
+                  Matthew Horsnell and
+                  Syed Ali Raza Jafri and
+                  Radhika Jagtap and
+                  Hanhwi Jang and
+                  Reiley Jeyapaul and
+                  Timothy M. Jones and
+                  Matthias Jung and
+                  Subash Kannoth and
+                  Hamidreza Khaleghzadeh and
+                  Yuetsu Kodama and
+                  Tushar Krishna and
+                  Tommaso Marinelli and
+                  Christian Menard and
+                  Andrea Mondelli and
+                  Tiago M{\"{u}}ck and
+                  Omar Naji and
+                  Krishnendra Nathella and
+                  Hoa Nguyen and
+                  Nikos Nikoleris and
+                  Lena E. Olson and
+                  Marc S. Orr and
+                  Binh Pham and
+                  Pablo Prieto and
+                  Trivikram Reddy and
+                  Alec Roelke and
+                  Mahyar Samani and
+                  Andreas Sandberg and
+                  Javier Setoain and
+                  Boris Shingarov and
+                  Matthew D. Sinclair and
+                  Tuan Ta and
+                  Rahul Thakur and
+                  Giacomo Travaglini and
+                  Michael Upton and
+                  Nilay Vaish and
+                  Ilias Vougioukas and
+                  Zhengrong Wang and
+                  Norbert Wehn and
+                  Christian Weis and
+                  David A. Wood and
+                  Hongil Yoon and
+                  {\'{E}}der F. Zulian},
+  title        = {The gem5 Simulator: Version 20.0+},
+  journal      = {CoRR},
+  volume       = {abs/2007.03152},
+  year         = {2020},
+  url          = {https://arxiv.org/abs/2007.03152},
+  eprinttype    = {arXiv},
+  eprint       = {2007.03152}
+}
+"""
diff --git a/src/python/m5/main.py b/src/python/m5/main.py
index ddcb024f8b..31eaf83690 100644
--- a/src/python/m5/main.py
+++ b/src/python/m5/main.py
@@ -39,6 +39,7 @@
 import code
 import datetime
 import os
+import runpy
 import socket
 import sys
 
@@ -126,13 +127,13 @@ def parse_options():
     option(
         "--stdout-file",
         metavar="FILE",
-        default="simout",
+        default="simout.txt",
         help="Filename for -r redirection [Default: %default]",
     )
     option(
         "--stderr-file",
         metavar="FILE",
-        default="simerr",
+        default="simerr.txt",
         help="Filename for -e redirection [Default: %default]",
     )
     option(
@@ -183,6 +184,16 @@ def collect_args(option, opt_str, value, parser):
         del parser.rargs[:]
         setattr(parser.values, option.dest, (value, extra_args))
 
+    option(
+        "-m",
+        type=str,
+        help="run library module as a script (terminates option list)",
+        default="",
+        metavar="mod",
+        action="callback",
+        callback=collect_args,
+    )
+
     option(
         "-c",
         type=str,
@@ -193,6 +204,14 @@ def collect_args(option, opt_str, value, parser):
         callback=collect_args,
     )
 
+    option(
+        "-P",
+        action="store_true",
+        default=False,
+        help="Don't prepend the script directory to the system path. "
+        "Mimics Python 3's `-P` option.",
+    )
+
     option(
         "-s",
         action="store_true",
@@ -348,13 +367,14 @@ def interact(scope):
 
 
 def _check_tracing():
-    import m5
     import _m5.core
 
+    from .util import fatal
+
     if _m5.core.TRACING_ON:
         return
 
-    m5.fatal("Tracing is not enabled.  Compile with TRACING_ON")
+    fatal("Tracing is not enabled.  Compile with TRACING_ON")
 
 
 def main():
@@ -369,7 +389,7 @@ def main():
     from . import stats
     from . import trace
 
-    from .util import inform, fatal, panic, isInteractive
+    from .util import inform, panic, isInteractive
     from m5.util.terminal_formatter import TerminalFormatter
 
     options, arguments = parse_options()
@@ -504,7 +524,7 @@ def quote(arg: str) -> str:
             if os.name == "nt" and os.sep == "\\":
                 # If a Windows machine, we manually quote the string.
                 arg = arg.replace('"', '\\"')
-                if re.search("\s", args):
+                if re.search(r"\s", args):
                     # We quote args which have whitespace.
                     arg = '"' + arg + '"'
                 return arg
@@ -516,7 +536,9 @@ def quote(arg: str) -> str:
         print()
 
     # check to make sure we can find the listed script
-    if not options.c and (not arguments or not os.path.isfile(arguments[0])):
+    if not (options.c or options.m) and (
+        not arguments or not os.path.isfile(arguments[0])
+    ):
         if arguments and not os.path.isfile(arguments[0]):
             print(f"Script {arguments[0]} not found")
 
@@ -594,39 +616,51 @@ def quote(arg: str) -> str:
 
     sys.argv = arguments
 
-    if options.c:
-        filedata = options.c[0]
-        filecode = compile(filedata, "<string>", "exec")
-        sys.argv = ["-c"] + options.c[1]
-        scope = {"__name__": "__m5_main__"}
-    else:
-        sys.path = [os.path.dirname(sys.argv[0])] + sys.path
-        filename = sys.argv[0]
-        filedata = open(filename, "r").read()
-        filecode = compile(filedata, filename, "exec")
-        scope = {"__file__": filename, "__name__": "__m5_main__"}
-
-    # if pdb was requested, execfile the thing under pdb, otherwise,
-    # just do the execfile normally
-    if options.pdb:
-        import pdb
-        import traceback
-
-        pdb = pdb.Pdb()
-        try:
-            pdb.run(filecode, scope)
-        except SystemExit:
-            print("The program exited via sys.exit(). Exit status: ", end=" ")
-            print(sys.exc_info()[1])
-        except:
-            traceback.print_exc()
-            print("Uncaught exception. Entering post mortem debugging")
-            t = sys.exc_info()[2]
-            while t.tb_next is not None:
-                t = t.tb_next
-                pdb.interaction(t.tb_frame, t)
+    if options.m:
+        sys.argv = [options.m[0]] + options.m[1]
+        runpy.run_module(options.m[0], run_name="__m5_main__")
     else:
-        exec(filecode, scope)
+        if options.c:
+            filedata = options.c[0]
+            filecode = compile(filedata, "<string>", "exec")
+            sys.argv = ["-c"] + options.c[1]
+            scope = {"__name__": "__m5_main__"}
+        else:
+            # If `-P` was used (`options.P == true`), don't prepend the script
+            # directory to the `sys.path`. This mimics Python 3's `-P` option
+            # (https://docs.python.org/3/using/cmdline.html#cmdoption-P).
+            if not options.P:
+                sys.path = [os.path.dirname(sys.argv[0])] + sys.path
+            filename = sys.argv[0]
+            with open(filename, "rb") as fd:
+                # Handle config files with unicode characters
+                filedata = fd.read().decode("utf-8")
+            filecode = compile(filedata, filename, "exec")
+            scope = {"__file__": filename, "__name__": "__m5_main__"}
+
+        # if pdb was requested, execfile the thing under pdb, otherwise,
+        # just do the execfile normally
+        if options.pdb:
+            import pdb
+            import traceback
+
+            pdb = pdb.Pdb()
+            try:
+                pdb.run(filecode, scope)
+            except SystemExit:
+                print(
+                    "The program exited via sys.exit(). Exit status: ", end=" "
+                )
+                print(sys.exc_info()[1])
+            except:
+                traceback.print_exc()
+                print("Uncaught exception. Entering post mortem debugging")
+                t = sys.exc_info()[2]
+                while t.tb_next is not None:
+                    t = t.tb_next
+                    pdb.interaction(t.tb_frame, t)
+        else:
+            exec(filecode, scope)
 
     # once the script is done
     if options.interactive:
diff --git a/src/python/m5/options.py b/src/python/m5/options.py
index ed0dcddc97..aac7d1ac74 100644
--- a/src/python/m5/options.py
+++ b/src/python/m5/options.py
@@ -30,11 +30,11 @@
 from optparse import *
 
 
-class nodefault(object):
+class nodefault:
     pass
 
 
-class splitter(object):
+class splitter:
     def __init__(self, split):
         self.split = split
 
diff --git a/src/python/m5/params.py b/src/python/m5/params.py
index 2ca6dfcc14..86a33c739d 100644
--- a/src/python/m5/params.py
+++ b/src/python/m5/params.py
@@ -101,7 +101,7 @@ def __new__(mcls, name, bases, dct):
 
 # Dummy base class to identify types that are legitimate for SimObject
 # parameters.
-class ParamValue(object, metaclass=MetaParamValue):
+class ParamValue(metaclass=MetaParamValue):
     cmd_line_settable = False
 
     # Generate the code needed as a prerequisite for declaring a C++
@@ -149,7 +149,7 @@ def pretty_print(self, value):
 
 
 # Regular parameter description.
-class ParamDesc(object):
+class ParamDesc:
     def __init__(self, ptype_str, ptype, *args, **kwargs):
         self.ptype_str = ptype_str
         # remember ptype only if it is provided
@@ -298,8 +298,7 @@ def get_name(self):
     # SimObjectVector directly.
     def descendants(self):
         for v in self:
-            for obj in v.descendants():
-                yield obj
+            yield from v.descendants()
 
     def get_config_as_dict(self):
         a = []
@@ -415,7 +414,7 @@ def cxx_decl(self, code):
         code("std::vector< ${{self.ptype.cxx_type}} > ${{self.name}};")
 
 
-class ParamFactory(object):
+class ParamFactory:
     def __init__(self, param_desc_class, ptype_str=None):
         self.param_desc_class = param_desc_class
         self.ptype_str = ptype_str
@@ -453,6 +452,7 @@ def __call__(self, *args, **kwargs):
 #
 #####################################################################
 
+
 # String-valued parameter.  Just mixin the ParamValue class with the
 # built-in str class.
 class String(ParamValue, str):
@@ -965,7 +965,7 @@ def __str__(self):
         if len(self.masks) == 0:
             return f"{self.start}:{self.end}"
         else:
-            return "%s:%s:%s:%s" % (
+            return "{}:{}:{}:{}".format(
                 self.start,
                 self.end,
                 self.intlvMatch,
@@ -1524,10 +1524,11 @@ def cxx_ini_parse(cls, code, src, dest, ret):
 # derive the new type from the appropriate base class on the fly.
 
 allEnums = {}
+
+
 # Metaclass for Enum types
 class MetaEnum(MetaParamValue):
     def __new__(mcls, name, bases, dict):
-
         cls = super().__new__(mcls, name, bases, dict)
         allEnums[name] = cls
         return cls
@@ -1560,8 +1561,8 @@ def __init__(cls, name, bases, init_dict):
         if cls.is_class:
             cls.cxx_type = f"{name}"
         else:
-            cls.cxx_type = f"enums::{name}"
-
+            scope = init_dict.get("wrapper_name", "enums")
+            cls.cxx_type = f"{scope}::{name}"
         super().__init__(name, bases, init_dict)
 
 
@@ -1600,7 +1601,7 @@ def cxx_predecls(cls, code):
     def cxx_ini_parse(cls, code, src, dest, ret):
         code("if (false) {")
         for elem_name in cls.map.keys():
-            code('} else if (%s == "%s") {' % (src, elem_name))
+            code(f'}} else if ({src} == "{elem_name}") {{')
             code.indent()
             name = cls.__name__ if cls.enum_name is None else cls.enum_name
             code(f"{dest} = {name if cls.is_class else 'enums'}::{elem_name};")
@@ -1963,11 +1964,12 @@ def cxx_ini_parse(self, code, src, dest, ret):
 # "Constants"... handy aliases for various values.
 #
 
+
 # Special class for NULL pointers.  Note the special check in
 # make_param_value() above that lets these be assigned where a
 # SimObject is required.
 # only one copy of a particular node
-class NullSimObject(object, metaclass=Singleton):
+class NullSimObject(metaclass=Singleton):
     _name = "Null"
 
     def __call__(cls):
@@ -2030,9 +2032,10 @@ def isNullPointer(value):
 #
 #####################################################################
 
+
 # Port reference: encapsulates a reference to a particular port on a
 # particular SimObject.
-class PortRef(object):
+class PortRef:
     def __init__(self, simobj, name, role, is_source):
         assert isSimObject(simobj) or isSimObjectClass(simobj)
         self.simobj = simobj
@@ -2202,7 +2205,7 @@ def __str__(self):
 
 # A reference to a complete vector-valued port (not just a single element).
 # Can be indexed to retrieve individual VectorPortElementRef instances.
-class VectorPortRef(object):
+class VectorPortRef:
     def __init__(self, simobj, name, role, is_source):
         assert isSimObject(simobj) or isSimObjectClass(simobj)
         self.simobj = simobj
@@ -2284,7 +2287,7 @@ def ccConnect(self):
 # Port description object.  Like a ParamDesc object, this represents a
 # logical port in the SimObject class, not a particular port on a
 # SimObject instance.  The latter are represented by PortRef objects.
-class Port(object):
+class Port:
     # Port("role", "description")
 
     _compat_dict = {}
@@ -2371,15 +2374,16 @@ def __init__(self, desc):
 VectorMasterPort = VectorRequestPort
 VectorSlavePort = VectorResponsePort
 
+
 # 'Fake' ParamDesc for Port references to assign to the _pdesc slot of
 # proxy objects (via set_param_desc()) so that proxy error messages
 # make sense.
-class PortParamDesc(object, metaclass=Singleton):
+class PortParamDesc(metaclass=Singleton):
     ptype_str = "Port"
     ptype = Port
 
 
-class DeprecatedParam(object):
+class DeprecatedParam:
     """A special type for deprecated parameter variable names.
 
     There are times when we need to change the name of parameter, but this
diff --git a/src/python/m5/proxy.py b/src/python/m5/proxy.py
index 78862346b4..1ae579190e 100644
--- a/src/python/m5/proxy.py
+++ b/src/python/m5/proxy.py
@@ -45,7 +45,7 @@
 import copy
 
 
-class BaseProxy(object):
+class BaseProxy:
     def __init__(self, search_self, search_up):
         self._search_self = search_self
         self._search_up = search_up
@@ -272,7 +272,7 @@ def isproxy(obj):
     return False
 
 
-class ProxyFactory(object):
+class ProxyFactory:
     def __init__(self, search_self, search_up):
         self.search_self = search_self
         self.search_up = search_up
diff --git a/src/python/m5/simulate.py b/src/python/m5/simulate.py
index 587bfa0202..d619697247 100644
--- a/src/python/m5/simulate.py
+++ b/src/python/m5/simulate.py
@@ -51,6 +51,7 @@
 from . import ticks
 from . import objects
 from . import params
+from .citations import gather_citations
 from m5.util.dot_writer import do_dot, do_dvfs_dot
 from m5.util.dot_writer_ruby import do_ruby_dot
 
@@ -64,6 +65,7 @@
 
 _instantiated = False  # Has m5.instantiate() been called?
 
+
 # The final call to instantiate the SimObject graph and initialize the
 # system.
 def instantiate(ckpt_dir=None):
@@ -164,6 +166,8 @@ def instantiate(ckpt_dir=None):
     # a checkpoint, If so, this call will shift them to be at a valid time.
     updateStatEvents()
 
+    gather_citations(root)
+
 
 need_startup = True
 
@@ -315,6 +319,10 @@ def checkpoint(dir):
 
     drain()
     memWriteback(root)
+
+    # Recursively create the checkpoint directory if it does not exist.
+    os.makedirs(dir, exist_ok=True)
+
     print("Writing checkpoint")
     _m5.core.serializeAll(dir)
 
diff --git a/src/python/m5/ticks.py b/src/python/m5/ticks.py
index 47b033cfb4..7ec84c3709 100644
--- a/src/python/m5/ticks.py
+++ b/src/python/m5/ticks.py
@@ -29,6 +29,7 @@
 import sys
 from m5.util import warn
 
+
 # fix the global frequency
 def fixGlobalFrequency():
     import _m5.core
diff --git a/src/python/m5/util/__init__.py b/src/python/m5/util/__init__.py
index 34c5ee8a49..e1c9bd226a 100644
--- a/src/python/m5/util/__init__.py
+++ b/src/python/m5/util/__init__.py
@@ -48,6 +48,7 @@
 from .attrdict import attrdict, multiattrdict, optiondict
 from .multidict import multidict
 
+
 # panic() should be called when something happens that should never
 # ever happen regardless of what the user does (i.e., an acutal m5
 # bug).
diff --git a/src/python/m5/util/dot_writer.py b/src/python/m5/util/dot_writer.py
index b491a98448..59886480a5 100644
--- a/src/python/m5/util/dot_writer.py
+++ b/src/python/m5/util/dot_writer.py
@@ -82,7 +82,7 @@ def dot_create_nodes(simNode, callgraph):
         label = "root"
     else:
         label = simNode._name
-    full_path = re.sub("\.", "_", simNode.path())
+    full_path = re.sub(r"\.", "_", simNode.path())
     # add class name under the label
     label = '"' + label + " \\n: " + simNode.__class__.__name__ + '"'
 
@@ -109,7 +109,7 @@ def dot_create_edges(simNode, callgraph):
     for port_name in simNode._ports.keys():
         port = simNode._port_refs.get(port_name, None)
         if port != None:
-            full_path = re.sub("\.", "_", simNode.path())
+            full_path = re.sub(r"\.", "_", simNode.path())
             full_port_name = full_path + "_" + port_name
             port_node = dot_create_node(simNode, full_port_name, port_name)
             # create edges
@@ -128,7 +128,7 @@ def dot_create_edges(simNode, callgraph):
 
 def dot_add_edge(simNode, callgraph, full_port_name, port):
     peer = port.peer
-    full_peer_path = re.sub("\.", "_", peer.simobj.path())
+    full_peer_path = re.sub(r"\.", "_", peer.simobj.path())
     full_peer_port_name = full_peer_path + "_" + peer.name
 
     # Each edge is encountered twice, once for each peer. We only want one
@@ -290,9 +290,9 @@ def dot_rgb_to_html(r, g, b):
 # We need to create all of the clock domains. We abuse the alpha channel to get
 # the correct domain colouring.
 def dot_add_clk_domain(c_dom, v_dom):
-    label = '"' + str(c_dom) + "\ :\ " + str(v_dom) + '"'
-    label = re.sub("\.", "_", str(label))
-    full_path = re.sub("\.", "_", str(c_dom))
+    label = '"' + str(c_dom) + r"\ :\ " + str(v_dom) + '"'
+    label = re.sub(r"\.", "_", str(label))
+    full_path = re.sub(r"\.", "_", str(c_dom))
     return pydot.Cluster(
         full_path,
         shape="box",
@@ -311,7 +311,7 @@ def dot_create_dvfs_nodes(simNode, callgraph, domain=None):
         label = "root"
     else:
         label = simNode._name
-    full_path = re.sub("\.", "_", simNode.path())
+    full_path = re.sub(r"\.", "_", simNode.path())
     # add class name under the label
     label = '"' + label + " \\n: " + simNode.__class__.__name__ + '"'
 
diff --git a/src/python/m5/util/fdthelper.py b/src/python/m5/util/fdthelper.py
index 136936c512..1f565df270 100644
--- a/src/python/m5/util/fdthelper.py
+++ b/src/python/m5/util/fdthelper.py
@@ -86,7 +86,7 @@ def __init__(self, name, values):
         super().__init__(name, values)
 
 
-class FdtState(object):
+class FdtState:
     """Class for maintaining state while recursively generating a flattened
     device tree. The state tracks address, size and CPU address cell sizes, and
     maintains a dictionary of allocated phandles."""
@@ -270,7 +270,7 @@ def writeDtbFile(self, filename):
             with open(filename, "wb") as f:
                 f.write(self.to_dtb())
             return filename
-        except IOError:
+        except OSError:
             raise RuntimeError("Failed to open DTB output file")
 
     def writeDtsFile(self, filename):
@@ -280,5 +280,5 @@ def writeDtsFile(self, filename):
             with open(filename, "w") as f:
                 f.write(self.to_dts())
             return filename
-        except IOError:
+        except OSError:
             raise RuntimeError("Failed to open DTS output file")
diff --git a/src/python/m5/util/multidict.py b/src/python/m5/util/multidict.py
index f6ca6ba90a..cff47aeda1 100644
--- a/src/python/m5/util/multidict.py
+++ b/src/python/m5/util/multidict.py
@@ -27,7 +27,7 @@
 __all__ = ["multidict"]
 
 
-class multidict(object):
+class multidict:
     def __init__(self, parent={}, **kwargs):
         self.local = dict(**kwargs)
         self.parent = parent
@@ -80,8 +80,7 @@ def has_key(self, key):
         return key in self
 
     def items(self):
-        for item in self.next():
-            yield item
+        yield from self.next()
 
     def keys(self):
         for key, value in self.next():
diff --git a/src/python/m5/util/pybind.py b/src/python/m5/util/pybind.py
index 54fd111f38..1be9bb604b 100644
--- a/src/python/m5/util/pybind.py
+++ b/src/python/m5/util/pybind.py
@@ -36,7 +36,7 @@
 from abc import *
 
 
-class PyBindExport(object, metaclass=ABCMeta):
+class PyBindExport(metaclass=ABCMeta):
     @abstractmethod
     def export(self, code, cname):
         pass
diff --git a/src/python/m5/util/terminal.py b/src/python/m5/util/terminal.py
index f3d53ac460..0486b14378 100644
--- a/src/python/m5/util/terminal.py
+++ b/src/python/m5/util/terminal.py
@@ -84,7 +84,7 @@ def cap_string(s, *args):
     cap_string = null_cap_string
 
 
-class ColorStrings(object):
+class ColorStrings:
     def __init__(self, cap_string):
         for i, c in enumerate(color_names):
             setattr(self, c, cap_string("setaf", i))
diff --git a/src/python/m5/util/terminal_formatter.py b/src/python/m5/util/terminal_formatter.py
index 8d533f8bb7..da441b1188 100644
--- a/src/python/m5/util/terminal_formatter.py
+++ b/src/python/m5/util/terminal_formatter.py
@@ -45,7 +45,6 @@ def __terminal_size(self):
         return w, h
 
     def __get_paragraphs(self, text, flatten=False):
-
         """
         This function takes a text and returns a list of constituent
         paragraphs, defining a paragraph as a block of text separated from
diff --git a/src/sim/ClockDomain.py b/src/sim/ClockDomain.py
index d71252e1bc..34380f916b 100644
--- a/src/sim/ClockDomain.py
+++ b/src/sim/ClockDomain.py
@@ -37,6 +37,7 @@
 from m5.SimObject import SimObject
 from m5.proxy import *
 
+
 # Abstract clock domain
 class ClockDomain(SimObject):
     type = "ClockDomain"
diff --git a/src/sim/DVFSHandler.py b/src/sim/DVFSHandler.py
index f7064221e1..13c649dbb0 100644
--- a/src/sim/DVFSHandler.py
+++ b/src/sim/DVFSHandler.py
@@ -37,6 +37,7 @@
 from m5.SimObject import SimObject
 from m5.proxy import *
 
+
 # The handler in its current form is design to be centeralized, one per system
 # and manages all the source clock domains (SrcClockDomain) it is configured to
 # handle.  The specific voltage and frequency points are configured per clock
diff --git a/src/sim/InstTracer.py b/src/sim/InstTracer.py
index 34c97dd43e..c8b3673d47 100644
--- a/src/sim/InstTracer.py
+++ b/src/sim/InstTracer.py
@@ -1,3 +1,15 @@
+# Copyright (c) 2023 Arm Limited
+# All rights reserved.
+#
+# The license below extends only to copyright in the software and shall
+# not be construed as granting a license to any other intellectual
+# property including but not limited to intellectual property relating
+# to a hardware implementation of the functionality of the software
+# licensed hereunder.  You may use the software subject to the license
+# terms below provided that you ensure that this notice is replicated
+# unmodified and in its entirety in all distributions of the software,
+# modified or unmodified, in source code or in binary form.
+#
 # Copyright (c) 2007 The Regents of The University of Michigan
 # All rights reserved.
 #
@@ -28,8 +40,18 @@
 from m5.params import *
 
 
+class InstDisassembler(SimObject):
+    type = "InstDisassembler"
+    cxx_header = "sim/insttracer.hh"
+    cxx_class = "gem5::trace::InstDisassembler"
+
+
 class InstTracer(SimObject):
     type = "InstTracer"
     cxx_header = "sim/insttracer.hh"
     cxx_class = "gem5::trace::InstTracer"
     abstract = True
+
+    disassembler = Param.InstDisassembler(
+        InstDisassembler(), "Instruction Disassembler"
+    )
diff --git a/src/sim/PowerDomain.py b/src/sim/PowerDomain.py
index 2f42343870..64018e6d15 100644
--- a/src/sim/PowerDomain.py
+++ b/src/sim/PowerDomain.py
@@ -39,6 +39,7 @@
 from m5.params import *
 from m5.objects.PowerState import PowerState
 
+
 # A power domain groups multiple ClockedObjects and creates a
 # hierarchy in which follower ClockedObjects (caches for example) can
 # change power state depeding on what the leader objects (CPUs for
diff --git a/src/sim/PowerState.py b/src/sim/PowerState.py
index ca285fc68b..9c9fe03a5a 100644
--- a/src/sim/PowerState.py
+++ b/src/sim/PowerState.py
@@ -38,6 +38,7 @@
 from m5.params import *
 from m5.proxy import *
 
+
 # Enumerate set of allowed power states that can be used by a clocked object.
 # The list is kept generic to express a base minimal set.
 # State definition :-
diff --git a/src/sim/Root.py b/src/sim/Root.py
index 5002cdcf81..5ad42da668 100644
--- a/src/sim/Root.py
+++ b/src/sim/Root.py
@@ -32,7 +32,6 @@
 
 
 class Root(SimObject):
-
     _the_instance = None
 
     def __new__(cls, **kwargs):
diff --git a/src/sim/SConscript b/src/sim/SConscript
index e26676c00a..78b06c5b1d 100644
--- a/src/sim/SConscript
+++ b/src/sim/SConscript
@@ -105,7 +105,7 @@ GTest('proxy_ptr.test', 'proxy_ptr.test.cc')
 GTest('serialize.test', 'serialize.test.cc', with_tag('gem5 serialize'))
 GTest('serialize_handlers.test', 'serialize_handlers.test.cc')
 
-SimObject('InstTracer.py', sim_objects=['InstTracer'])
+SimObject('InstTracer.py', sim_objects=['InstTracer', 'InstDisassembler'])
 SimObject('Process.py', sim_objects=['Process', 'EmulatedDriver'])
 Source('faults.cc')
 Source('process.cc')
diff --git a/src/sim/SubSystem.py b/src/sim/SubSystem.py
index fa0063ba1f..49f70d5b8a 100644
--- a/src/sim/SubSystem.py
+++ b/src/sim/SubSystem.py
@@ -36,6 +36,7 @@
 from m5.SimObject import SimObject
 from m5.params import *
 
+
 # An empty simobject. Used for organizing simobjects
 # into logical groups as subsystems of a larger
 # system. For example, if we wanted to build a cpu cluster
diff --git a/src/sim/fd_array.cc b/src/sim/fd_array.cc
index 5ca9370054..ea58299587 100644
--- a/src/sim/fd_array.cc
+++ b/src/sim/fd_array.cc
@@ -42,6 +42,7 @@
 #include "base/output.hh"
 #include "params/Process.hh"
 #include "sim/fd_entry.hh"
+#include "sim/process.hh"
 
 namespace gem5
 {
@@ -367,7 +368,7 @@ FDArray::serialize(CheckpointOut &cp) const {
 }
 
 void
-FDArray::unserialize(CheckpointIn &cp) {
+FDArray::unserialize(CheckpointIn &cp, Process* process_ptr) {
     ScopedCheckpointSection sec(cp, "fdarray");
     uint64_t size;
     paramIn(cp, "size", size);
@@ -418,11 +419,24 @@ FDArray::unserialize(CheckpointIn &cp) {
         setFDEntry(tgt_fd, fdep);
 
         mode_t mode = this_ffd->getFileMode();
-        std::string const& path = this_ffd->getFileName();
+
+        std::string path;
+
+        if (process_ptr) {
+            // Check if it is needed to redirect the app path to another host
+            // path
+            path = process_ptr->checkPathRedirect(this_ffd->getFileName());
+        }
+        else {
+            path = this_ffd->getFileName();
+        }
+
         int flags = this_ffd->getFlags();
 
         // Re-open the file and assign a new sim_fd
-        int sim_fd = openFile(path, flags, mode);
+        int sim_fd;
+        sim_fd = openFile(path, flags, mode);
+
         this_ffd->setSimFD(sim_fd);
 
         // Restore the file offset to the proper value
diff --git a/src/sim/fd_array.hh b/src/sim/fd_array.hh
index d6d1b3cfbe..c2a6b64dea 100644
--- a/src/sim/fd_array.hh
+++ b/src/sim/fd_array.hh
@@ -43,6 +43,8 @@
 namespace gem5
 {
 
+class Process;
+
 class FDArray : public Serializable
 {
   public:
@@ -117,7 +119,11 @@ class FDArray : public Serializable
      * Serialization methods for file descriptors
      */
     void serialize(CheckpointOut &cp) const override;
-    void unserialize(CheckpointIn &cp) override;
+    void unserialize(CheckpointIn &cp, Process* process_ptr );
+    void unserialize(CheckpointIn &cp) override {
+      unserialize(cp, nullptr);
+    };
+
 
   private:
     /**
diff --git a/src/sim/insttracer.hh b/src/sim/insttracer.hh
index 9c9bca7692..37e29756a2 100644
--- a/src/sim/insttracer.hh
+++ b/src/sim/insttracer.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014, 2017, 2020 ARM Limited
+ * Copyright (c) 2014, 2017, 2020, 2023 Arm Limited
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -48,6 +48,7 @@
 #include "cpu/inst_res.hh"
 #include "cpu/inst_seq.hh"
 #include "cpu/static_inst.hh"
+#include "params/InstTracer.hh"
 #include "sim/sim_object.hh"
 
 namespace gem5
@@ -286,10 +287,37 @@ class InstRecord
     bool getFaulting() const { return faulting; }
 };
 
+/**
+ * The base InstDisassembler class provides a one-API interface
+ * to disassemble the instruction passed as a first argument.
+ * It also provides a base implementation which is
+ * simply calling the StaticInst::disassemble method, which
+ * is the usual interface for disassembling
+ * a gem5 instruction.
+ */
+class InstDisassembler : public SimObject
+{
+  public:
+    InstDisassembler(const SimObjectParams &params)
+      : SimObject(params)
+    {}
+
+    virtual std::string
+    disassemble(StaticInstPtr inst,
+                const PCStateBase &pc,
+                const loader::SymbolTable *symtab) const
+    {
+        return inst->disassemble(pc.instAddr(), symtab);
+    }
+};
+
 class InstTracer : public SimObject
 {
   public:
-    InstTracer(const Params &p) : SimObject(p) {}
+    PARAMS(InstTracer);
+    InstTracer(const Params &p)
+      : SimObject(p), disassembler(p.disassembler)
+    {}
 
     virtual ~InstTracer() {}
 
@@ -297,6 +325,17 @@ class InstTracer : public SimObject
         getInstRecord(Tick when, ThreadContext *tc,
                 const StaticInstPtr staticInst, const PCStateBase &pc,
                 const StaticInstPtr macroStaticInst=nullptr) = 0;
+
+    std::string
+    disassemble(StaticInstPtr inst,
+                const PCStateBase &pc,
+                const loader::SymbolTable *symtab=nullptr) const
+    {
+        return disassembler->disassemble(inst, pc, symtab);
+    }
+
+  private:
+    InstDisassembler *disassembler;
 };
 
 } // namespace trace
diff --git a/src/sim/main.cc b/src/sim/main.cc
index 81a691d15d..1c42891816 100644
--- a/src/sim/main.cc
+++ b/src/sim/main.cc
@@ -50,6 +50,7 @@ main(int argc, char **argv)
     // Initialize gem5 special signal handling.
     initSignals();
 
+#if PY_VERSION_HEX < 0x03080000
     // Convert argv[0] to a wchar_t string, using python's locale and cleanup
     // functions.
     std::unique_ptr<wchar_t[], decltype(&PyMem_RawFree)> program(
@@ -59,6 +60,23 @@ main(int argc, char **argv)
     // This can help python find libraries at run time relative to this binary.
     // It's probably not necessary, but is mostly harmless and might be useful.
     Py_SetProgramName(program.get());
+#else
+    // Preinitialize Python for Python 3.8+
+    // This ensures that the locale configuration takes effect
+    PyStatus status;
+
+    PyConfig config;
+    PyConfig_InitPythonConfig(&config);
+
+    /* Set the program name. Implicitly preinitialize Python. */
+    status = PyConfig_SetBytesString(&config, &config.program_name,
+                                     argv[0]);
+    if (PyStatus_Exception(status)) {
+        PyConfig_Clear(&config);
+        Py_ExitStatusException(status);
+        return 1;
+    }
+#endif
 
     py::scoped_interpreter guard(true, argc, argv);
 
diff --git a/src/sim/mem_state.cc b/src/sim/mem_state.cc
index 801226cfb1..93e7ca3773 100644
--- a/src/sim/mem_state.cc
+++ b/src/sim/mem_state.cc
@@ -49,7 +49,7 @@ MemState::MemState(Process *owner, Addr brk_point, Addr stack_base,
       _stackBase(stack_base), _stackSize(max_stack_size),
       _maxStackSize(max_stack_size), _stackMin(stack_base - max_stack_size),
       _nextThreadStackBase(next_thread_stack_base),
-      _mmapEnd(mmap_end), _endBrkPoint(brk_point)
+      _mmapEnd(mmap_end)
 {
 }
 
@@ -67,7 +67,6 @@ MemState::operator=(const MemState &in)
     _stackMin = in._stackMin;
     _nextThreadStackBase = in._nextThreadStackBase;
     _mmapEnd = in._mmapEnd;
-    _endBrkPoint = in._endBrkPoint;
     _vmaList = in._vmaList; /* This assignment does a deep copy. */
 
     return *this;
@@ -107,26 +106,34 @@ MemState::isUnmapped(Addr start_addr, Addr length)
 void
 MemState::updateBrkRegion(Addr old_brk, Addr new_brk)
 {
-    /**
-     * To make this simple, avoid reducing the heap memory area if the
-     * new_brk point is less than the old_brk; this occurs when the heap is
-     * receding because the application has given back memory. The brk point
-     * is still tracked in the MemState class as an independent field so that
-     * it can be returned to the application; we just do not update the
-     * region unless we expand it out.
-     */
-    if (new_brk < old_brk) {
-        _brkPoint = new_brk;
-        return;
-    }
-
     /**
      * The regions must be page aligned but the break point can be set on
      * byte boundaries. Ensure that the restriction is maintained here by
      * extending the request out to the end of the page. (The roundUp
      * function will not round up an already aligned page.)
      */
-    auto page_aligned_brk = roundUp(new_brk, _pageBytes);
+    auto page_aligned_new_brk = roundUp(new_brk, _pageBytes);
+    auto page_aligned_old_brk = roundUp(old_brk, _pageBytes);
+
+    /**
+     * Reduce the heap memory area if the new_brk point is less than
+     * the old_brk; this occurs when the heap is receding because the
+     * application has given back memory. This may involve unmapping
+     * heap pages, if new_brk rounds to a lower-address page. The
+     * previous behavior was to leave such pages mapped for simplicity;
+     * however, that was not what Linux does in practice and may
+     * violate the assumptions of applications like glibc malloc,
+     * whose default configuration for Linux requires all pages
+     * allocated via brk(2) to be zero-filled (specifically,
+     * by setting MORECORE_CLEARS to 2).
+     */
+    if (new_brk < old_brk) {
+        const auto length = page_aligned_old_brk - page_aligned_new_brk;
+        if (length > 0)
+            unmapRegion(page_aligned_new_brk, length);
+        _brkPoint = new_brk;
+        return;
+    }
 
     /**
      * Create a new mapping for the heap region. We only create a mapping
@@ -135,17 +142,16 @@ MemState::updateBrkRegion(Addr old_brk, Addr new_brk)
      *
      * Since we do not track the type of the region and we also do not
      * coalesce the regions together, we can create a fragmented set of
-     * heap regions. To resolve this, we keep the furthest point ever mapped
-     * by the _endBrkPoint field.
+     * heap regions.
      */
-    if (page_aligned_brk > _endBrkPoint) {
-        auto length = page_aligned_brk - _endBrkPoint;
+    if (page_aligned_new_brk > page_aligned_old_brk) {
+        auto length = page_aligned_new_brk - page_aligned_old_brk;
         /**
          * Check if existing mappings impede the expansion of brk expansion.
          * If brk cannot expand, it must return the original, unmodified brk
          * address and should not modify the mappings here.
          */
-        if (!isUnmapped(_endBrkPoint, length)) {
+        if (!isUnmapped(page_aligned_old_brk, length)) {
             return;
         }
 
@@ -156,8 +162,7 @@ MemState::updateBrkRegion(Addr old_brk, Addr new_brk)
          * implemented if it actually becomes necessary; probably only
          * necessary if the list becomes too long to walk.
          */
-        mapRegion(_endBrkPoint, length, "heap");
-        _endBrkPoint = page_aligned_brk;
+        mapRegion(page_aligned_old_brk, length, "heap");
     }
 
     _brkPoint = new_brk;
diff --git a/src/sim/mem_state.hh b/src/sim/mem_state.hh
index b2b50d0760..8c01f7a765 100644
--- a/src/sim/mem_state.hh
+++ b/src/sim/mem_state.hh
@@ -277,11 +277,6 @@ class MemState : public Serializable
     Addr _nextThreadStackBase;
     Addr _mmapEnd;
 
-    /**
-     * Keeps record of the furthest mapped heap location.
-     */
-    Addr _endBrkPoint;
-
     /**
      * The _vmaList member is a list of virtual memory areas in the target
      * application space that have been allocated by the target. In most
diff --git a/src/sim/power/MathExprPowerModel.py b/src/sim/power/MathExprPowerModel.py
index 755b3953ee..6f335980a4 100644
--- a/src/sim/power/MathExprPowerModel.py
+++ b/src/sim/power/MathExprPowerModel.py
@@ -37,6 +37,7 @@
 from m5.params import *
 from m5.objects.PowerModelState import PowerModelState
 
+
 # Represents a power model for a simobj
 class MathExprPowerModel(PowerModelState):
     type = "MathExprPowerModel"
diff --git a/src/sim/power/PowerModel.py b/src/sim/power/PowerModel.py
index 8dba29795d..f45f24a3cc 100644
--- a/src/sim/power/PowerModel.py
+++ b/src/sim/power/PowerModel.py
@@ -37,6 +37,7 @@
 from m5.params import *
 from m5.proxy import Parent
 
+
 # Enum for a type of  power model
 class PMType(Enum):
     vals = ["All", "Static", "Dynamic"]
diff --git a/src/sim/power/PowerModelState.py b/src/sim/power/PowerModelState.py
index 081cd652d2..ba7bd44915 100644
--- a/src/sim/power/PowerModelState.py
+++ b/src/sim/power/PowerModelState.py
@@ -36,6 +36,7 @@
 from m5.SimObject import *
 from m5.params import *
 
+
 # Represents a power model for a simobj
 class PowerModelState(SimObject):
     type = "PowerModelState"
diff --git a/src/sim/power/ThermalDomain.py b/src/sim/power/ThermalDomain.py
index ddb8d4455b..ff5fdaff3f 100644
--- a/src/sim/power/ThermalDomain.py
+++ b/src/sim/power/ThermalDomain.py
@@ -36,6 +36,7 @@
 from m5.SimObject import *
 from m5.params import *
 
+
 # Represents a group of simobj which produce heat
 class ThermalDomain(SimObject):
     type = "ThermalDomain"
diff --git a/src/sim/power/ThermalModel.py b/src/sim/power/ThermalModel.py
index a3d4a804cc..0d45a384c6 100644
--- a/src/sim/power/ThermalModel.py
+++ b/src/sim/power/ThermalModel.py
@@ -74,7 +74,7 @@ class ThermalCapacitor(SimObject):
 
 
 # Represents a fixed temperature node (ie. air)
-class ThermalReference(SimObject, object):
+class ThermalReference(SimObject):
     type = "ThermalReference"
     cxx_header = "sim/power/thermal_model.hh"
     cxx_class = "gem5::ThermalReference"
diff --git a/src/sim/probe/probe.hh b/src/sim/probe/probe.hh
index 3dd428effd..8d5366670a 100644
--- a/src/sim/probe/probe.hh
+++ b/src/sim/probe/probe.hh
@@ -1,5 +1,6 @@
 /*
  * Copyright (c) 2013 ARM Limited
+ * Copyright (c) 2022-2023 The University of Edinburgh
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -317,6 +318,47 @@ class ProbePointArg : public ProbePoint
     }
 };
 
+
+/**
+ * ProbeListenerArgFunc generates a listener for the class of Arg and
+ * a lambda callback function that is called by the notify.
+ *
+ * Note that the function is passed as lambda function on construction
+ * Example:
+ * ProbeListenerArgFunc<MyArg> (myobj->getProbeManager(),
+ *                "MyProbePointName", [this](const MyArg &arg)
+ *                { my_own_func(arg, xyz...); // do something with arg
+ *  }));
+ */
+template <class Arg>
+class ProbeListenerArgFunc : public ProbeListenerArgBase<Arg>
+{
+  typedef std::function<void(const Arg &)> NotifyFunction;
+  private:
+    NotifyFunction function;
+
+  public:
+    /**
+     * @param obj the class of type Tcontaining the method to call on notify.
+     * @param pm A probe manager that is not part of the obj
+     * @param name the name of the ProbePoint to add this listener to.
+     * @param func a pointer to the function on obj (called on notify).
+     */
+    ProbeListenerArgFunc(ProbeManager *pm, const std::string &name,
+                       const NotifyFunction &func)
+      : ProbeListenerArgBase<Arg>(pm, name),
+        function(func)
+    {}
+
+    /**
+     * @brief called when the ProbePoint calls notify. This is a shim through
+     *        to the function passed during construction.
+     * @param val the argument value to pass.
+     */
+    void notify(const Arg &val) override { function(val); }
+};
+
+
 } // namespace gem5
 
 #endif//__SIM_PROBE_PROBE_HH__
diff --git a/src/sim/process.cc b/src/sim/process.cc
index a348b450b0..f47dbd59c6 100644
--- a/src/sim/process.cc
+++ b/src/sim/process.cc
@@ -387,7 +387,7 @@ Process::unserialize(CheckpointIn &cp)
 {
     memState->unserialize(cp);
     pTable->unserialize(cp);
-    fds->unserialize(cp);
+    fds->unserialize(cp, this);
 
     /**
      * Checkpoints for pipes, device drivers or sockets currently
diff --git a/src/sim/pseudo_inst.cc b/src/sim/pseudo_inst.cc
index 55e44c7adc..29caba6661 100644
--- a/src/sim/pseudo_inst.cc
+++ b/src/sim/pseudo_inst.cc
@@ -244,9 +244,10 @@ loadsymbol(ThreadContext *tc)
             continue;
 
         if (!tc->getSystemPtr()->workload->insertSymbol(
-                    { loader::Symbol::Binding::Global, symbol, addr })) {
-            continue;
-        }
+            { loader::Symbol::Binding::Global,
+              loader::Symbol::SymbolType::Function, symbol, addr })) {
+                continue;
+              }
 
 
         DPRINTF(Loader, "Loaded symbol: %s @ %#llx\n", symbol, addr);
@@ -270,9 +271,13 @@ addsymbol(ThreadContext *tc, Addr addr, Addr symbolAddr)
     DPRINTF(Loader, "Loaded symbol: %s @ %#llx\n", symbol, addr);
 
     tc->getSystemPtr()->workload->insertSymbol(
-            { loader::Symbol::Binding::Global, symbol, addr });
+        { loader::Symbol::Binding::Global,
+          loader::Symbol::SymbolType::Function, symbol, addr }
+    );
     loader::debugSymbolTable.insert(
-            { loader::Symbol::Binding::Global, symbol, addr });
+        { loader::Symbol::Binding::Global,
+          loader::Symbol::SymbolType::Function, symbol, addr }
+    );
 }
 
 uint64_t
diff --git a/src/sim/serialize.cc b/src/sim/serialize.cc
index 2b1bd35f16..0f722a017e 100644
--- a/src/sim/serialize.cc
+++ b/src/sim/serialize.cc
@@ -145,8 +145,11 @@ CheckpointIn::setDir(const std::string &name)
     // appears to have a format placeholder in it.
     currentDirectory = (name.find("%") != std::string::npos) ?
         csprintf(name, curTick()) : name;
-    if (currentDirectory[currentDirectory.size() - 1] != '/')
+    auto isEmptyPath = currentDirectory.empty();
+    auto endsWithSlash = !isEmptyPath && currentDirectory.back() == '/';
+    if (!endsWithSlash) {
         currentDirectory += "/";
+    }
     return currentDirectory;
 }
 
diff --git a/src/sim/signal.hh b/src/sim/signal.hh
index 3cb3f62c0d..233de07658 100644
--- a/src/sim/signal.hh
+++ b/src/sim/signal.hh
@@ -54,14 +54,16 @@ class SignalSinkPort : public Port
     OnChangeFunc _onChange;
 
   protected:
+    // if bypass_on_change is specified true, it will not call the _onChange
+    // function. Only _state will be updated if needed.
     void
-    set(const State &new_state)
+    set(const State &new_state, const bool bypass_on_change = false)
     {
         if (new_state == _state)
             return;
 
         _state = new_state;
-        if (_onChange)
+        if (!bypass_on_change && _onChange)
             _onChange(_state);
     }
 
@@ -79,6 +81,8 @@ class SignalSinkPort : public Port
         _source = dynamic_cast<SignalSourcePort<State> *>(&peer);
         fatal_if(!_source, "Attempt to bind signal pin %s to "
                 "incompatible pin %s", name(), peer.name());
+        // The state of sink has to match the state of source.
+        _state = _source->state();
         Port::bind(peer);
     }
     void
@@ -94,18 +98,30 @@ class SignalSourcePort : public Port
 {
   private:
     SignalSinkPort<State> *sink = nullptr;
-    State _state = {};
+    State _state;
 
   public:
-    SignalSourcePort(const std::string &_name, PortID _id=InvalidPortID) :
-        Port(_name, _id)
-    {}
+    SignalSourcePort(const std::string &_name, PortID _id = InvalidPortID)
+        : Port(_name, _id)
+    {
+        _state = {};
+    }
+
+    // Give an initial value to the _state instead of using a default value.
+    SignalSourcePort(const std::string &_name, PortID _id,
+                     const State &init_state)
+        : SignalSourcePort(_name, _id)
+    {
+        _state = init_state;
+    }
 
+    // if bypass_on_change is specified true, it will not call the _onChange
+    // function. Only _state will be updated if needed.
     void
-    set(const State &new_state)
+    set(const State &new_state, const bool bypass_on_change = false)
     {
         _state = new_state;
-        sink->set(new_state);
+        sink->set(new_state, bypass_on_change);
     }
 
     const State &state() const { return _state; }
@@ -126,6 +142,6 @@ class SignalSourcePort : public Port
     }
 };
 
-} // namespace gem5
+}  // namespace gem5
 
-#endif //__SIM_SIGNAL_HH__
+#endif  //__SIM_SIGNAL_HH__
diff --git a/src/sim/syscall_emul.cc b/src/sim/syscall_emul.cc
index c212d242fb..9794a4835e 100644
--- a/src/sim/syscall_emul.cc
+++ b/src/sim/syscall_emul.cc
@@ -959,7 +959,9 @@ chdirFunc(SyscallDesc *desc, ThreadContext *tc, VPtr<> pathname)
         tgt_cwd = path;
     } else {
         char buf[PATH_MAX];
-        tgt_cwd = realpath((p->tgtCwd + "/" + path).c_str(), buf);
+        if (!realpath((p->tgtCwd + "/" + path).c_str(), buf))
+            return -errno;
+        tgt_cwd = buf;
     }
     std::string host_cwd = p->checkPathRedirect(tgt_cwd);
 
diff --git a/src/sim/syscall_emul.hh b/src/sim/syscall_emul.hh
index b4550dd86b..c23521aed9 100644
--- a/src/sim/syscall_emul.hh
+++ b/src/sim/syscall_emul.hh
@@ -1375,7 +1375,7 @@ statFunc(SyscallDesc *desc, ThreadContext *tc,
 template <class OS>
 SyscallReturn
 newfstatatFunc(SyscallDesc *desc, ThreadContext *tc, int dirfd,
-               VPtr<> pathname, VPtr<typename OS::tgt_stat> tgt_stat,
+               VPtr<> pathname, VPtr<typename OS::tgt_stat64> tgt_stat,
                int flags)
 {
     std::string path;
@@ -1405,7 +1405,7 @@ newfstatatFunc(SyscallDesc *desc, ThreadContext *tc, int dirfd,
     if (result < 0)
         return -errno;
 
-    copyOutStatBuf<OS>(tgt_stat, &host_buf);
+    copyOutStat64Buf<OS>(tgt_stat, &host_buf);
 
     return 0;
 }
@@ -2409,7 +2409,7 @@ tgkillFunc(SyscallDesc *desc, ThreadContext *tc, int tgid, int tid, int sig)
         }
     }
 
-    if (sig != 0 || sig != OS::TGT_SIGABRT)
+    if (sig != 0 && sig != OS::TGT_SIGABRT)
         return -EINVAL;
 
     if (tgt_proc == nullptr)
diff --git a/src/sim/system.hh b/src/sim/system.hh
index d2725c32a9..bb64f639b5 100644
--- a/src/sim/system.hh
+++ b/src/sim/system.hh
@@ -305,7 +305,7 @@ class System : public SimObject, public PCEventScope
     /**
      * Get the cache line size of the system.
      */
-    unsigned int cacheLineSize() const { return _cacheLineSize; }
+    Addr cacheLineSize() const { return _cacheLineSize; }
 
     Threads threads;
 
@@ -405,7 +405,7 @@ class System : public SimObject, public PCEventScope
 
     enums::MemoryMode memoryMode;
 
-    const unsigned int _cacheLineSize;
+    const Addr _cacheLineSize;
 
     uint64_t workItemsBegin = 0;
     uint64_t workItemsEnd = 0;
diff --git a/src/systemc/core/SystemC.py b/src/systemc/core/SystemC.py
index f1f87a0583..a51c33f86f 100644
--- a/src/systemc/core/SystemC.py
+++ b/src/systemc/core/SystemC.py
@@ -25,6 +25,7 @@
 
 from m5.SimObject import SimObject, cxxMethod
 
+
 # This class represents the systemc kernel. There should be exactly one in the
 # simulation. It receives gem5 SimObject lifecycle callbacks (init, regStats,
 # etc.) and manages the lifecycle of the systemc simulation accordingly.
diff --git a/src/systemc/ext/core/sc_port.hh b/src/systemc/ext/core/sc_port.hh
index 796950e29b..bd57553559 100644
--- a/src/systemc/ext/core/sc_port.hh
+++ b/src/systemc/ext/core/sc_port.hh
@@ -114,6 +114,10 @@ class sc_port_base : public sc_object
     virtual sc_port_policy _portPolicy() const = 0;
 };
 
+// The overloaded virtual is intended in SystemC, so we'll disable the warning.
+// Please check section 9.3 of SystemC 2.3.1 release note for more details.
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Woverloaded-virtual"
 template <class IF>
 class sc_port_b : public sc_port_base
 {
@@ -244,6 +248,7 @@ class sc_port_b : public sc_port_base
     sc_port_b(const sc_port_b<IF> &) {}
     sc_port_b<IF> &operator = (const sc_port_b<IF> &) { return *this; }
 };
+#pragma GCC diagnostic pop
 
 template <class IF, int N=1, sc_port_policy P=SC_ONE_OR_MORE_BOUND>
 class sc_port : public sc_port_b<IF>
diff --git a/src/systemc/python/systemc.py b/src/systemc/python/systemc.py
index da189ffb29..f5a364a006 100644
--- a/src/systemc/python/systemc.py
+++ b/src/systemc/python/systemc.py
@@ -30,7 +30,7 @@
 from _m5.systemc import sc_main_result_code, sc_main_result_str
 
 
-class ScMainResult(object):
+class ScMainResult:
     def __init__(self, code, message):
         self.code = code
         self.message = message
diff --git a/src/systemc/tests/verify.py b/src/systemc/tests/verify.py
index c0e072e3c2..acd9cb15e8 100755
--- a/src/systemc/tests/verify.py
+++ b/src/systemc/tests/verify.py
@@ -508,7 +508,7 @@ def run(self, tests):
             missing = []
             log_file = ".".join([test.name, "log"])
             log_path = gd.entry(log_file)
-            simout_path = os.path.join(out_dir, "simout")
+            simout_path = os.path.join(out_dir, "simout.txt")
             if not os.path.exists(simout_path):
                 missing.append("log output")
             elif log_path:
diff --git a/src/systemc/tlm_bridge/gem5_to_tlm.cc b/src/systemc/tlm_bridge/gem5_to_tlm.cc
index 515975224e..461be11051 100644
--- a/src/systemc/tlm_bridge/gem5_to_tlm.cc
+++ b/src/systemc/tlm_bridge/gem5_to_tlm.cc
@@ -152,6 +152,7 @@ packet2payload(PacketPtr packet)
     trans->acquire();
 
     trans->set_address(packet->getAddr());
+    trans->set_response_status(tlm::TLM_INCOMPLETE_RESPONSE);
 
     /* Check if this transaction was allocated by mm */
     sc_assert(trans->has_mm());
@@ -480,7 +481,8 @@ Gem5ToTlmBridge<BITWIDTH>::recvRespRetry()
 
     tlm::tlm_generic_payload *trans = blockingResponse;
     blockingResponse = nullptr;
-    PacketPtr packet = packetMap[blockingResponse];
+
+    PacketPtr packet = packetMap[trans];
     sc_assert(packet);
 
     bool need_retry = !bridgeResponsePort.sendTimingResp(packet);
diff --git a/tests/configs/dram-lowp.py b/tests/configs/dram-lowp.py
deleted file mode 100644
index 25e7cc3087..0000000000
--- a/tests/configs/dram-lowp.py
+++ /dev/null
@@ -1,60 +0,0 @@
-# Copyright (c) 2017 ARM Limited
-# All rights reserved.
-#
-# The license below extends only to copyright in the software and shall
-# not be construed as granting a license to any other intellectual
-# property including but not limited to intellectual property relating
-# to a hardware implementation of the functionality of the software
-# licensed hereunder.  You may use the software subject to the license
-# terms below provided that you ensure that this notice is replicated
-# unmodified and in its entirety in all distributions of the software,
-# modified or unmodified, in source code or in binary form.
-#
-# Copyright (c) 2015 Jason Lowe-Power
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are
-# met: redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer;
-# redistributions in binary form must reproduce the above copyright
-# notice, this list of conditions and the following disclaimer in the
-# documentation and/or other materials provided with the distribution;
-# neither the name of the copyright holders nor the names of its
-# contributors may be used to endorse or promote products derived from
-# this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#
-# A wrapper around configs/dram/low_power_sweep.py
-
-# For some reason, this is implicitly needed by run.py
-root = None
-
-import m5
-
-
-def run_test(root):
-    # Called from tests/run.py
-
-    import sys
-
-    argv = [
-        sys.argv[0],
-        # Add a specific page policy and specify the number of ranks
-        f"-p{page_policy}",
-        "-r 2",
-    ]
-
-    # Execute the script we are wrapping
-    run_config("configs/dram/low_power_sweep.py", argv=argv)
diff --git a/tests/configs/gpu-randomtest-ruby.py b/tests/configs/gpu-randomtest-ruby.py
deleted file mode 100644
index cfc65526e5..0000000000
--- a/tests/configs/gpu-randomtest-ruby.py
+++ /dev/null
@@ -1,164 +0,0 @@
-#
-#  Copyright (c) 2010-2015 Advanced Micro Devices, Inc.
-#  All rights reserved.
-#
-#  Redistribution and use in source and binary forms, with or without
-#  modification, are permitted provided that the following conditions are met:
-#
-#  1. Redistributions of source code must retain the above copyright notice,
-#  this list of conditions and the following disclaimer.
-#
-#  2. Redistributions in binary form must reproduce the above copyright notice,
-#  this list of conditions and the following disclaimer in the documentation
-#  and/or other materials provided with the distribution.
-#
-#  3. Neither the name of the copyright holder nor the names of its contributors
-#  may be used to endorse or promote products derived from this software
-#  without specific prior written permission.
-#
-#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-#  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-#  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-#  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-#  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-#  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-#  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-#  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-#  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-#  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-#  POSSIBILITY OF SUCH DAMAGE.
-
-import m5
-from m5.objects import *
-from m5.defines import buildEnv
-from m5.util import addToPath
-import os, argparse, sys
-
-m5.util.addToPath("../configs/")
-
-from ruby import Ruby
-from common import Options
-
-parser = argparse.ArgumentParser()
-Options.addCommonOptions(parser)
-
-# add the gpu specific options expected by the the gpu and gpu_RfO
-parser.add_argument(
-    "-u",
-    "--num-compute-units",
-    type=int,
-    default=8,
-    help="number of compute units in the GPU",
-)
-parser.add_argument(
-    "--num-cp",
-    type=int,
-    default=0,
-    help="Number of GPU Command Processors (CP)",
-)
-parser.add_argument(
-    "--simds-per-cu", type=int, default=4, help="SIMD unitsper CU"
-)
-parser.add_argument(
-    "--wf-size", type=int, default=64, help="Wavefront size(in workitems)"
-)
-parser.add_argument(
-    "--wfs-per-simd",
-    type=int,
-    default=10,
-    help="Number of WF slots per SIMD",
-)
-
-# Add the ruby specific and protocol specific options
-Ruby.define_options(parser)
-
-args = parser.parse_args()
-
-#
-# Set the default cache size and associativity to be very small to encourage
-# races between requests and writebacks.
-#
-args.l1d_size = "256B"
-args.l1i_size = "256B"
-args.l2_size = "512B"
-args.l3_size = "1kB"
-args.l1d_assoc = 2
-args.l1i_assoc = 2
-args.l2_assoc = 2
-args.l3_assoc = 2
-args.num_compute_units = 8
-args.num_sqc = 2
-
-# Check to for the GPU_RfO protocol.  Other GPU protocols are non-SC and will
-# not work with the Ruby random tester.
-assert buildEnv["PROTOCOL"] == "GPU_RfO"
-
-#
-# create the tester and system, including ruby
-#
-tester = RubyTester(
-    check_flush=False,
-    checks_to_complete=100,
-    wakeup_frequency=10,
-    num_cpus=args.num_cpus,
-)
-
-# We set the testers as cpu for ruby to find the correct clock domains
-# for the L1 Objects.
-system = System(cpu=tester)
-
-# Dummy voltage domain for all our clock domains
-system.voltage_domain = VoltageDomain(voltage=args.sys_voltage)
-system.clk_domain = SrcClockDomain(
-    clock="1GHz", voltage_domain=system.voltage_domain
-)
-
-system.mem_ranges = AddrRange("256MB")
-
-# the ruby tester reuses num_cpus to specify the
-# number of cpu ports connected to the tester object, which
-# is stored in system.cpu. because there is only ever one
-# tester object, num_cpus is not necessarily equal to the
-# size of system.cpu
-cpu_list = [system.cpu] * args.num_cpus
-Ruby.create_system(args, False, system, cpus=cpu_list)
-
-# Create a separate clock domain for Ruby
-system.ruby.clk_domain = SrcClockDomain(
-    clock="1GHz", voltage_domain=system.voltage_domain
-)
-
-tester.num_cpus = len(system.ruby._cpu_ports)
-
-#
-# The tester is most effective when randomization is turned on and
-# artifical delay is randomly inserted on messages
-#
-system.ruby.randomization = True
-
-for ruby_port in system.ruby._cpu_ports:
-    #
-    # Tie the ruby tester ports to the ruby cpu read and write ports
-    #
-    if ruby_port.support_data_reqs and ruby_port.support_inst_reqs:
-        tester.cpuInstDataPort = ruby_port.in_ports
-    elif ruby_port.support_data_reqs:
-        tester.cpuDataPort = ruby_port.in_ports
-    elif ruby_port.support_inst_reqs:
-        tester.cpuInstPort = ruby_port.in_ports
-
-    # Do not automatically retry stalled Ruby requests
-    ruby_port.no_retry_on_stall = True
-
-    #
-    # Tell the sequencer this is the ruby tester so that it
-    # copies the subblock back to the checker
-    #
-    ruby_port.using_ruby_tester = True
-
-# -----------------------
-# run simulation
-# -----------------------
-
-root = Root(full_system=False, system=system)
-root.system.mem_mode = "timing"
diff --git a/tests/configs/gpu-ruby.py b/tests/configs/gpu-ruby.py
deleted file mode 100644
index 7606168a98..0000000000
--- a/tests/configs/gpu-ruby.py
+++ /dev/null
@@ -1,433 +0,0 @@
-#
-#  Copyright (c) 2015 Advanced Micro Devices, Inc.
-#  All rights reserved.
-#
-#  Redistribution and use in source and binary forms, with or without
-#  modification, are permitted provided that the following conditions are met:
-#
-#  1. Redistributions of source code must retain the above copyright notice,
-#  this list of conditions and the following disclaimer.
-#
-#  2. Redistributions in binary form must reproduce the above copyright notice,
-#  this list of conditions and the following disclaimer in the documentation
-#  and/or other materials provided with the distribution.
-#
-#  3. Neither the name of the copyright holder nor the names of its contributors
-#  may be used to endorse or promote products derived from this software
-#  without specific prior written permission.
-#
-#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-#  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-#  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-#  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-#  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-#  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-#  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-#  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-#  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-#  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-#  POSSIBILITY OF SUCH DAMAGE.
-
-import m5
-from m5.objects import *
-from m5.defines import buildEnv
-from m5.util import addToPath
-import os, argparse, sys, math, glob
-
-m5.util.addToPath("../configs/")
-
-from ruby import Ruby
-from common import Options
-from common import GPUTLBOptions, GPUTLBConfig
-
-
-def run_test(root):
-    """gpu test requires a specialized run_test implementation to set up the
-    mmio space."""
-
-    # instantiate configuration
-    m5.instantiate()
-
-    # Now that the system has been constructed, setup the mmio space
-    root.system.cpu[0].workload[0].map(0x10000000, 0x200000000, 4096)
-
-    # simulate until program terminates
-    exit_event = m5.simulate(maxtick)
-    print("Exiting @ tick", m5.curTick(), "because", exit_event.getCause())
-
-
-parser = argparse.ArgumentParser()
-Options.addCommonOptions(parser)
-Options.addSEOptions(parser)
-
-parser.add_argument(
-    "-k",
-    "--kernel-files",
-    help="file(s) containing GPU kernel code (colon separated)",
-)
-parser.add_argument(
-    "-u",
-    "--num-compute-units",
-    type=int,
-    default=2,
-    help="number of GPU compute units",
-),
-parser.add_argument(
-    "--num-cp",
-    type=int,
-    default=0,
-    help="Number of GPU Command Processors (CP)",
-)
-parser.add_argument(
-    "--simds-per-cu", type=int, default=4, help="SIMD unitsper CU"
-)
-parser.add_argument(
-    "--cu-per-sqc",
-    type=int,
-    default=4,
-    help="number of CUssharing an SQC (icache, and thus icache TLB)",
-)
-parser.add_argument(
-    "--wf-size", type=int, default=64, help="Wavefront size(in workitems)"
-)
-parser.add_argument(
-    "--wfs-per-simd",
-    type=int,
-    default=8,
-    help="Number of WF slots per SIMD",
-)
-parser.add_argument(
-    "--sp-bypass-path-length",
-    type=int,
-    default=4,
-    help="Number of stages of bypass path in vector ALU for Single "
-    "Precision ops",
-)
-parser.add_argument(
-    "--dp-bypass-path-length",
-    type=int,
-    default=4,
-    help="Number of stages of bypass path in vector ALU for Double "
-    "Precision ops",
-)
-parser.add_argument(
-    "--issue-period",
-    type=int,
-    default=4,
-    help="Number of cycles per vector instruction issue period",
-)
-parser.add_argument(
-    "--glbmem-wr-bus-width",
-    type=int,
-    default=32,
-    help="VGPR to Coalescer (Global Memory) data bus width in bytes",
-)
-parser.add_argument(
-    "--glbmem-rd-bus-width",
-    type=int,
-    default=32,
-    help="Coalescer to VGPR (Global Memory) data bus width in bytes",
-)
-parser.add_argument(
-    "--shr-mem-pipes-per-cu",
-    type=int,
-    default=1,
-    help="Number of Shared Memory pipelines per CU",
-)
-parser.add_argument(
-    "--glb-mem-pipes-per-cu",
-    type=int,
-    default=1,
-    help="Number of Global Memory pipelines per CU",
-)
-parser.add_argument(
-    "--vreg-file-size",
-    type=int,
-    default=2048,
-    help="number of physical vector registers per SIMD",
-)
-parser.add_argument(
-    "--bw-scalor",
-    type=int,
-    default=0,
-    help="bandwidth scalor for scalability analysis",
-)
-parser.add_argument("--CPUClock", type=str, default="2GHz", help="CPU clock")
-parser.add_argument("--GPUClock", type=str, default="1GHz", help="GPU clock")
-parser.add_argument(
-    "--cpu-voltage",
-    action="store",
-    type=str,
-    default="1.0V",
-    help="""CPU  voltage domain""",
-)
-parser.add_argument(
-    "--gpu-voltage",
-    action="store",
-    type=str,
-    default="1.0V",
-    help="""CPU  voltage domain""",
-)
-parser.add_argument(
-    "--CUExecPolicy",
-    type=str,
-    default="OLDEST-FIRST",
-    help="WF exec policy (OLDEST-FIRST, ROUND-ROBIN)",
-)
-parser.add_argument(
-    "--xact-cas-mode",
-    action="store_true",
-    help="enable load_compare mode (transactional CAS)",
-)
-parser.add_argument(
-    "--SegFaultDebug",
-    action="store_true",
-    help="checks for GPU seg fault before TLB access",
-)
-parser.add_argument(
-    "--LocalMemBarrier",
-    action="store_true",
-    help="Barrier does not wait for writethroughs to complete",
-)
-parser.add_argument(
-    "--countPages",
-    action="store_true",
-    help="Count Page Accesses and output in per-CU output files",
-)
-parser.add_argument("--TLB-prefetch", type=int, help="prefetch depth forTLBs")
-parser.add_argument(
-    "--pf-type",
-    type=str,
-    help="type of prefetch: PF_CU, PF_WF, PF_PHASE, PF_STRIDE",
-)
-parser.add_argument("--pf-stride", type=int, help="set prefetch stride")
-parser.add_argument(
-    "--numLdsBanks",
-    type=int,
-    default=32,
-    help="number of physical banks per LDS module",
-)
-parser.add_argument(
-    "--ldsBankConflictPenalty",
-    type=int,
-    default=1,
-    help="number of cycles per LDS bank conflict",
-)
-
-# Add the ruby specific and protocol specific options
-Ruby.define_options(parser)
-
-GPUTLBOptions.tlb_options(parser)
-
-args = parser.parse_args()
-
-# The GPU cache coherence protocols only work with the backing store
-args.access_backing_store = True
-
-# Currently, the sqc (I-Cache of GPU) is shared by
-# multiple compute units(CUs). The protocol works just fine
-# even if sqc is not shared. Overriding this option here
-# so that the user need not explicitly set this (assuming
-# sharing sqc is the common usage)
-n_cu = args.num_compute_units
-num_sqc = int(math.ceil(float(n_cu) / args.cu_per_sqc))
-args.num_sqc = num_sqc  # pass this to Ruby
-
-########################## Creating the GPU system ########################
-# shader is the GPU
-shader = Shader(
-    n_wf=args.wfs_per_simd,
-    clk_domain=SrcClockDomain(
-        clock=args.GPUClock,
-        voltage_domain=VoltageDomain(voltage=args.gpu_voltage),
-    ),
-    timing=True,
-)
-
-# GPU_RfO(Read For Ownership) implements SC/TSO memory model.
-# Other GPU protocols implement release consistency at GPU side.
-# So, all GPU protocols other than GPU_RfO should make their writes
-# visible to the global memory and should read from global memory
-# during kernal boundary. The pipeline initiates(or do not initiate)
-# the acquire/release operation depending on this impl_kern_boundary_sync
-# flag. This flag=true means pipeline initiates a acquire/release operation
-# at kernel boundary.
-if buildEnv["PROTOCOL"] == "GPU_RfO":
-    shader.impl_kern_boundary_sync = False
-else:
-    shader.impl_kern_boundary_sync = True
-
-# Switching off per-lane TLB by default
-per_lane = False
-if args.TLB_config == "perLane":
-    per_lane = True
-
-# List of compute units; one GPU can have multiple compute units
-compute_units = []
-for i in range(n_cu):
-    compute_units.append(
-        ComputeUnit(
-            cu_id=i,
-            perLaneTLB=per_lane,
-            num_SIMDs=args.simds_per_cu,
-            wfSize=args.wf_size,
-            spbypass_pipe_length=args.sp_bypass_path_length,
-            dpbypass_pipe_length=args.dp_bypass_path_length,
-            issue_period=args.issue_period,
-            coalescer_to_vrf_bus_width=args.glbmem_rd_bus_width,
-            vrf_to_coalescer_bus_width=args.glbmem_wr_bus_width,
-            num_global_mem_pipes=args.glb_mem_pipes_per_cu,
-            num_shared_mem_pipes=args.shr_mem_pipes_per_cu,
-            n_wf=args.wfs_per_simd,
-            execPolicy=args.CUExecPolicy,
-            xactCasMode=args.xact_cas_mode,
-            debugSegFault=args.SegFaultDebug,
-            functionalTLB=True,
-            localMemBarrier=args.LocalMemBarrier,
-            countPages=args.countPages,
-            localDataStore=LdsState(
-                banks=args.numLdsBanks,
-                bankConflictPenalty=args.ldsBankConflictPenalty,
-            ),
-        )
-    )
-    wavefronts = []
-    vrfs = []
-    for j in range(args.simds_per_cu):
-        for k in range(int(shader.n_wf)):
-            wavefronts.append(Wavefront(simdId=j, wf_slot_id=k))
-        vrfs.append(
-            VectorRegisterFile(
-                simd_id=j, num_regs_per_simd=args.vreg_file_size
-            )
-        )
-    compute_units[-1].wavefronts = wavefronts
-    compute_units[-1].vector_register_file = vrfs
-    if args.TLB_prefetch:
-        compute_units[-1].prefetch_depth = args.TLB_prefetch
-        compute_units[-1].prefetch_prev_type = args.pf_type
-
-    # attach the LDS and the CU to the bus (actually a Bridge)
-    compute_units[-1].ldsPort = compute_units[-1].ldsBus.slave
-    compute_units[-1].ldsBus.master = compute_units[-1].localDataStore.cuPort
-
-# Attach compute units to GPU
-shader.CUs = compute_units
-
-# this is a uniprocessor only test, thus the shader is the second index in the
-# list of "system.cpus"
-args.num_cpus = 1
-shader_idx = 1
-cpu = TimingSimpleCPU(cpu_id=0)
-
-########################## Creating the GPU dispatcher ########################
-# Dispatcher dispatches work from host CPU to GPU
-host_cpu = cpu
-dispatcher = GpuDispatcher()
-
-# Currently does not test for command processors
-cpu_list = [cpu] + [shader] + [dispatcher]
-
-system = System(
-    cpu=cpu_list,
-    mem_ranges=[AddrRange(args.mem_size)],
-    mem_mode="timing",
-    workload=SEWorkload(),
-)
-
-# Dummy voltage domain for all our clock domains
-system.voltage_domain = VoltageDomain(voltage=args.sys_voltage)
-system.clk_domain = SrcClockDomain(
-    clock="1GHz", voltage_domain=system.voltage_domain
-)
-
-# Create a seperate clock domain for components that should run at
-# CPUs frequency
-system.cpu[0].clk_domain = SrcClockDomain(
-    clock="2GHz", voltage_domain=system.voltage_domain
-)
-
-# configure the TLB hierarchy
-GPUTLBConfig.config_tlb_hierarchy(args, system, shader_idx)
-
-# create Ruby system
-system.piobus = IOXBar(
-    width=32, response_latency=0, frontend_latency=0, forward_latency=0
-)
-Ruby.create_system(args, None, system)
-
-# Create a separate clock for Ruby
-system.ruby.clk_domain = SrcClockDomain(
-    clock=args.ruby_clock, voltage_domain=system.voltage_domain
-)
-
-# create the interrupt controller
-cpu.createInterruptController()
-
-#
-# Tie the cpu cache ports to the ruby cpu ports and
-# physmem, respectively
-#
-cpu.connectAllPorts(
-    system.ruby._cpu_ports[0].in_ports,
-    system.ruby._cpu_ports[0].in_ports,
-    system.ruby._cpu_ports[0].interrupt_out_port,
-)
-system.ruby._cpu_ports[0].mem_request_port = system.piobus.cpu_side_ports
-
-# attach CU ports to Ruby
-# Because of the peculiarities of the CP core, you may have 1 CPU but 2
-# sequencers and thus 2 _cpu_ports created. Your GPUs shouldn't be
-# hooked up until after the CP. To make this script generic, figure out
-# the index as below, but note that this assumes there is one sequencer
-# per compute unit and one sequencer per SQC for the math to work out
-# correctly.
-gpu_port_idx = (
-    len(system.ruby._cpu_ports) - args.num_compute_units - args.num_sqc
-)
-gpu_port_idx = gpu_port_idx - args.num_cp * 2
-
-wavefront_size = args.wf_size
-for i in range(n_cu):
-    # The pipeline issues wavefront_size number of uncoalesced requests
-    # in one GPU issue cycle. Hence wavefront_size mem ports.
-    for j in range(wavefront_size):
-        system.cpu[shader_idx].CUs[i].memory_port[j] = system.ruby._cpu_ports[
-            gpu_port_idx
-        ].slave[j]
-    gpu_port_idx += 1
-
-for i in range(n_cu):
-    if i > 0 and not i % args.cu_per_sqc:
-        gpu_port_idx += 1
-    system.cpu[shader_idx].CUs[i].sqc_port = system.ruby._cpu_ports[
-        gpu_port_idx
-    ].slave
-gpu_port_idx = gpu_port_idx + 1
-
-# Current regression tests do not support the command processor
-assert args.num_cp == 0
-
-# connect dispatcher to the system.piobus
-dispatcher.pio = system.piobus.mem_side_ports
-dispatcher.dma = system.piobus.cpu_side_ports
-
-################# Connect the CPU and GPU via GPU Dispatcher ###################
-# CPU rings the GPU doorbell to notify a pending task
-# using this interface.
-# And GPU uses this interface to notify the CPU of task completion
-# The communcation happens through emulated driver.
-
-# Note this implicit setting of the cpu_pointer, shader_pointer and tlb array
-# parameters must be after the explicit setting of the System cpu list
-shader.cpu_pointer = host_cpu
-dispatcher.cpu = host_cpu
-dispatcher.shader_pointer = shader
-
-# -----------------------
-# run simulation
-# -----------------------
-
-root = Root(full_system=False, system=system)
-m5.ticks.setGlobalFrequency("1THz")
-root.system.mem_mode = "timing"
diff --git a/tests/configs/memtest-filter.py b/tests/configs/memtest-filter.py
deleted file mode 100644
index 1080853f7b..0000000000
--- a/tests/configs/memtest-filter.py
+++ /dev/null
@@ -1,83 +0,0 @@
-# Copyright (c) 2006-2007 The Regents of The University of Michigan
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are
-# met: redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer;
-# redistributions in binary form must reproduce the above copyright
-# notice, this list of conditions and the following disclaimer in the
-# documentation and/or other materials provided with the distribution;
-# neither the name of the copyright holders nor the names of its
-# contributors may be used to endorse or promote products derived from
-# this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-import m5
-from m5.objects import *
-
-m5.util.addToPath("../configs/")
-from common.Caches import *
-
-# MAX CORES IS 8 with the fals sharing method
-nb_cores = 8
-cpus = [MemTest() for i in range(nb_cores)]
-
-# system simulated
-system = System(
-    cpu=cpus,
-    physmem=SimpleMemory(),
-    membus=SystemXBar(width=16, snoop_filter=SnoopFilter()),
-)
-# Dummy voltage domain for all our clock domains
-system.voltage_domain = VoltageDomain()
-system.clk_domain = SrcClockDomain(
-    clock="1GHz", voltage_domain=system.voltage_domain
-)
-
-# Create a seperate clock domain for components that should run at
-# CPUs frequency
-system.cpu_clk_domain = SrcClockDomain(
-    clock="2GHz", voltage_domain=system.voltage_domain
-)
-
-system.toL2Bus = L2XBar(
-    clk_domain=system.cpu_clk_domain, snoop_filter=SnoopFilter()
-)
-system.l2c = L2Cache(clk_domain=system.cpu_clk_domain, size="64kB", assoc=8)
-system.l2c.cpu_side = system.toL2Bus.mem_side_ports
-
-# connect l2c to membus
-system.l2c.mem_side = system.membus.cpu_side_ports
-
-# add L1 caches
-for cpu in cpus:
-    # All cpus are associated with cpu_clk_domain
-    cpu.clk_domain = system.cpu_clk_domain
-    cpu.l1c = L1Cache(size="32kB", assoc=4)
-    cpu.l1c.cpu_side = cpu.port
-    cpu.l1c.mem_side = system.toL2Bus.cpu_side_ports
-
-system.system_port = system.membus.cpu_side_ports
-
-# connect memory to membus
-system.physmem.port = system.membus.mem_side_ports
-
-
-# -----------------------
-# run simulation
-# -----------------------
-
-root = Root(full_system=False, system=system)
-root.system.mem_mode = "timing"
diff --git a/tests/configs/memtest-ruby.py b/tests/configs/memtest-ruby.py
deleted file mode 100644
index dac165e288..0000000000
--- a/tests/configs/memtest-ruby.py
+++ /dev/null
@@ -1,122 +0,0 @@
-# Copyright (c) 2006-2007 The Regents of The University of Michigan
-# Copyright (c) 2010 Advanced Micro Devices, Inc.
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are
-# met: redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer;
-# redistributions in binary form must reproduce the above copyright
-# notice, this list of conditions and the following disclaimer in the
-# documentation and/or other materials provided with the distribution;
-# neither the name of the copyright holders nor the names of its
-# contributors may be used to endorse or promote products derived from
-# this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-import m5
-from m5.objects import *
-from m5.defines import buildEnv
-from m5.util import addToPath
-import os, argparse, sys
-
-m5.util.addToPath("../configs/")
-
-from ruby import Ruby
-from common import Options
-
-parser = argparse.ArgumentParser()
-Options.addCommonOptions(parser)
-
-# Add the ruby specific and protocol specific options
-Ruby.define_options(parser)
-
-args = parser.parse_args()
-
-#
-# Set the default cache size and associativity to be very small to encourage
-# races between requests and writebacks.
-#
-args.l1d_size = "256B"
-args.l1i_size = "256B"
-args.l2_size = "512B"
-args.l3_size = "1kB"
-args.l1d_assoc = 2
-args.l1i_assoc = 2
-args.l2_assoc = 2
-args.l3_assoc = 2
-args.ports = 32
-
-# MAX CORES IS 8 with the fals sharing method
-nb_cores = 8
-
-# ruby does not support atomic, functional, or uncacheable accesses
-cpus = [
-    MemTest(
-        percent_functional=50, percent_uncacheable=0, suppress_func_errors=True
-    )
-    for i in range(nb_cores)
-]
-
-# overwrite args.num_cpus with the nb_cores value
-args.num_cpus = nb_cores
-
-# system simulated
-system = System(cpu=cpus)
-# Dummy voltage domain for all our clock domains
-system.voltage_domain = VoltageDomain()
-system.clk_domain = SrcClockDomain(
-    clock="1GHz", voltage_domain=system.voltage_domain
-)
-
-# Create a seperate clock domain for components that should run at
-# CPUs frequency
-system.cpu_clk_domain = SrcClockDomain(
-    clock="2GHz", voltage_domain=system.voltage_domain
-)
-
-# All cpus are associated with cpu_clk_domain
-for cpu in cpus:
-    cpu.clk_domain = system.cpu_clk_domain
-
-system.mem_ranges = AddrRange("256MB")
-
-Ruby.create_system(args, False, system)
-
-# Create a separate clock domain for Ruby
-system.ruby.clk_domain = SrcClockDomain(
-    clock=args.ruby_clock, voltage_domain=system.voltage_domain
-)
-
-assert len(cpus) == len(system.ruby._cpu_ports)
-
-for (i, ruby_port) in enumerate(system.ruby._cpu_ports):
-    #
-    # Tie the cpu port to the ruby cpu ports and
-    # physmem, respectively
-    #
-    cpus[i].port = ruby_port.in_ports
-
-    #
-    # Since the memtester is incredibly bursty, increase the deadlock
-    # threshold to 1 million cycles
-    #
-    ruby_port.deadlock_threshold = 1000000
-
-# -----------------------
-# run simulation
-# -----------------------
-
-root = Root(full_system=False, system=system)
-root.system.mem_mode = "timing"
diff --git a/tests/configs/memtest.py b/tests/configs/memtest.py
deleted file mode 100644
index 10f3fbe50d..0000000000
--- a/tests/configs/memtest.py
+++ /dev/null
@@ -1,77 +0,0 @@
-# Copyright (c) 2006-2007 The Regents of The University of Michigan
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are
-# met: redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer;
-# redistributions in binary form must reproduce the above copyright
-# notice, this list of conditions and the following disclaimer in the
-# documentation and/or other materials provided with the distribution;
-# neither the name of the copyright holders nor the names of its
-# contributors may be used to endorse or promote products derived from
-# this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-import m5
-from m5.objects import *
-
-m5.util.addToPath("../configs/")
-from common.Caches import *
-
-# MAX CORES IS 8 with the fals sharing method
-nb_cores = 8
-cpus = [MemTest() for i in range(nb_cores)]
-
-# system simulated
-system = System(cpu=cpus, physmem=SimpleMemory(), membus=SystemXBar())
-# Dummy voltage domain for all our clock domains
-system.voltage_domain = VoltageDomain()
-system.clk_domain = SrcClockDomain(
-    clock="1GHz", voltage_domain=system.voltage_domain
-)
-
-# Create a seperate clock domain for components that should run at
-# CPUs frequency
-system.cpu_clk_domain = SrcClockDomain(
-    clock="2GHz", voltage_domain=system.voltage_domain
-)
-
-system.toL2Bus = L2XBar(clk_domain=system.cpu_clk_domain)
-system.l2c = L2Cache(clk_domain=system.cpu_clk_domain, size="64kB", assoc=8)
-system.l2c.cpu_side = system.toL2Bus.mem_side_ports
-
-# connect l2c to membus
-system.l2c.mem_side = system.membus.cpu_side_ports
-
-# add L1 caches
-for cpu in cpus:
-    # All cpus are associated with cpu_clk_domain
-    cpu.clk_domain = system.cpu_clk_domain
-    cpu.l1c = L1Cache(size="32kB", assoc=4)
-    cpu.l1c.cpu_side = cpu.port
-    cpu.l1c.mem_side = system.toL2Bus.cpu_side_ports
-
-system.system_port = system.membus.cpu_side_ports
-
-# connect memory to membus
-system.physmem.port = system.membus.mem_side_ports
-
-
-# -----------------------
-# run simulation
-# -----------------------
-
-root = Root(full_system=False, system=system)
-root.system.mem_mode = "timing"
diff --git a/tests/configs/minor-timing-mp.py b/tests/configs/minor-timing-mp.py
deleted file mode 100644
index b6c56de512..0000000000
--- a/tests/configs/minor-timing-mp.py
+++ /dev/null
@@ -1,48 +0,0 @@
-# Copyright (c) 2013 ARM Limited
-# All rights reserved.
-#
-# The license below extends only to copyright in the software and shall
-# not be construed as granting a license to any other intellectual
-# property including but not limited to intellectual property relating
-# to a hardware implementation of the functionality of the software
-# licensed hereunder.  You may use the software subject to the license
-# terms below provided that you ensure that this notice is replicated
-# unmodified and in its entirety in all distributions of the software,
-# modified or unmodified, in source code or in binary form.
-#
-# Copyright (c) 2006-2007 The Regents of The University of Michigan
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are
-# met: redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer;
-# redistributions in binary form must reproduce the above copyright
-# notice, this list of conditions and the following disclaimer in the
-# documentation and/or other materials provided with the distribution;
-# neither the name of the copyright holders nor the names of its
-# contributors may be used to endorse or promote products derived from
-# this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-from m5.objects import *
-from base_config import *
-
-nb_cores = 4
-root = BaseSESystem(
-    mem_mode="timing",
-    mem_class=DDR3_1600_8x8,
-    cpu_class=MinorCPU,
-    num_cpus=nb_cores,
-).create_root()
diff --git a/tests/configs/o3-timing-checker.py b/tests/configs/o3-timing-checker.py
deleted file mode 100644
index 9b328ce9e8..0000000000
--- a/tests/configs/o3-timing-checker.py
+++ /dev/null
@@ -1,44 +0,0 @@
-# Copyright (c) 2013 ARM Limited
-# All rights reserved.
-#
-# The license below extends only to copyright in the software and shall
-# not be construed as granting a license to any other intellectual
-# property including but not limited to intellectual property relating
-# to a hardware implementation of the functionality of the software
-# licensed hereunder.  You may use the software subject to the license
-# terms below provided that you ensure that this notice is replicated
-# unmodified and in its entirety in all distributions of the software,
-# modified or unmodified, in source code or in binary form.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are
-# met: redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer;
-# redistributions in binary form must reproduce the above copyright
-# notice, this list of conditions and the following disclaimer in the
-# documentation and/or other materials provided with the distribution;
-# neither the name of the copyright holders nor the names of its
-# contributors may be used to endorse or promote products derived from
-# this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-from m5.objects import *
-from base_config import *
-
-root = BaseSESystemUniprocessor(
-    mem_mode="timing",
-    mem_class=DDR3_1600_8x8,
-    cpu_class=DerivO3CPU,
-    checker=True,
-).create_root()
diff --git a/tests/configs/o3-timing-mp-ruby.py b/tests/configs/o3-timing-mp-ruby.py
deleted file mode 100644
index 10725e36ad..0000000000
--- a/tests/configs/o3-timing-mp-ruby.py
+++ /dev/null
@@ -1,68 +0,0 @@
-# Copyright (c) 2006-2007 The Regents of The University of Michigan
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are
-# met: redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer;
-# redistributions in binary form must reproduce the above copyright
-# notice, this list of conditions and the following disclaimer in the
-# documentation and/or other materials provided with the distribution;
-# neither the name of the copyright holders nor the names of its
-# contributors may be used to endorse or promote products derived from
-# this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-import m5
-from m5.objects import *
-
-nb_cores = 4
-cpus = [DerivO3CPU(cpu_id=i) for i in range(nb_cores)]
-
-import ruby_config
-
-ruby_memory = ruby_config.generate("TwoLevel_SplitL1UnifiedL2.rb", nb_cores)
-
-# system simulated
-system = System(
-    cpu=cpus,
-    physmem=ruby_memory,
-    membus=SystemXBar(),
-    mem_mode="timing",
-    clk_domain=SrcClockDomain(clock="1GHz"),
-)
-
-# Create a seperate clock domain for components that should run at
-# CPUs frequency
-system.cpu_clk_domain = SrcClockDomain(clock="2GHz")
-
-for cpu in cpus:
-    # create the interrupt controller
-    cpu.createInterruptController()
-    cpu.connectBus(system.membus)
-    # All cpus are associated with cpu_clk_domain
-    cpu.clk_domain = system.cpu_clk_domain
-
-# connect memory to membus
-system.physmem.port = system.membus.mem_side_ports
-
-# Connect the system port for loading of binaries etc
-system.system_port = system.membus.cpu_side_ports
-
-# -----------------------
-# run simulation
-# -----------------------
-
-root = Root(full_system=False, system=system)
-root.system.mem_mode = "timing"
diff --git a/tests/configs/o3-timing-mt.py b/tests/configs/o3-timing-mt.py
deleted file mode 100644
index 9fda80de12..0000000000
--- a/tests/configs/o3-timing-mt.py
+++ /dev/null
@@ -1,63 +0,0 @@
-# Copyright (c) 2013, 2015 ARM Limited
-# All rights reserved.
-#
-# The license below extends only to copyright in the software and shall
-# not be construed as granting a license to any other intellectual
-# property including but not limited to intellectual property relating
-# to a hardware implementation of the functionality of the software
-# licensed hereunder.  You may use the software subject to the license
-# terms below provided that you ensure that this notice is replicated
-# unmodified and in its entirety in all distributions of the software,
-# modified or unmodified, in source code or in binary form.
-#
-# Copyright (c) 2006-2007 The Regents of The University of Michigan
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are
-# met: redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer;
-# redistributions in binary form must reproduce the above copyright
-# notice, this list of conditions and the following disclaimer in the
-# documentation and/or other materials provided with the distribution;
-# neither the name of the copyright holders nor the names of its
-# contributors may be used to endorse or promote products derived from
-# this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-from m5.objects import *
-from m5.defines import buildEnv
-from base_config import *
-from arm_generic import *
-from common.cores.arm.O3_ARM_v7a import O3_ARM_v7a_3
-from gem5.isas import ISA
-from gem5.runtime import get_runtime_isa
-
-# If we are running ARM regressions, use a more sensible CPU
-# configuration. This makes the results more meaningful, and also
-# increases the coverage of the regressions.
-if get_runtime_isa() == ISA.ARM:
-    root = ArmSESystemUniprocessor(
-        mem_mode="timing",
-        mem_class=DDR3_1600_8x8,
-        cpu_class=O3_ARM_v7a_3,
-        num_threads=2,
-    ).create_root()
-else:
-    root = BaseSESystemUniprocessor(
-        mem_mode="timing",
-        mem_class=DDR3_1600_8x8,
-        cpu_class=DerivO3CPU,
-        num_threads=2,
-    ).create_root()
diff --git a/tests/configs/o3-timing.py b/tests/configs/o3-timing.py
deleted file mode 100644
index 26efe466d4..0000000000
--- a/tests/configs/o3-timing.py
+++ /dev/null
@@ -1,57 +0,0 @@
-# Copyright (c) 2013 ARM Limited
-# All rights reserved.
-#
-# The license below extends only to copyright in the software and shall
-# not be construed as granting a license to any other intellectual
-# property including but not limited to intellectual property relating
-# to a hardware implementation of the functionality of the software
-# licensed hereunder.  You may use the software subject to the license
-# terms below provided that you ensure that this notice is replicated
-# unmodified and in its entirety in all distributions of the software,
-# modified or unmodified, in source code or in binary form.
-#
-# Copyright (c) 2006-2007 The Regents of The University of Michigan
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are
-# met: redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer;
-# redistributions in binary form must reproduce the above copyright
-# notice, this list of conditions and the following disclaimer in the
-# documentation and/or other materials provided with the distribution;
-# neither the name of the copyright holders nor the names of its
-# contributors may be used to endorse or promote products derived from
-# this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-from m5.objects import *
-from m5.defines import buildEnv
-from base_config import *
-from arm_generic import *
-from common.cores.arm.O3_ARM_v7a import O3_ARM_v7a_3
-from gem5.isas import ISA
-from gem5.runtime import get_runtime_isa
-
-# If we are running ARM regressions, use a more sensible CPU
-# configuration. This makes the results more meaningful, and also
-# increases the coverage of the regressions.
-if get_runtime_isa() == ISA.ARM:
-    root = ArmSESystemUniprocessor(
-        mem_mode="timing", mem_class=DDR3_1600_8x8, cpu_class=O3_ARM_v7a_3
-    ).create_root()
-else:
-    root = BaseSESystemUniprocessor(
-        mem_mode="timing", mem_class=DDR3_1600_8x8, cpu_class=DerivO3CPU
-    ).create_root()
diff --git a/tests/configs/pc-o3-timing.py b/tests/configs/pc-o3-timing.py
deleted file mode 100644
index 24abcd2de6..0000000000
--- a/tests/configs/pc-o3-timing.py
+++ /dev/null
@@ -1,41 +0,0 @@
-# Copyright (c) 2012 ARM Limited
-# All rights reserved.
-#
-# The license below extends only to copyright in the software and shall
-# not be construed as granting a license to any other intellectual
-# property including but not limited to intellectual property relating
-# to a hardware implementation of the functionality of the software
-# licensed hereunder.  You may use the software subject to the license
-# terms below provided that you ensure that this notice is replicated
-# unmodified and in its entirety in all distributions of the software,
-# modified or unmodified, in source code or in binary form.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are
-# met: redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer;
-# redistributions in binary form must reproduce the above copyright
-# notice, this list of conditions and the following disclaimer in the
-# documentation and/or other materials provided with the distribution;
-# neither the name of the copyright holders nor the names of its
-# contributors may be used to endorse or promote products derived from
-# this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-from m5.objects import *
-from x86_generic import *
-
-root = LinuxX86FSSystemUniprocessor(
-    mem_mode="timing", mem_class=DDR3_1600_8x8, cpu_class=DerivO3CPU
-).create_root()
diff --git a/tests/configs/pc-simple-timing-ruby.py b/tests/configs/pc-simple-timing-ruby.py
deleted file mode 100644
index d0458b49cd..0000000000
--- a/tests/configs/pc-simple-timing-ruby.py
+++ /dev/null
@@ -1,91 +0,0 @@
-# Copyright (c) 2012 Mark D. Hill and David A. Wood
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are
-# met: redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer;
-# redistributions in binary form must reproduce the above copyright
-# notice, this list of conditions and the following disclaimer in the
-# documentation and/or other materials provided with the distribution;
-# neither the name of the copyright holders nor the names of its
-# contributors may be used to endorse or promote products derived from
-# this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-import m5, os, argparse, sys
-from m5.objects import *
-
-m5.util.addToPath("../configs/")
-from common.Benchmarks import SysConfig
-from common import FSConfig, SysPaths
-from ruby import Ruby
-from common import Options
-
-# Add the ruby specific and protocol specific options
-parser = argparse.ArgumentParser()
-Options.addCommonOptions(parser)
-Ruby.define_options(parser)
-args = parser.parse_args()
-
-# Set the default cache size and associativity to be very small to encourage
-# races between requests and writebacks.
-args.l1d_size = "32kB"
-args.l1i_size = "32kB"
-args.l2_size = "4MB"
-args.l1d_assoc = 2
-args.l1i_assoc = 2
-args.l2_assoc = 2
-args.num_cpus = 2
-
-# the system
-mdesc = SysConfig(disks=["linux-x86.img"])
-system = FSConfig.makeLinuxX86System(
-    "timing", args.num_cpus, mdesc=mdesc, Ruby=True
-)
-system.kernel = SysPaths.binary("x86_64-vmlinux-2.6.22.9")
-# Dummy voltage domain for all our clock domains
-system.voltage_domain = VoltageDomain(voltage=args.sys_voltage)
-
-system.kernel = FSConfig.binary("x86_64-vmlinux-2.6.22.9.smp")
-system.clk_domain = SrcClockDomain(
-    clock="1GHz", voltage_domain=system.voltage_domain
-)
-system.cpu_clk_domain = SrcClockDomain(
-    clock="2GHz", voltage_domain=system.voltage_domain
-)
-system.cpu = [
-    TimingSimpleCPU(cpu_id=i, clk_domain=system.cpu_clk_domain)
-    for i in range(args.num_cpus)
-]
-
-Ruby.create_system(args, True, system, system.iobus, system._dma_ports)
-
-# Create a seperate clock domain for Ruby
-system.ruby.clk_domain = SrcClockDomain(
-    clock=args.ruby_clock, voltage_domain=system.voltage_domain
-)
-
-# Connect the ruby io port to the PIO bus,
-# assuming that there is just one such port.
-system.iobus.mem_side_ports = system.ruby._io_port.in_ports
-
-for (i, cpu) in enumerate(system.cpu):
-    # create the interrupt controller
-    cpu.createInterruptController()
-    # Tie the cpu ports to the correct ruby system ports
-    system.ruby._cpu_ports[i].connectCpuPorts(cpu)
-
-root = Root(full_system=True, system=system)
-m5.ticks.setGlobalFrequency("1THz")
diff --git a/tests/configs/rubytest-ruby.py b/tests/configs/rubytest-ruby.py
deleted file mode 100644
index 9a382cce6e..0000000000
--- a/tests/configs/rubytest-ruby.py
+++ /dev/null
@@ -1,136 +0,0 @@
-# Copyright (c) 2006-2007 The Regents of The University of Michigan
-# Copyright (c) 2009 Advanced Micro Devices, Inc.
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are
-# met: redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer;
-# redistributions in binary form must reproduce the above copyright
-# notice, this list of conditions and the following disclaimer in the
-# documentation and/or other materials provided with the distribution;
-# neither the name of the copyright holders nor the names of its
-# contributors may be used to endorse or promote products derived from
-# this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-import m5
-from m5.objects import *
-from m5.defines import buildEnv
-from m5.util import addToPath
-import os, argparse, sys
-
-m5.util.addToPath("../configs/")
-
-from ruby import Ruby
-from common import Options
-
-parser = argparse.ArgumentParser()
-Options.addNoISAOptions(parser)
-
-# Add the ruby specific and protocol specific options
-Ruby.define_options(parser)
-
-args = parser.parse_args()
-
-#
-# Set the default cache size and associativity to be very small to encourage
-# races between requests and writebacks.
-#
-args.l1d_size = "256B"
-args.l1i_size = "256B"
-args.l2_size = "512B"
-args.l3_size = "1kB"
-args.l1d_assoc = 2
-args.l1i_assoc = 2
-args.l2_assoc = 2
-args.l3_assoc = 2
-args.ports = 32
-
-# Turn on flush check for the hammer protocol
-check_flush = False
-if buildEnv["PROTOCOL"] == "MOESI_hammer":
-    check_flush = True
-
-#
-# create the tester and system, including ruby
-#
-tester = RubyTester(
-    check_flush=check_flush,
-    checks_to_complete=100,
-    wakeup_frequency=10,
-    num_cpus=args.num_cpus,
-)
-
-# We set the testers as cpu for ruby to find the correct clock domains
-# for the L1 Objects.
-system = System(cpu=tester)
-
-# Dummy voltage domain for all our clock domains
-system.voltage_domain = VoltageDomain(voltage=args.sys_voltage)
-system.clk_domain = SrcClockDomain(
-    clock="1GHz", voltage_domain=system.voltage_domain
-)
-
-system.mem_ranges = AddrRange("256MB")
-
-# the ruby tester reuses num_cpus to specify the
-# number of cpu ports connected to the tester object, which
-# is stored in system.cpu. because there is only ever one
-# tester object, num_cpus is not necessarily equal to the
-# size of system.cpu
-cpu_list = [system.cpu] * args.num_cpus
-Ruby.create_system(args, False, system, cpus=cpu_list)
-
-# Create a separate clock domain for Ruby
-system.ruby.clk_domain = SrcClockDomain(
-    clock="1GHz", voltage_domain=system.voltage_domain
-)
-
-assert args.num_cpus == len(system.ruby._cpu_ports)
-
-tester.num_cpus = len(system.ruby._cpu_ports)
-
-#
-# The tester is most effective when randomization is turned on and
-# artifical delay is randomly inserted on messages
-#
-system.ruby.randomization = True
-
-for ruby_port in system.ruby._cpu_ports:
-    #
-    # Tie the ruby tester ports to the ruby cpu read and write ports
-    #
-    if ruby_port.support_data_reqs and ruby_port.support_inst_reqs:
-        tester.cpuInstDataPort = ruby_port.in_ports
-    elif ruby_port.support_data_reqs:
-        tester.cpuDataPort = ruby_port.in_ports
-    elif ruby_port.support_inst_reqs:
-        tester.cpuInstPort = ruby_port.in_ports
-
-    # Do not automatically retry stalled Ruby requests
-    ruby_port.no_retry_on_stall = True
-
-    #
-    # Tell the sequencer this is the ruby tester so that it
-    # copies the subblock back to the checker
-    #
-    ruby_port.using_ruby_tester = True
-
-# -----------------------
-# run simulation
-# -----------------------
-
-root = Root(full_system=False, system=system)
-root.system.mem_mode = "timing"
diff --git a/tests/configs/simple-atomic-mp-ruby.py b/tests/configs/simple-atomic-mp-ruby.py
deleted file mode 100644
index e3ac279022..0000000000
--- a/tests/configs/simple-atomic-mp-ruby.py
+++ /dev/null
@@ -1,66 +0,0 @@
-# Copyright (c) 2006-2007 The Regents of The University of Michigan
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are
-# met: redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer;
-# redistributions in binary form must reproduce the above copyright
-# notice, this list of conditions and the following disclaimer in the
-# documentation and/or other materials provided with the distribution;
-# neither the name of the copyright holders nor the names of its
-# contributors may be used to endorse or promote products derived from
-# this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-import m5
-from m5.objects import *
-
-nb_cores = 4
-cpus = [AtomicSimpleCPU(cpu_id=i) for i in range(nb_cores)]
-
-import ruby_config
-
-ruby_memory = ruby_config.generate("TwoLevel_SplitL1UnifiedL2.rb", nb_cores)
-
-# system simulated
-system = System(
-    cpu=cpus,
-    physmem=ruby_memory,
-    membus=SystemXBar(),
-    clk_domain=SrcClockDomain(clock="1GHz"),
-)
-
-# Create a seperate clock domain for components that should run at
-# CPUs frequency
-system.cpu.clk_domain = SrcClockDomain(clock="2GHz")
-
-# add L1 caches
-for cpu in cpus:
-    cpu.connectBus(system.membus)
-    # All cpus are associated with cpu_clk_domain
-    cpu.clk_domain = system.cpu_clk_domain
-
-# connect memory to membus
-system.physmem.port = system.membus.mem_side_ports
-
-# Connect the system port for loading of binaries etc
-system.system_port = system.membus.cpu_side_ports
-
-# -----------------------
-# run simulation
-# -----------------------
-
-root = Root(full_system=False, system=system)
-root.system.mem_mode = "atomic"
diff --git a/tests/configs/simple-atomic-mp.py b/tests/configs/simple-atomic-mp.py
deleted file mode 100644
index 0d85b5af36..0000000000
--- a/tests/configs/simple-atomic-mp.py
+++ /dev/null
@@ -1,45 +0,0 @@
-# Copyright (c) 2013 ARM Limited
-# All rights reserved.
-#
-# The license below extends only to copyright in the software and shall
-# not be construed as granting a license to any other intellectual
-# property including but not limited to intellectual property relating
-# to a hardware implementation of the functionality of the software
-# licensed hereunder.  You may use the software subject to the license
-# terms below provided that you ensure that this notice is replicated
-# unmodified and in its entirety in all distributions of the software,
-# modified or unmodified, in source code or in binary form.
-#
-# Copyright (c) 2006-2007 The Regents of The University of Michigan
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are
-# met: redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer;
-# redistributions in binary form must reproduce the above copyright
-# notice, this list of conditions and the following disclaimer in the
-# documentation and/or other materials provided with the distribution;
-# neither the name of the copyright holders nor the names of its
-# contributors may be used to endorse or promote products derived from
-# this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-from m5.objects import *
-from base_config import *
-
-nb_cores = 4
-root = BaseSESystem(
-    mem_mode="atomic", cpu_class=AtomicSimpleCPU, num_cpus=nb_cores
-).create_root()
diff --git a/tests/configs/simple-atomic.py b/tests/configs/simple-atomic.py
deleted file mode 100644
index 6dd86ccf39..0000000000
--- a/tests/configs/simple-atomic.py
+++ /dev/null
@@ -1,44 +0,0 @@
-# Copyright (c) 2013 ARM Limited
-# All rights reserved.
-#
-# The license below extends only to copyright in the software and shall
-# not be construed as granting a license to any other intellectual
-# property including but not limited to intellectual property relating
-# to a hardware implementation of the functionality of the software
-# licensed hereunder.  You may use the software subject to the license
-# terms below provided that you ensure that this notice is replicated
-# unmodified and in its entirety in all distributions of the software,
-# modified or unmodified, in source code or in binary form.
-#
-# Copyright (c) 2006-2007 The Regents of The University of Michigan
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are
-# met: redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer;
-# redistributions in binary form must reproduce the above copyright
-# notice, this list of conditions and the following disclaimer in the
-# documentation and/or other materials provided with the distribution;
-# neither the name of the copyright holders nor the names of its
-# contributors may be used to endorse or promote products derived from
-# this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-from m5.objects import *
-from base_config import *
-
-root = BaseSESystemUniprocessor(
-    mem_mode="atomic", cpu_class=AtomicSimpleCPU
-).create_root()
diff --git a/tests/configs/simple-timing-mp-ruby.py b/tests/configs/simple-timing-mp-ruby.py
deleted file mode 100644
index 38488c409d..0000000000
--- a/tests/configs/simple-timing-mp-ruby.py
+++ /dev/null
@@ -1,97 +0,0 @@
-# Copyright (c) 2006-2007 The Regents of The University of Michigan
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are
-# met: redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer;
-# redistributions in binary form must reproduce the above copyright
-# notice, this list of conditions and the following disclaimer in the
-# documentation and/or other materials provided with the distribution;
-# neither the name of the copyright holders nor the names of its
-# contributors may be used to endorse or promote products derived from
-# this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-import m5
-from m5.objects import *
-from m5.defines import buildEnv
-from m5.util import addToPath
-import os, argparse, sys
-
-m5.util.addToPath("../configs/")
-
-from common import Options
-from ruby import Ruby
-
-parser = argparse.ArgumentParser()
-Options.addCommonOptions(parser)
-
-# Add the ruby specific and protocol specific options
-Ruby.define_options(parser)
-
-args = parser.parse_args()
-
-#
-# Set the default cache size and associativity to be very small to encourage
-# races between requests and writebacks.
-#
-args.l1d_size = "256B"
-args.l1i_size = "256B"
-args.l2_size = "512B"
-args.l3_size = "1kB"
-args.l1d_assoc = 2
-args.l1i_assoc = 2
-args.l2_assoc = 2
-args.l3_assoc = 2
-
-nb_cores = 4
-cpus = [TimingSimpleCPU(cpu_id=i) for i in range(nb_cores)]
-
-# overwrite the num_cpus to equal nb_cores
-args.num_cpus = nb_cores
-
-# system simulated
-system = System(cpu=cpus, clk_domain=SrcClockDomain(clock="1GHz"))
-
-# Create a seperate clock domain for components that should run at
-# CPUs frequency
-system.cpu.clk_domain = SrcClockDomain(clock="2GHz")
-
-Ruby.create_system(args, False, system)
-
-# Create a separate clock domain for Ruby
-system.ruby.clk_domain = SrcClockDomain(clock=args.ruby_clock)
-
-assert args.num_cpus == len(system.ruby._cpu_ports)
-
-for (i, cpu) in enumerate(system.cpu):
-    # create the interrupt controller
-    cpu.createInterruptController()
-
-    #
-    # Tie the cpu ports to the ruby cpu ports
-    #
-    cpu.connectAllPorts(
-        system.ruby._cpu_ports[i].in_ports,
-        system.ruby._cpu_ports[i].in_ports,
-        system.ruby._cpu_ports[i].interrupt_out_port,
-    )
-
-# -----------------------
-# run simulation
-# -----------------------
-
-root = Root(full_system=False, system=system)
-root.system.mem_mode = "timing"
diff --git a/tests/configs/simple-timing-mp.py b/tests/configs/simple-timing-mp.py
deleted file mode 100644
index 3988f4c2b9..0000000000
--- a/tests/configs/simple-timing-mp.py
+++ /dev/null
@@ -1,45 +0,0 @@
-# Copyright (c) 2013 ARM Limited
-# All rights reserved.
-#
-# The license below extends only to copyright in the software and shall
-# not be construed as granting a license to any other intellectual
-# property including but not limited to intellectual property relating
-# to a hardware implementation of the functionality of the software
-# licensed hereunder.  You may use the software subject to the license
-# terms below provided that you ensure that this notice is replicated
-# unmodified and in its entirety in all distributions of the software,
-# modified or unmodified, in source code or in binary form.
-#
-# Copyright (c) 2006-2007 The Regents of The University of Michigan
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are
-# met: redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer;
-# redistributions in binary form must reproduce the above copyright
-# notice, this list of conditions and the following disclaimer in the
-# documentation and/or other materials provided with the distribution;
-# neither the name of the copyright holders nor the names of its
-# contributors may be used to endorse or promote products derived from
-# this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-from m5.objects import *
-from base_config import *
-
-nb_cores = 4
-root = BaseSESystem(
-    mem_mode="timing", cpu_class=TimingSimpleCPU, num_cpus=nb_cores
-).create_root()
diff --git a/tests/configs/simple-timing-ruby.py b/tests/configs/simple-timing-ruby.py
deleted file mode 100644
index eb0f4e9ac7..0000000000
--- a/tests/configs/simple-timing-ruby.py
+++ /dev/null
@@ -1,104 +0,0 @@
-# Copyright (c) 2006-2007 The Regents of The University of Michigan
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are
-# met: redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer;
-# redistributions in binary form must reproduce the above copyright
-# notice, this list of conditions and the following disclaimer in the
-# documentation and/or other materials provided with the distribution;
-# neither the name of the copyright holders nor the names of its
-# contributors may be used to endorse or promote products derived from
-# this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-import m5
-from m5.objects import *
-from m5.defines import buildEnv
-from m5.util import addToPath
-import os, argparse, sys
-
-m5.util.addToPath("../configs/")
-
-from ruby import Ruby
-from common import Options
-
-parser = argparse.ArgumentParser()
-Options.addCommonOptions(parser)
-
-# Add the ruby specific and protocol specific options
-Ruby.define_options(parser)
-
-args = parser.parse_args()
-
-#
-# Set the default cache size and associativity to be very small to encourage
-# races between requests and writebacks.
-#
-args.l1d_size = "256B"
-args.l1i_size = "256B"
-args.l2_size = "512B"
-args.l3_size = "1kB"
-args.l1d_assoc = 2
-args.l1i_assoc = 2
-args.l2_assoc = 2
-args.l3_assoc = 2
-
-# this is a uniprocessor only test
-args.num_cpus = 1
-cpu = TimingSimpleCPU(cpu_id=0)
-system = System(cpu=cpu)
-
-# Dummy voltage domain for all our clock domains
-system.voltage_domain = VoltageDomain(voltage=args.sys_voltage)
-system.clk_domain = SrcClockDomain(
-    clock="1GHz", voltage_domain=system.voltage_domain
-)
-
-# Create a seperate clock domain for components that should run at
-# CPUs frequency
-system.cpu.clk_domain = SrcClockDomain(
-    clock="2GHz", voltage_domain=system.voltage_domain
-)
-
-system.mem_ranges = AddrRange("256MB")
-Ruby.create_system(args, False, system)
-
-# Create a separate clock for Ruby
-system.ruby.clk_domain = SrcClockDomain(
-    clock=args.ruby_clock, voltage_domain=system.voltage_domain
-)
-
-assert len(system.ruby._cpu_ports) == 1
-
-# create the interrupt controller
-cpu.createInterruptController()
-
-#
-# Tie the cpu cache ports to the ruby cpu ports and
-# physmem, respectively
-#
-cpu.connectAllPorts(
-    system.ruby._cpu_ports[0].in_ports,
-    system.ruby._cpu_ports[0].in_ports,
-    system.ruby._cpu_ports[0].interrupt_out_port,
-)
-
-# -----------------------
-# run simulation
-# -----------------------
-
-root = Root(full_system=False, system=system)
-root.system.mem_mode = "timing"
diff --git a/tests/configs/simple-timing.py b/tests/configs/simple-timing.py
deleted file mode 100644
index bf3ced4463..0000000000
--- a/tests/configs/simple-timing.py
+++ /dev/null
@@ -1,44 +0,0 @@
-# Copyright (c) 2013 ARM Limited
-# All rights reserved.
-#
-# The license below extends only to copyright in the software and shall
-# not be construed as granting a license to any other intellectual
-# property including but not limited to intellectual property relating
-# to a hardware implementation of the functionality of the software
-# licensed hereunder.  You may use the software subject to the license
-# terms below provided that you ensure that this notice is replicated
-# unmodified and in its entirety in all distributions of the software,
-# modified or unmodified, in source code or in binary form.
-#
-# Copyright (c) 2006-2007 The Regents of The University of Michigan
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are
-# met: redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer;
-# redistributions in binary form must reproduce the above copyright
-# notice, this list of conditions and the following disclaimer in the
-# documentation and/or other materials provided with the distribution;
-# neither the name of the copyright holders nor the names of its
-# contributors may be used to endorse or promote products derived from
-# this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-from m5.objects import *
-from base_config import *
-
-root = BaseSESystemUniprocessor(
-    mem_mode="timing", cpu_class=TimingSimpleCPU
-).create_root()
diff --git a/tests/configs/x86_generic.py b/tests/configs/x86_generic.py
deleted file mode 100644
index 3c590860de..0000000000
--- a/tests/configs/x86_generic.py
+++ /dev/null
@@ -1,123 +0,0 @@
-# Copyright (c) 2012 ARM Limited
-# All rights reserved.
-#
-# The license below extends only to copyright in the software and shall
-# not be construed as granting a license to any other intellectual
-# property including but not limited to intellectual property relating
-# to a hardware implementation of the functionality of the software
-# licensed hereunder.  You may use the software subject to the license
-# terms below provided that you ensure that this notice is replicated
-# unmodified and in its entirety in all distributions of the software,
-# modified or unmodified, in source code or in binary form.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are
-# met: redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer;
-# redistributions in binary form must reproduce the above copyright
-# notice, this list of conditions and the following disclaimer in the
-# documentation and/or other materials provided with the distribution;
-# neither the name of the copyright holders nor the names of its
-# contributors may be used to endorse or promote products derived from
-# this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-from abc import ABCMeta, abstractmethod
-import m5
-from m5.objects import *
-from m5.proxy import *
-
-m5.util.addToPath("../configs/")
-from common.Benchmarks import SysConfig
-from common import FSConfig, SysPaths
-from common.Caches import *
-from base_config import *
-
-
-class LinuxX86SystemBuilder(object):
-    """Mix-in that implements create_system.
-
-    This mix-in is intended as a convenient way of adding an
-    X86-specific create_system method to a class deriving from one of
-    the generic base systems.
-    """
-
-    def __init__(self):
-        pass
-
-    def create_system(self):
-        mdesc = SysConfig(disks=["linux-x86.img"])
-        system = FSConfig.makeLinuxX86System(
-            self.mem_mode, numCPUs=self.num_cpus, mdesc=mdesc
-        )
-        system.kernel = SysPaths.binary("x86_64-vmlinux-2.6.22.9")
-
-        self.init_system(system)
-        return system
-
-
-class LinuxX86FSSystem(LinuxX86SystemBuilder, BaseFSSystem):
-    """Basic X86 full system builder."""
-
-    def __init__(self, **kwargs):
-        """Initialize an X86 system that supports full system simulation.
-
-        Note: Keyword arguments that are not listed below will be
-        passed to the BaseFSSystem.
-
-        Keyword Arguments:
-          machine_type -- String describing the platform to simulate
-        """
-        BaseSystem.__init__(self, **kwargs)
-        LinuxX86SystemBuilder.__init__(self)
-
-    def create_caches_private(self, cpu):
-        cpu.addPrivateSplitL1Caches(
-            L1_ICache(size="32kB", assoc=1),
-            L1_DCache(size="32kB", assoc=4),
-            PageTableWalkerCache(),
-            PageTableWalkerCache(),
-        )
-
-
-class LinuxX86FSSystemUniprocessor(
-    LinuxX86SystemBuilder, BaseFSSystemUniprocessor
-):
-    """Basic X86 full system builder for uniprocessor systems.
-
-    Note: This class is a specialization of the X86FSSystem and is
-    only really needed to provide backwards compatibility for existing
-    test cases.
-    """
-
-    def __init__(self, **kwargs):
-        BaseFSSystemUniprocessor.__init__(self, **kwargs)
-        LinuxX86SystemBuilder.__init__(self)
-
-    def create_caches_private(self, cpu):
-        cpu.addTwoLevelCacheHierarchy(
-            L1_ICache(size="32kB", assoc=1),
-            L1_DCache(size="32kB", assoc=4),
-            L2Cache(size="4MB", assoc=8),
-            PageTableWalkerCache(),
-            PageTableWalkerCache(),
-        )
-
-
-class LinuxX86FSSwitcheroo(LinuxX86SystemBuilder, BaseFSSwitcheroo):
-    """Uniprocessor X86 system prepared for CPU switching"""
-
-    def __init__(self, **kwargs):
-        BaseFSSwitcheroo.__init__(self, **kwargs)
-        LinuxX86SystemBuilder.__init__(self)
diff --git a/tests/compiler-tests.sh b/tests/deprecated/compiler-tests.sh
similarity index 98%
rename from tests/compiler-tests.sh
rename to tests/deprecated/compiler-tests.sh
index f5d4bb189f..e8da335de4 100755
--- a/tests/compiler-tests.sh
+++ b/tests/deprecated/compiler-tests.sh
@@ -18,7 +18,6 @@ images=("gcc-version-12"
         "gcc-version-10"
         "gcc-version-9"
         "gcc-version-8"
-        "gcc-version-7"
         "clang-version-14"
         "clang-version-13"
         "clang-version-12"
@@ -27,7 +26,6 @@ images=("gcc-version-12"
         "clang-version-9"
         "clang-version-8"
         "clang-version-7"
-        "clang-version-6.0"
         # The following checks our support for Ubuntu 18.04, 20.04, and 22.04.
         "ubuntu-18.04_all-dependencies"
         "ubuntu-20.04_all-dependencies"
@@ -58,6 +56,8 @@ builds=("ALL"
         "POWER"
         "RISCV"
         "SPARC"
+        "GCN3_X86"
+        "VEGA_X86"
         "X86"
         "X86_MI_example"
         "X86_MOESI_AMD_Base")
@@ -73,7 +73,7 @@ opts=(".opt"
 builds_per_compiler=1
 
 # Base URL of the gem5 testing images.
-base_url="gcr.io/gem5-test"
+base_url="ghcr.io/gem5"
 
 # Arguments passed into scons on every build target test.
 if [ $# -eq 0 ];then
diff --git a/tests/jenkins/gem5art-tests.sh b/tests/deprecated/jenkins/gem5art-tests.sh
similarity index 100%
rename from tests/jenkins/gem5art-tests.sh
rename to tests/deprecated/jenkins/gem5art-tests.sh
diff --git a/tests/jenkins/presubmit-stage2.sh b/tests/deprecated/jenkins/presubmit-stage2.sh
similarity index 100%
rename from tests/jenkins/presubmit-stage2.sh
rename to tests/deprecated/jenkins/presubmit-stage2.sh
diff --git a/tests/jenkins/presubmit.cfg b/tests/deprecated/jenkins/presubmit.cfg
similarity index 100%
rename from tests/jenkins/presubmit.cfg
rename to tests/deprecated/jenkins/presubmit.cfg
diff --git a/tests/jenkins/presubmit.sh b/tests/deprecated/jenkins/presubmit.sh
similarity index 95%
rename from tests/jenkins/presubmit.sh
rename to tests/deprecated/jenkins/presubmit.sh
index 91eb95f81b..becf499d28 100755
--- a/tests/jenkins/presubmit.sh
+++ b/tests/deprecated/jenkins/presubmit.sh
@@ -37,8 +37,8 @@
 
 set -e
 
-DOCKER_IMAGE_ALL_DEP=gcr.io/gem5-test/ubuntu-22.04_all-dependencies:latest
-DOCKER_IMAGE_CLANG_COMPILE=gcr.io/gem5-test/clang-version-14:latest
+DOCKER_IMAGE_ALL_DEP=ghcr.io/gem5/ubuntu-22.04_all-dependencies:latest
+DOCKER_IMAGE_CLANG_COMPILE=ghcr.io/gem5/clang-version-14:latest
 PRESUBMIT_STAGE2=tests/jenkins/presubmit-stage2.sh
 GEM5ART_TESTS=tests/jenkins/gem5art-tests.sh
 
diff --git a/tests/nightly.sh b/tests/deprecated/nightly.sh
similarity index 91%
rename from tests/nightly.sh
rename to tests/deprecated/nightly.sh
index cea1ad0be9..53ad374c3c 100755
--- a/tests/nightly.sh
+++ b/tests/deprecated/nightly.sh
@@ -1,6 +1,6 @@
 #!/bin/bash
 
-# Copyright (c) 2021 The Regents of the University of California
+# Copyright (c) 2021-2023 The Regents of the University of California
 # All Rights Reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -70,13 +70,13 @@ unit_test () {
 
     docker run -u $UID:$GID --volume "${gem5_root}":"${gem5_root}" -w \
         "${gem5_root}" --memory="${docker_mem_limit}" --rm \
-        gcr.io/gem5-test/ubuntu-22.04_all-dependencies:${tag} \
+        ghcr.io/gem5/ubuntu-22.04_all-dependencies:${tag} \
             scons build/ALL/unittests.${build} -j${compile_threads} \
             --ignore-style
 }
 
 # Ensure we have the latest docker images.
-docker pull gcr.io/gem5-test/ubuntu-22.04_all-dependencies:${tag}
+docker pull ghcr.io/gem5/ubuntu-22.04_all-dependencies:${tag}
 
 # Run the unit tests.
 unit_test opt
@@ -85,7 +85,7 @@ unit_test debug
 # Run the gem5 long tests.
 docker run -u $UID:$GID --volume "${gem5_root}":"${gem5_root}" -w \
     "${gem5_root}"/tests --memory="${docker_mem_limit}" --rm \
-    gcr.io/gem5-test/ubuntu-22.04_all-dependencies:${tag} \
+    ghcr.io/gem5/ubuntu-22.04_all-dependencies:${tag} \
         ./main.py run --length long -j${compile_threads} -t${run_threads} -vv
 
 # Unfortunately, due docker being unable run KVM, we do so separately.
@@ -105,10 +105,10 @@ cd "${gem5_root}/tests"
 cd "${gem5_root}"
 
 # For the GPU tests we compile and run the GPU ISA inside a gcn-gpu container.
-docker pull gcr.io/gem5-test/gcn-gpu:${tag}
+docker pull ghcr.io/gem5/gcn-gpu:${tag}
 docker run --rm -u $UID:$GID --volume "${gem5_root}":"${gem5_root}" -w \
     "${gem5_root}" --memory="${docker_mem_limit}" \
-    gcr.io/gem5-test/gcn-gpu:${tag}  bash -c \
+    ghcr.io/gem5/gcn-gpu:${tag}  bash -c \
     "scons build/${gpu_isa}/gem5.opt -j${compile_threads} --ignore-style \
         || (rm -rf build && scons build/${gpu_isa}/gem5.opt \
         -j${compile_threads} --ignore-style)"
@@ -123,7 +123,7 @@ mkdir -p tests/testing-results
 # basic GPU functionality is working.
 docker run --rm -u $UID:$GID --volume "${gem5_root}":"${gem5_root}" -w \
     "${gem5_root}" --memory="${docker_mem_limit}" \
-    gcr.io/gem5-test/gcn-gpu:${tag}  build/${gpu_isa}/gem5.opt \
+    ghcr.io/gem5/gcn-gpu:${tag}  build/${gpu_isa}/gem5.opt \
     configs/example/apu_se.py --reg-alloc-policy=dynamic -n3 -c square
 
 # get HeteroSync
@@ -135,7 +135,7 @@ wget -qN http://dist.gem5.org/dist/develop/test-progs/heterosync/gcn3/allSyncPri
 # atomics are tested.
 docker run --rm -u $UID:$GID --volume "${gem5_root}":"${gem5_root}" -w \
     "${gem5_root}"  --memory="${docker_mem_limit}" \
-    gcr.io/gem5-test/gcn-gpu:${tag} build/${gpu_isa}/gem5.opt \
+    ghcr.io/gem5/gcn-gpu:${tag} build/${gpu_isa}/gem5.opt \
     configs/example/apu_se.py --reg-alloc-policy=dynamic -n3 -c \
     allSyncPrims-1kernel --options="sleepMutex 10 16 4"
 
@@ -146,7 +146,7 @@ docker run --rm -u $UID:$GID --volume "${gem5_root}":"${gem5_root}" -w \
 # atomics are tested.
 docker run --rm -u $UID:$GID --volume "${gem5_root}":"${gem5_root}" -w \
     "${gem5_root}"  --memory="${docker_mem_limit}" \
-    gcr.io/gem5-test/gcn-gpu:${tag}  build/${gpu_isa}/gem5.opt \
+    ghcr.io/gem5/gcn-gpu:${tag}  build/${gpu_isa}/gem5.opt \
     configs/example/apu_se.py --reg-alloc-policy=dynamic -n3 -c \
     allSyncPrims-1kernel --options="lfTreeBarrUniq 10 16 4"
 
@@ -157,10 +157,11 @@ build_and_run_SST () {
 
     docker run -u $UID:$GID --volume "${gem5_root}":"${gem5_root}" -w \
         "${gem5_root}" --rm  --memory="${docker_mem_limit}" \
-        gcr.io/gem5-test/sst-env:${tag} bash -c "\
+        ghcr.io/gem5/sst-env:${tag} bash -c "\
 scons build/${isa}/libgem5_${variant}.so -j${compile_threads} \
 --without-tcmalloc --duplicate-sources --ignore-style && \
 cd ext/sst && \
+mv Makefile.linux Makefile && \
 make clean; make -j ${compile_threads} && \
 sst --add-lib-path=./ sst/example.py && \
 cd -;
@@ -172,7 +173,7 @@ build_and_run_systemc () {
     rm -rf "${gem5_root}/build/ARM"
     docker run -u $UID:$GID --volume "${gem5_root}":"${gem5_root}" -w \
         "${gem5_root}" --memory="${docker_mem_limit}" --rm \
-        gcr.io/gem5-test/ubuntu-22.04_min-dependencies:${tag} bash -c "\
+        ghcr.io/gem5/ubuntu-22.04_min-dependencies:${tag} bash -c "\
 scons -j${compile_threads} --ignore-style --duplicate-sources \
 build/ARM/gem5.opt && \
 scons --with-cxx-config --without-python --without-tcmalloc \
@@ -182,7 +183,7 @@ scons --with-cxx-config --without-python --without-tcmalloc \
 
     docker run -u $UID:$GID --volume "${gem5_root}":"${gem5_root}" -w \
         "${gem5_root}" --memory="${docker_mem_limit}" --rm \
-        gcr.io/gem5-test/systemc-env:${tag} bash -c "\
+        ghcr.io/gem5/systemc-env:${tag} bash -c "\
 cd util/systemc/gem5_within_systemc && \
 make -j${compile_threads} && \
 ../../../build/ARM/gem5.opt ../../../configs/deprecated/example/se.py -c \
diff --git a/tests/weekly.sh b/tests/deprecated/weekly.sh
similarity index 95%
rename from tests/weekly.sh
rename to tests/deprecated/weekly.sh
index 9c7ebdf76d..17d68426a6 100755
--- a/tests/weekly.sh
+++ b/tests/deprecated/weekly.sh
@@ -72,7 +72,7 @@ fi
 # Run the gem5 very-long tests.
 docker run -u $UID:$GID --volume "${gem5_root}":"${gem5_root}" -w \
     "${gem5_root}"/tests --memory="${docker_mem_limit}" --rm \
-    gcr.io/gem5-test/ubuntu-22.04_all-dependencies:${tag} \
+    ghcr.io/gem5/ubuntu-22.04_all-dependencies:${tag} \
         ./main.py run --length very-long -j${threads} -t${run_threads} -vv
 
 mkdir -p tests/testing-results
@@ -81,7 +81,7 @@ mkdir -p tests/testing-results
 # before pulling gem5 resources, make sure it doesn't exist already
 docker run -u $UID:$GID --rm --volume "${gem5_root}":"${gem5_root}" -w \
        "${gem5_root}" --memory="${docker_mem_limit}" \
-       gcr.io/gem5-test/gcn-gpu:${tag} bash -c \
+       ghcr.io/gem5/gcn-gpu:${tag} bash -c \
        "rm -rf ${gem5_root}/gem5-resources"
 
 # delete m5out, Pannotia datasets, and output files in case a failed regression
@@ -94,7 +94,7 @@ rm -rf ${gem5_root}/m5out coAuthorsDBLP.graph 1k_128k.gr result.out
 # Moreover, DNNMark builds a library and thus doesn't have a binary, so we
 # need to build it before we run it.
 # Need to pull this first because HACC's docker requires this path to exist
-git clone https://gem5.googlesource.com/public/gem5-resources \
+git clone https://github.com/gem5/gem5-resources \
     "${gem5_root}/gem5-resources"
 
 
@@ -127,7 +127,7 @@ cd "${gem5_root}"
 # avoid needing to set all of these, we instead build a docker for it, which
 # has all these variables pre-set in its Dockerfile
 # To avoid compiling gem5 multiple times, all GPU benchmarks will use this
-docker pull gcr.io/gem5-test/gcn-gpu:${tag}
+docker pull ghcr.io/gem5/gcn-gpu:${tag}
 docker build -t hacc-test-weekly ${gem5_root}/gem5-resources/src/gpu/halo-finder
 
 docker run --rm -u $UID:$GID --volume "${gem5_root}":"${gem5_root}" -w \
@@ -296,10 +296,15 @@ docker run --rm -v ${gem5_root}:${gem5_root} -w ${gem5_root} -u $UID:$GID \
        -c color_maxmin.gem5 --options="1k_128k.gr 0"
 
 # build FW
-docker run --rm -v ${gem5_root}:${gem5_root} -w \
-       ${gem5_root}/gem5-resources/src/gpu/pannotia/fw -u $UID:$GID \
+docker run --rm -v ${gem5_root}:${gem5_root} -w ${gem5_root} -u $UID:$GID \
+       ${gem5_root}/gem5-resources/src/gpu/pannotia/fw \
        --memory="${docker_mem_limit}" hacc-test-weekly bash -c \
-       "export GEM5_PATH=${gem5_root} ; make gem5-fusion"
+       "export GEM5_PATH=${gem5_root} ; make default; make gem5-fusion"
+
+# create input mmap file for FW
+docker run --rm -v ${gem5_root}:${gem5_root} -w ${gem5_root} -u $UID:$GID \
+       --memory="${docker_mem_limit}" hacc-test-weekly bash -c\
+       "./gem5-resources/src/gpu/pannotia/fw/bin/fw_hip ./gem5-resources/src/gpu/pannotia/fw/1k_128k.gr 1"
 
 # run FW (use same input dataset as BC for faster testing)
 docker run --rm -v ${gem5_root}:${gem5_root} -w ${gem5_root} -u $UID:$GID \
@@ -308,7 +313,7 @@ docker run --rm -v ${gem5_root}:${gem5_root} -w ${gem5_root} -u $UID:$GID \
        ${gem5_root}/configs/example/apu_se.py -n3 --mem-size=8GB \
        --reg-alloc-policy=dynamic \
        --benchmark-root=${gem5_root}/gem5-resources/src/gpu/pannotia/fw/bin \
-       -c fw_hip.gem5 --options="1k_128k.gr"
+       -c fw_hip.gem5 --options="1k_128k.gr 2"
 
 # build MIS
 docker run --rm -v ${gem5_root}:${gem5_root} -w \
@@ -414,23 +419,23 @@ rm -rf "${gem5_root}/build/ALL"
 
 docker run -u $UID:$GID --volume "${gem5_root}":"${gem5_root}" -w \
     "${gem5_root}" --memory="${docker_mem_limit}" --rm \
-    gcr.io/gem5-test/ubuntu-22.04_all-dependencies:${tag} \
+    ghcr.io/gem5/ubuntu-22.04_all-dependencies:${tag} \
        scons build/ALL/gem5.opt -j${threads}
 
 docker run -u $UID:$GID --volume "${gem5_root}":"${gem5_root}" -w \
     "${gem5_root}" --memory="${docker_mem_limit}" --rm \
-    gcr.io/gem5-test/ubuntu-22.04_all-dependencies:${tag} \
+    ghcr.io/gem5/ubuntu-22.04_all-dependencies:${tag} \
        ./build/ALL/gem5.opt \
        configs/example/gem5_library/dramsys/arm-hello-dramsys.py
 
 docker run -u $UID:$GID --volume "${gem5_root}":"${gem5_root}" -w \
     "${gem5_root}" --memory="${docker_mem_limit}" --rm \
-    gcr.io/gem5-test/ubuntu-22.04_all-dependencies:${tag} \
+    ghcr.io/gem5/ubuntu-22.04_all-dependencies:${tag} \
        ./build/ALL/gem5.opt \
        configs/example/gem5_library/dramsys/dramsys-traffic.py
 
 docker run -u $UID:$GID --volume "${gem5_root}":"${gem5_root}" -w \
     "${gem5_root}" --memory="${docker_mem_limit}" --rm \
-    gcr.io/gem5-test/ubuntu-22.04_all-dependencies:${tag} \
+    ghcr.io/gem5/ubuntu-22.04_all-dependencies:${tag} \
        ./build/ALL/gem5.opt \
        configs/example/dramsys.py
diff --git a/tests/gem5/arm_boot_tests/README.md b/tests/gem5/arm_boot_tests/README.md
new file mode 100644
index 0000000000..68d5de97f1
--- /dev/null
+++ b/tests/gem5/arm_boot_tests/README.md
@@ -0,0 +1,9 @@
+# Arm Boot Tests
+
+These tests run a series of Linux boots on the ARMBoard.
+It varies the CPU type, number of CPUs, and memory used for each run.
+To run these tests by themselves, you can run the following command in the tests directory:
+
+```bash
+./main.py run gem5/arm_boot_tests --length=[length]
+```
diff --git a/tests/gem5/configs/arm_boot_exit_run.py b/tests/gem5/arm_boot_tests/configs/arm_boot_exit_run.py
similarity index 97%
rename from tests/gem5/configs/arm_boot_exit_run.py
rename to tests/gem5/arm_boot_tests/configs/arm_boot_exit_run.py
index a8ea6eeea7..ffb41459f5 100644
--- a/tests/gem5/configs/arm_boot_exit_run.py
+++ b/tests/gem5/arm_boot_tests/configs/arm_boot_exit_run.py
@@ -36,7 +36,7 @@
 from gem5.isas import ISA
 from m5.objects import ArmDefaultRelease
 from gem5.utils.requires import requires
-from gem5.resources.resource import Resource
+from gem5.resources.resource import obtain_resource
 from gem5.simulate.simulator import Simulator
 from m5.objects import VExpress_GEM5_Foundation
 from gem5.coherence_protocol import CoherenceProtocol
@@ -199,15 +199,15 @@
 
 # Set the Full System workload.
 board.set_kernel_disk_workload(
-    kernel=Resource(
+    kernel=obtain_resource(
         "arm64-linux-kernel-5.4.49",
         resource_directory=args.resource_directory,
     ),
-    bootloader=Resource(
+    bootloader=obtain_resource(
         "arm64-bootloader-foundation",
         resource_directory=args.resource_directory,
     ),
-    disk_image=Resource(
+    disk_image=obtain_resource(
         "arm64-ubuntu-20.04-img",
         resource_directory=args.resource_directory,
     ),
diff --git a/tests/gem5/arm-boot-tests/test_linux_boot.py b/tests/gem5/arm_boot_tests/test_linux_boot.py
similarity index 99%
rename from tests/gem5/arm-boot-tests/test_linux_boot.py
rename to tests/gem5/arm_boot_tests/test_linux_boot.py
index 9e04e24d62..23921ef403 100644
--- a/tests/gem5/arm-boot-tests/test_linux_boot.py
+++ b/tests/gem5/arm_boot_tests/test_linux_boot.py
@@ -44,9 +44,8 @@ def test_boot(
     length: str,
     to_tick: Optional[int] = None,
 ):
-
     name = f"{cpu}-cpu_{num_cpus}-cores_{mem_system}_{memory_class}_\
-arm-boot-test"
+arm_boot_test"
 
     verifiers = []
 
@@ -90,6 +89,7 @@ def test_boot(
             config.base_dir,
             "tests",
             "gem5",
+            "arm_boot_tests",
             "configs",
             "arm_boot_exit_run.py",
         ),
diff --git a/tests/gem5/asmtest/README.md b/tests/gem5/asmtest/README.md
new file mode 100644
index 0000000000..fcbe8ea65c
--- /dev/null
+++ b/tests/gem5/asmtest/README.md
@@ -0,0 +1,9 @@
+# ASM Test
+
+These tests run a set of RISCV binaries on a bare bones syscall execution.
+In addition, these test run these binaries against different CPU types.
+To run these tests by themselves, you can run the following command in the tests directory:
+
+```bash
+./main.py run gem5/asmtest --length=[length]
+```
diff --git a/tests/gem5/asmtest/configs/riscv_asmtest.py b/tests/gem5/asmtest/configs/riscv_asmtest.py
new file mode 100644
index 0000000000..e98ec1bd49
--- /dev/null
+++ b/tests/gem5/asmtest/configs/riscv_asmtest.py
@@ -0,0 +1,121 @@
+# Copyright (c) 2021 The Regents of the University of California
+# Copyright (c) 2022 Google Inc
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+"""
+A run script for a very simple Syscall-Execution running simple binaries.
+The system has no cache heirarchy and is as "bare-bones" as you can get in
+gem5 while still being functinal.
+"""
+
+from gem5.resources.resource import obtain_resource
+from gem5.components.processors.cpu_types import (
+    get_cpu_types_str_set,
+    get_cpu_type_from_str,
+)
+from gem5.components.memory import SingleChannelDDR3_1600
+from gem5.components.boards.simple_board import SimpleBoard
+from gem5.components.cachehierarchies.classic.no_cache import NoCache
+from gem5.components.processors.simple_processor import SimpleProcessor
+from gem5.simulate.simulator import Simulator
+from gem5.isas import ISA
+
+import argparse
+
+parser = argparse.ArgumentParser(
+    description="A gem5 script for testing RISC-V instructions"
+)
+
+parser.add_argument(
+    "resource", type=str, help="The gem5 resource binary to run."
+)
+
+parser.add_argument(
+    "cpu", type=str, choices=get_cpu_types_str_set(), help="The CPU type used."
+)
+
+parser.add_argument(
+    "--riscv-32bits",
+    action="store_true",
+    help="Use 32 bits core of Riscv CPU",
+)
+
+parser.add_argument(
+    "-r",
+    "--resource-directory",
+    type=str,
+    required=False,
+    help="The directory in which resources will be downloaded or exist.",
+)
+
+parser.add_argument(
+    "-n",
+    "--num-cores",
+    type=int,
+    default=1,
+    required=False,
+    help="The number of CPU cores to run.",
+)
+
+args = parser.parse_args()
+
+# Setup the system.
+cache_hierarchy = NoCache()
+memory = SingleChannelDDR3_1600()
+
+processor = SimpleProcessor(
+    cpu_type=get_cpu_type_from_str(args.cpu),
+    isa=ISA.RISCV,
+    num_cores=args.num_cores,
+)
+
+if args.riscv_32bits:
+    for simple_core in processor.cores:
+        for i in range(len(simple_core.core.isa)):
+            simple_core.core.isa[i].riscv_type = "RV32"
+
+motherboard = SimpleBoard(
+    clk_freq="3GHz",
+    processor=processor,
+    memory=memory,
+    cache_hierarchy=cache_hierarchy,
+)
+
+# Set the workload
+binary = obtain_resource(
+    args.resource, resource_directory=args.resource_directory
+)
+motherboard.set_se_binary_workload(binary)
+
+# Run the simulation
+simulator = Simulator(board=motherboard)
+simulator.run()
+
+print(
+    "Exiting @ tick {} because {}.".format(
+        simulator.get_current_tick(), simulator.get_last_exit_event_cause()
+    )
+)
diff --git a/tests/gem5/asmtest/tests.py b/tests/gem5/asmtest/tests.py
index b2a5992da0..62e4ef5859 100644
--- a/tests/gem5/asmtest/tests.py
+++ b/tests/gem5/asmtest/tests.py
@@ -34,7 +34,7 @@
 # The following lists the RISCV binaries. Those commented out presently result
 # in a test failure. This is outlined in the following Jira issue:
 # https://gem5.atlassian.net/browse/GEM5-496
-binaries = (
+rv64_binaries = (
     "rv64samt-ps-sysclone_d",
     "rv64samt-ps-sysfutex1_d",
     #    'rv64samt-ps-sysfutex2_d',
@@ -69,6 +69,50 @@
     "rv64uamt-ps-amoswap_d",
     "rv64uamt-ps-amoxor_d",
     "rv64uamt-ps-lrsc_d",
+    "rv64ub-ps-add_uw",
+    "rv64ub-ps-andn",
+    "rv64ub-ps-bclr",
+    "rv64ub-ps-bclri",
+    "rv64ub-ps-bext",
+    "rv64ub-ps-bexti",
+    "rv64ub-ps-binv",
+    "rv64ub-ps-binvi",
+    "rv64ub-ps-bset",
+    "rv64ub-ps-bseti",
+    "rv64ub-ps-clmul",
+    "rv64ub-ps-clmulh",
+    "rv64ub-ps-clmulr",
+    "rv64ub-ps-clz",
+    "rv64ub-ps-clzw",
+    "rv64ub-ps-cpop",
+    "rv64ub-ps-cpopw",
+    "rv64ub-ps-ctz",
+    "rv64ub-ps-ctzw",
+    "rv64ub-ps-max",
+    "rv64ub-ps-maxu",
+    "rv64ub-ps-min",
+    "rv64ub-ps-minu",
+    "rv64ub-ps-orc_b",
+    "rv64ub-ps-orn",
+    "rv64ub-ps-rev8",
+    "rv64ub-ps-rol",
+    "rv64ub-ps-rolw",
+    "rv64ub-ps-ror",
+    "rv64ub-ps-rori",
+    "rv64ub-ps-roriw",
+    "rv64ub-ps-rorw",
+    "rv64ub-ps-sext_b",
+    "rv64ub-ps-sext_h",
+    "rv64ub-ps-sh1add",
+    "rv64ub-ps-sh1add_uw",
+    "rv64ub-ps-sh2add",
+    "rv64ub-ps-sh2add_uw",
+    "rv64ub-ps-sh3add",
+    "rv64ub-ps-sh3add_uw",
+    "rv64ub-ps-slli_uw",
+    "rv64ub-ps-xnor",
+    "rv64ub-ps-zext_h",
+    "rv64uc-ps-rvc",
     "rv64ud-ps-fadd",
     "rv64ud-ps-fclass",
     "rv64ud-ps-fcmp",
@@ -169,10 +213,168 @@
     "rv64uzfh-ps-recoding",
 )
 
+rv32_binaries = (
+    "rv32ua-ps-amoadd_w",
+    "rv32ua-ps-amoand_w",
+    "rv32ua-ps-amomaxu_w",
+    "rv32ua-ps-amomax_w",
+    "rv32ua-ps-amominu_w",
+    "rv32ua-ps-amomin_w",
+    "rv32ua-ps-amoor_w",
+    "rv32ua-ps-amoswap_w",
+    "rv32ua-ps-amoxor_w",
+    "rv32ua-ps-lrsc",
+    "rv32uamt-ps-amoadd_w",
+    "rv32uamt-ps-amoand_w",
+    "rv32uamt-ps-amomaxu_w",
+    "rv32uamt-ps-amomax_w",
+    "rv32uamt-ps-amominu_w",
+    "rv32uamt-ps-amomin_w",
+    "rv32uamt-ps-amoor_w",
+    "rv32uamt-ps-amoswap_w",
+    "rv32uamt-ps-amoxor_w",
+    "rv32uamt-ps-lrsc_w",
+    "rv32ub-ps-andn",
+    "rv32ub-ps-bclr",
+    "rv32ub-ps-bclri",
+    "rv32ub-ps-bext",
+    "rv32ub-ps-bexti",
+    "rv32ub-ps-binv",
+    "rv32ub-ps-binvi",
+    "rv32ub-ps-bset",
+    "rv32ub-ps-bseti",
+    "rv32ub-ps-clmul",
+    "rv32ub-ps-clmulh",
+    "rv32ub-ps-clmulr",
+    "rv32ub-ps-clz",
+    "rv32ub-ps-cpop",
+    "rv32ub-ps-ctz",
+    "rv32ub-ps-max",
+    "rv32ub-ps-maxu",
+    "rv32ub-ps-min",
+    "rv32ub-ps-minu",
+    "rv32ub-ps-orc_b",
+    "rv32ub-ps-orn",
+    "rv32ub-ps-rev8",
+    "rv32ub-ps-rol",
+    "rv32ub-ps-ror",
+    "rv32ub-ps-rori",
+    "rv32ub-ps-sext_b",
+    "rv32ub-ps-sext_h",
+    "rv32ub-ps-sh1add",
+    "rv32ub-ps-sh2add",
+    "rv32ub-ps-sh3add",
+    "rv32ub-ps-xnor",
+    "rv32ub-ps-zext_h",
+    "rv32uc-ps-rvc",
+    "rv32ud-ps-fadd",
+    "rv32ud-ps-fclass",
+    "rv32ud-ps-fcmp",
+    "rv32ud-ps-fcvt",
+    "rv32ud-ps-fcvt_w",
+    "rv32ud-ps-fdiv",
+    "rv32ud-ps-fmadd",
+    "rv32ud-ps-fmin",
+    "rv32ud-ps-ldst",
+    "rv32ud-ps-recoding",
+    "rv32uf-ps-fadd",
+    "rv32uf-ps-fclass",
+    "rv32uf-ps-fcmp",
+    "rv32uf-ps-fcvt",
+    "rv32uf-ps-fcvt_w",
+    "rv32uf-ps-fdiv",
+    "rv32uf-ps-fmadd",
+    "rv32uf-ps-fmin",
+    "rv32uf-ps-ldst",
+    "rv32uf-ps-move",
+    "rv32uf-ps-recoding",
+    "rv32ui-ps-add",
+    "rv32ui-ps-addi",
+    "rv32ui-ps-and",
+    "rv32ui-ps-andi",
+    "rv32ui-ps-auipc",
+    "rv32ui-ps-beq",
+    "rv32ui-ps-bge",
+    "rv32ui-ps-bgeu",
+    "rv32ui-ps-blt",
+    "rv32ui-ps-bltu",
+    "rv32ui-ps-bne",
+    "rv32ui-ps-fence_i",
+    "rv32ui-ps-jal",
+    "rv32ui-ps-jalr",
+    "rv32ui-ps-lb",
+    "rv32ui-ps-lbu",
+    "rv32ui-ps-lh",
+    "rv32ui-ps-lhu",
+    "rv32ui-ps-lui",
+    "rv32ui-ps-lw",
+    "rv32ui-ps-or",
+    "rv32ui-ps-ori",
+    "rv32ui-ps-sb",
+    "rv32ui-ps-sh",
+    "rv32ui-ps-simple",
+    "rv32ui-ps-sll",
+    "rv32ui-ps-slli",
+    "rv32ui-ps-slt",
+    "rv32ui-ps-slti",
+    "rv32ui-ps-sltiu",
+    "rv32ui-ps-sltu",
+    "rv32ui-ps-sra",
+    "rv32ui-ps-srai",
+    "rv32ui-ps-srl",
+    "rv32ui-ps-srli",
+    "rv32ui-ps-sub",
+    "rv32ui-ps-sw",
+    "rv32ui-ps-xor",
+    "rv32ui-ps-xori",
+    "rv32um-ps-div",
+    "rv32um-ps-divu",
+    "rv32um-ps-mul",
+    "rv32um-ps-mulh",
+    "rv32um-ps-mulhsu",
+    "rv32um-ps-mulhu",
+    "rv32um-ps-rem",
+    "rv32um-ps-remu",
+    "rv32uzfh-ps-fadd",
+    "rv32uzfh-ps-fclass",
+    "rv32uzfh-ps-fcmp",
+    "rv32uzfh-ps-fcvt",
+    "rv32uzfh-ps-fcvt_w",
+    "rv32uzfh-ps-fdiv",
+    "rv32uzfh-ps-fmadd",
+    "rv32uzfh-ps-fmin",
+    "rv32uzfh-ps-ldst",
+    "rv32uzfh-ps-move",
+    "rv32uzfh-ps-recoding",
+)
+
 cpu_types = ("atomic", "timing", "minor", "o3")
 
 for cpu_type in cpu_types:
-    for binary in binaries:
+    for binary in rv64_binaries:
+        gem5_verify_config(
+            name=f"asm-riscv-{binary}-{cpu_type}",
+            verifiers=(),
+            config=joinpath(
+                config.base_dir,
+                "tests",
+                "gem5",
+                "asmtest",
+                "configs",
+                "riscv_asmtest.py",
+            ),
+            config_args=[
+                binary,
+                cpu_type,
+                "--num-cores",
+                "4",
+                "--resource-directory",
+                resource_path,
+            ],
+            valid_isas=(constants.all_compiled_tag,),
+            valid_hosts=constants.supported_hosts,
+        )
+    for binary in rv32_binaries:
         gem5_verify_config(
             name=f"asm-riscv-{binary}-{cpu_type}",
             verifiers=(),
@@ -180,15 +382,16 @@
                 config.base_dir,
                 "tests",
                 "gem5",
+                "asmtest",
                 "configs",
-                "simple_binary_run.py",
+                "riscv_asmtest.py",
             ),
             config_args=[
                 binary,
                 cpu_type,
-                "riscv",
                 "--num-cores",
                 "4",
+                "--riscv-32bits",
                 "--resource-directory",
                 resource_path,
             ],
diff --git a/tests/gem5/checkpoint_tests/README.md b/tests/gem5/checkpoint_tests/README.md
new file mode 100644
index 0000000000..64767169ba
--- /dev/null
+++ b/tests/gem5/checkpoint_tests/README.md
@@ -0,0 +1,11 @@
+# Checkpoint tests
+
+These tests run hello world binary for arm, x86, and power isa and ubuntuboot workload for x86 isa using checkpoints.
+Each binary is run in two parts:
+- Save checkpoint: A binary is run for a set amount of ticks and then a checkpoint is taken. This test checks if the checkpoint is taken.
+
+- Resotre checkpoint: The same binary and board in the respective save test are used with the saved checkpoint (the checkpoint is uploaded to gem5 resources). This test checks if the binary ran properly.
+
+```bash
+./main.py run gem5/checkpoint_tests/
+```
diff --git a/tests/gem5/checkpoint_tests/configs/arm-hello-restore-checkpoint.py b/tests/gem5/checkpoint_tests/configs/arm-hello-restore-checkpoint.py
new file mode 100644
index 0000000000..0ce9a7606a
--- /dev/null
+++ b/tests/gem5/checkpoint_tests/configs/arm-hello-restore-checkpoint.py
@@ -0,0 +1,80 @@
+# Copyright (c) 2023 The Regents of the University of California
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+"""
+This gem5 configuation script creates a simple board sharing the same
+structure as the one in
+tests/gem5/checkpoint-tests/arm-hello-save-checkpoint.py.
+This script restores the checkpoint generated by the above script, and
+runs the rest of "arm-hello64-static" binary simulation.
+This configuration serves as a test of restoring a checkpoint with ARM ISA.
+"""
+
+from gem5.isas import ISA
+from gem5.utils.requires import requires
+from gem5.resources.resource import obtain_resource, CheckpointResource
+
+from gem5.components.cachehierarchies.classic.private_l1_private_l2_cache_hierarchy import (
+    PrivateL1PrivateL2CacheHierarchy,
+)
+from gem5.components.boards.simple_board import SimpleBoard
+from gem5.components.processors.simple_processor import SimpleProcessor
+from gem5.simulate.simulator import Simulator
+from gem5.components.memory import SingleChannelDDR3_1600
+from gem5.components.processors.cpu_types import CPUTypes
+
+
+requires(isa_required=ISA.ARM)
+
+cache_hierarchy = PrivateL1PrivateL2CacheHierarchy(
+    l1d_size="16kB", l1i_size="16kB", l2_size="256kB"
+)
+
+memory = SingleChannelDDR3_1600(size="32MB")
+
+processor = SimpleProcessor(cpu_type=CPUTypes.ATOMIC, isa=ISA.ARM, num_cores=2)
+
+board = SimpleBoard(
+    clk_freq="3GHz",
+    processor=processor,
+    memory=memory,
+    cache_hierarchy=cache_hierarchy,
+)
+
+board.set_se_binary_workload(
+    obtain_resource("arm-hello64-static"),
+    checkpoint=obtain_resource("arm-hello-test-checkpoint"),
+)
+
+sim = Simulator(board=board, full_system=False)
+sim.run()
+
+print(
+    "Exiting @ tick {} because {}.".format(
+        sim.get_current_tick(), sim.get_last_exit_event_cause()
+    )
+)
diff --git a/tests/gem5/checkpoint_tests/configs/arm-hello-save-checkpoint.py b/tests/gem5/checkpoint_tests/configs/arm-hello-save-checkpoint.py
new file mode 100644
index 0000000000..a731b38a58
--- /dev/null
+++ b/tests/gem5/checkpoint_tests/configs/arm-hello-save-checkpoint.py
@@ -0,0 +1,77 @@
+# Copyright (c) 2023 The Regents of the University of California
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import argparse
+from gem5.isas import ISA
+from gem5.utils.requires import requires
+from gem5.resources.resource import obtain_resource
+from gem5.components.cachehierarchies.classic.private_l1_private_l2_cache_hierarchy import (
+    PrivateL1PrivateL2CacheHierarchy,
+)
+from gem5.components.boards.simple_board import SimpleBoard
+from gem5.components.processors.simple_processor import SimpleProcessor
+from gem5.simulate.simulator import Simulator
+from gem5.components.memory import SingleChannelDDR3_1600
+from gem5.components.processors.cpu_types import CPUTypes
+
+parser = argparse.ArgumentParser()
+
+parser.add_argument(
+    "--checkpoint-path",
+    type=str,
+    required=False,
+    default="arm-hello-test-checkpoint/",
+    help="The directory to store the checkpoint.",
+)
+
+args = parser.parse_args()
+requires(isa_required=ISA.ARM)
+
+cache_hierarchy = PrivateL1PrivateL2CacheHierarchy(
+    l1d_size="16kB", l1i_size="16kB", l2_size="256kB"
+)
+
+memory = SingleChannelDDR3_1600(size="32MB")
+processor = SimpleProcessor(cpu_type=CPUTypes.ATOMIC, isa=ISA.ARM, num_cores=2)
+board = SimpleBoard(
+    clk_freq="3GHz",
+    processor=processor,
+    memory=memory,
+    cache_hierarchy=cache_hierarchy,
+)
+board.set_se_binary_workload(obtain_resource("arm-hello64-static"))
+
+sim = Simulator(board=board, full_system=False)
+max_ticks = 10**6
+sim.run(max_ticks=max_ticks)
+print(
+    "Exiting @ tick {} because {}.".format(
+        sim.get_current_tick(), sim.get_last_exit_event_cause()
+    )
+)
+print("Taking checkpoint at", args.checkpoint_path)
+sim.save_checkpoint(args.checkpoint_path)
+print("Done taking checkpoint")
diff --git a/tests/gem5/checkpoint_tests/configs/power-hello-restore-checkpoint.py b/tests/gem5/checkpoint_tests/configs/power-hello-restore-checkpoint.py
new file mode 100644
index 0000000000..05479bcca7
--- /dev/null
+++ b/tests/gem5/checkpoint_tests/configs/power-hello-restore-checkpoint.py
@@ -0,0 +1,72 @@
+# Copyright (c) 2023 The Regents of the University of California
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+"""
+This gem5 configuation script creates a simple board sharing the same
+structure as the one in
+tests/gem5/checkpoint-tests/power-hello-save-checkpoint.py.
+This script restores the checkpoint generated by the above script, and
+runs the rest of "power-hello" binary simulation.
+This configuration serves as a test of restoring a checkpoint with POWER ISA.
+"""
+
+from gem5.isas import ISA
+from gem5.utils.requires import requires
+from gem5.resources.resource import obtain_resource, CheckpointResource
+from gem5.components.cachehierarchies.classic.no_cache import NoCache
+from gem5.components.boards.simple_board import SimpleBoard
+from gem5.components.processors.simple_processor import SimpleProcessor
+from gem5.simulate.simulator import Simulator
+from gem5.components.memory import SingleChannelDDR3_1600
+from gem5.components.processors.cpu_types import CPUTypes
+
+
+requires(isa_required=ISA.POWER)
+
+cache_hierarchy = NoCache()
+
+memory = SingleChannelDDR3_1600(size="32MB")
+processor = SimpleProcessor(
+    cpu_type=CPUTypes.TIMING, isa=ISA.POWER, num_cores=2
+)
+board = SimpleBoard(
+    clk_freq="3GHz",
+    processor=processor,
+    memory=memory,
+    cache_hierarchy=cache_hierarchy,
+)
+board.set_se_binary_workload(
+    obtain_resource("power-hello"),
+    checkpoint=obtain_resource("power-hello-test-checkpoint"),
+)
+
+sim = Simulator(board=board, full_system=False)
+sim.run()
+print(
+    "Exiting @ tick {} because {}.".format(
+        sim.get_current_tick(), sim.get_last_exit_event_cause()
+    )
+)
diff --git a/tests/gem5/checkpoint_tests/configs/power-hello-save-checkpoint.py b/tests/gem5/checkpoint_tests/configs/power-hello-save-checkpoint.py
new file mode 100644
index 0000000000..6fb99a2534
--- /dev/null
+++ b/tests/gem5/checkpoint_tests/configs/power-hello-save-checkpoint.py
@@ -0,0 +1,83 @@
+# Copyright (c) 2023 The Regents of the University of California
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+"""
+This gem5 test script creates a simple board to run the first
+10^6 ticks of "power-hello" binary simulation and saves a checkpoint.
+This configuration serves as a test to ensure that checkpoints work
+with POWER ISA.
+"""
+
+import argparse
+from gem5.isas import ISA
+from gem5.utils.requires import requires
+from gem5.resources.resource import obtain_resource
+from gem5.components.cachehierarchies.classic.no_cache import NoCache
+from gem5.components.boards.simple_board import SimpleBoard
+from gem5.components.processors.simple_processor import SimpleProcessor
+from gem5.simulate.simulator import Simulator
+from gem5.components.memory import SingleChannelDDR3_1600
+from gem5.components.processors.cpu_types import CPUTypes
+
+parser = argparse.ArgumentParser()
+
+parser.add_argument(
+    "--checkpoint-path",
+    type=str,
+    required=False,
+    default="power-hello-test-checkpoint/",
+    help="The directory to store the checkpoint.",
+)
+
+args = parser.parse_args()
+requires(isa_required=ISA.POWER)
+
+cache_hierarchy = NoCache()
+
+memory = SingleChannelDDR3_1600(size="32MB")
+processor = SimpleProcessor(
+    cpu_type=CPUTypes.TIMING, isa=ISA.POWER, num_cores=2
+)
+
+board = SimpleBoard(
+    clk_freq="3GHz",
+    processor=processor,
+    memory=memory,
+    cache_hierarchy=cache_hierarchy,
+)
+board.set_se_binary_workload(obtain_resource("power-hello"))
+
+sim = Simulator(board=board, full_system=False)
+max_ticks = 10**6
+sim.run(max_ticks=max_ticks)
+print(
+    "Exiting @ tick {} because {}.".format(
+        sim.get_current_tick(), sim.get_last_exit_event_cause()
+    )
+)
+print("Taking checkpoint at", args.checkpoint_path)
+sim.save_checkpoint(args.checkpoint_path)
+print("Done taking checkpoint")
diff --git a/tests/gem5/checkpoint_tests/configs/sparc-hello-restore-checkpoint.py b/tests/gem5/checkpoint_tests/configs/sparc-hello-restore-checkpoint.py
new file mode 100644
index 0000000000..0bc2e122cd
--- /dev/null
+++ b/tests/gem5/checkpoint_tests/configs/sparc-hello-restore-checkpoint.py
@@ -0,0 +1,72 @@
+# Copyright (c) 2023 The Regents of the University of California
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+"""
+This gem5 configuation script creates a simple board sharing the same
+structure as the one in
+tests/gem5/checkpoint-tests/sparc-hello-save-checkpoint.py.
+This script restores the checkpoint generated by the above script, and
+runs the rest of "sparc-hello" binary simulation.
+This configuration serves as a test of restoring a checkpoint with SPARC ISA.
+"""
+
+from gem5.isas import ISA
+from gem5.utils.requires import requires
+from gem5.resources.resource import obtain_resource, CheckpointResource
+from gem5.components.cachehierarchies.classic.no_cache import NoCache
+from gem5.components.boards.simple_board import SimpleBoard
+from gem5.components.processors.simple_processor import SimpleProcessor
+from gem5.simulate.simulator import Simulator
+from gem5.components.memory import SingleChannelDDR3_1600
+from gem5.components.processors.cpu_types import CPUTypes
+
+
+requires(isa_required=ISA.SPARC)
+
+cache_hierarchy = NoCache()
+
+memory = SingleChannelDDR3_1600(size="32MB")
+processor = SimpleProcessor(
+    cpu_type=CPUTypes.TIMING, isa=ISA.SPARC, num_cores=2
+)
+board = SimpleBoard(
+    clk_freq="3GHz",
+    processor=processor,
+    memory=memory,
+    cache_hierarchy=cache_hierarchy,
+)
+board.set_se_binary_workload(
+    obtain_resource("sparc-hello"),
+    checkpoint=CheckpointResource(local_path="./sparc-hello-test-checkpoint"),
+)
+
+sim = Simulator(board=board, full_system=False)
+sim.run()
+print(
+    "Exiting @ tick {} because {}.".format(
+        sim.get_current_tick(), sim.get_last_exit_event_cause()
+    )
+)
diff --git a/tests/gem5/checkpoint_tests/configs/sparc-hello-save-checkpoint.py b/tests/gem5/checkpoint_tests/configs/sparc-hello-save-checkpoint.py
new file mode 100644
index 0000000000..ab216588aa
--- /dev/null
+++ b/tests/gem5/checkpoint_tests/configs/sparc-hello-save-checkpoint.py
@@ -0,0 +1,83 @@
+# Copyright (c) 2023 The Regents of the University of California
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+"""
+This gem5 test script creates a simple board to run the first
+10^6 ticks of "sparc-hello" binary simulation and saves a checkpoint.
+This configuration serves as a test to ensure that checkpoints work
+with SPARC ISA.
+"""
+
+import argparse
+from gem5.isas import ISA
+from gem5.utils.requires import requires
+from gem5.resources.resource import obtain_resource
+from gem5.components.cachehierarchies.classic.no_cache import NoCache
+from gem5.components.boards.simple_board import SimpleBoard
+from gem5.components.processors.simple_processor import SimpleProcessor
+from gem5.simulate.simulator import Simulator
+from gem5.components.memory import SingleChannelDDR3_1600
+from gem5.components.processors.cpu_types import CPUTypes
+
+parser = argparse.ArgumentParser()
+
+parser.add_argument(
+    "--checkpoint-path",
+    type=str,
+    required=False,
+    default="sparc-hello-test-checkpoint/",
+    help="The directory to store the checkpoint.",
+)
+
+args = parser.parse_args()
+requires(isa_required=ISA.SPARC)
+
+cache_hierarchy = NoCache()
+
+memory = SingleChannelDDR3_1600(size="32MB")
+processor = SimpleProcessor(
+    cpu_type=CPUTypes.TIMING, isa=ISA.SPARC, num_cores=2
+)
+
+board = SimpleBoard(
+    clk_freq="3GHz",
+    processor=processor,
+    memory=memory,
+    cache_hierarchy=cache_hierarchy,
+)
+board.set_se_binary_workload(obtain_resource("sparc-hello"))
+
+sim = Simulator(board=board, full_system=False)
+max_ticks = 10**6
+sim.run(max_ticks=max_ticks)
+print(
+    "Exiting @ tick {} because {}.".format(
+        sim.get_current_tick(), sim.get_last_exit_event_cause()
+    )
+)
+print("Taking checkpoint at", args.checkpoint_path)
+sim.save_checkpoint(args.checkpoint_path)
+print("Done taking checkpoint")
diff --git a/tests/gem5/checkpoint_tests/configs/x86-fs-restore-checkpoint.py b/tests/gem5/checkpoint_tests/configs/x86-fs-restore-checkpoint.py
new file mode 100644
index 0000000000..0a3264d576
--- /dev/null
+++ b/tests/gem5/checkpoint_tests/configs/x86-fs-restore-checkpoint.py
@@ -0,0 +1,88 @@
+# Copyright (c) 2023 The Regents of the University of California
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+"""
+This gem5 configuation script creates a simple board sharing the same
+structure as the one in
+tests/gem5/checkpoint-tests/x86-fs-save-checkpoint.py.
+This script restores the checkpoint generated by the above script, and
+runs the rest of full system simulation.
+This configuration serves as a test of restoring a checkpoint with X86 ISA in fs mode.
+"""
+
+from gem5.components.boards.x86_board import X86Board
+from gem5.components.memory import SingleChannelDDR3_1600
+from gem5.components.processors.simple_processor import SimpleProcessor
+from gem5.components.cachehierarchies.classic.private_l1_private_l2_cache_hierarchy import (
+    PrivateL1PrivateL2CacheHierarchy,
+)
+from gem5.components.processors.cpu_types import CPUTypes
+from gem5.isas import ISA
+from gem5.utils.requires import requires
+from gem5.resources.resource import obtain_resource, CheckpointResource
+from gem5.simulate.simulator import Simulator
+
+# Run a check to ensure the right version of gem5 is being used.
+requires(isa_required=ISA.X86)
+
+# Setup the cache hierarchy.
+# For classic, PrivateL1PrivateL2 and NoCache have been tested.
+# For Ruby, MESI_Two_Level and MI_example have been tested.
+cache_hierarchy = PrivateL1PrivateL2CacheHierarchy(
+    l1d_size="32kB", l1i_size="32kB", l2_size="512kB"
+)
+
+# Setup the system memory.
+memory = SingleChannelDDR3_1600(size="1GB")
+
+# Setup a single core Processor.
+processor = SimpleProcessor(cpu_type=CPUTypes.O3, isa=ISA.X86, num_cores=1)
+
+# Setup the board.
+board = X86Board(
+    clk_freq="3GHz",
+    processor=processor,
+    memory=memory,
+    cache_hierarchy=cache_hierarchy,
+)
+
+# Set the Full System workload.
+board.set_kernel_disk_workload(
+    kernel=obtain_resource("x86-linux-kernel-5.4.49"),
+    disk_image=obtain_resource("x86-ubuntu-18.04-img"),
+    checkpoint=obtain_resource("x86-fs-test-checkpoint"),
+)
+
+sim = Simulator(board=board, full_system=True)
+print("Beginning simulation!")
+
+sim.run(max_ticks=10**10)
+
+print(
+    "Exiting @ tick {} because {}.".format(
+        sim.get_current_tick(), sim.get_last_exit_event_cause()
+    )
+)
diff --git a/tests/gem5/checkpoint_tests/configs/x86-fs-save-checkpoint.py b/tests/gem5/checkpoint_tests/configs/x86-fs-save-checkpoint.py
new file mode 100644
index 0000000000..891130af2b
--- /dev/null
+++ b/tests/gem5/checkpoint_tests/configs/x86-fs-save-checkpoint.py
@@ -0,0 +1,101 @@
+# Copyright (c) 2023 The Regents of the University of California
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+"""
+This gem5 test script creates a simple board to run the first
+10^6 ticks of x86 full system kernel disk workload simulation and saves a checkpoint.
+This configuration serves as a test to ensure that checkpoints work
+with X86 ISA in fs mode.
+"""
+
+import argparse
+from gem5.components.boards.x86_board import X86Board
+from gem5.components.memory import SingleChannelDDR3_1600
+from gem5.components.processors.simple_processor import SimpleProcessor
+from gem5.components.cachehierarchies.classic.private_l1_private_l2_cache_hierarchy import (
+    PrivateL1PrivateL2CacheHierarchy,
+)
+from gem5.components.processors.cpu_types import CPUTypes
+from gem5.isas import ISA
+from gem5.utils.requires import requires
+from gem5.resources.resource import obtain_resource
+from gem5.simulate.simulator import Simulator
+
+parser = argparse.ArgumentParser()
+
+parser.add_argument(
+    "--checkpoint-path",
+    type=str,
+    required=False,
+    default="x86-fs-test-checkpoint/",
+    help="The directory to store the checkpoint.",
+)
+
+args = parser.parse_args()
+
+# Run a check to ensure the right version of gem5 is being used.
+requires(isa_required=ISA.X86)
+
+# Setup the cache hierarchy.
+# For classic, PrivateL1PrivateL2 and NoCache have been tested.
+# For Ruby, MESI_Two_Level and MI_example have been tested.
+cache_hierarchy = PrivateL1PrivateL2CacheHierarchy(
+    l1d_size="32kB", l1i_size="32kB", l2_size="512kB"
+)
+
+# Setup the system memory.
+memory = SingleChannelDDR3_1600(size="1GB")
+
+# Setup a single core Processor.
+processor = SimpleProcessor(cpu_type=CPUTypes.O3, isa=ISA.X86, num_cores=1)
+
+# Setup the board.
+board = X86Board(
+    clk_freq="3GHz",
+    processor=processor,
+    memory=memory,
+    cache_hierarchy=cache_hierarchy,
+)
+
+# Set the Full System workload.
+board.set_kernel_disk_workload(
+    kernel=obtain_resource("x86-linux-kernel-5.4.49"),
+    disk_image=obtain_resource("x86-ubuntu-18.04-img"),
+)
+
+sim = Simulator(board=board, full_system=True)
+print("Beginning simulation!")
+
+max_ticks = 10**6
+sim.run(max_ticks=max_ticks)
+print(
+    "Exiting @ tick {} because {}.".format(
+        sim.get_current_tick(), sim.get_last_exit_event_cause()
+    )
+)
+print("Taking checkpoint at", args.checkpoint_path)
+sim.save_checkpoint(args.checkpoint_path)
+print("Done taking checkpoint")
diff --git a/tests/gem5/checkpoint_tests/configs/x86-hello-restore-checkpoint.py b/tests/gem5/checkpoint_tests/configs/x86-hello-restore-checkpoint.py
new file mode 100644
index 0000000000..c60675eb24
--- /dev/null
+++ b/tests/gem5/checkpoint_tests/configs/x86-hello-restore-checkpoint.py
@@ -0,0 +1,72 @@
+# Copyright (c) 2023 The Regents of the University of California
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+"""
+This gem5 configuation script creates a simple board sharing the same
+structure as the one in
+tests/gem5/checkpoint-tests/x86-hello-save-checkpoint.py.
+This script restores the checkpoint generated by the above script, and
+runs the rest of "x86-hello64-static" binary simulation.
+This configuration serves as a test of restoring a checkpoint with X86 ISA.
+"""
+
+from gem5.isas import ISA
+from gem5.utils.requires import requires
+from gem5.resources.resource import obtain_resource, CheckpointResource
+from gem5.components.cachehierarchies.classic.private_l1_cache_hierarchy import (
+    PrivateL1CacheHierarchy,
+)
+from gem5.components.boards.simple_board import SimpleBoard
+from gem5.components.processors.simple_processor import SimpleProcessor
+from gem5.simulate.simulator import Simulator
+from gem5.components.memory import SingleChannelDDR3_1600
+from gem5.components.processors.cpu_types import CPUTypes
+
+
+requires(isa_required=ISA.X86)
+
+cache_hierarchy = PrivateL1CacheHierarchy(l1d_size="16kB", l1i_size="16kB")
+
+memory = SingleChannelDDR3_1600(size="32MB")
+processor = SimpleProcessor(cpu_type=CPUTypes.TIMING, isa=ISA.X86, num_cores=4)
+board = SimpleBoard(
+    clk_freq="3GHz",
+    processor=processor,
+    memory=memory,
+    cache_hierarchy=cache_hierarchy,
+)
+board.set_se_binary_workload(
+    obtain_resource("x86-hello64-static"),
+    checkpoint=obtain_resource("x86-hello-test-checkpoint"),
+)
+
+sim = Simulator(board=board, full_system=False)
+sim.run()
+print(
+    "Exiting @ tick {} because {}.".format(
+        sim.get_current_tick(), sim.get_last_exit_event_cause()
+    )
+)
diff --git a/tests/gem5/checkpoint_tests/configs/x86-hello-save-checkpoint.py b/tests/gem5/checkpoint_tests/configs/x86-hello-save-checkpoint.py
new file mode 100644
index 0000000000..5611e795d4
--- /dev/null
+++ b/tests/gem5/checkpoint_tests/configs/x86-hello-save-checkpoint.py
@@ -0,0 +1,83 @@
+# Copyright (c) 2023 The Regents of the University of California
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+"""
+This gem5 test script creates a simple board to run the first
+10^6 ticks of "x86-hello64-static" binary simulation and saves a checkpoint.
+This configuration serves as a test to ensure that checkpoints work
+with X86 ISA.
+"""
+
+import argparse
+from gem5.isas import ISA
+from gem5.utils.requires import requires
+from gem5.resources.resource import obtain_resource
+from gem5.components.cachehierarchies.classic.private_l1_cache_hierarchy import (
+    PrivateL1CacheHierarchy,
+)
+from gem5.components.boards.simple_board import SimpleBoard
+from gem5.components.processors.simple_processor import SimpleProcessor
+from gem5.simulate.simulator import Simulator
+from gem5.components.memory import SingleChannelDDR3_1600
+from gem5.components.processors.cpu_types import CPUTypes
+
+parser = argparse.ArgumentParser()
+
+parser.add_argument(
+    "--checkpoint-path",
+    type=str,
+    required=False,
+    default="x86-hello-test-checkpoint/",
+    help="The directory to store the checkpoint.",
+)
+
+args = parser.parse_args()
+requires(isa_required=ISA.X86)
+
+cache_hierarchy = PrivateL1CacheHierarchy(l1d_size="16kB", l1i_size="16kB")
+
+memory = SingleChannelDDR3_1600(size="32MB")
+processor = SimpleProcessor(cpu_type=CPUTypes.TIMING, isa=ISA.X86, num_cores=4)
+
+board = SimpleBoard(
+    clk_freq="3GHz",
+    processor=processor,
+    memory=memory,
+    cache_hierarchy=cache_hierarchy,
+)
+board.set_se_binary_workload(obtain_resource("x86-hello64-static"))
+
+sim = Simulator(board=board, full_system=False)
+max_ticks = 10**6
+sim.run(max_ticks=max_ticks)
+print(
+    "Exiting @ tick {} because {}.".format(
+        sim.get_current_tick(), sim.get_last_exit_event_cause()
+    )
+)
+print("Taking checkpoint at", args.checkpoint_path)
+sim.save_checkpoint(args.checkpoint_path)
+print("Done taking checkpoint")
diff --git a/tests/gem5/checkpoint_tests/test-checkpoints.py b/tests/gem5/checkpoint_tests/test-checkpoints.py
new file mode 100644
index 0000000000..7a6c18d626
--- /dev/null
+++ b/tests/gem5/checkpoint_tests/test-checkpoints.py
@@ -0,0 +1,241 @@
+# Copyright (c) 2023 The Regents of the University of California
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+"""
+This runs simple tests to ensure the examples in `configs/example/gem5_library`
+still function. They simply check the simulation completed.
+"""
+from testlib import *
+import re
+import os
+
+if config.bin_path:
+    resource_path = config.bin_path
+else:
+    resource_path = joinpath(absdirpath(__file__), "..", "resources")
+
+hello_verifier = verifier.MatchRegex(re.compile(r"Hello world!"))
+save_checkpoint_verifier = verifier.MatchRegex(
+    re.compile(r"Done taking checkpoint")
+)
+
+
+gem5_verify_config(
+    name="test-checkpoint-arm-hello-save-checkpoint",
+    fixtures=(),
+    verifiers=(save_checkpoint_verifier,),
+    config=joinpath(
+        config.base_dir,
+        "tests",
+        "gem5",
+        "checkpoint_tests",
+        "configs",
+        "arm-hello-save-checkpoint.py",
+    ),
+    config_args=[
+        "--checkpoint-path",
+        joinpath(resource_path, "arm-hello-test-checkpoint"),
+    ],
+    valid_isas=(constants.all_compiled_tag,),
+    valid_hosts=constants.supported_hosts,
+    length=constants.quick_tag,
+)
+
+gem5_verify_config(
+    name="test-checkpoint-arm-hello-restore-checkpoint",
+    fixtures=(),
+    verifiers=(hello_verifier,),
+    config=joinpath(
+        config.base_dir,
+        "tests",
+        "gem5",
+        "checkpoint_tests",
+        "configs",
+        "arm-hello-restore-checkpoint.py",
+    ),
+    config_args=[],
+    valid_isas=(constants.all_compiled_tag,),
+    valid_hosts=constants.supported_hosts,
+    length=constants.quick_tag,
+)
+
+gem5_verify_config(
+    name="test-checkpoint-x86-hello-save-checkpoint",
+    fixtures=(),
+    verifiers=(save_checkpoint_verifier,),
+    config=joinpath(
+        config.base_dir,
+        "tests",
+        "gem5",
+        "checkpoint_tests",
+        "configs",
+        "x86-hello-save-checkpoint.py",
+    ),
+    config_args=[
+        "--checkpoint-path",
+        joinpath(resource_path, "x86-hello-test-checkpoint"),
+    ],
+    valid_isas=(constants.all_compiled_tag,),
+    valid_hosts=constants.supported_hosts,
+    length=constants.quick_tag,
+)
+
+gem5_verify_config(
+    name="test-checkpoint-x86-hello-restore-checkpoint",
+    fixtures=(),
+    verifiers=(hello_verifier,),
+    config=joinpath(
+        config.base_dir,
+        "tests",
+        "gem5",
+        "checkpoint_tests",
+        "configs",
+        "x86-hello-restore-checkpoint.py",
+    ),
+    config_args=[],
+    valid_isas=(constants.all_compiled_tag,),
+    valid_hosts=constants.supported_hosts,
+    length=constants.quick_tag,
+)
+
+gem5_verify_config(
+    name="test-checkpoint-x86-fs-save-checkpoint",
+    fixtures=(),
+    verifiers=(save_checkpoint_verifier,),
+    config=joinpath(
+        config.base_dir,
+        "tests",
+        "gem5",
+        "checkpoint_tests",
+        "configs",
+        "x86-fs-save-checkpoint.py",
+    ),
+    config_args=[
+        "--checkpoint-path",
+        joinpath(resource_path, "x86-fs-test-checkpoint"),
+    ],
+    valid_isas=(constants.all_compiled_tag,),
+    valid_hosts=constants.supported_hosts,
+    length=constants.quick_tag,
+)
+
+gem5_verify_config(
+    name="test-checkpoint-x86-fs-restore-checkpoint",
+    fixtures=(),
+    verifiers=(),
+    config=joinpath(
+        config.base_dir,
+        "tests",
+        "gem5",
+        "checkpoint_tests",
+        "configs",
+        "x86-fs-restore-checkpoint.py",
+    ),
+    config_args=[],
+    valid_isas=(constants.all_compiled_tag,),
+    valid_hosts=constants.supported_hosts,
+    length=constants.quick_tag,
+)
+
+gem5_verify_config(
+    name="test-checkpoint-power-hello-save-checkpoint",
+    fixtures=(),
+    verifiers=(save_checkpoint_verifier,),
+    config=joinpath(
+        config.base_dir,
+        "tests",
+        "gem5",
+        "checkpoint_tests",
+        "configs",
+        "power-hello-save-checkpoint.py",
+    ),
+    config_args=[
+        "--checkpoint-path",
+        joinpath(resource_path, "power-hello-test-checkpoint"),
+    ],
+    valid_isas=(constants.all_compiled_tag,),
+    valid_hosts=constants.supported_hosts,
+    length=constants.quick_tag,
+)
+
+gem5_verify_config(
+    name="test-checkpoint-power-hello-restore-checkpoint",
+    fixtures=(),
+    verifiers=(hello_verifier,),
+    config=joinpath(
+        config.base_dir,
+        "tests",
+        "gem5",
+        "checkpoint_tests",
+        "configs",
+        "power-hello-restore-checkpoint.py",
+    ),
+    config_args=[],
+    valid_isas=(constants.all_compiled_tag,),
+    valid_hosts=constants.supported_hosts,
+    length=constants.quick_tag,
+)
+
+# There is a bug in sparc isa that causes the checkpoints to fail
+# GitHub issue: https://github.com/gem5/gem5/issues/197
+# gem5_verify_config(
+#     name="test-checkpoint-sparc-hello-save-checkpoint",
+#     fixtures=(),
+#     verifiers=(save_checkpoint_verifier,),
+#     config=joinpath(
+#         config.base_dir,
+#         "tests",
+#         "gem5",
+#         "checkpoint_tests",
+#         "configs",
+#         "sparc-hello-save-checkpoint.py",
+#     ),
+#     config_args=[
+#         # "--checkpoint-path",
+#         # joinpath(resource_path, "sparc-hello-test-checkpoint"),
+#     ],
+#     valid_isas=(constants.all_compiled_tag,),
+#     valid_hosts=constants.supported_hosts,
+#     length=constants.quick_tag,
+# )
+
+# gem5_verify_config(
+#     name="test-checkpoint-sparc-hello-restore-checkpoint",
+#     fixtures=(),
+#     verifiers=(hello_verifier,),
+#     config=joinpath(
+#         config.base_dir,
+#         "tests",
+#         "gem5",
+#         "checkpoint_tests",
+#         "configs",
+#         "sparc-hello-restore-checkpoint.py",
+#     ),
+#     config_args=[],
+#     valid_isas=(constants.all_compiled_tag,),
+#     valid_hosts=constants.supported_hosts,
+#     length=constants.quick_tag,
+# )
diff --git a/tests/gem5/cpu_tests/README.md b/tests/gem5/cpu_tests/README.md
new file mode 100644
index 0000000000..574d12ce0a
--- /dev/null
+++ b/tests/gem5/cpu_tests/README.md
@@ -0,0 +1,8 @@
+# CPU Tests
+
+These tests run the Bubblesort and FloatMM workloads against the different CPU models.
+To run these tests by themselves, you can run the following command in the tests directory:
+
+```bash
+./main.py run gem5/cpu_tests --length=[length]
+```
diff --git a/tests/gem5/cpu_tests/run.py b/tests/gem5/cpu_tests/run.py
index fb528e5e8c..06790c7ea1 100644
--- a/tests/gem5/cpu_tests/run.py
+++ b/tests/gem5/cpu_tests/run.py
@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 # Copyright (c) 2018 The Regents of the University of California
 # All Rights Reserved.
 #
diff --git a/tests/gem5/dram_lowp/README.md b/tests/gem5/dram_lowp/README.md
new file mode 100644
index 0000000000..bff3f033fd
--- /dev/null
+++ b/tests/gem5/dram_lowp/README.md
@@ -0,0 +1,8 @@
+# DRAM LowP
+
+These tests run the `configs/dram` scripts that trigger low power state transitions in the DRAM controller.
+To run these tests by themselves, you can run the following command in the tests directory:
+
+```bash
+./main.py run gem5/dram_lowp --length=[length]
+```
diff --git a/tests/gem5/dram-lowp/ref/simout b/tests/gem5/dram_lowp/ref/simout.txt
similarity index 100%
rename from tests/gem5/dram-lowp/ref/simout
rename to tests/gem5/dram_lowp/ref/simout.txt
diff --git a/tests/gem5/dram-lowp/test_dram_lowp.py b/tests/gem5/dram_lowp/test_dram_lowp.py
similarity index 96%
rename from tests/gem5/dram-lowp/test_dram_lowp.py
rename to tests/gem5/dram_lowp/test_dram_lowp.py
index 2e146bbe46..ec38acacc1 100644
--- a/tests/gem5/dram-lowp/test_dram_lowp.py
+++ b/tests/gem5/dram_lowp/test_dram_lowp.py
@@ -26,7 +26,9 @@
 
 from testlib import *
 
-verifiers = (verifier.MatchStdoutNoPerf(joinpath(getcwd(), "ref", "simout")),)
+verifiers = (
+    verifier.MatchStdoutNoPerf(joinpath(getcwd(), "ref", "simout.txt")),
+)
 
 gem5_verify_config(
     name="test-low_power-close_adaptive",
diff --git a/tests/gem5/fixture.py b/tests/gem5/fixture.py
index d3312c9a63..da2cf11be2 100644
--- a/tests/gem5/fixture.py
+++ b/tests/gem5/fixture.py
@@ -53,19 +53,19 @@
 import testlib.log as log
 from testlib.state import Result
 
+from typing import Optional, List
+
 
 class VariableFixture(Fixture):
     def __init__(self, value=None, name=None):
-        super(VariableFixture, self).__init__(name=name)
+        super().__init__(name=name)
         self.value = value
 
 
 class TempdirFixture(Fixture):
     def __init__(self):
         self.path = None
-        super(TempdirFixture, self).__init__(
-            name=constants.tempdir_fixture_name
-        )
+        super().__init__(name=constants.tempdir_fixture_name)
 
     def setup(self, testitem):
         self.path = tempfile.mkdtemp(prefix="gem5out")
@@ -74,7 +74,7 @@ def post_test_procedure(self, testitem):
         suiteUID = testitem.metadata.uid.suite
         testUID = testitem.metadata.name
         testing_result_folder = os.path.join(
-            config.result_path, "SuiteUID:" + suiteUID, "TestUID:" + testUID
+            config.result_path, "SuiteUID-" + suiteUID, "TestUID-" + testUID
         )
 
         # Copy the output files of the run from /tmp to testing-results
@@ -111,7 +111,7 @@ def __new__(cls, target):
         if target in cls.fixtures:
             obj = cls.fixtures[target]
         else:
-            obj = super(UniqueFixture, cls).__new__(cls)
+            obj = super().__new__(cls)
             obj.lock = threading.Lock()
             obj.target = target
             cls.fixtures[target] = obj
@@ -121,7 +121,7 @@ def __init__(self, *args, **kwargs):
         with self.lock:
             if hasattr(self, "_init_done"):
                 return
-            super(UniqueFixture, self).__init__(self, **kwargs)
+            super().__init__(self, **kwargs)
             self._init(*args, **kwargs)
             self._init_done = True
 
@@ -144,9 +144,13 @@ class SConsFixture(UniqueFixture):
     """
 
     def __new__(cls, target):
-        obj = super(SConsFixture, cls).__new__(cls, target)
+        obj = super().__new__(cls, target)
         return obj
 
+    def _setup(self, testitem):
+        if config.skip_build:
+            return
+
     def _setup(self, testitem):
         if config.skip_build:
             return
@@ -189,7 +193,7 @@ def __new__(cls, isa, variant, protocol=None):
         if protocol:
             target_dir += "_" + protocol
         target = joinpath(target_dir, f"gem5.{variant}")
-        obj = super(Gem5Fixture, cls).__new__(cls, target)
+        obj = super().__new__(cls, target)
         return obj
 
     def _init(self, isa, variant, protocol=None):
@@ -204,18 +208,24 @@ def _init(self, isa, variant, protocol=None):
             self.options = ["--default=" + isa.upper(), "PROTOCOL=" + protocol]
         self.set_global()
 
+    def get_get_build_info(self) -> Optional[str]:
+        build_target = self.target
+        if self.options:
+            build_target += " ".join(self.options)
+        return build_target
+
 
 class MakeFixture(Fixture):
     def __init__(self, directory, *args, **kwargs):
         name = f"make -C {directory}"
-        super(MakeFixture, self).__init__(
+        super().__init__(
             build_once=True, lazy_init=False, name=name, *args, **kwargs
         )
         self.targets = []
         self.directory = directory
 
     def setup(self):
-        super(MakeFixture, self).setup()
+        super().setup()
         targets = set(self.required_by)
         command = ["make", "-C", self.directory]
         command.extend([target.target for target in targets])
@@ -230,7 +240,7 @@ def __init__(self, target, make_fixture=None, *args, **kwargs):
         scons we need to know what invocation to attach to. If none given,
         creates its own.
         """
-        super(MakeTarget, self).__init__(name=target, *args, **kwargs)
+        super().__init__(name=target, *args, **kwargs)
         self.target = self.name
 
         if make_fixture is None:
@@ -244,7 +254,7 @@ def __init__(self, target, make_fixture=None, *args, **kwargs):
         self.require(self.make_fixture)
 
     def setup(self, testitem):
-        super(MakeTarget, self).setup()
+        super().setup()
         self.make_fixture.setup()
         return self
 
@@ -254,7 +264,7 @@ def __init__(self, program, isa, os, recompile=False):
         make_dir = joinpath(config.bin_dir, program)
         make_fixture = MakeFixture(make_dir)
         target = joinpath("bin", isa, os, program)
-        super(TestProgram, self).__init__(target, make_fixture)
+        super().__init__(target, make_fixture)
         self.path = joinpath(make_dir, target)
         self.recompile = recompile
 
@@ -274,7 +284,7 @@ class DownloadedProgram(UniqueFixture):
 
     def __new__(cls, url, path, filename, gzip_decompress=False):
         target = joinpath(path, filename)
-        return super(DownloadedProgram, cls).__new__(cls, target)
+        return super().__new__(cls, target)
 
     def _init(self, url, path, filename, gzip_decompress=False, **kwargs):
         """
@@ -358,7 +368,6 @@ def _extract(self):
         with tarfile.open(self.filename) as tf:
 
             def is_within_directory(directory, target):
-
                 abs_directory = os.path.abspath(directory)
                 abs_target = os.path.abspath(target)
 
@@ -369,7 +378,6 @@ def is_within_directory(directory, target):
             def safe_extract(
                 tar, path=".", members=None, *, numeric_owner=False
             ):
-
                 for member in tar.getmembers():
                     member_path = os.path.join(path, member.name)
                     if not is_within_directory(path, member_path):
diff --git a/tests/gem5/fs/linux/arm/README.md b/tests/gem5/fs/linux/arm/README.md
new file mode 100644
index 0000000000..ba4bf07b36
--- /dev/null
+++ b/tests/gem5/fs/linux/arm/README.md
@@ -0,0 +1,8 @@
+# FS
+
+This is a set of full system ARM tests.
+To run these tests by themselves, you can run the following command in the tests directory:
+
+```bash
+./main.py run gem5/fs/linux/arm --length=[length]
+```
diff --git a/tests/gem5/configs/arm_generic.py b/tests/gem5/fs/linux/arm/configs/arm_generic.py
similarity index 98%
rename from tests/gem5/configs/arm_generic.py
rename to tests/gem5/fs/linux/arm/configs/arm_generic.py
index df118c7583..ad2ea58597 100644
--- a/tests/gem5/configs/arm_generic.py
+++ b/tests/gem5/fs/linux/arm/configs/arm_generic.py
@@ -56,7 +56,7 @@ class ArmSESystemUniprocessor(BaseSESystemUniprocessor):
     """
 
     def __init__(self, **kwargs):
-        super(ArmSESystemUniprocessor, self).__init__(**kwargs)
+        super().__init__(**kwargs)
 
     def create_caches_private(self, cpu):
         # The atomic SE configurations do not use caches
@@ -67,7 +67,7 @@ def create_caches_private(self, cpu):
             )
 
 
-class LinuxArmSystemBuilder(object):
+class LinuxArmSystemBuilder:
     """Mix-in that implements create_system.
 
     This mix-in is intended as a convenient way of adding an
diff --git a/tests/gem5/configs/base_caches.py b/tests/gem5/fs/linux/arm/configs/base_caches.py
similarity index 100%
rename from tests/gem5/configs/base_caches.py
rename to tests/gem5/fs/linux/arm/configs/base_caches.py
diff --git a/tests/gem5/configs/base_config.py b/tests/gem5/fs/linux/arm/configs/base_config.py
similarity index 96%
rename from tests/gem5/configs/base_config.py
rename to tests/gem5/fs/linux/arm/configs/base_config.py
index 22987d5eff..4bf374b01c 100644
--- a/tests/gem5/configs/base_config.py
+++ b/tests/gem5/fs/linux/arm/configs/base_config.py
@@ -46,7 +46,7 @@
 _have_kvm_support = "BaseKvmCPU" in globals()
 
 
-class BaseSystem(object, metaclass=ABCMeta):
+class BaseSystem(metaclass=ABCMeta):
     """Base system builder.
 
     This class provides some basic functionality for creating an ARM
@@ -254,10 +254,10 @@ class BaseSESystem(BaseSystem):
     """Basic syscall-emulation builder."""
 
     def __init__(self, **kwargs):
-        super(BaseSESystem, self).__init__(**kwargs)
+        super().__init__(**kwargs)
 
     def init_system(self, system):
-        super(BaseSESystem, self).init_system(system)
+        super().init_system(system)
 
     def create_system(self):
         if issubclass(self.mem_class, m5.objects.DRAMInterface):
@@ -291,7 +291,7 @@ class BaseSESystemUniprocessor(BaseSESystem):
     """
 
     def __init__(self, **kwargs):
-        super(BaseSESystemUniprocessor, self).__init__(**kwargs)
+        super().__init__(**kwargs)
 
     def create_caches_private(self, cpu):
         # The atomic SE configurations do not use caches
@@ -311,10 +311,10 @@ class BaseFSSystem(BaseSystem):
     """Basic full system builder."""
 
     def __init__(self, **kwargs):
-        super(BaseFSSystem, self).__init__(**kwargs)
+        super().__init__(**kwargs)
 
     def init_system(self, system):
-        super(BaseFSSystem, self).init_system(system)
+        super().init_system(system)
 
         if self.use_ruby:
             # Connect the ruby io port to the PIO bus,
@@ -356,7 +356,7 @@ class BaseFSSystemUniprocessor(BaseFSSystem):
     """
 
     def __init__(self, **kwargs):
-        super(BaseFSSystemUniprocessor, self).__init__(**kwargs)
+        super().__init__(**kwargs)
 
     def create_caches_private(self, cpu):
         cpu.addTwoLevelCacheHierarchy(
@@ -373,7 +373,7 @@ class BaseFSSwitcheroo(BaseFSSystem):
     """Uniprocessor system prepared for CPU switching"""
 
     def __init__(self, cpu_classes, **kwargs):
-        super(BaseFSSwitcheroo, self).__init__(**kwargs)
+        super().__init__(**kwargs)
         self.cpu_classes = tuple(cpu_classes)
 
     def create_cpus(self, cpu_clk_domain):
diff --git a/tests/gem5/configs/checkpoint.py b/tests/gem5/fs/linux/arm/configs/checkpoint.py
similarity index 100%
rename from tests/gem5/configs/checkpoint.py
rename to tests/gem5/fs/linux/arm/configs/checkpoint.py
diff --git a/tests/gem5/configs/realview-minor-dual.py b/tests/gem5/fs/linux/arm/configs/realview-minor-dual.py
similarity index 100%
rename from tests/gem5/configs/realview-minor-dual.py
rename to tests/gem5/fs/linux/arm/configs/realview-minor-dual.py
diff --git a/tests/gem5/configs/realview-minor.py b/tests/gem5/fs/linux/arm/configs/realview-minor.py
similarity index 100%
rename from tests/gem5/configs/realview-minor.py
rename to tests/gem5/fs/linux/arm/configs/realview-minor.py
diff --git a/tests/gem5/configs/realview-o3-checker.py b/tests/gem5/fs/linux/arm/configs/realview-o3-checker.py
similarity index 100%
rename from tests/gem5/configs/realview-o3-checker.py
rename to tests/gem5/fs/linux/arm/configs/realview-o3-checker.py
diff --git a/tests/gem5/configs/realview-o3-dual.py b/tests/gem5/fs/linux/arm/configs/realview-o3-dual.py
similarity index 100%
rename from tests/gem5/configs/realview-o3-dual.py
rename to tests/gem5/fs/linux/arm/configs/realview-o3-dual.py
diff --git a/tests/gem5/configs/realview-o3.py b/tests/gem5/fs/linux/arm/configs/realview-o3.py
similarity index 100%
rename from tests/gem5/configs/realview-o3.py
rename to tests/gem5/fs/linux/arm/configs/realview-o3.py
diff --git a/tests/gem5/configs/realview-simple-atomic-checkpoint.py b/tests/gem5/fs/linux/arm/configs/realview-simple-atomic-checkpoint.py
similarity index 100%
rename from tests/gem5/configs/realview-simple-atomic-checkpoint.py
rename to tests/gem5/fs/linux/arm/configs/realview-simple-atomic-checkpoint.py
diff --git a/tests/gem5/configs/realview-simple-atomic-dual.py b/tests/gem5/fs/linux/arm/configs/realview-simple-atomic-dual.py
similarity index 100%
rename from tests/gem5/configs/realview-simple-atomic-dual.py
rename to tests/gem5/fs/linux/arm/configs/realview-simple-atomic-dual.py
diff --git a/tests/gem5/configs/realview-simple-atomic.py b/tests/gem5/fs/linux/arm/configs/realview-simple-atomic.py
similarity index 100%
rename from tests/gem5/configs/realview-simple-atomic.py
rename to tests/gem5/fs/linux/arm/configs/realview-simple-atomic.py
diff --git a/tests/gem5/configs/realview-simple-timing-dual-ruby.py b/tests/gem5/fs/linux/arm/configs/realview-simple-timing-dual-ruby.py
similarity index 100%
rename from tests/gem5/configs/realview-simple-timing-dual-ruby.py
rename to tests/gem5/fs/linux/arm/configs/realview-simple-timing-dual-ruby.py
diff --git a/tests/gem5/configs/realview-simple-timing-dual.py b/tests/gem5/fs/linux/arm/configs/realview-simple-timing-dual.py
similarity index 100%
rename from tests/gem5/configs/realview-simple-timing-dual.py
rename to tests/gem5/fs/linux/arm/configs/realview-simple-timing-dual.py
diff --git a/tests/gem5/configs/realview-simple-timing-ruby.py b/tests/gem5/fs/linux/arm/configs/realview-simple-timing-ruby.py
similarity index 100%
rename from tests/gem5/configs/realview-simple-timing-ruby.py
rename to tests/gem5/fs/linux/arm/configs/realview-simple-timing-ruby.py
diff --git a/tests/gem5/configs/realview-simple-timing.py b/tests/gem5/fs/linux/arm/configs/realview-simple-timing.py
similarity index 100%
rename from tests/gem5/configs/realview-simple-timing.py
rename to tests/gem5/fs/linux/arm/configs/realview-simple-timing.py
diff --git a/tests/gem5/configs/realview-switcheroo-atomic.py b/tests/gem5/fs/linux/arm/configs/realview-switcheroo-atomic.py
similarity index 100%
rename from tests/gem5/configs/realview-switcheroo-atomic.py
rename to tests/gem5/fs/linux/arm/configs/realview-switcheroo-atomic.py
diff --git a/tests/gem5/configs/realview-switcheroo-full.py b/tests/gem5/fs/linux/arm/configs/realview-switcheroo-full.py
similarity index 100%
rename from tests/gem5/configs/realview-switcheroo-full.py
rename to tests/gem5/fs/linux/arm/configs/realview-switcheroo-full.py
diff --git a/tests/gem5/configs/realview-switcheroo-noncaching-timing.py b/tests/gem5/fs/linux/arm/configs/realview-switcheroo-noncaching-timing.py
similarity index 100%
rename from tests/gem5/configs/realview-switcheroo-noncaching-timing.py
rename to tests/gem5/fs/linux/arm/configs/realview-switcheroo-noncaching-timing.py
diff --git a/tests/gem5/configs/realview-switcheroo-o3.py b/tests/gem5/fs/linux/arm/configs/realview-switcheroo-o3.py
similarity index 100%
rename from tests/gem5/configs/realview-switcheroo-o3.py
rename to tests/gem5/fs/linux/arm/configs/realview-switcheroo-o3.py
diff --git a/tests/gem5/configs/realview-switcheroo-timing.py b/tests/gem5/fs/linux/arm/configs/realview-switcheroo-timing.py
similarity index 100%
rename from tests/gem5/configs/realview-switcheroo-timing.py
rename to tests/gem5/fs/linux/arm/configs/realview-switcheroo-timing.py
diff --git a/tests/gem5/configs/realview64-kvm-dual.py b/tests/gem5/fs/linux/arm/configs/realview64-kvm-dual.py
similarity index 100%
rename from tests/gem5/configs/realview64-kvm-dual.py
rename to tests/gem5/fs/linux/arm/configs/realview64-kvm-dual.py
diff --git a/tests/gem5/configs/realview64-kvm.py b/tests/gem5/fs/linux/arm/configs/realview64-kvm.py
similarity index 100%
rename from tests/gem5/configs/realview64-kvm.py
rename to tests/gem5/fs/linux/arm/configs/realview64-kvm.py
diff --git a/tests/gem5/configs/realview64-minor-dual.py b/tests/gem5/fs/linux/arm/configs/realview64-minor-dual.py
similarity index 100%
rename from tests/gem5/configs/realview64-minor-dual.py
rename to tests/gem5/fs/linux/arm/configs/realview64-minor-dual.py
diff --git a/tests/gem5/configs/realview64-minor.py b/tests/gem5/fs/linux/arm/configs/realview64-minor.py
similarity index 100%
rename from tests/gem5/configs/realview64-minor.py
rename to tests/gem5/fs/linux/arm/configs/realview64-minor.py
diff --git a/tests/gem5/configs/realview64-o3-checker.py b/tests/gem5/fs/linux/arm/configs/realview64-o3-checker.py
similarity index 100%
rename from tests/gem5/configs/realview64-o3-checker.py
rename to tests/gem5/fs/linux/arm/configs/realview64-o3-checker.py
diff --git a/tests/gem5/configs/realview64-o3-dual-ruby.py b/tests/gem5/fs/linux/arm/configs/realview64-o3-dual-ruby.py
similarity index 100%
rename from tests/gem5/configs/realview64-o3-dual-ruby.py
rename to tests/gem5/fs/linux/arm/configs/realview64-o3-dual-ruby.py
diff --git a/tests/gem5/configs/realview64-o3-dual.py b/tests/gem5/fs/linux/arm/configs/realview64-o3-dual.py
similarity index 100%
rename from tests/gem5/configs/realview64-o3-dual.py
rename to tests/gem5/fs/linux/arm/configs/realview64-o3-dual.py
diff --git a/tests/gem5/configs/realview64-o3.py b/tests/gem5/fs/linux/arm/configs/realview64-o3.py
similarity index 100%
rename from tests/gem5/configs/realview64-o3.py
rename to tests/gem5/fs/linux/arm/configs/realview64-o3.py
diff --git a/tests/gem5/configs/realview64-simple-atomic-checkpoint.py b/tests/gem5/fs/linux/arm/configs/realview64-simple-atomic-checkpoint.py
similarity index 100%
rename from tests/gem5/configs/realview64-simple-atomic-checkpoint.py
rename to tests/gem5/fs/linux/arm/configs/realview64-simple-atomic-checkpoint.py
diff --git a/tests/gem5/configs/realview64-simple-atomic-dual.py b/tests/gem5/fs/linux/arm/configs/realview64-simple-atomic-dual.py
similarity index 100%
rename from tests/gem5/configs/realview64-simple-atomic-dual.py
rename to tests/gem5/fs/linux/arm/configs/realview64-simple-atomic-dual.py
diff --git a/tests/gem5/configs/realview64-simple-atomic.py b/tests/gem5/fs/linux/arm/configs/realview64-simple-atomic.py
similarity index 100%
rename from tests/gem5/configs/realview64-simple-atomic.py
rename to tests/gem5/fs/linux/arm/configs/realview64-simple-atomic.py
diff --git a/tests/gem5/configs/realview64-simple-timing-dual-ruby.py b/tests/gem5/fs/linux/arm/configs/realview64-simple-timing-dual-ruby.py
similarity index 100%
rename from tests/gem5/configs/realview64-simple-timing-dual-ruby.py
rename to tests/gem5/fs/linux/arm/configs/realview64-simple-timing-dual-ruby.py
diff --git a/tests/gem5/configs/realview64-simple-timing-dual.py b/tests/gem5/fs/linux/arm/configs/realview64-simple-timing-dual.py
similarity index 100%
rename from tests/gem5/configs/realview64-simple-timing-dual.py
rename to tests/gem5/fs/linux/arm/configs/realview64-simple-timing-dual.py
diff --git a/tests/gem5/configs/realview64-simple-timing-ruby.py b/tests/gem5/fs/linux/arm/configs/realview64-simple-timing-ruby.py
similarity index 100%
rename from tests/gem5/configs/realview64-simple-timing-ruby.py
rename to tests/gem5/fs/linux/arm/configs/realview64-simple-timing-ruby.py
diff --git a/tests/gem5/configs/realview64-simple-timing.py b/tests/gem5/fs/linux/arm/configs/realview64-simple-timing.py
similarity index 100%
rename from tests/gem5/configs/realview64-simple-timing.py
rename to tests/gem5/fs/linux/arm/configs/realview64-simple-timing.py
diff --git a/tests/gem5/configs/realview64-switcheroo-atomic.py b/tests/gem5/fs/linux/arm/configs/realview64-switcheroo-atomic.py
similarity index 100%
rename from tests/gem5/configs/realview64-switcheroo-atomic.py
rename to tests/gem5/fs/linux/arm/configs/realview64-switcheroo-atomic.py
diff --git a/tests/gem5/configs/realview64-switcheroo-full.py b/tests/gem5/fs/linux/arm/configs/realview64-switcheroo-full.py
similarity index 100%
rename from tests/gem5/configs/realview64-switcheroo-full.py
rename to tests/gem5/fs/linux/arm/configs/realview64-switcheroo-full.py
diff --git a/tests/gem5/configs/realview64-switcheroo-o3.py b/tests/gem5/fs/linux/arm/configs/realview64-switcheroo-o3.py
similarity index 100%
rename from tests/gem5/configs/realview64-switcheroo-o3.py
rename to tests/gem5/fs/linux/arm/configs/realview64-switcheroo-o3.py
diff --git a/tests/gem5/configs/realview64-switcheroo-timing.py b/tests/gem5/fs/linux/arm/configs/realview64-switcheroo-timing.py
similarity index 100%
rename from tests/gem5/configs/realview64-switcheroo-timing.py
rename to tests/gem5/fs/linux/arm/configs/realview64-switcheroo-timing.py
diff --git a/tests/gem5/configs/switcheroo.py b/tests/gem5/fs/linux/arm/configs/switcheroo.py
similarity index 99%
rename from tests/gem5/configs/switcheroo.py
rename to tests/gem5/fs/linux/arm/configs/switcheroo.py
index 72736a9d87..3c39fbf96a 100644
--- a/tests/gem5/configs/switcheroo.py
+++ b/tests/gem5/fs/linux/arm/configs/switcheroo.py
@@ -53,7 +53,7 @@ class Sequential:
 
     def __init__(self, cpus):
         self.first_cpu = None
-        for (cpuno, cpu) in enumerate(cpus):
+        for cpuno, cpu in enumerate(cpus):
             if not cpu.switched_out:
                 if self.first_cpu != None:
                     fatal("More than one CPU is switched in")
@@ -141,7 +141,6 @@ def run_test(root, switcher=None, freq=1000, verbose=False):
             exit_cause == "target called exit()"
             or exit_cause == "m5_exit instruction encountered"
         ):
-
             sys.exit(0)
         else:
             print(f"Test failed: Unknown exit cause: {exit_cause}")
diff --git a/tests/gem5/fs/linux/arm/run.py b/tests/gem5/fs/linux/arm/run.py
index 18a4e5e268..e677297cad 100644
--- a/tests/gem5/fs/linux/arm/run.py
+++ b/tests/gem5/fs/linux/arm/run.py
@@ -62,7 +62,7 @@ def run_test(root):
 # path setup
 sys.path.append(joinpath(gem5_root, "configs"))
 tests_root = joinpath(gem5_root, "tests")
-sys.path.append(joinpath(tests_root, "gem5", "configs"))
+sys.path.append(joinpath(tests_root, "gem5", "fs", "linux", "arm", "configs"))
 
 
 exec(compile(open(config).read(), config, "exec"))
diff --git a/tests/gem5/fs/linux/arm/test.py b/tests/gem5/fs/linux/arm/test.py
index 870024760e..f503f7ae02 100644
--- a/tests/gem5/fs/linux/arm/test.py
+++ b/tests/gem5/fs/linux/arm/test.py
@@ -129,7 +129,16 @@ def verifier_list(name):
         valid_hosts = constants.supported_hosts
 
     args = [
-        joinpath(config.base_dir, "tests", "gem5", "configs", name + ".py"),
+        joinpath(
+            config.base_dir,
+            "tests",
+            "gem5",
+            "fs",
+            "linux",
+            "arm",
+            "configs",
+            name + ".py",
+        ),
         path,
         config.base_dir,
     ]
@@ -147,7 +156,16 @@ def verifier_list(name):
 
 for name in arm_fs_long_tests:
     args = [
-        joinpath(config.base_dir, "tests", "gem5", "configs", name + ".py"),
+        joinpath(
+            config.base_dir,
+            "tests",
+            "gem5",
+            "fs",
+            "linux",
+            "arm",
+            "configs",
+            name + ".py",
+        ),
         path,
         config.base_dir,
     ]
@@ -164,7 +182,16 @@ def verifier_list(name):
 
 for name in arm_fs_long_tests_arm_target:
     args = [
-        joinpath(config.base_dir, "tests", "gem5", "configs", name + ".py"),
+        joinpath(
+            config.base_dir,
+            "tests",
+            "gem5",
+            "fs",
+            "linux",
+            "arm",
+            "configs",
+            name + ".py",
+        ),
         path,
         config.base_dir,
     ]
diff --git a/tests/gem5/gem5_library_example_tests/README.md b/tests/gem5/gem5_library_example_tests/README.md
new file mode 100644
index 0000000000..3abba289f7
--- /dev/null
+++ b/tests/gem5/gem5_library_example_tests/README.md
@@ -0,0 +1,8 @@
+# gem5 Library Example Tests
+
+This set of tests checks the examples in `configs/example/gem5_library`, and makes sure they run to completion.
+To run these tests by themselves, you can run the following command in the tests directory:
+
+```bash
+./main.py run gem5/gem5-resources --length=very-long
+```
diff --git a/tests/gem5/gem5_library_example_tests/test_gem5_library_examples.py b/tests/gem5/gem5_library_example_tests/test_gem5_library_examples.py
index e43d461b35..512e2c7cc2 100644
--- a/tests/gem5/gem5_library_example_tests/test_gem5_library_examples.py
+++ b/tests/gem5/gem5_library_example_tests/test_gem5_library_examples.py
@@ -29,6 +29,7 @@
 still function. They simply check the simulation completed.
 """
 from testlib import *
+from testlib.log import *
 import re
 import os
 
@@ -171,7 +172,14 @@
     length=constants.long_tag,
 )
 
-if os.access("/dev/kvm", mode=os.R_OK | os.W_OK):
+log.test_log.message(
+    "PARSEC tests are disabled. This is due to our GitHub "
+    "Actions self-hosted runners only having 60GB of disk space. The "
+    "PARSEC Disk image is too big to use.",
+    level=LogLevel.Warn,
+)
+# 'False' is used to disable the tests.
+if False:  # os.access("/dev/kvm", mode=os.R_OK | os.W_OK):
     # The x86-parsec-benchmarks uses KVM cores, this test will therefore only
     # be run on systems that support KVM.
     gem5_verify_config(
@@ -325,6 +333,23 @@
     length=constants.very_long_tag,
 )
 
+gem5_verify_config(
+    name="test-gem5-library-example-riscvmatched-microbenchmark-suite",
+    fixtures=(),
+    verifiers=(),
+    config=joinpath(
+        config.base_dir,
+        "configs",
+        "example",
+        "gem5_library",
+        "riscvmatched-microbenchmark-suite.py",
+    ),
+    config_args=[],
+    valid_isas=(constants.all_compiled_tag,),
+    valid_hosts=constants.supported_hosts,
+    length=constants.long_tag,
+)
+
 # The LoopPoint-Checkpointing feature is still under development, therefore
 # these tests are temporarily disabled until this feature is complete.#
 
diff --git a/tests/gem5/gem5_resources/README.md b/tests/gem5/gem5_resources/README.md
new file mode 100644
index 0000000000..8243c01b9f
--- /dev/null
+++ b/tests/gem5/gem5_resources/README.md
@@ -0,0 +1,8 @@
+# gem5 Resources
+
+This test makes sure that resources you download within gem5 work properly, and the downloaded resource matches the input given.
+To run these tests by themselves, you can run the following command in the tests directory:
+
+```bash
+./main.py run gem5/gem5_resources --length=very-long
+```
diff --git a/tests/gem5/configs/download_check.py b/tests/gem5/gem5_resources/configs/download_check.py
similarity index 89%
rename from tests/gem5/configs/download_check.py
rename to tests/gem5/gem5_resources/configs/download_check.py
index e3b06a578d..4d5f9ac69c 100644
--- a/tests/gem5/configs/download_check.py
+++ b/tests/gem5/gem5_resources/configs/download_check.py
@@ -52,6 +52,14 @@
     "checked",
 )
 
+parser.add_argument(
+    "--skip",
+    nargs="+",  # Accepts 1 or more arguments.
+    type=str,
+    help="The resource IDs to skip. If not set, no resources will be skipped.",
+    required=False,
+)
+
 parser.add_argument(
     "--gem5-version",
     type=str,
@@ -83,9 +91,11 @@
 
 # We log all the errors as they occur then dump them at the end. This means we
 # can be aware of all download errors in a single failure.
-errors = str()
+errors = ""
 
 for id in ids:
+    if args.skip and id in args.skip:
+        continue
     if id not in resource_list:
         errors += (
             f"Resource with ID '{id}' not found in "
@@ -127,9 +137,14 @@
                 + f"({md5(Path(download_path))}) differs to that recorded in "
                 + f" gem5-resources ({resource_json['md5sum']}).{os.linesep}"
             )
+        # Remove the downloaded resource.
+        if os.path.isfile(download_path):
+            os.remove(download_path)
+        elif os.path.isdir(download_path):
+            shutil.rmtree(download_path, ignore_errors=True)
+        else:
+            raise Exception("{download_path} is not a file or directory.")
 
-# Remove the downloaded resource.
-shutil.rmtree(args.download_directory, ignore_errors=True)
 
 # If errors exist, raise an exception highlighting them.
 if errors:
diff --git a/tests/gem5/gem5-resources/test_download_resources.py b/tests/gem5/gem5_resources/test_download_resources.py
similarity index 88%
rename from tests/gem5/gem5-resources/test_download_resources.py
rename to tests/gem5/gem5_resources/test_download_resources.py
index c0efc8baad..a15d498717 100644
--- a/tests/gem5/gem5-resources/test_download_resources.py
+++ b/tests/gem5/gem5_resources/test_download_resources.py
@@ -38,9 +38,19 @@
     fixtures=(),
     verifiers=(),
     config=joinpath(
-        config.base_dir, "tests", "gem5", "configs", "download_check.py"
+        config.base_dir,
+        "tests",
+        "gem5",
+        "gem5_resources",
+        "configs",
+        "download_check.py",
     ),
-    config_args=["--download-directory", resource_path],
+    config_args=[
+        "--download-directory",
+        resource_path,
+        "--skip",
+        "x86-parsec",
+    ],
     valid_isas=(constants.all_compiled_tag,),
     length=constants.very_long_tag,
 )
diff --git a/tests/gem5/gpu/README.md b/tests/gem5/gpu/README.md
new file mode 100644
index 0000000000..9722e30161
--- /dev/null
+++ b/tests/gem5/gpu/README.md
@@ -0,0 +1,8 @@
+# GPU
+
+These tests do random checks to the Ruby GPU protocol within gem5.
+To run these tests by themselves, you can run the following command in the tests directory:
+
+```bash
+./main.py run gem5/gem5-resources --length=very-long
+```
diff --git a/tests/gem5/insttest_se/README.md b/tests/gem5/insttest_se/README.md
new file mode 100644
index 0000000000..3895316674
--- /dev/null
+++ b/tests/gem5/insttest_se/README.md
@@ -0,0 +1,8 @@
+# Inst Test SE
+
+These test the insttest binary running on the SPARC ISA, checking against different CPU models.
+To run these tests by themselves, you can run the following command in the tests directory:
+
+```bash
+./main.py run gem5/insttest_se --length=[length]
+```
diff --git a/tests/gem5/insttest_se/configs/simple_binary_run.py b/tests/gem5/insttest_se/configs/simple_binary_run.py
new file mode 100644
index 0000000000..1a0f819a8f
--- /dev/null
+++ b/tests/gem5/insttest_se/configs/simple_binary_run.py
@@ -0,0 +1,130 @@
+# Copyright (c) 2021 The Regents of the University of California
+# Copyright (c) 2022 Google Inc
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+"""
+A run script for a very simple Syscall-Execution running simple binaries.
+The system has no cache heirarchy and is as "bare-bones" as you can get in
+gem5 while still being functinal.
+"""
+
+from gem5.resources.resource import Resource
+from gem5.components.processors.cpu_types import (
+    get_cpu_types_str_set,
+    get_cpu_type_from_str,
+)
+from gem5.components.memory import SingleChannelDDR3_1600
+from gem5.components.boards.simple_board import SimpleBoard
+from gem5.components.cachehierarchies.classic.no_cache import NoCache
+from gem5.components.processors.simple_processor import SimpleProcessor
+from gem5.components.processors.base_cpu_core import BaseCPUCore
+from gem5.components.processors.base_cpu_processor import BaseCPUProcessor
+from gem5.components.processors.simple_core import SimpleCore
+from gem5.components.boards.mem_mode import MemMode
+from gem5.components.processors.cpu_types import CPUTypes
+from gem5.simulate.simulator import Simulator
+from gem5.isas import get_isa_from_str, get_isas_str_set, ISA
+
+from m5.util import fatal
+
+import argparse
+import importlib
+
+cpu_types_string_map = {
+    CPUTypes.ATOMIC: "AtomicSimpleCPU",
+    CPUTypes.O3: "O3CPU",
+    CPUTypes.TIMING: "TimingSimpleCPU",
+    CPUTypes.KVM: "KvmCPU",
+    CPUTypes.MINOR: "MinorCPU",
+}
+
+parser = argparse.ArgumentParser(
+    description="A gem5 script for running simple binaries in SE mode."
+)
+
+parser.add_argument(
+    "resource", type=str, help="The gem5 resource binary to run."
+)
+
+parser.add_argument(
+    "cpu", type=str, choices=get_cpu_types_str_set(), help="The CPU type used."
+)
+
+parser.add_argument(
+    "isa", type=str, choices=get_isas_str_set(), help="The ISA used"
+)
+
+parser.add_argument(
+    "--resource-directory",
+    type=str,
+    required=False,
+    help="The directory in which resources will be downloaded or exist.",
+)
+
+parser.add_argument(
+    "--arguments",
+    type=str,
+    action="append",
+    default=[],
+    required=False,
+    help="The input arguments for the binary.",
+)
+
+args = parser.parse_args()
+
+# Setup the system.
+cache_hierarchy = NoCache()
+memory = SingleChannelDDR3_1600()
+
+isa_enum = get_isa_from_str(args.isa)
+cpu_enum = get_cpu_type_from_str(args.cpu)
+
+processor = SimpleProcessor(
+    cpu_type=cpu_enum,
+    isa=isa_enum,
+    num_cores=1,
+)
+
+motherboard = SimpleBoard(
+    clk_freq="3GHz",
+    processor=processor,
+    memory=memory,
+    cache_hierarchy=cache_hierarchy,
+)
+
+# Set the workload
+binary = Resource(args.resource, resource_directory=args.resource_directory)
+motherboard.set_se_binary_workload(binary, arguments=args.arguments)
+
+# Run the simulation
+simulator = Simulator(board=motherboard)
+simulator.run()
+
+print(
+    "Exiting @ tick {} because {}.".format(
+        simulator.get_current_tick(), simulator.get_last_exit_event_cause()
+    )
+)
diff --git a/tests/gem5/insttest_se/ref/simout b/tests/gem5/insttest_se/ref/simout.txt
similarity index 100%
rename from tests/gem5/insttest_se/ref/simout
rename to tests/gem5/insttest_se/ref/simout.txt
diff --git a/tests/gem5/insttest_se/test.py b/tests/gem5/insttest_se/test.py
index 4dde9d6e94..bf59382b52 100644
--- a/tests/gem5/insttest_se/test.py
+++ b/tests/gem5/insttest_se/test.py
@@ -41,7 +41,9 @@
 for isa in test_progs:
     for binary in test_progs[isa]:
         ref_path = joinpath(getcwd(), "ref")
-        verifiers = (verifier.MatchStdoutNoPerf(joinpath(ref_path, "simout")),)
+        verifiers = (
+            verifier.MatchStdoutNoPerf(joinpath(ref_path, "simout.txt")),
+        )
 
         for cpu in cpu_types[isa]:
             gem5_verify_config(
@@ -52,6 +54,7 @@
                     config.base_dir,
                     "tests",
                     "gem5",
+                    "insttest_se",
                     "configs",
                     "simple_binary_run.py",
                 ),
diff --git a/tests/gem5/kvm_fork_tests/README.md b/tests/gem5/kvm_fork_tests/README.md
new file mode 100644
index 0000000000..5d2d8e8ff8
--- /dev/null
+++ b/tests/gem5/kvm_fork_tests/README.md
@@ -0,0 +1,8 @@
+# KVM Fork Tests
+
+These tests check that gem5 can fork with the KVM cpu, then switch to a different CPU.
+To run these tests by themselves, you can run the following command in the tests directory:
+
+```bash
+./main.py run gem5/kvm_fork_tests --length=[length]
+```
diff --git a/tests/gem5/configs/boot_kvm_fork_run.py b/tests/gem5/kvm_fork_tests/configs/boot_kvm_fork_run.py
similarity index 98%
rename from tests/gem5/configs/boot_kvm_fork_run.py
rename to tests/gem5/kvm_fork_tests/configs/boot_kvm_fork_run.py
index 84e273d842..cb6d1b44e1 100644
--- a/tests/gem5/configs/boot_kvm_fork_run.py
+++ b/tests/gem5/kvm_fork_tests/configs/boot_kvm_fork_run.py
@@ -55,7 +55,7 @@
 from gem5.components.processors.simple_switchable_processor import (
     SimpleSwitchableProcessor,
 )
-from gem5.resources.resource import Resource
+from gem5.resources.resource import obtain_resource
 from gem5.runtime import get_runtime_coherence_protocol
 from gem5.utils.requires import requires
 
@@ -179,10 +179,10 @@
 
 # Set the Full System workload.
 motherboard.set_kernel_disk_workload(
-    kernel=Resource(
+    kernel=obtain_resource(
         "x86-linux-kernel-5.4.49", resource_directory=args.resource_directory
     ),
-    disk_image=Resource(
+    disk_image=obtain_resource(
         "x86-ubuntu-18.04-img", resource_directory=args.resource_directory
     ),
     readfile_contents=dedent(
diff --git a/tests/gem5/kvm-fork-tests/test_kvm_fork_run.py b/tests/gem5/kvm_fork_tests/test_kvm_fork_run.py
similarity index 96%
rename from tests/gem5/kvm-fork-tests/test_kvm_fork_run.py
rename to tests/gem5/kvm_fork_tests/test_kvm_fork_run.py
index 7dcfc8517c..be6821d6ef 100644
--- a/tests/gem5/kvm-fork-tests/test_kvm_fork_run.py
+++ b/tests/gem5/kvm_fork_tests/test_kvm_fork_run.py
@@ -41,7 +41,6 @@
 
 
 def test_kvm_fork_run(cpu: str, num_cpus: int, mem_system: str, length: str):
-
     if not os.access("/dev/kvm", mode=os.R_OK | os.W_OK):
         # Don't run the tests if KVM is unavailable.
         return
@@ -64,7 +63,12 @@ def test_kvm_fork_run(cpu: str, num_cpus: int, mem_system: str, length: str):
         verifiers=verifiers,
         fixtures=(),
         config=joinpath(
-            config.base_dir, "tests", "gem5", "configs", "boot_kvm_fork_run.py"
+            config.base_dir,
+            "tests",
+            "gem5",
+            "kvm_fork_tests",
+            "configs",
+            "boot_kvm_fork_run.py",
         ),
         config_args=[
             "--cpu",
diff --git a/tests/gem5/kvm_switch_tests/README.md b/tests/gem5/kvm_switch_tests/README.md
new file mode 100644
index 0000000000..9a46aa6c8b
--- /dev/null
+++ b/tests/gem5/kvm_switch_tests/README.md
@@ -0,0 +1,8 @@
+# KVM Switch Tests
+
+These tests ensure that gem5 can switch processors during simulation.
+To run these tests by themselves, you can run the following command in the tests directory:
+
+```bash
+./main.py run gem5/kvm_switch_tests --length=[length]
+```
diff --git a/tests/gem5/configs/boot_kvm_switch_exit.py b/tests/gem5/kvm_switch_tests/configs/boot_kvm_switch_exit.py
similarity index 98%
rename from tests/gem5/configs/boot_kvm_switch_exit.py
rename to tests/gem5/kvm_switch_tests/configs/boot_kvm_switch_exit.py
index 1347e68ba4..2d21261161 100644
--- a/tests/gem5/configs/boot_kvm_switch_exit.py
+++ b/tests/gem5/kvm_switch_tests/configs/boot_kvm_switch_exit.py
@@ -45,7 +45,7 @@
 from gem5.components.processors.simple_switchable_processor import (
     SimpleSwitchableProcessor,
 )
-from gem5.resources.resource import Resource
+from gem5.resources.resource import obtain_resource
 from gem5.runtime import get_runtime_coherence_protocol
 from gem5.simulate.simulator import Simulator
 from gem5.simulate.exit_event import ExitEvent
@@ -165,10 +165,10 @@
 
 # Set the Full System workload.
 motherboard.set_kernel_disk_workload(
-    kernel=Resource(
+    kernel=obtain_resource(
         "x86-linux-kernel-5.4.49", resource_directory=args.resource_directory
     ),
-    disk_image=Resource(
+    disk_image=obtain_resource(
         "x86-ubuntu-18.04-img", resource_directory=args.resource_directory
     ),
     # The first exit signals to switch processors.
diff --git a/tests/gem5/kvm-switch-tests/test_kvm_cpu_switch.py b/tests/gem5/kvm_switch_tests/test_kvm_cpu_switch.py
similarity index 99%
rename from tests/gem5/kvm-switch-tests/test_kvm_cpu_switch.py
rename to tests/gem5/kvm_switch_tests/test_kvm_cpu_switch.py
index 85e9268e2d..6cc53e6134 100644
--- a/tests/gem5/kvm-switch-tests/test_kvm_cpu_switch.py
+++ b/tests/gem5/kvm_switch_tests/test_kvm_cpu_switch.py
@@ -41,7 +41,6 @@
 
 
 def test_kvm_switch(cpu: str, num_cpus: int, mem_system: str, length: str):
-
     if not os.access("/dev/kvm", mode=os.R_OK | os.W_OK):
         # Don't run the tests if KVM is unavailable.
         return
@@ -67,6 +66,7 @@ def test_kvm_switch(cpu: str, num_cpus: int, mem_system: str, length: str):
             config.base_dir,
             "tests",
             "gem5",
+            "kvm_switch_tests",
             "configs",
             "boot_kvm_switch_exit.py",
         ),
diff --git a/tests/gem5/learning_gem5/README.md b/tests/gem5/learning_gem5/README.md
new file mode 100644
index 0000000000..eda68d9d4c
--- /dev/null
+++ b/tests/gem5/learning_gem5/README.md
@@ -0,0 +1,9 @@
+# Learning gem5
+
+This set of tests ensures that the example scripts for the gem5 tutorial run properly.
+
+To run these tests by themselves, you can run the following command in the tests directory:
+
+```bash
+./main.py run gem5/learning_gem5 --length=[length]
+```
diff --git a/tests/gem5/m5_util/README.md b/tests/gem5/m5_util/README.md
new file mode 100644
index 0000000000..133345a904
--- /dev/null
+++ b/tests/gem5/m5_util/README.md
@@ -0,0 +1,8 @@
+# m5 Util
+
+These test the util m5 exit assembly instruction.
+To run these tests by themselves, you can run the following command in the tests directory:
+
+```bash
+./main.py run gem5/m5_util --length=[length]
+```
diff --git a/tests/gem5/m5_util/configs/simple_binary_run.py b/tests/gem5/m5_util/configs/simple_binary_run.py
new file mode 100644
index 0000000000..ab12156ae2
--- /dev/null
+++ b/tests/gem5/m5_util/configs/simple_binary_run.py
@@ -0,0 +1,103 @@
+# Copyright (c) 2021 The Regents of the University of California
+# Copyright (c) 2022 Google Inc
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+"""
+A run script for a very simple Syscall-Execution running simple binaries.
+The system has no cache heirarchy and is as "bare-bones" as you can get in
+gem5 while still being functinal.
+"""
+
+from gem5.resources.resource import Resource
+from gem5.components.processors.cpu_types import (
+    get_cpu_types_str_set,
+    get_cpu_type_from_str,
+)
+from gem5.components.memory import SingleChannelDDR3_1600
+from gem5.components.boards.simple_board import SimpleBoard
+from gem5.components.cachehierarchies.classic.no_cache import NoCache
+from gem5.components.processors.simple_processor import SimpleProcessor
+from gem5.components.processors.base_cpu_core import BaseCPUCore
+from gem5.components.processors.base_cpu_processor import BaseCPUProcessor
+from gem5.components.processors.simple_core import SimpleCore
+from gem5.components.boards.mem_mode import MemMode
+from gem5.components.processors.cpu_types import CPUTypes
+from gem5.simulate.simulator import Simulator
+from gem5.isas import get_isa_from_str, get_isas_str_set, ISA
+
+from m5.util import fatal
+
+import argparse
+import importlib
+
+
+parser = argparse.ArgumentParser(
+    description="A gem5 script for running simple binaries in SE mode."
+)
+
+parser.add_argument(
+    "resource", type=str, help="The gem5 resource binary to run."
+)
+
+parser.add_argument(
+    "--resource-directory",
+    type=str,
+    required=False,
+    help="The directory in which resources will be downloaded or exist.",
+)
+
+args = parser.parse_args()
+
+# Setup the system.
+cache_hierarchy = NoCache()
+memory = SingleChannelDDR3_1600()
+
+processor = SimpleProcessor(
+    cpu_type=CPUTypes.ATOMIC,
+    isa=ISA.X86,
+    num_cores=1,
+)
+
+motherboard = SimpleBoard(
+    clk_freq="3GHz",
+    processor=processor,
+    memory=memory,
+    cache_hierarchy=cache_hierarchy,
+)
+
+# Set the workload
+binary = Resource(args.resource, resource_directory=args.resource_directory)
+motherboard.set_se_binary_workload(binary)
+
+# Run the simulation
+simulator = Simulator(board=motherboard)
+simulator.run()
+
+print(
+    "Exiting @ tick {} because {}.".format(
+        simulator.get_current_tick(), simulator.get_last_exit_event_cause()
+    )
+)
diff --git a/tests/gem5/m5_util/test_exit.py b/tests/gem5/m5_util/test_exit.py
index b79a8fadc2..214a20ada9 100644
--- a/tests/gem5/m5_util/test_exit.py
+++ b/tests/gem5/m5_util/test_exit.py
@@ -57,14 +57,17 @@
     verifiers=[a],
     fixtures=(),
     config=joinpath(
-        config.base_dir, "tests", "gem5", "configs", "simple_binary_run.py"
+        config.base_dir,
+        "tests",
+        "gem5",
+        "m5_util",
+        "configs",
+        "simple_binary_run.py",
     ),
     config_args=[
         "x86-m5-exit",
-        "atomic",
         "--resource-directory",
         resource_path,
-        "x86",
     ],
     valid_isas=(constants.all_compiled_tag,),
 )
diff --git a/tests/gem5/m5threads_test_atomic/README.md b/tests/gem5/m5threads_test_atomic/README.md
new file mode 100644
index 0000000000..5e52b91958
--- /dev/null
+++ b/tests/gem5/m5threads_test_atomic/README.md
@@ -0,0 +1,8 @@
+# m5 Threads Test Atomic
+
+These are m5threads atomic tests that run against different CPU types.
+To run these tests by themselves, you can run the following command in the tests directory:
+
+```bash
+./main.py run gem5/m5threads_test_atomic --length=[length]
+```
diff --git a/tests/gem5/m5threads_test_atomic/caches.py b/tests/gem5/m5threads_test_atomic/caches.py
index fd87d0484d..29de48b065 100755
--- a/tests/gem5/m5threads_test_atomic/caches.py
+++ b/tests/gem5/m5threads_test_atomic/caches.py
@@ -51,7 +51,7 @@ class L1Cache(PrefetchCache):
     writeback_clean = True
 
     def __init__(self, options=None):
-        super(L1Cache, self).__init__(options)
+        super().__init__(options)
         pass
 
     def connectBus(self, bus):
@@ -71,7 +71,7 @@ class L1ICache(L1Cache):
     size = "32kB"
 
     def __init__(self, opts=None):
-        super(L1ICache, self).__init__(opts)
+        super().__init__(opts)
 
     def connectCPU(self, cpu):
         """Connect this cache's port to a CPU icache port"""
@@ -85,7 +85,7 @@ class L1DCache(L1Cache):
     size = "32kB"
 
     def __init__(self, opts=None):
-        super(L1DCache, self).__init__(opts)
+        super().__init__(opts)
 
     def connectCPU(self, cpu):
         """Connect this cache's port to a CPU dcache port"""
@@ -106,7 +106,7 @@ class L2Cache(PrefetchCache):
     writeback_clean = True
 
     def __init__(self, opts=None):
-        super(L2Cache, self).__init__(opts)
+        super().__init__(opts)
 
     def connectCPUSideBus(self, bus):
         self.cpu_side = bus.mem_side_ports
diff --git a/tests/gem5/m5threads_test_atomic/ref/sparc64/simout b/tests/gem5/m5threads_test_atomic/ref/sparc64/simout.txt
similarity index 100%
rename from tests/gem5/m5threads_test_atomic/ref/sparc64/simout
rename to tests/gem5/m5threads_test_atomic/ref/sparc64/simout.txt
diff --git a/tests/gem5/m5threads_test_atomic/test.py b/tests/gem5/m5threads_test_atomic/test.py
index 531de83b2f..0af973bd14 100644
--- a/tests/gem5/m5threads_test_atomic/test.py
+++ b/tests/gem5/m5threads_test_atomic/test.py
@@ -45,7 +45,7 @@
 test_atomic = DownloadedProgram(url, base_path, binary)
 
 verifiers = (
-    verifier.MatchStdoutNoPerf(joinpath(getcwd(), "ref/sparc64/simout")),
+    verifier.MatchStdoutNoPerf(joinpath(getcwd(), "ref/sparc64/simout.txt")),
 )
 
 for cpu in cpu_types:
diff --git a/tests/gem5/memory/README.md b/tests/gem5/memory/README.md
new file mode 100644
index 0000000000..892593394d
--- /dev/null
+++ b/tests/gem5/memory/README.md
@@ -0,0 +1,9 @@
+# Memory
+
+These run a set of tests on memory within gem5.
+
+To run these tests by themselves, you can run the following command in the tests directory:
+
+```bash
+./main.py run gem5/memory --length=[length]
+```
diff --git a/tests/gem5/multi_isa/README.md b/tests/gem5/multi_isa/README.md
new file mode 100644
index 0000000000..94d8c1a3e1
--- /dev/null
+++ b/tests/gem5/multi_isa/README.md
@@ -0,0 +1,9 @@
+# Multi ISA
+
+These tests check that all our ISAs are both currrently supported within gem5, as well as checking that get_runtime_isa() works as expected.
+
+To run these tests by themselves, you can run the following command in the tests directory:
+
+```bash
+./main.py run gem5/multi_isa --length=[length]
+```
diff --git a/tests/gem5/configs/runtime_isa_check.py b/tests/gem5/multi_isa/configs/runtime_isa_check.py
similarity index 100%
rename from tests/gem5/configs/runtime_isa_check.py
rename to tests/gem5/multi_isa/configs/runtime_isa_check.py
diff --git a/tests/gem5/configs/supported_isa_check.py b/tests/gem5/multi_isa/configs/supported_isa_check.py
similarity index 100%
rename from tests/gem5/configs/supported_isa_check.py
rename to tests/gem5/multi_isa/configs/supported_isa_check.py
diff --git a/tests/gem5/multi_isa/test_multi_isa.py b/tests/gem5/multi_isa/test_multi_isa.py
index 7d278b75ea..c9726174c0 100644
--- a/tests/gem5/multi_isa/test_multi_isa.py
+++ b/tests/gem5/multi_isa/test_multi_isa.py
@@ -49,6 +49,7 @@
                 config.base_dir,
                 "tests",
                 "gem5",
+                "multi_isa",
                 "configs",
                 "runtime_isa_check.py",
             ),
@@ -66,6 +67,7 @@
                 config.base_dir,
                 "tests",
                 "gem5",
+                "multi_isa",
                 "configs",
                 "supported_isa_check.py",
             ),
@@ -86,6 +88,7 @@
                 config.base_dir,
                 "tests",
                 "gem5",
+                "multi_isa",
                 "configs",
                 "supported_isa_check.py",
             ),
diff --git a/tests/gem5/parsec_benchmarks/README.md b/tests/gem5/parsec_benchmarks/README.md
new file mode 100644
index 0000000000..90dfd5a8e8
--- /dev/null
+++ b/tests/gem5/parsec_benchmarks/README.md
@@ -0,0 +1,9 @@
+# Parsec Benchmarks
+
+These tests run through a subset of the parsec benchmarks within gem5.
+
+To run these tests by themselves, you can run the following command in the tests directory:
+
+```bash
+./main.py run gem5/parsec_benchmarks --length=[length]
+```
diff --git a/tests/gem5/configs/parsec_disk_run.py b/tests/gem5/parsec_benchmarks/configs/parsec_disk_run.py
similarity index 98%
rename from tests/gem5/configs/parsec_disk_run.py
rename to tests/gem5/parsec_benchmarks/configs/parsec_disk_run.py
index 5c2fa75f65..341548729b 100644
--- a/tests/gem5/configs/parsec_disk_run.py
+++ b/tests/gem5/parsec_benchmarks/configs/parsec_disk_run.py
@@ -37,7 +37,7 @@
 
 import m5.stats
 
-from gem5.resources.resource import Resource
+from gem5.resources.resource import obtain_resource
 from gem5.components.boards.x86_board import X86Board
 from gem5.components.memory import SingleChannelDDR3_1600
 from gem5.components.processors.simple_switchable_processor import (
@@ -147,7 +147,6 @@
 # Setup the cachie hierarchy.
 
 if args.mem_system == "classic":
-
     from gem5.components.cachehierarchies.classic.private_l1_private_l2_cache_hierarchy import (
         PrivateL1PrivateL2CacheHierarchy,
     )
@@ -205,15 +204,16 @@
 )
 
 board.set_kernel_disk_workload(
-    kernel=Resource(
+    kernel=obtain_resource(
         "x86-linux-kernel-5.4.49", resource_directory=args.resource_directory
     ),
-    disk_image=Resource(
+    disk_image=obtain_resource(
         "x86-parsec", resource_directory=args.resource_directory
     ),
     readfile_contents=command,
 )
 
+
 # Here we define some custom workbegin/workend exit event generators. Here we
 # want to switch to detailed CPUs at the beginning of the ROI, then continue to
 # the end of of the ROI. Then we exit the simulation.
diff --git a/tests/gem5/parsec-benchmarks/test_parsec.py b/tests/gem5/parsec_benchmarks/test_parsec.py
similarity index 93%
rename from tests/gem5/parsec-benchmarks/test_parsec.py
rename to tests/gem5/parsec_benchmarks/test_parsec.py
index 11735ab43f..60aae0736d 100644
--- a/tests/gem5/parsec-benchmarks/test_parsec.py
+++ b/tests/gem5/parsec_benchmarks/test_parsec.py
@@ -43,13 +43,19 @@ def test_parsec(
     size: str,
     length: str,
 ):
-
     if (boot_cpu == "kvm" or detailed_cpu == "kvm") and not os.access(
         "/dev/kvm", mode=os.R_OK | os.W_OK
     ):
         # Don't run the tests if KVM is unavailable.
         return
 
+    print(
+        "WARNING: PARSEC tests are disabled. This is due to our GitHub "
+        "Actions self-hosted runners only having 60GB of disk space. The "
+        "PARSEC Disk image is too big to use."
+    )
+    return  # Remove this line to re-enable PARSEC tests.
+
     gem5_verify_config(
         name="{}-boot-cpu_{}-detailed-cpu_{}-cores_{}_{}_{}_parsec-test".format(
             boot_cpu, detailed_cpu, str(num_cpus), mem_system, benchmark, size
@@ -57,7 +63,12 @@ def test_parsec(
         verifiers=(),
         fixtures=(),
         config=joinpath(
-            config.base_dir, "tests", "gem5", "configs", "parsec_disk_run.py"
+            config.base_dir,
+            "tests",
+            "gem5",
+            "parsec_benchmarks",
+            "configs",
+            "parsec_disk_run.py",
         ),
         config_args=[
             "--cpu",
diff --git a/tests/gem5/replacement-policies/README b/tests/gem5/replacement_policies/README
similarity index 100%
rename from tests/gem5/replacement-policies/README
rename to tests/gem5/replacement_policies/README
diff --git a/tests/gem5/replacement-policies/cache_hierarchies.py b/tests/gem5/replacement_policies/configs/cache_hierarchies.py
similarity index 100%
rename from tests/gem5/replacement-policies/cache_hierarchies.py
rename to tests/gem5/replacement_policies/configs/cache_hierarchies.py
diff --git a/tests/gem5/replacement-policies/run_replacement_policy.py b/tests/gem5/replacement_policies/configs/run_replacement_policy.py
similarity index 100%
rename from tests/gem5/replacement-policies/run_replacement_policy.py
rename to tests/gem5/replacement_policies/configs/run_replacement_policy.py
diff --git a/tests/gem5/replacement-policies/ref/fifo_test1_ld b/tests/gem5/replacement_policies/ref/fifo_test1_ld
similarity index 100%
rename from tests/gem5/replacement-policies/ref/fifo_test1_ld
rename to tests/gem5/replacement_policies/ref/fifo_test1_ld
diff --git a/tests/gem5/replacement-policies/ref/fifo_test1_st b/tests/gem5/replacement_policies/ref/fifo_test1_st
similarity index 100%
rename from tests/gem5/replacement-policies/ref/fifo_test1_st
rename to tests/gem5/replacement_policies/ref/fifo_test1_st
diff --git a/tests/gem5/replacement-policies/ref/fifo_test2_ld b/tests/gem5/replacement_policies/ref/fifo_test2_ld
similarity index 100%
rename from tests/gem5/replacement-policies/ref/fifo_test2_ld
rename to tests/gem5/replacement_policies/ref/fifo_test2_ld
diff --git a/tests/gem5/replacement-policies/ref/fifo_test2_st b/tests/gem5/replacement_policies/ref/fifo_test2_st
similarity index 100%
rename from tests/gem5/replacement-policies/ref/fifo_test2_st
rename to tests/gem5/replacement_policies/ref/fifo_test2_st
diff --git a/tests/gem5/replacement-policies/ref/lfu_test1_ld b/tests/gem5/replacement_policies/ref/lfu_test1_ld
similarity index 100%
rename from tests/gem5/replacement-policies/ref/lfu_test1_ld
rename to tests/gem5/replacement_policies/ref/lfu_test1_ld
diff --git a/tests/gem5/replacement-policies/ref/lfu_test1_st b/tests/gem5/replacement_policies/ref/lfu_test1_st
similarity index 100%
rename from tests/gem5/replacement-policies/ref/lfu_test1_st
rename to tests/gem5/replacement_policies/ref/lfu_test1_st
diff --git a/tests/gem5/replacement-policies/ref/lfu_test2_ld b/tests/gem5/replacement_policies/ref/lfu_test2_ld
similarity index 100%
rename from tests/gem5/replacement-policies/ref/lfu_test2_ld
rename to tests/gem5/replacement_policies/ref/lfu_test2_ld
diff --git a/tests/gem5/replacement-policies/ref/lfu_test2_st b/tests/gem5/replacement_policies/ref/lfu_test2_st
similarity index 100%
rename from tests/gem5/replacement-policies/ref/lfu_test2_st
rename to tests/gem5/replacement_policies/ref/lfu_test2_st
diff --git a/tests/gem5/replacement-policies/ref/lfu_test3_ld b/tests/gem5/replacement_policies/ref/lfu_test3_ld
similarity index 100%
rename from tests/gem5/replacement-policies/ref/lfu_test3_ld
rename to tests/gem5/replacement_policies/ref/lfu_test3_ld
diff --git a/tests/gem5/replacement-policies/ref/lfu_test3_st b/tests/gem5/replacement_policies/ref/lfu_test3_st
similarity index 100%
rename from tests/gem5/replacement-policies/ref/lfu_test3_st
rename to tests/gem5/replacement_policies/ref/lfu_test3_st
diff --git a/tests/gem5/replacement-policies/ref/lip_test1_ld b/tests/gem5/replacement_policies/ref/lip_test1_ld
similarity index 100%
rename from tests/gem5/replacement-policies/ref/lip_test1_ld
rename to tests/gem5/replacement_policies/ref/lip_test1_ld
diff --git a/tests/gem5/replacement-policies/ref/lip_test1_st b/tests/gem5/replacement_policies/ref/lip_test1_st
similarity index 100%
rename from tests/gem5/replacement-policies/ref/lip_test1_st
rename to tests/gem5/replacement_policies/ref/lip_test1_st
diff --git a/tests/gem5/replacement-policies/ref/lru_test1_ld b/tests/gem5/replacement_policies/ref/lru_test1_ld
similarity index 100%
rename from tests/gem5/replacement-policies/ref/lru_test1_ld
rename to tests/gem5/replacement_policies/ref/lru_test1_ld
diff --git a/tests/gem5/replacement-policies/ref/lru_test1_st b/tests/gem5/replacement_policies/ref/lru_test1_st
similarity index 100%
rename from tests/gem5/replacement-policies/ref/lru_test1_st
rename to tests/gem5/replacement_policies/ref/lru_test1_st
diff --git a/tests/gem5/replacement-policies/ref/lru_test2_ld b/tests/gem5/replacement_policies/ref/lru_test2_ld
similarity index 100%
rename from tests/gem5/replacement-policies/ref/lru_test2_ld
rename to tests/gem5/replacement_policies/ref/lru_test2_ld
diff --git a/tests/gem5/replacement-policies/ref/lru_test2_st b/tests/gem5/replacement_policies/ref/lru_test2_st
similarity index 100%
rename from tests/gem5/replacement-policies/ref/lru_test2_st
rename to tests/gem5/replacement_policies/ref/lru_test2_st
diff --git a/tests/gem5/replacement-policies/ref/lru_test3_ld b/tests/gem5/replacement_policies/ref/lru_test3_ld
similarity index 100%
rename from tests/gem5/replacement-policies/ref/lru_test3_ld
rename to tests/gem5/replacement_policies/ref/lru_test3_ld
diff --git a/tests/gem5/replacement-policies/ref/lru_test3_st b/tests/gem5/replacement_policies/ref/lru_test3_st
similarity index 100%
rename from tests/gem5/replacement-policies/ref/lru_test3_st
rename to tests/gem5/replacement_policies/ref/lru_test3_st
diff --git a/tests/gem5/replacement-policies/ref/lru_test4_ld b/tests/gem5/replacement_policies/ref/lru_test4_ld
similarity index 100%
rename from tests/gem5/replacement-policies/ref/lru_test4_ld
rename to tests/gem5/replacement_policies/ref/lru_test4_ld
diff --git a/tests/gem5/replacement-policies/ref/lru_test4_st b/tests/gem5/replacement_policies/ref/lru_test4_st
similarity index 100%
rename from tests/gem5/replacement-policies/ref/lru_test4_st
rename to tests/gem5/replacement_policies/ref/lru_test4_st
diff --git a/tests/gem5/replacement-policies/ref/mru_test1_ld b/tests/gem5/replacement_policies/ref/mru_test1_ld
similarity index 100%
rename from tests/gem5/replacement-policies/ref/mru_test1_ld
rename to tests/gem5/replacement_policies/ref/mru_test1_ld
diff --git a/tests/gem5/replacement-policies/ref/mru_test1_st b/tests/gem5/replacement_policies/ref/mru_test1_st
similarity index 100%
rename from tests/gem5/replacement-policies/ref/mru_test1_st
rename to tests/gem5/replacement_policies/ref/mru_test1_st
diff --git a/tests/gem5/replacement-policies/ref/mru_test2_ld b/tests/gem5/replacement_policies/ref/mru_test2_ld
similarity index 100%
rename from tests/gem5/replacement-policies/ref/mru_test2_ld
rename to tests/gem5/replacement_policies/ref/mru_test2_ld
diff --git a/tests/gem5/replacement-policies/ref/mru_test2_st b/tests/gem5/replacement_policies/ref/mru_test2_st
similarity index 100%
rename from tests/gem5/replacement-policies/ref/mru_test2_st
rename to tests/gem5/replacement_policies/ref/mru_test2_st
diff --git a/tests/gem5/replacement-policies/ref/nru_test1_ld b/tests/gem5/replacement_policies/ref/nru_test1_ld
similarity index 100%
rename from tests/gem5/replacement-policies/ref/nru_test1_ld
rename to tests/gem5/replacement_policies/ref/nru_test1_ld
diff --git a/tests/gem5/replacement-policies/ref/nru_test1_st b/tests/gem5/replacement_policies/ref/nru_test1_st
similarity index 100%
rename from tests/gem5/replacement-policies/ref/nru_test1_st
rename to tests/gem5/replacement_policies/ref/nru_test1_st
diff --git a/tests/gem5/replacement-policies/ref/rrip_test1_ld b/tests/gem5/replacement_policies/ref/rrip_test1_ld
similarity index 100%
rename from tests/gem5/replacement-policies/ref/rrip_test1_ld
rename to tests/gem5/replacement_policies/ref/rrip_test1_ld
diff --git a/tests/gem5/replacement-policies/ref/rrip_test1_st b/tests/gem5/replacement_policies/ref/rrip_test1_st
similarity index 100%
rename from tests/gem5/replacement-policies/ref/rrip_test1_st
rename to tests/gem5/replacement_policies/ref/rrip_test1_st
diff --git a/tests/gem5/replacement-policies/ref/rrip_test2_ld b/tests/gem5/replacement_policies/ref/rrip_test2_ld
similarity index 100%
rename from tests/gem5/replacement-policies/ref/rrip_test2_ld
rename to tests/gem5/replacement_policies/ref/rrip_test2_ld
diff --git a/tests/gem5/replacement-policies/ref/rrip_test2_st b/tests/gem5/replacement_policies/ref/rrip_test2_st
similarity index 100%
rename from tests/gem5/replacement-policies/ref/rrip_test2_st
rename to tests/gem5/replacement_policies/ref/rrip_test2_st
diff --git a/tests/gem5/replacement-policies/ref/second_chance_test1_ld b/tests/gem5/replacement_policies/ref/second_chance_test1_ld
similarity index 100%
rename from tests/gem5/replacement-policies/ref/second_chance_test1_ld
rename to tests/gem5/replacement_policies/ref/second_chance_test1_ld
diff --git a/tests/gem5/replacement-policies/ref/second_chance_test1_st b/tests/gem5/replacement_policies/ref/second_chance_test1_st
similarity index 100%
rename from tests/gem5/replacement-policies/ref/second_chance_test1_st
rename to tests/gem5/replacement_policies/ref/second_chance_test1_st
diff --git a/tests/gem5/replacement-policies/ref/second_chance_test2_ld b/tests/gem5/replacement_policies/ref/second_chance_test2_ld
similarity index 100%
rename from tests/gem5/replacement-policies/ref/second_chance_test2_ld
rename to tests/gem5/replacement_policies/ref/second_chance_test2_ld
diff --git a/tests/gem5/replacement-policies/ref/second_chance_test2_st b/tests/gem5/replacement_policies/ref/second_chance_test2_st
similarity index 100%
rename from tests/gem5/replacement-policies/ref/second_chance_test2_st
rename to tests/gem5/replacement_policies/ref/second_chance_test2_st
diff --git a/tests/gem5/replacement-policies/ref/second_chance_test3_ld b/tests/gem5/replacement_policies/ref/second_chance_test3_ld
similarity index 100%
rename from tests/gem5/replacement-policies/ref/second_chance_test3_ld
rename to tests/gem5/replacement_policies/ref/second_chance_test3_ld
diff --git a/tests/gem5/replacement-policies/ref/second_chance_test3_st b/tests/gem5/replacement_policies/ref/second_chance_test3_st
similarity index 100%
rename from tests/gem5/replacement-policies/ref/second_chance_test3_st
rename to tests/gem5/replacement_policies/ref/second_chance_test3_st
diff --git a/tests/gem5/replacement-policies/ref/tree_plru_test1_ld b/tests/gem5/replacement_policies/ref/tree_plru_test1_ld
similarity index 100%
rename from tests/gem5/replacement-policies/ref/tree_plru_test1_ld
rename to tests/gem5/replacement_policies/ref/tree_plru_test1_ld
diff --git a/tests/gem5/replacement-policies/ref/tree_plru_test1_st b/tests/gem5/replacement_policies/ref/tree_plru_test1_st
similarity index 100%
rename from tests/gem5/replacement-policies/ref/tree_plru_test1_st
rename to tests/gem5/replacement_policies/ref/tree_plru_test1_st
diff --git a/tests/gem5/replacement-policies/ref/tree_plru_test2_ld b/tests/gem5/replacement_policies/ref/tree_plru_test2_ld
similarity index 100%
rename from tests/gem5/replacement-policies/ref/tree_plru_test2_ld
rename to tests/gem5/replacement_policies/ref/tree_plru_test2_ld
diff --git a/tests/gem5/replacement-policies/ref/tree_plru_test2_st b/tests/gem5/replacement_policies/ref/tree_plru_test2_st
similarity index 100%
rename from tests/gem5/replacement-policies/ref/tree_plru_test2_st
rename to tests/gem5/replacement_policies/ref/tree_plru_test2_st
diff --git a/tests/gem5/replacement-policies/ref/tree_plru_test3_ld b/tests/gem5/replacement_policies/ref/tree_plru_test3_ld
similarity index 100%
rename from tests/gem5/replacement-policies/ref/tree_plru_test3_ld
rename to tests/gem5/replacement_policies/ref/tree_plru_test3_ld
diff --git a/tests/gem5/replacement-policies/ref/tree_plru_test3_st b/tests/gem5/replacement_policies/ref/tree_plru_test3_st
similarity index 100%
rename from tests/gem5/replacement-policies/ref/tree_plru_test3_st
rename to tests/gem5/replacement_policies/ref/tree_plru_test3_st
diff --git a/tests/gem5/replacement_policies/run_replacement_policy.py b/tests/gem5/replacement_policies/run_replacement_policy.py
new file mode 100644
index 0000000000..ec38bf382f
--- /dev/null
+++ b/tests/gem5/replacement_policies/run_replacement_policy.py
@@ -0,0 +1,94 @@
+# Copyright (c) 2022 The Regents of the University of California
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import argparse
+from importlib.machinery import SourceFileLoader
+
+from cache_hierarchies import ModMIExampleCacheHierarchy
+
+import m5
+
+from m5.debug import flags
+from m5.objects import Root
+from gem5.components.boards.test_board import TestBoard
+from gem5.components.memory.simple import SingleChannelSimpleMemory
+from gem5.components.processors.complex_generator import ComplexGenerator
+
+argparser = argparse.ArgumentParser()
+
+argparser.add_argument(
+    "config_name",
+    type=str,
+    help="Name of the python file "
+    "including the defintion of a python generator and "
+    "importing the right replacement policy. The python "
+    "generator should only assume one positional argument "
+    "and be named python_generator. The replacement policy"
+    " should be imported as rp.",
+)
+argparser.add_argument(
+    "config_path",
+    type=str,
+    help="Path to the python file" "specified by config_name.",
+)
+
+args = argparser.parse_args()
+
+module = SourceFileLoader(args.config_name, args.config_path).load_module()
+python_generator = module.python_generator
+rp_class = module.rp
+
+flags["RubyHitMiss"].enable()
+
+cache_hierarchy = ModMIExampleCacheHierarchy(rp_class)
+
+memory = SingleChannelSimpleMemory(
+    latency="30ns",
+    latency_var="0ns",
+    bandwidth="12.8GiB/s",
+    size="512MiB",
+)
+
+generator = ComplexGenerator()
+generator.set_traffic_from_python_generator(python_generator)
+
+# We use the Test Board. This is a special board to run traffic generation
+# tasks
+motherboard = TestBoard(
+    clk_freq="1GHz",
+    generator=generator,  # We pass the traffic generator as the processor.
+    memory=memory,
+    cache_hierarchy=cache_hierarchy,
+)
+root = Root(full_system=False, system=motherboard)
+
+motherboard._pre_instantiate()
+m5.instantiate()
+
+generator.start_traffic()
+print("Beginning simulation!")
+exit_event = m5.simulate()
+print(f"Exiting @ tick {m5.curTick()} because {exit_event.getCause()}.")
diff --git a/tests/gem5/replacement-policies/test_replacement_policies.py b/tests/gem5/replacement_policies/test_replacement_policies.py
similarity index 98%
rename from tests/gem5/replacement-policies/test_replacement_policies.py
rename to tests/gem5/replacement_policies/test_replacement_policies.py
index 4c74f72a2a..dd95c9c851 100644
--- a/tests/gem5/replacement-policies/test_replacement_policies.py
+++ b/tests/gem5/replacement_policies/test_replacement_policies.py
@@ -44,7 +44,8 @@ def test_replacement_policy(config_name: str, config_path: str) -> None:
             config.base_dir,
             "tests",
             "gem5",
-            "replacement-policies",
+            "replacement_policies",
+            "configs",
             "run_replacement_policy.py",
         ),
         config_args=[config_name, config_path],
diff --git a/tests/gem5/replacement-policies/traces/fifo_test1_ld.py b/tests/gem5/replacement_policies/traces/fifo_test1_ld.py
similarity index 100%
rename from tests/gem5/replacement-policies/traces/fifo_test1_ld.py
rename to tests/gem5/replacement_policies/traces/fifo_test1_ld.py
diff --git a/tests/gem5/replacement-policies/traces/fifo_test1_st.py b/tests/gem5/replacement_policies/traces/fifo_test1_st.py
similarity index 100%
rename from tests/gem5/replacement-policies/traces/fifo_test1_st.py
rename to tests/gem5/replacement_policies/traces/fifo_test1_st.py
diff --git a/tests/gem5/replacement-policies/traces/fifo_test2_ld.py b/tests/gem5/replacement_policies/traces/fifo_test2_ld.py
similarity index 100%
rename from tests/gem5/replacement-policies/traces/fifo_test2_ld.py
rename to tests/gem5/replacement_policies/traces/fifo_test2_ld.py
diff --git a/tests/gem5/replacement-policies/traces/fifo_test2_st.py b/tests/gem5/replacement_policies/traces/fifo_test2_st.py
similarity index 100%
rename from tests/gem5/replacement-policies/traces/fifo_test2_st.py
rename to tests/gem5/replacement_policies/traces/fifo_test2_st.py
diff --git a/tests/gem5/replacement-policies/traces/lfu_test1_ld.py b/tests/gem5/replacement_policies/traces/lfu_test1_ld.py
similarity index 100%
rename from tests/gem5/replacement-policies/traces/lfu_test1_ld.py
rename to tests/gem5/replacement_policies/traces/lfu_test1_ld.py
diff --git a/tests/gem5/replacement-policies/traces/lfu_test1_st.py b/tests/gem5/replacement_policies/traces/lfu_test1_st.py
similarity index 100%
rename from tests/gem5/replacement-policies/traces/lfu_test1_st.py
rename to tests/gem5/replacement_policies/traces/lfu_test1_st.py
diff --git a/tests/gem5/replacement-policies/traces/lfu_test2_ld.py b/tests/gem5/replacement_policies/traces/lfu_test2_ld.py
similarity index 100%
rename from tests/gem5/replacement-policies/traces/lfu_test2_ld.py
rename to tests/gem5/replacement_policies/traces/lfu_test2_ld.py
diff --git a/tests/gem5/replacement-policies/traces/lfu_test2_st.py b/tests/gem5/replacement_policies/traces/lfu_test2_st.py
similarity index 100%
rename from tests/gem5/replacement-policies/traces/lfu_test2_st.py
rename to tests/gem5/replacement_policies/traces/lfu_test2_st.py
diff --git a/tests/gem5/replacement-policies/traces/lfu_test3_ld.py b/tests/gem5/replacement_policies/traces/lfu_test3_ld.py
similarity index 100%
rename from tests/gem5/replacement-policies/traces/lfu_test3_ld.py
rename to tests/gem5/replacement_policies/traces/lfu_test3_ld.py
diff --git a/tests/gem5/replacement-policies/traces/lfu_test3_st.py b/tests/gem5/replacement_policies/traces/lfu_test3_st.py
similarity index 100%
rename from tests/gem5/replacement-policies/traces/lfu_test3_st.py
rename to tests/gem5/replacement_policies/traces/lfu_test3_st.py
diff --git a/tests/gem5/replacement-policies/traces/lip_test1_ld.py b/tests/gem5/replacement_policies/traces/lip_test1_ld.py
similarity index 100%
rename from tests/gem5/replacement-policies/traces/lip_test1_ld.py
rename to tests/gem5/replacement_policies/traces/lip_test1_ld.py
diff --git a/tests/gem5/replacement-policies/traces/lip_test1_st.py b/tests/gem5/replacement_policies/traces/lip_test1_st.py
similarity index 100%
rename from tests/gem5/replacement-policies/traces/lip_test1_st.py
rename to tests/gem5/replacement_policies/traces/lip_test1_st.py
diff --git a/tests/gem5/replacement-policies/traces/lru_test1_ld.py b/tests/gem5/replacement_policies/traces/lru_test1_ld.py
similarity index 100%
rename from tests/gem5/replacement-policies/traces/lru_test1_ld.py
rename to tests/gem5/replacement_policies/traces/lru_test1_ld.py
diff --git a/tests/gem5/replacement-policies/traces/lru_test1_st.py b/tests/gem5/replacement_policies/traces/lru_test1_st.py
similarity index 100%
rename from tests/gem5/replacement-policies/traces/lru_test1_st.py
rename to tests/gem5/replacement_policies/traces/lru_test1_st.py
diff --git a/tests/gem5/replacement-policies/traces/lru_test2_ld.py b/tests/gem5/replacement_policies/traces/lru_test2_ld.py
similarity index 100%
rename from tests/gem5/replacement-policies/traces/lru_test2_ld.py
rename to tests/gem5/replacement_policies/traces/lru_test2_ld.py
diff --git a/tests/gem5/replacement-policies/traces/lru_test2_st.py b/tests/gem5/replacement_policies/traces/lru_test2_st.py
similarity index 100%
rename from tests/gem5/replacement-policies/traces/lru_test2_st.py
rename to tests/gem5/replacement_policies/traces/lru_test2_st.py
diff --git a/tests/gem5/replacement-policies/traces/lru_test3_ld.py b/tests/gem5/replacement_policies/traces/lru_test3_ld.py
similarity index 100%
rename from tests/gem5/replacement-policies/traces/lru_test3_ld.py
rename to tests/gem5/replacement_policies/traces/lru_test3_ld.py
diff --git a/tests/gem5/replacement-policies/traces/lru_test3_st.py b/tests/gem5/replacement_policies/traces/lru_test3_st.py
similarity index 100%
rename from tests/gem5/replacement-policies/traces/lru_test3_st.py
rename to tests/gem5/replacement_policies/traces/lru_test3_st.py
diff --git a/tests/gem5/replacement-policies/traces/lru_test4_ld.py b/tests/gem5/replacement_policies/traces/lru_test4_ld.py
similarity index 100%
rename from tests/gem5/replacement-policies/traces/lru_test4_ld.py
rename to tests/gem5/replacement_policies/traces/lru_test4_ld.py
diff --git a/tests/gem5/replacement-policies/traces/lru_test4_st.py b/tests/gem5/replacement_policies/traces/lru_test4_st.py
similarity index 100%
rename from tests/gem5/replacement-policies/traces/lru_test4_st.py
rename to tests/gem5/replacement_policies/traces/lru_test4_st.py
diff --git a/tests/gem5/replacement-policies/traces/mru_test1_ld.py b/tests/gem5/replacement_policies/traces/mru_test1_ld.py
similarity index 100%
rename from tests/gem5/replacement-policies/traces/mru_test1_ld.py
rename to tests/gem5/replacement_policies/traces/mru_test1_ld.py
diff --git a/tests/gem5/replacement-policies/traces/mru_test1_st.py b/tests/gem5/replacement_policies/traces/mru_test1_st.py
similarity index 100%
rename from tests/gem5/replacement-policies/traces/mru_test1_st.py
rename to tests/gem5/replacement_policies/traces/mru_test1_st.py
diff --git a/tests/gem5/replacement-policies/traces/mru_test2_ld.py b/tests/gem5/replacement_policies/traces/mru_test2_ld.py
similarity index 100%
rename from tests/gem5/replacement-policies/traces/mru_test2_ld.py
rename to tests/gem5/replacement_policies/traces/mru_test2_ld.py
diff --git a/tests/gem5/replacement-policies/traces/mru_test2_st.py b/tests/gem5/replacement_policies/traces/mru_test2_st.py
similarity index 100%
rename from tests/gem5/replacement-policies/traces/mru_test2_st.py
rename to tests/gem5/replacement_policies/traces/mru_test2_st.py
diff --git a/tests/gem5/replacement-policies/traces/nru_test1_ld.py b/tests/gem5/replacement_policies/traces/nru_test1_ld.py
similarity index 100%
rename from tests/gem5/replacement-policies/traces/nru_test1_ld.py
rename to tests/gem5/replacement_policies/traces/nru_test1_ld.py
diff --git a/tests/gem5/replacement-policies/traces/nru_test1_st.py b/tests/gem5/replacement_policies/traces/nru_test1_st.py
similarity index 100%
rename from tests/gem5/replacement-policies/traces/nru_test1_st.py
rename to tests/gem5/replacement_policies/traces/nru_test1_st.py
diff --git a/tests/gem5/replacement-policies/traces/rrip_test1_ld.py b/tests/gem5/replacement_policies/traces/rrip_test1_ld.py
similarity index 100%
rename from tests/gem5/replacement-policies/traces/rrip_test1_ld.py
rename to tests/gem5/replacement_policies/traces/rrip_test1_ld.py
diff --git a/tests/gem5/replacement-policies/traces/rrip_test1_st.py b/tests/gem5/replacement_policies/traces/rrip_test1_st.py
similarity index 100%
rename from tests/gem5/replacement-policies/traces/rrip_test1_st.py
rename to tests/gem5/replacement_policies/traces/rrip_test1_st.py
diff --git a/tests/gem5/replacement-policies/traces/rrip_test2_ld.py b/tests/gem5/replacement_policies/traces/rrip_test2_ld.py
similarity index 99%
rename from tests/gem5/replacement-policies/traces/rrip_test2_ld.py
rename to tests/gem5/replacement_policies/traces/rrip_test2_ld.py
index b9f2ee026e..dcc8df90ee 100644
--- a/tests/gem5/replacement-policies/traces/rrip_test2_ld.py
+++ b/tests/gem5/replacement_policies/traces/rrip_test2_ld.py
@@ -52,7 +52,6 @@
 
 
 def python_generator(generator):
-
     yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 100, 0)
     yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 100, 0)
     yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 100, 0)
diff --git a/tests/gem5/replacement-policies/traces/rrip_test2_st.py b/tests/gem5/replacement_policies/traces/rrip_test2_st.py
similarity index 99%
rename from tests/gem5/replacement-policies/traces/rrip_test2_st.py
rename to tests/gem5/replacement_policies/traces/rrip_test2_st.py
index be23756a95..b53ec4a076 100644
--- a/tests/gem5/replacement-policies/traces/rrip_test2_st.py
+++ b/tests/gem5/replacement_policies/traces/rrip_test2_st.py
@@ -52,7 +52,6 @@
 
 
 def python_generator(generator):
-
     yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 0, 0)
     yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 0, 0)
     yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 0, 0)
diff --git a/tests/gem5/replacement-policies/traces/second_chance_test1_ld.py b/tests/gem5/replacement_policies/traces/second_chance_test1_ld.py
similarity index 100%
rename from tests/gem5/replacement-policies/traces/second_chance_test1_ld.py
rename to tests/gem5/replacement_policies/traces/second_chance_test1_ld.py
diff --git a/tests/gem5/replacement-policies/traces/second_chance_test1_st.py b/tests/gem5/replacement_policies/traces/second_chance_test1_st.py
similarity index 100%
rename from tests/gem5/replacement-policies/traces/second_chance_test1_st.py
rename to tests/gem5/replacement_policies/traces/second_chance_test1_st.py
diff --git a/tests/gem5/replacement-policies/traces/second_chance_test2_ld.py b/tests/gem5/replacement_policies/traces/second_chance_test2_ld.py
similarity index 99%
rename from tests/gem5/replacement-policies/traces/second_chance_test2_ld.py
rename to tests/gem5/replacement_policies/traces/second_chance_test2_ld.py
index d187cbec3f..88c8e462a2 100644
--- a/tests/gem5/replacement-policies/traces/second_chance_test2_ld.py
+++ b/tests/gem5/replacement_policies/traces/second_chance_test2_ld.py
@@ -52,7 +52,6 @@
 
 
 def python_generator(generator):
-
     yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 100, 0)
     yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 100, 0)
     yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 100, 0)
diff --git a/tests/gem5/replacement-policies/traces/second_chance_test2_st.py b/tests/gem5/replacement_policies/traces/second_chance_test2_st.py
similarity index 100%
rename from tests/gem5/replacement-policies/traces/second_chance_test2_st.py
rename to tests/gem5/replacement_policies/traces/second_chance_test2_st.py
diff --git a/tests/gem5/replacement-policies/traces/second_chance_test3_ld.py b/tests/gem5/replacement_policies/traces/second_chance_test3_ld.py
similarity index 100%
rename from tests/gem5/replacement-policies/traces/second_chance_test3_ld.py
rename to tests/gem5/replacement_policies/traces/second_chance_test3_ld.py
diff --git a/tests/gem5/replacement-policies/traces/second_chance_test3_st.py b/tests/gem5/replacement_policies/traces/second_chance_test3_st.py
similarity index 99%
rename from tests/gem5/replacement-policies/traces/second_chance_test3_st.py
rename to tests/gem5/replacement_policies/traces/second_chance_test3_st.py
index 53dcbffe89..d40383e8fc 100644
--- a/tests/gem5/replacement-policies/traces/second_chance_test3_st.py
+++ b/tests/gem5/replacement_policies/traces/second_chance_test3_st.py
@@ -54,7 +54,6 @@
 
 
 def python_generator(generator):
-
     yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 0, 0)
     yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 0, 0)
     yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 0, 0)
diff --git a/tests/gem5/replacement-policies/traces/tree_plru_test1_ld.py b/tests/gem5/replacement_policies/traces/tree_plru_test1_ld.py
similarity index 100%
rename from tests/gem5/replacement-policies/traces/tree_plru_test1_ld.py
rename to tests/gem5/replacement_policies/traces/tree_plru_test1_ld.py
diff --git a/tests/gem5/replacement-policies/traces/tree_plru_test1_st.py b/tests/gem5/replacement_policies/traces/tree_plru_test1_st.py
similarity index 100%
rename from tests/gem5/replacement-policies/traces/tree_plru_test1_st.py
rename to tests/gem5/replacement_policies/traces/tree_plru_test1_st.py
diff --git a/tests/gem5/replacement-policies/traces/tree_plru_test2_ld.py b/tests/gem5/replacement_policies/traces/tree_plru_test2_ld.py
similarity index 100%
rename from tests/gem5/replacement-policies/traces/tree_plru_test2_ld.py
rename to tests/gem5/replacement_policies/traces/tree_plru_test2_ld.py
diff --git a/tests/gem5/replacement-policies/traces/tree_plru_test2_st.py b/tests/gem5/replacement_policies/traces/tree_plru_test2_st.py
similarity index 100%
rename from tests/gem5/replacement-policies/traces/tree_plru_test2_st.py
rename to tests/gem5/replacement_policies/traces/tree_plru_test2_st.py
diff --git a/tests/gem5/replacement-policies/traces/tree_plru_test3_ld.py b/tests/gem5/replacement_policies/traces/tree_plru_test3_ld.py
similarity index 100%
rename from tests/gem5/replacement-policies/traces/tree_plru_test3_ld.py
rename to tests/gem5/replacement_policies/traces/tree_plru_test3_ld.py
diff --git a/tests/gem5/replacement-policies/traces/tree_plru_test3_st.py b/tests/gem5/replacement_policies/traces/tree_plru_test3_st.py
similarity index 100%
rename from tests/gem5/replacement-policies/traces/tree_plru_test3_st.py
rename to tests/gem5/replacement_policies/traces/tree_plru_test3_st.py
diff --git a/tests/gem5/riscv_boot_tests/README.md b/tests/gem5/riscv_boot_tests/README.md
new file mode 100644
index 0000000000..002cc4d09d
--- /dev/null
+++ b/tests/gem5/riscv_boot_tests/README.md
@@ -0,0 +1,9 @@
+# RISCV Boot Tests
+
+These tests run a series of Linux boots on the RISCVBoard.
+It varies the CPU type, number of CPUs, and memory used for each run.
+To run these tests by themselves, you can run the following command in the tests directory:
+
+```bash
+./main.py run gem5/riscv_boot_tests --length=[length]
+```
diff --git a/tests/gem5/configs/riscv_boot_exit_run.py b/tests/gem5/riscv_boot_tests/configs/riscv_boot_exit_run.py
similarity index 98%
rename from tests/gem5/configs/riscv_boot_exit_run.py
rename to tests/gem5/riscv_boot_tests/configs/riscv_boot_exit_run.py
index e9fc06b27b..3726d7de46 100644
--- a/tests/gem5/configs/riscv_boot_exit_run.py
+++ b/tests/gem5/riscv_boot_tests/configs/riscv_boot_exit_run.py
@@ -40,7 +40,7 @@
 from gem5.components.boards.riscv_board import RiscvBoard
 from gem5.components.processors.simple_processor import SimpleProcessor
 from gem5.simulate.simulator import Simulator
-from gem5.resources.workload import Workload
+from gem5.resources.resource import obtain_resource
 
 import argparse
 import importlib
@@ -160,7 +160,7 @@
 )
 
 # Set the workload.
-workload = Workload(
+workload = obtain_resource(
     "riscv-ubuntu-20.04-boot", resource_directory=args.resource_directory
 )
 board.set_workload(workload)
diff --git a/tests/gem5/riscv-boot-tests/test_linux_boot.py b/tests/gem5/riscv_boot_tests/test_linux_boot.py
similarity index 99%
rename from tests/gem5/riscv-boot-tests/test_linux_boot.py
rename to tests/gem5/riscv_boot_tests/test_linux_boot.py
index 55e0ae6109..43d1c6d69f 100644
--- a/tests/gem5/riscv-boot-tests/test_linux_boot.py
+++ b/tests/gem5/riscv_boot_tests/test_linux_boot.py
@@ -44,7 +44,6 @@ def test_boot(
     length: str,
     to_tick: Optional[int] = None,
 ):
-
     name = "{}-cpu_{}-cores_{}_{}_riscv-boot-test".format(
         cpu, str(num_cpus), cache_type, memory_class
     )
@@ -80,6 +79,7 @@ def test_boot(
             config.base_dir,
             "tests",
             "gem5",
+            "riscv_boot_tests",
             "configs",
             "riscv_boot_exit_run.py",
         ),
diff --git a/tests/gem5/se_mode/hello_se/README.md b/tests/gem5/se_mode/hello_se/README.md
new file mode 100644
index 0000000000..b3109692b0
--- /dev/null
+++ b/tests/gem5/se_mode/hello_se/README.md
@@ -0,0 +1,8 @@
+# SE Mode
+
+These tests use the SimpleBoard to test simple binaries in SE mode, both with single and multi cores.
+To run these tests by themselves, you can run the following command in the tests directory:
+
+```bash
+./main.py run gem5/se_mode/hello_se --length=[length]
+```
diff --git a/tests/gem5/configs/simple_binary_run.py b/tests/gem5/se_mode/hello_se/configs/simple_binary_run.py
similarity index 80%
rename from tests/gem5/configs/simple_binary_run.py
rename to tests/gem5/se_mode/hello_se/configs/simple_binary_run.py
index f5e097eaae..19fd0e6b8c 100644
--- a/tests/gem5/configs/simple_binary_run.py
+++ b/tests/gem5/se_mode/hello_se/configs/simple_binary_run.py
@@ -1,4 +1,5 @@
 # Copyright (c) 2021 The Regents of the University of California
+# Copyright (c) 2022 Google Inc
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -30,7 +31,7 @@
 gem5 while still being functinal.
 """
 
-from gem5.resources.resource import Resource
+from gem5.resources.resource import obtain_resource
 from gem5.components.processors.cpu_types import (
     get_cpu_types_str_set,
     get_cpu_type_from_str,
@@ -45,9 +46,20 @@
 from gem5.components.boards.mem_mode import MemMode
 from gem5.components.processors.cpu_types import CPUTypes
 from gem5.simulate.simulator import Simulator
-from gem5.isas import get_isa_from_str, get_isas_str_set
+from gem5.isas import get_isa_from_str, get_isas_str_set, ISA
+
+from m5.util import fatal
 
 import argparse
+import importlib
+
+cpu_types_string_map = {
+    CPUTypes.ATOMIC: "AtomicSimpleCPU",
+    CPUTypes.O3: "O3CPU",
+    CPUTypes.TIMING: "TimingSimpleCPU",
+    CPUTypes.KVM: "KvmCPU",
+    CPUTypes.MINOR: "MinorCPU",
+}
 
 parser = argparse.ArgumentParser(
     description="A gem5 script for running simple binaries in SE mode."
@@ -65,13 +77,6 @@
     "isa", type=str, choices=get_isas_str_set(), help="The ISA used"
 )
 
-parser.add_argument(
-    "-b",
-    "--base-cpu-processor",
-    action="store_true",
-    help="Use the BaseCPUProcessor instead of the SimpleProcessor.",
-)
-
 parser.add_argument(
     "-r",
     "--resource-directory",
@@ -104,28 +109,14 @@
 cache_hierarchy = NoCache()
 memory = SingleChannelDDR3_1600()
 
-if args.base_cpu_processor:
-    cores = [
-        BaseCPUCore(
-            core=SimpleCore.cpu_simobject_factory(
-                cpu_type=get_cpu_type_from_str(args.cpu),
-                isa=get_isa_from_str(args.isa),
-                core_id=i,
-            ),
-            isa=get_isa_from_str(args.isa),
-        )
-        for i in range(args.num_cores)
-    ]
-
-    processor = BaseCPUProcessor(
-        cores=cores,
-    )
-else:
-    processor = SimpleProcessor(
-        cpu_type=get_cpu_type_from_str(args.cpu),
-        isa=get_isa_from_str(args.isa),
-        num_cores=args.num_cores,
-    )
+isa_enum = get_isa_from_str(args.isa)
+cpu_enum = get_cpu_type_from_str(args.cpu)
+
+processor = SimpleProcessor(
+    cpu_type=cpu_enum,
+    isa=isa_enum,
+    num_cores=args.num_cores,
+)
 
 motherboard = SimpleBoard(
     clk_freq="3GHz",
@@ -135,7 +126,9 @@
 )
 
 # Set the workload
-binary = Resource(args.resource, resource_directory=args.resource_directory)
+binary = obtain_resource(
+    args.resource, resource_directory=args.resource_directory
+)
 motherboard.set_se_binary_workload(binary, arguments=args.arguments)
 
 # Run the simulation
diff --git a/tests/gem5/se_mode/hello_se/test_hello_se.py b/tests/gem5/se_mode/hello_se/test_hello_se.py
index 1aaac4a435..ebcef719a8 100644
--- a/tests/gem5/se_mode/hello_se/test_hello_se.py
+++ b/tests/gem5/se_mode/hello_se/test_hello_se.py
@@ -90,13 +90,18 @@
 
 
 def verify_config(isa, binary, cpu, hosts, verifier, input):
-
     gem5_verify_config(
         name="test-" + binary + "-" + cpu,
         fixtures=(),
         verifiers=(verifier,),
         config=joinpath(
-            config.base_dir, "tests", "gem5", "configs", "simple_binary_run.py"
+            config.base_dir,
+            "tests",
+            "gem5",
+            "se_mode",
+            "hello_se",
+            "configs",
+            "simple_binary_run.py",
         ),
         config_args=[
             binary,
diff --git a/tests/gem5/se_mode/hello_se/test_se_multicore.py b/tests/gem5/se_mode/hello_se/test_se_multicore.py
index 55fc61fbf8..dc98a755f3 100644
--- a/tests/gem5/se_mode/hello_se/test_se_multicore.py
+++ b/tests/gem5/se_mode/hello_se/test_se_multicore.py
@@ -40,7 +40,13 @@
     fixtures=(),
     verifiers=(),
     config=joinpath(
-        config.base_dir, "tests", "gem5", "configs", "simple_binary_run.py"
+        config.base_dir,
+        "tests",
+        "gem5",
+        "se_mode",
+        "hello_se",
+        "configs",
+        "simple_binary_run.py",
     ),
     config_args=[
         "x86-hello64-static",
diff --git a/tests/gem5/stats/README.md b/tests/gem5/stats/README.md
new file mode 100644
index 0000000000..c55600bed1
--- /dev/null
+++ b/tests/gem5/stats/README.md
@@ -0,0 +1,8 @@
+# Stats
+
+This test runs an SE simulation with the hdf5 stats and checks that the simulation succeeds and the stats file exists.
+To run these tests by themselves, you can run the following command in the tests directory:
+
+```bash
+./main.py run gem5/stats --length=[length]
+```
diff --git a/tests/gem5/stats/configs/simple_binary_run.py b/tests/gem5/stats/configs/simple_binary_run.py
new file mode 100644
index 0000000000..b4d9d76d8d
--- /dev/null
+++ b/tests/gem5/stats/configs/simple_binary_run.py
@@ -0,0 +1,112 @@
+# Copyright (c) 2021 The Regents of the University of California
+# Copyright (c) 2022 Google Inc
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+"""
+A run script for a very simple Syscall-Execution running simple binaries.
+The system has no cache heirarchy and is as "bare-bones" as you can get in
+gem5 while still being functinal.
+"""
+
+from gem5.resources.resource import Resource
+from gem5.components.processors.cpu_types import (
+    get_cpu_types_str_set,
+    get_cpu_type_from_str,
+)
+from gem5.components.memory import SingleChannelDDR3_1600
+from gem5.components.boards.simple_board import SimpleBoard
+from gem5.components.cachehierarchies.classic.no_cache import NoCache
+from gem5.components.processors.simple_processor import SimpleProcessor
+from gem5.components.processors.base_cpu_core import BaseCPUCore
+from gem5.components.processors.base_cpu_processor import BaseCPUProcessor
+from gem5.components.processors.simple_core import SimpleCore
+from gem5.components.boards.mem_mode import MemMode
+from gem5.components.processors.cpu_types import CPUTypes
+from gem5.simulate.simulator import Simulator
+from gem5.isas import get_isa_from_str, get_isas_str_set, ISA
+
+from m5.util import fatal
+
+import argparse
+import importlib
+
+parser = argparse.ArgumentParser(
+    description="A gem5 script for running simple binaries in SE mode."
+)
+
+parser.add_argument(
+    "resource", type=str, help="The gem5 resource binary to run."
+)
+
+parser.add_argument(
+    "-r",
+    "--resource-directory",
+    type=str,
+    required=False,
+    help="The directory in which resources will be downloaded or exist.",
+)
+
+parser.add_argument(
+    "--arguments",
+    type=str,
+    action="append",
+    default=[],
+    required=False,
+    help="The input arguments for the binary.",
+)
+
+args = parser.parse_args()
+
+# Setup the system.
+cache_hierarchy = NoCache()
+memory = SingleChannelDDR3_1600()
+
+processor = SimpleProcessor(
+    cpu_type=CPUTypes.ATOMIC,
+    isa=ISA.ARM,
+    num_cores=1,
+)
+
+motherboard = SimpleBoard(
+    clk_freq="3GHz",
+    processor=processor,
+    memory=memory,
+    cache_hierarchy=cache_hierarchy,
+)
+
+# Set the workload
+binary = Resource(args.resource, resource_directory=args.resource_directory)
+motherboard.set_se_binary_workload(binary, arguments=args.arguments)
+
+# Run the simulation
+simulator = Simulator(board=motherboard)
+simulator.run()
+
+print(
+    "Exiting @ tick {} because {}.".format(
+        simulator.get_current_tick(), simulator.get_last_exit_event_cause()
+    )
+)
diff --git a/tests/gem5/stats/test_hdf5.py b/tests/gem5/stats/test_hdf5.py
index 8775d22ad8..c226d717de 100644
--- a/tests/gem5/stats/test_hdf5.py
+++ b/tests/gem5/stats/test_hdf5.py
@@ -96,14 +96,17 @@ def have_hdf5():
         verifiers=[ok_verifier, err_verifier, h5_verifier],
         fixtures=(),
         config=joinpath(
-            config.base_dir, "tests", "gem5", "configs", "simple_binary_run.py"
+            config.base_dir,
+            "tests",
+            "gem5",
+            "stats",
+            "configs",
+            "simple_binary_run.py",
         ),
         config_args=[
             "arm-hello64-static",
-            "atomic",
             "--resource-directory",
             resource_path,
-            "arm",
         ],
         gem5_args=["--stats-file=h5://stats.h5"],
         valid_isas=(constants.all_compiled_tag,),
diff --git a/tests/gem5/stdlib/README.md b/tests/gem5/stdlib/README.md
new file mode 100644
index 0000000000..0b0649f6b6
--- /dev/null
+++ b/tests/gem5/stdlib/README.md
@@ -0,0 +1,8 @@
+# Standard Library
+
+These tests check that the BaseCPUProcessor and the gem5.utils.requires function work as intended.
+To run these tests by themselves, you can run the following command in the tests directory:
+
+```bash
+./main.py run gem5/stdlib --length=[length]
+```
diff --git a/tests/gem5/configs/requires_check.py b/tests/gem5/stdlib/configs/requires_check.py
similarity index 100%
rename from tests/gem5/configs/requires_check.py
rename to tests/gem5/stdlib/configs/requires_check.py
diff --git a/tests/gem5/stdlib/configs/simple_binary_run.py b/tests/gem5/stdlib/configs/simple_binary_run.py
new file mode 100644
index 0000000000..a0e4c7f62a
--- /dev/null
+++ b/tests/gem5/stdlib/configs/simple_binary_run.py
@@ -0,0 +1,124 @@
+# Copyright (c) 2021 The Regents of the University of California
+# Copyright (c) 2022 Google Inc
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+"""
+A run script for a very simple Syscall-Execution running simple binaries.
+The system has no cache heirarchy and is as "bare-bones" as you can get in
+gem5 while still being functinal.
+"""
+
+from gem5.resources.resource import Resource
+from gem5.components.processors.cpu_types import (
+    get_cpu_types_str_set,
+    get_cpu_type_from_str,
+)
+from gem5.components.memory import SingleChannelDDR3_1600
+from gem5.components.boards.simple_board import SimpleBoard
+from gem5.components.cachehierarchies.classic.no_cache import NoCache
+from gem5.components.processors.simple_processor import SimpleProcessor
+from gem5.components.processors.base_cpu_core import BaseCPUCore
+from gem5.components.processors.base_cpu_processor import BaseCPUProcessor
+from gem5.components.processors.simple_core import SimpleCore
+from gem5.components.boards.mem_mode import MemMode
+from gem5.components.processors.cpu_types import CPUTypes
+from gem5.simulate.simulator import Simulator
+from gem5.isas import get_isa_from_str, get_isas_str_set, ISA
+
+from m5.util import fatal
+
+import argparse
+import importlib
+
+cpu_types_string_map = {
+    CPUTypes.ATOMIC: "AtomicSimpleCPU",
+    CPUTypes.O3: "O3CPU",
+    CPUTypes.TIMING: "TimingSimpleCPU",
+    CPUTypes.KVM: "KvmCPU",
+    CPUTypes.MINOR: "MinorCPU",
+}
+
+parser = argparse.ArgumentParser(
+    description="A gem5 script for running simple binaries in SE mode."
+)
+
+parser.add_argument(
+    "resource", type=str, help="The gem5 resource binary to run."
+)
+
+parser.add_argument(
+    "cpu", type=str, choices=get_cpu_types_str_set(), help="The CPU type used."
+)
+
+parser.add_argument(
+    "isa", type=str, choices=get_isas_str_set(), help="The ISA used"
+)
+
+args = parser.parse_args()
+
+# Setup the system.
+cache_hierarchy = NoCache()
+memory = SingleChannelDDR3_1600()
+
+isa_enum = get_isa_from_str(args.isa)
+cpu_enum = get_cpu_type_from_str(args.cpu)
+
+cores = [
+    BaseCPUCore(
+        core=SimpleCore.cpu_simobject_factory(
+            cpu_type=cpu_enum,
+            isa=isa_enum,
+            core_id=i,
+        ),
+        isa=isa_enum,
+    )
+    for i in range(1)
+]
+
+processor = BaseCPUProcessor(
+    cores=cores,
+)
+
+motherboard = SimpleBoard(
+    clk_freq="3GHz",
+    processor=processor,
+    memory=memory,
+    cache_hierarchy=cache_hierarchy,
+)
+
+# Set the workload
+binary = Resource(args.resource)
+motherboard.set_se_binary_workload(binary)
+
+# Run the simulation
+simulator = Simulator(board=motherboard)
+simulator.run()
+
+print(
+    "Exiting @ tick {} because {}.".format(
+        simulator.get_current_tick(), simulator.get_last_exit_event_cause()
+    )
+)
diff --git a/tests/gem5/stdlib/configs/simulator_exit_event_run.py b/tests/gem5/stdlib/configs/simulator_exit_event_run.py
new file mode 100644
index 0000000000..56c99359f2
--- /dev/null
+++ b/tests/gem5/stdlib/configs/simulator_exit_event_run.py
@@ -0,0 +1,162 @@
+# Copyright (c) 2022 The Regents of the University of California
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+"""
+This script is used for testing the event_event handler of the Simulator
+module. If the handler is working correctly the following output will be
+received:
+
+```
+The program has started!
+About to exit the simulation for the 1 st/nd/rd/th time
+Handled exit event.
+build/X86/sim/simulate.cc:194: info: Entering event queue @ 780559326.  Starting simulation...
+About to exit the simulation for the 2 st/nd/rd/th time
+Handled exit event.
+build/X86/sim/simulate.cc:194: info: Entering event queue @ 854152659.  Starting simulation...
+About to exit the simulation for the 3 st/nd/rd/th time
+Handling the final exit event. We'll exit now.
+```
+
+By default a generator is passed to define the exit_event behavior. A list of
+functions or a lone function can also be passed. This can be specified by the
+`--exit-event-type` parameter.
+"""
+
+from gem5.resources.resource import obtain_resource
+from gem5.components.memory import SingleChannelDDR3_1600
+from gem5.components.boards.simple_board import SimpleBoard
+from gem5.components.cachehierarchies.classic.no_cache import NoCache
+from gem5.components.processors.cpu_types import CPUTypes
+from gem5.components.processors.simple_processor import SimpleProcessor
+from gem5.components.boards.simple_board import SimpleBoard
+from gem5.simulate.simulator import Simulator
+from gem5.simulate.exit_event import ExitEvent
+from gem5.isas import ISA
+
+import argparse
+
+parser = argparse.ArgumentParser(
+    description="A gem5 script for running simple binaries in SE mode."
+)
+
+parser.add_argument(
+    "-e",
+    "--exit-event-type",
+    type=str,
+    choices=("generator", "function-list", "function"),
+    default="generator",
+    help="Used to specify what exit event format is to be passed.",
+)
+
+parser.add_argument(
+    "-r",
+    "--resource-directory",
+    type=str,
+    required=False,
+    help="The directory in which resources will be downloaded or exist.",
+)
+
+
+args = parser.parse_args()
+
+# Setup the system.
+cache_hierarchy = NoCache()
+memory = SingleChannelDDR3_1600()
+
+processor = SimpleProcessor(
+    cpu_type=CPUTypes.TIMING,
+    isa=ISA.X86,
+    num_cores=1,
+)
+
+motherboard = SimpleBoard(
+    clk_freq="3GHz",
+    processor=processor,
+    memory=memory,
+    cache_hierarchy=cache_hierarchy,
+)
+
+# Set the workload
+# Note: Here we're using the "x86-m5-exit-repeat" resource. This calls an
+# `m5_exit(0)` command in an infinite while-loop.
+binary = obtain_resource(
+    "x86-m5-exit-repeat", resource_directory=args.resource_directory
+)
+motherboard.set_se_binary_workload(binary)
+
+# Create the exit event handler. Here there are three kinds: either pass a
+# generator, a list of functions, or a lone function. In this script they all
+# do the same thing for testing purposes.
+
+
+def event_handle() -> bool:
+    print("Handled exit event.")
+    return False
+
+
+def event_handle_final() -> bool:
+    print("Handling the final exit event. We'll exit now.")
+    return True
+
+
+def generator():
+    yield event_handle()
+    yield event_handle()
+    yield event_handle_final()
+
+
+func_list = [event_handle, event_handle, event_handle_final]
+
+i = 0
+
+
+def lone_function() -> bool:
+    global i
+    i += 1
+    if i < 3:
+        return event_handle()
+    return event_handle_final()
+
+
+exit_event_handler = None
+if args.exit_event_type == "function-list":
+    exit_event_handler = func_list
+elif args.exit_event_type == "generator":
+    exit_event_handler = generator()
+elif args.exit_event_type == "function":
+    exit_event_handler = lone_function
+
+assert exit_event_handler is not None
+
+# Run the simulation
+simulator = Simulator(
+    board=motherboard,
+    on_exit_event={
+        ExitEvent.EXIT: exit_event_handler,
+    },
+)
+simulator.run()
diff --git a/tests/gem5/stdlib/simulator/ref/simout.txt b/tests/gem5/stdlib/simulator/ref/simout.txt
new file mode 100644
index 0000000000..a58d513213
--- /dev/null
+++ b/tests/gem5/stdlib/simulator/ref/simout.txt
@@ -0,0 +1,8 @@
+Global frequency set at 1000000000000 ticks per second
+The program has started!
+About to exit the simulation for the 1 st/nd/rd/th time
+Handled exit event.
+About to exit the simulation for the 2 st/nd/rd/th time
+Handled exit event.
+About to exit the simulation for the 3 st/nd/rd/th time
+Handling the final exit event. We'll exit now.
diff --git a/tests/gem5/stdlib/simulator/test_event_event.py b/tests/gem5/stdlib/simulator/test_event_event.py
new file mode 100644
index 0000000000..23204f0026
--- /dev/null
+++ b/tests/gem5/stdlib/simulator/test_event_event.py
@@ -0,0 +1,89 @@
+# Copyright (c) 2022 The Regents of the University of California
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from testlib import *
+
+"""
+These tests are designed to test the BaseCPUProcessor. It utilizes the
+tests/gem5/configs/simple_binary_run.py to run a simple SE-mode simualation
+with different configurations of the BaseCPUProcessor.
+"""
+
+verifiers = (
+    verifier.MatchStdoutNoPerf(joinpath(getcwd(), "ref", "simout.txt")),
+)
+
+
+gem5_verify_config(
+    name="simulator-exit-event-handler-with-function-list",
+    verifiers=verifiers,
+    fixtures=(),
+    config=joinpath(
+        config.base_dir,
+        "tests",
+        "gem5",
+        "stdlib",
+        "configs",
+        "simulator_exit_event_run.py",
+    ),
+    config_args=["-e", "function-list"],
+    valid_isas=(constants.all_compiled_tag,),
+    length=constants.quick_tag,
+)
+
+gem5_verify_config(
+    name="simulator-exit-event-handler-with-generator",
+    verifiers=verifiers,
+    fixtures=(),
+    config=joinpath(
+        config.base_dir,
+        "tests",
+        "gem5",
+        "stdlib",
+        "configs",
+        "simulator_exit_event_run.py",
+    ),
+    config_args=["-e", "generator"],
+    valid_isas=(constants.all_compiled_tag,),
+    length=constants.quick_tag,
+)
+
+gem5_verify_config(
+    name="simulator-exit-event-handler-with-lone-function",
+    verifiers=verifiers,
+    fixtures=(),
+    config=joinpath(
+        config.base_dir,
+        "tests",
+        "gem5",
+        "stdlib",
+        "configs",
+        "simulator_exit_event_run.py",
+    ),
+    config_args=["-e", "function"],
+    valid_isas=(constants.all_compiled_tag,),
+    length=constants.quick_tag,
+)
diff --git a/tests/gem5/stdlib/test_base_cpu_processor.py b/tests/gem5/stdlib/test_base_cpu_processor.py
index cbc6767481..554342b51f 100644
--- a/tests/gem5/stdlib/test_base_cpu_processor.py
+++ b/tests/gem5/stdlib/test_base_cpu_processor.py
@@ -37,9 +37,14 @@
     verifiers=(),
     fixtures=(),
     config=joinpath(
-        config.base_dir, "tests", "gem5", "configs", "simple_binary_run.py"
+        config.base_dir,
+        "tests",
+        "gem5",
+        "stdlib",
+        "configs",
+        "simple_binary_run.py",
     ),
-    config_args=["x86-hello64-static", "timing", "x86", "-b"],
+    config_args=["x86-hello64-static", "timing", "x86"],
     valid_isas=(constants.all_compiled_tag,),
     length=constants.quick_tag,
 )
@@ -49,9 +54,14 @@
     verifiers=(),
     fixtures=(),
     config=joinpath(
-        config.base_dir, "tests", "gem5", "configs", "simple_binary_run.py"
+        config.base_dir,
+        "tests",
+        "gem5",
+        "stdlib",
+        "configs",
+        "simple_binary_run.py",
     ),
-    config_args=["riscv-hello", "atomic", "riscv", "-b"],
+    config_args=["riscv-hello", "atomic", "riscv"],
     valid_isas=(constants.all_compiled_tag,),
     length=constants.quick_tag,
 )
@@ -61,9 +71,14 @@
     verifiers=(),
     fixtures=(),
     config=joinpath(
-        config.base_dir, "tests", "gem5", "configs", "simple_binary_run.py"
+        config.base_dir,
+        "tests",
+        "gem5",
+        "stdlib",
+        "configs",
+        "simple_binary_run.py",
     ),
-    config_args=["arm-hello64-static", "o3", "arm", "-b"],
+    config_args=["arm-hello64-static", "o3", "arm"],
     valid_isas=(constants.all_compiled_tag,),
     length=constants.quick_tag,
 )
diff --git a/tests/gem5/stdlib/test_requires.py b/tests/gem5/stdlib/test_requires.py
index b729050b47..3011180679 100644
--- a/tests/gem5/stdlib/test_requires.py
+++ b/tests/gem5/stdlib/test_requires.py
@@ -58,6 +58,7 @@
                 config.base_dir,
                 "tests",
                 "gem5",
+                "stdlib",
                 "configs",
                 "requires_check.py",
             ),
@@ -75,6 +76,7 @@
                 config.base_dir,
                 "tests",
                 "gem5",
+                "stdlib",
                 "configs",
                 "requires_check.py",
             ),
diff --git a/tests/gem5/suite.py b/tests/gem5/suite.py
index 7e0935d9eb..939ecdd8fa 100644
--- a/tests/gem5/suite.py
+++ b/tests/gem5/suite.py
@@ -98,7 +98,6 @@ def gem5_verify_config(
     for host in valid_hosts:
         for opt in valid_variants:
             for isa in valid_isas:
-
                 # Create a tempdir fixture to be shared throughout the test.
                 tempdir = TempdirFixture()
                 gem5_returncode = VariableFixture(
diff --git a/tests/gem5/to_tick/README.md b/tests/gem5/to_tick/README.md
new file mode 100644
index 0000000000..e675905120
--- /dev/null
+++ b/tests/gem5/to_tick/README.md
@@ -0,0 +1,8 @@
+# To Tick
+
+These tests check that setting the max tick in various ways behaves as expected, as well as that event scheduling at a certain tick works.
+To run these tests by themselves, you can run the following command in the tests directory:
+
+```bash
+./main.py run gem5/to_tick --length=[length]
+```
diff --git a/tests/gem5/to_tick/configs/tick-exit.py b/tests/gem5/to_tick/configs/tick-exit.py
index 9b412cbfb6..4f13d723fa 100644
--- a/tests/gem5/to_tick/configs/tick-exit.py
+++ b/tests/gem5/to_tick/configs/tick-exit.py
@@ -28,7 +28,7 @@
 
 """
 
-from gem5.resources.resource import Resource
+from gem5.resources.resource import obtain_resource
 from gem5.isas import ISA
 from gem5.components.memory import SingleChannelDDR3_1600
 from gem5.components.boards.simple_board import SimpleBoard
@@ -76,7 +76,7 @@
 )
 
 # Set the workload
-binary = Resource(
+binary = obtain_resource(
     "x86-hello64-static", resource_directory=args.resource_directory
 )
 motherboard.set_se_binary_workload(binary)
diff --git a/tests/gem5/to_tick/configs/tick-to-max.py b/tests/gem5/to_tick/configs/tick-to-max.py
index 2b679df412..89396915cf 100644
--- a/tests/gem5/to_tick/configs/tick-to-max.py
+++ b/tests/gem5/to_tick/configs/tick-to-max.py
@@ -33,7 +33,7 @@
 time.
 """
 
-from gem5.resources.resource import Resource
+from gem5.resources.resource import obtain_resource
 from gem5.isas import ISA
 from gem5.components.memory import SingleChannelDDR3_1600
 from gem5.components.boards.simple_board import SimpleBoard
@@ -97,7 +97,7 @@
 )
 
 # Set the workload
-binary = Resource(
+binary = obtain_resource(
     "x86-hello64-static", resource_directory=args.resource_directory
 )
 motherboard.set_se_binary_workload(binary)
diff --git a/tests/gem5/traffic_gen/README.md b/tests/gem5/traffic_gen/README.md
new file mode 100644
index 0000000000..dadb71b641
--- /dev/null
+++ b/tests/gem5/traffic_gen/README.md
@@ -0,0 +1,9 @@
+# Traffic Generator
+
+This tests the gem5 memory components with a simple traffic generator.
+It also checks the correctness of the statistics outputted by gem5.
+To run these tests by themselves, you can run the following command in the tests directory:
+
+```bash
+./main.py run gem5/traffic_gen --length=[length]
+```
diff --git a/tests/gem5/traffic_gen/simple_traffic_run.py b/tests/gem5/traffic_gen/configs/simple_traffic_run.py
similarity index 100%
rename from tests/gem5/traffic_gen/simple_traffic_run.py
rename to tests/gem5/traffic_gen/configs/simple_traffic_run.py
diff --git a/tests/gem5/traffic_gen/test_memory_traffic_gen.py b/tests/gem5/traffic_gen/test_memory_traffic_gen.py
index 122204e3e9..74b31105b7 100644
--- a/tests/gem5/traffic_gen/test_memory_traffic_gen.py
+++ b/tests/gem5/traffic_gen/test_memory_traffic_gen.py
@@ -44,7 +44,6 @@ def test_memory(
     memory: str,
     *args,
 ) -> None:
-
     name = (
         "test-memory-"
         + f"{generator}-{generator_cores}-{cache}-{module}-{memory}"
@@ -72,6 +71,7 @@ def test_memory(
             "tests",
             "gem5",
             "traffic_gen",
+            "configs",
             "simple_traffic_run.py",
         ),
         config_args=[generator, generator_cores, cache, module]
diff --git a/tests/gem5/verifier.py b/tests/gem5/verifier.py
index c725fc68b9..eed9499848 100644
--- a/tests/gem5/verifier.py
+++ b/tests/gem5/verifier.py
@@ -49,7 +49,7 @@
 from testlib.helper import joinpath, diff_out_file
 
 
-class Verifier(object):
+class Verifier:
     def __init__(self, fixtures=tuple()):
         self.fixtures = fixtures
 
@@ -67,7 +67,7 @@ def instantiate_test(self, name_pfx):
 
 class CheckH5StatsExist(Verifier):
     def __init__(self, stats_file="stats.h5"):
-        super(CheckH5StatsExist, self).__init__()
+        super().__init__()
         self.stats_file = stats_file
 
     def test(self, params):
@@ -84,7 +84,7 @@ class MatchGoldStandard(Verifier):
     """
 
     def __init__(
-        self, standard_filename, ignore_regex=None, test_filename="simout"
+        self, standard_filename, ignore_regex=None, test_filename="simout.txt"
     ):
         """
         :param standard_filename: The path of the standard file to compare
@@ -94,7 +94,7 @@ def __init__(
         either which will be ignored in 'standard' and test output files when
         diffing.
         """
-        super(MatchGoldStandard, self).__init__()
+        super().__init__()
         self.standard_filename = standard_filename
         self.test_filename = test_filename
 
@@ -139,13 +139,12 @@ class DerivedGoldStandard(MatchGoldStandard):
     def __init__(
         self, standard_filename, ignore_regex=__ignore_regex_sentinel, **kwargs
     ):
-
         if ignore_regex == self.__ignore_regex_sentinel:
             ignore_regex = self._default_ignore_regex
 
         self._generic_instance_warning(kwargs)
 
-        super(DerivedGoldStandard, self).__init__(
+        super().__init__(
             standard_filename,
             test_filename=self._file,
             ignore_regex=ignore_regex,
@@ -156,7 +155,7 @@ def __init__(
 class MatchStdout(DerivedGoldStandard):
     _file = constants.gem5_simulation_stdout
     _default_ignore_regex = [
-        re.compile("^\s+$"),  # Remove blank lines.
+        re.compile(r"^\s+$"),  # Remove blank lines.
         re.compile("^gem5 Simulator System"),
         re.compile("^gem5 is copyrighted software"),
         re.compile("^Redirecting (stdout|stderr) to"),
@@ -169,8 +168,8 @@ class MatchStdout(DerivedGoldStandard):
         re.compile("^info: kernel located at:"),
         re.compile("^info: Standard input is not a terminal"),
         re.compile("^Couldn't unlink "),
-        re.compile("^Using GPU kernel code file\(s\) "),
-        re.compile("^.* not found locally\. Downloading"),
+        re.compile(r"^Using GPU kernel code file\(s\) "),
+        re.compile(r"^.* not found locally\. Downloading"),
         re.compile("^Finished downloading"),
         re.compile("^info: Using default config"),
     ]
@@ -219,12 +218,12 @@ class MatchFileRegex(Verifier):
     """
 
     def __init__(self, regex, filenames):
-        super(MatchFileRegex, self).__init__()
+        super().__init__()
         self.regex = _iterable_regex(regex)
         self.filenames = filenames
 
     def parse_file(self, fname):
-        with open(fname, "r") as file_:
+        with open(fname) as file_:
             for line in file_:
                 for regex in self.regex:
                     if re.match(regex, line):
@@ -253,7 +252,7 @@ def __init__(self, regex, match_stderr=True, match_stdout=True):
             filenames.append(constants.gem5_simulation_stdout)
         if match_stderr:
             filenames.append(constants.gem5_simulation_stderr)
-        super(MatchRegex, self).__init__(regex, filenames)
+        super().__init__(regex, filenames)
 
 
 class NoMatchRegex(MatchRegex):
@@ -262,7 +261,7 @@ class NoMatchRegex(MatchRegex):
     """
 
     def __init__(self, regex, match_stderr=True, match_stdout=True):
-        super(NoMatchRegex, self).__init__(regex, match_stderr, match_stdout)
+        super().__init__(regex, match_stderr, match_stdout)
 
     def test(self, params):
         fixtures = params.fixtures
@@ -291,7 +290,7 @@ def __init__(
         :param test_name_in_m5out: True if the 'test_name' dir is to found in
         the `m5.options.outdir`.
         """
-        super(MatchJSONStats, self).__init__()
+        super().__init__()
         self.truth_name = truth_name
         self.test_name = test_name
         self.test_name_in_outdir = test_name_in_outdir
@@ -319,13 +318,13 @@ def _compare_stats(self, trusted_file, test_file):
             test_util.fail(err)
 
     def test(self, params):
-        trusted_file = open(self.truth_name, "r")
+        trusted_file = open(self.truth_name)
         if self.test_name_in_outdir:
             fixtures = params.fixtures
             tempdir = fixtures[constants.tempdir_fixture_name].path
-            test_file = open(joinpath(tempdir, self.test_name), "r")
+            test_file = open(joinpath(tempdir, self.test_name))
         else:
-            test_file = open(self.test_name, "r")
+            test_file = open(self.test_name)
 
         return self._compare_stats(trusted_file, test_file)
 
diff --git a/tests/gem5/x86_boot_tests/README.md b/tests/gem5/x86_boot_tests/README.md
new file mode 100644
index 0000000000..92357b6bb2
--- /dev/null
+++ b/tests/gem5/x86_boot_tests/README.md
@@ -0,0 +1,9 @@
+# X86 Boot Tests
+
+These tests run a series of Linux boots on the X86Board.
+It varies the CPU type, number of CPUs, and memory used for each run.
+To run these tests by themselves, you can run the following command in the tests directory:
+
+```bash
+./main.py run gem5/x86_boot_tests --length=[length]
+```
diff --git a/tests/gem5/configs/x86_boot_exit_run.py b/tests/gem5/x86_boot_tests/configs/x86_boot_exit_run.py
similarity index 98%
rename from tests/gem5/configs/x86_boot_exit_run.py
rename to tests/gem5/x86_boot_tests/configs/x86_boot_exit_run.py
index e9eeacefd8..63b6625479 100644
--- a/tests/gem5/configs/x86_boot_exit_run.py
+++ b/tests/gem5/x86_boot_tests/configs/x86_boot_exit_run.py
@@ -41,7 +41,7 @@
 )
 from gem5.components.processors.simple_processor import SimpleProcessor
 from gem5.simulate.simulator import Simulator
-from gem5.resources.workload import Workload
+from gem5.resources.resource import obtain_resource
 
 import argparse
 import importlib
@@ -184,7 +184,7 @@
     kernal_args.append("init=/root/exit.sh")
 
 # Set the workload.
-workload = Workload(
+workload = obtain_resource(
     "x86-ubuntu-18.04-boot", resource_directory=args.resource_directory
 )
 workload.set_parameter("kernel_args", kernal_args)
diff --git a/tests/gem5/x86-boot-tests/test_linux_boot.py b/tests/gem5/x86_boot_tests/test_linux_boot.py
similarity index 98%
rename from tests/gem5/x86-boot-tests/test_linux_boot.py
rename to tests/gem5/x86_boot_tests/test_linux_boot.py
index 1907aaf0e4..d4b744a0b1 100644
--- a/tests/gem5/x86-boot-tests/test_linux_boot.py
+++ b/tests/gem5/x86_boot_tests/test_linux_boot.py
@@ -44,7 +44,6 @@ def test_boot(
     boot_type: str = "init",
     to_tick: Optional[int] = None,
 ):
-
     name = "{}-cpu_{}-cores_{}_{}_{}_x86-boot-test".format(
         cpu, str(num_cpus), mem_system, memory_class, boot_type
     )
@@ -75,7 +74,12 @@ def test_boot(
         verifiers=verifiers,
         fixtures=(),
         config=joinpath(
-            config.base_dir, "tests", "gem5", "configs", "x86_boot_exit_run.py"
+            config.base_dir,
+            "tests",
+            "gem5",
+            "x86_boot_tests",
+            "configs",
+            "x86_boot_exit_run.py",
         ),
         config_args=[
             "--cpu",
diff --git a/tests/pyunit/pyunit_jsonserializable_check.py b/tests/pyunit/pyunit_jsonserializable_check.py
index 8d5d2fa857..9979d3f9d6 100644
--- a/tests/pyunit/pyunit_jsonserializable_check.py
+++ b/tests/pyunit/pyunit_jsonserializable_check.py
@@ -54,18 +54,18 @@ def test_to_json(self):
         obj_json = obj.to_json()
         self.assertTrue("child_1" in obj_json)
         self.assertTrue("stat1" in obj_json["child_1"])
-        self.assertEquals(2, obj_json["child_1"]["stat1"])
+        self.assertEqual(2, obj_json["child_1"]["stat1"])
         self.assertTrue("stat2" in obj_json["child_1"])
-        self.assertEquals("3", obj_json["child_1"]["stat2"])
+        self.assertEqual("3", obj_json["child_1"]["stat2"])
         self.assertTrue("child_list" in obj_json)
-        self.assertEquals(2, len(obj_json["child_list"]))
+        self.assertEqual(2, len(obj_json["child_list"]))
         self.assertTrue("stat1" in obj_json["child_list"][0])
         self.assertEqual("hello", obj_json["child_list"][0]["stat1"])
         self.assertTrue("list_stat2" in obj_json["child_list"][1])
-        self.assertEquals(6, len(obj_json["child_list"][1]["list_stat2"]))
-        self.assertEquals("1", obj_json["child_list"][1]["list_stat2"][0])
-        self.assertEquals(2, obj_json["child_list"][1]["list_stat2"][1])
-        self.assertEquals("3", obj_json["child_list"][1]["list_stat2"][2])
-        self.assertEquals(4, obj_json["child_list"][1]["list_stat2"][3])
-        self.assertEquals(5.2, obj_json["child_list"][1]["list_stat2"][4])
-        self.assertEquals(None, obj_json["child_list"][1]["list_stat2"][5])
+        self.assertEqual(6, len(obj_json["child_list"][1]["list_stat2"]))
+        self.assertEqual("1", obj_json["child_list"][1]["list_stat2"][0])
+        self.assertEqual(2, obj_json["child_list"][1]["list_stat2"][1])
+        self.assertEqual("3", obj_json["child_list"][1]["list_stat2"][2])
+        self.assertEqual(4, obj_json["child_list"][1]["list_stat2"][3])
+        self.assertEqual(5.2, obj_json["child_list"][1]["list_stat2"][4])
+        self.assertEqual(None, obj_json["child_list"][1]["list_stat2"][5])
diff --git a/tests/pyunit/stdlib/pyunit_looppoint.py b/tests/pyunit/stdlib/pyunit_looppoint.py
index 0cb708e8ac..f838aa9ff5 100644
--- a/tests/pyunit/stdlib/pyunit_looppoint.py
+++ b/tests/pyunit/stdlib/pyunit_looppoint.py
@@ -47,21 +47,21 @@ class LooppointRegionPCTestSuite(unittest.TestCase):
     def test_construction_with_relative(self) -> None:
         region_pc = LooppointRegionPC(pc=444, globl=65, relative=454)
 
-        self.assertEquals(444, region_pc.get_pc())
-        self.assertEquals(65, region_pc.get_global())
-        self.assertEquals(454, region_pc.get_relative())
+        self.assertEqual(444, region_pc.get_pc())
+        self.assertEqual(65, region_pc.get_global())
+        self.assertEqual(454, region_pc.get_relative())
 
     def test_construction_without_relative(self) -> None:
         region_pc = LooppointRegionPC(pc=43454, globl=653434)
 
-        self.assertEquals(43454, region_pc.get_pc())
-        self.assertEquals(653434, region_pc.get_global())
+        self.assertEqual(43454, region_pc.get_pc())
+        self.assertEqual(653434, region_pc.get_global())
         self.assertIsNone(region_pc.get_relative())
 
     def test_get_pc_count_pair(self) -> None:
         region_pc = LooppointRegionPC(pc=1, globl=2)
         expected = PcCountPair(1, 2)
-        self.assertEquals(expected, region_pc.get_pc_count_pair())
+        self.assertEqual(expected, region_pc.get_pc_count_pair())
 
     def update_relative_count(self) -> None:
         pass  # Not really sure what to do here...
@@ -70,23 +70,23 @@ def test_to_json_with_relative(self) -> None:
         region_pc = LooppointRegionPC(pc=100, globl=200, relative=300)
         json_contents = region_pc.to_json()
 
-        self.assertEquals(3, len(json_contents))
+        self.assertEqual(3, len(json_contents))
         self.assertTrue("pc" in json_contents)
-        self.assertEquals(100, json_contents["pc"])
+        self.assertEqual(100, json_contents["pc"])
         self.assertTrue("global" in json_contents)
-        self.assertEquals(200, json_contents["global"])
+        self.assertEqual(200, json_contents["global"])
         self.assertTrue("relative" in json_contents)
-        self.assertEquals(300, json_contents["relative"])
+        self.assertEqual(300, json_contents["relative"])
 
     def test_to_json_without_relative(self) -> None:
         region_pc = LooppointRegionPC(pc=1111, globl=2222)
         json_contents = region_pc.to_json()
 
-        self.assertEquals(2, len(json_contents))
+        self.assertEqual(2, len(json_contents))
         self.assertTrue("pc" in json_contents)
-        self.assertEquals(1111, json_contents["pc"])
+        self.assertEqual(1111, json_contents["pc"])
         self.assertTrue("global" in json_contents)
-        self.assertEquals(2222, json_contents["global"])
+        self.assertEqual(2222, json_contents["global"])
         self.assertFalse("relative" in json_contents)
 
 
@@ -98,8 +98,8 @@ def test_construction(self) -> None:
             start=PcCountPair(123, 456), end=PcCountPair(789, 1011)
         )
 
-        self.assertEquals(PcCountPair(123, 456), region_warmup.get_start())
-        self.assertEquals(PcCountPair(789, 1011), region_warmup.get_end())
+        self.assertEqual(PcCountPair(123, 456), region_warmup.get_start())
+        self.assertEqual(PcCountPair(789, 1011), region_warmup.get_end())
 
     def test_get_pc_count_pairs(self) -> None:
         region_warmup = LooppointRegionWarmup(
@@ -107,9 +107,9 @@ def test_get_pc_count_pairs(self) -> None:
         )
 
         output = region_warmup.get_pc_count_pairs()
-        self.assertEquals(2, len(output))
-        self.assertEquals(PcCountPair(1, 1), output[0])
-        self.assertEquals(PcCountPair(2, 2), output[1])
+        self.assertEqual(2, len(output))
+        self.assertEqual(PcCountPair(1, 1), output[0])
+        self.assertEqual(PcCountPair(2, 2), output[1])
 
     def test_to_json(self) -> None:
         region_warmup = LooppointRegionWarmup(
@@ -135,14 +135,14 @@ def test_construction_with(self) -> None:
 
         sim_start = sim.get_start()
 
-        self.assertEquals(444, sim_start.get_pc())
-        self.assertEquals(65, sim_start.get_global())
-        self.assertEquals(454, sim_start.get_relative())
+        self.assertEqual(444, sim_start.get_pc())
+        self.assertEqual(65, sim_start.get_global())
+        self.assertEqual(454, sim_start.get_relative())
 
         sim_end = sim.get_end()
 
-        self.assertEquals(555, sim_end.get_pc())
-        self.assertEquals(699, sim_end.get_global())
+        self.assertEqual(555, sim_end.get_pc())
+        self.assertEqual(699, sim_end.get_global())
         self.assertIsNone(sim_end.get_relative())
 
     def test_get_pc_count_pairs(self) -> None:
@@ -152,9 +152,9 @@ def test_get_pc_count_pairs(self) -> None:
         )
 
         sim_pc_count_pairs = sim.get_pc_count_pairs()
-        self.assertEquals(2, len(sim_pc_count_pairs))
-        self.assertEquals(PcCountPair(56, 45), sim_pc_count_pairs[0])
-        self.assertEquals(PcCountPair(23, 12), sim_pc_count_pairs[1])
+        self.assertEqual(2, len(sim_pc_count_pairs))
+        self.assertEqual(PcCountPair(56, 45), sim_pc_count_pairs[0])
+        self.assertEqual(PcCountPair(23, 12), sim_pc_count_pairs[1])
 
     def test_get_json(self) -> None:
         sim = LooppointSimulation(
@@ -193,7 +193,7 @@ def test_construction_with_warmup(self):
         self.assertTrue(
             isinstance(region.get_simulation(), LooppointSimulation)
         )
-        self.assertEquals(5.6, region.get_multiplier())
+        self.assertEqual(5.6, region.get_multiplier())
         self.assertIsNotNone(region.get_warmup())
         self.assertTrue(isinstance(region.get_warmup(), LooppointRegionWarmup))
 
@@ -209,7 +209,7 @@ def test_construction_without_warmup(self):
         self.assertTrue(
             isinstance(region.get_simulation(), LooppointSimulation)
         )
-        self.assertEquals(5444.4, region.get_multiplier())
+        self.assertEqual(5444.4, region.get_multiplier())
         self.assertIsNone(region.get_warmup())
 
     def test_get_pc_count_pairs_with_warmup(self):
@@ -225,11 +225,11 @@ def test_get_pc_count_pairs_with_warmup(self):
         )
         pc_count_pairs = region.get_pc_count_pairs()
 
-        self.assertEquals(4, len(pc_count_pairs))
-        self.assertEquals(PcCountPair(1, 2), pc_count_pairs[0])
-        self.assertEquals(PcCountPair(6, 7), pc_count_pairs[1])
-        self.assertEquals(PcCountPair(100, 200), pc_count_pairs[2])
-        self.assertEquals(PcCountPair(101, 202), pc_count_pairs[3])
+        self.assertEqual(4, len(pc_count_pairs))
+        self.assertEqual(PcCountPair(1, 2), pc_count_pairs[0])
+        self.assertEqual(PcCountPair(6, 7), pc_count_pairs[1])
+        self.assertEqual(PcCountPair(100, 200), pc_count_pairs[2])
+        self.assertEqual(PcCountPair(101, 202), pc_count_pairs[3])
 
     def test_get_pc_count_pairs_without_warmup(self):
         region = LooppointRegion(
@@ -242,9 +242,9 @@ def test_get_pc_count_pairs_without_warmup(self):
 
         pc_count_pairs = region.get_pc_count_pairs()
 
-        self.assertEquals(2, len(pc_count_pairs))
-        self.assertEquals(PcCountPair(56, 2345), pc_count_pairs[0])
-        self.assertEquals(PcCountPair(645, 457), pc_count_pairs[1])
+        self.assertEqual(2, len(pc_count_pairs))
+        self.assertEqual(PcCountPair(56, 2345), pc_count_pairs[0])
+        self.assertEqual(PcCountPair(645, 457), pc_count_pairs[1])
 
 
 class LooppointTestSuite(unittest.TestCase):
@@ -276,11 +276,11 @@ def test_construction(self):
             }
         )
 
-        self.assertEquals(2, len(looppoint.get_regions()))
+        self.assertEqual(2, len(looppoint.get_regions()))
         self.assertTrue(1 in looppoint.get_regions())
-        self.assertEquals(region1, looppoint.get_regions()[1])
+        self.assertEqual(region1, looppoint.get_regions()[1])
         self.assertTrue(3 in looppoint.get_regions())
-        self.assertEquals(region2, looppoint.get_regions()[3])
+        self.assertEqual(region2, looppoint.get_regions()[3])
 
     def test_get_targets(self):
         region1 = LooppointRegion(
@@ -309,16 +309,15 @@ def test_get_targets(self):
         )
 
         targets = looppoint.get_targets()
-        self.assertEquals(6, len(targets))
-        self.assertEquals(PcCountPair(56, 2345), targets[0])
-        self.assertEquals(PcCountPair(645, 457), targets[1])
-        self.assertEquals(PcCountPair(67, 254), targets[2])
-        self.assertEquals(PcCountPair(64554, 7454), targets[3])
-        self.assertEquals(PcCountPair(100, 200), targets[4])
-        self.assertEquals(PcCountPair(101, 202), targets[5])
+        self.assertEqual(6, len(targets))
+        self.assertEqual(PcCountPair(56, 2345), targets[0])
+        self.assertEqual(PcCountPair(645, 457), targets[1])
+        self.assertEqual(PcCountPair(67, 254), targets[2])
+        self.assertEqual(PcCountPair(64554, 7454), targets[3])
+        self.assertEqual(PcCountPair(100, 200), targets[4])
+        self.assertEqual(PcCountPair(101, 202), targets[5])
 
     def test_get_region_start_id_map(self):
-
         region1 = LooppointRegion(
             simulation=LooppointSimulation(
                 start=LooppointRegionPC(pc=56, globl=2345, relative=344),
@@ -346,15 +345,15 @@ def test_get_region_start_id_map(self):
 
         region_start_id_map = looppoint.get_region_start_id_map()
 
-        self.assertEquals(2, len(region_start_id_map))
+        self.assertEqual(2, len(region_start_id_map))
 
         # The start of region1.
         self.assertTrue(PcCountPair(56, 2345) in region_start_id_map)
-        self.assertEquals(1, region_start_id_map[PcCountPair(56, 2345)])
+        self.assertEqual(1, region_start_id_map[PcCountPair(56, 2345)])
 
         # The start of region2.  Since this has a warmup, it's the warmup.
         self.assertTrue(PcCountPair(100, 200) in region_start_id_map)
-        self.assertEquals(3, region_start_id_map[PcCountPair(100, 200)])
+        self.assertEqual(3, region_start_id_map[PcCountPair(100, 200)])
 
     def test_to_json(self) -> None:
         region1 = LooppointRegion(
@@ -441,60 +440,60 @@ def test_load_pinpoints_matrix(self):
         )
 
         regions = looppoint.get_regions()
-        self.assertEquals(3, len(regions))
+        self.assertEqual(3, len(regions))
 
         region1 = regions[1]
-        self.assertEquals(4.0, region1.get_multiplier())
+        self.assertEqual(4.0, region1.get_multiplier())
 
         region1start = region1.get_simulation().get_start()
-        self.assertEquals(0x4069D0, region1start.get_pc())
-        self.assertEquals(211076617, region1start.get_global())
+        self.assertEqual(0x4069D0, region1start.get_pc())
+        self.assertEqual(211076617, region1start.get_global())
         self.assertIsNone(region1start.get_relative())
 
         region1end = region1.get_simulation().get_end()
-        self.assertEquals(0x4069D0, region1end.get_pc())
-        self.assertEquals(219060252, region1end.get_global())
+        self.assertEqual(0x4069D0, region1end.get_pc())
+        self.assertEqual(219060252, region1end.get_global())
         self.assertIsNotNone(region1end.get_relative())
-        self.assertEquals(1060676, region1end.get_relative())
+        self.assertEqual(1060676, region1end.get_relative())
 
         self.assertIsNone(region1.get_warmup())
 
         region2 = regions[2]
-        self.assertEquals(5.001, region2.get_multiplier())
+        self.assertEqual(5.001, region2.get_multiplier())
 
         region2start = region2.get_simulation().get_start()
-        self.assertEquals(0x4069D0, region2start.get_pc())
-        self.assertEquals(407294228, region2start.get_global())
+        self.assertEqual(0x4069D0, region2start.get_pc())
+        self.assertEqual(407294228, region2start.get_global())
         self.assertIsNone(region2start.get_relative())
 
         region2end = region2.get_simulation().get_end()
-        self.assertEquals(0x4069D0, region2end.get_pc())
-        self.assertEquals(415282447, region2end.get_global())
+        self.assertEqual(0x4069D0, region2end.get_pc())
+        self.assertEqual(415282447, region2end.get_global())
         self.assertIsNotNone(region2end.get_relative())
-        self.assertEquals(1035231, region2end.get_relative())
+        self.assertEqual(1035231, region2end.get_relative())
 
         region2warmup = region2.get_warmup()
         self.assertIsNotNone(region2warmup)
-        self.assertEquals(
+        self.assertEqual(
             PcCountPair(0x406880, 48111518), region2warmup.get_start()
         )
-        self.assertEquals(
+        self.assertEqual(
             PcCountPair(0x4069D0, 407294228), region2warmup.get_end()
         )
 
         region3 = regions[3]
-        self.assertEquals(4.0, region3.get_multiplier())
+        self.assertEqual(4.0, region3.get_multiplier())
 
         region3start = region3.get_simulation().get_start()
-        self.assertEquals(0x4069D0, region3start.get_pc())
-        self.assertEquals(187978221, region3start.get_global())
+        self.assertEqual(0x4069D0, region3start.get_pc())
+        self.assertEqual(187978221, region3start.get_global())
         self.assertIsNone(region3start.get_relative())
 
         region3end = region3.get_simulation().get_end()
-        self.assertEquals(0x406880, region3end.get_pc())
-        self.assertEquals(23520614, region3end.get_global())
+        self.assertEqual(0x406880, region3end.get_pc())
+        self.assertEqual(23520614, region3end.get_global())
         self.assertIsNotNone(region3end.get_relative())
-        self.assertEquals(144352, region3end.get_relative())
+        self.assertEqual(144352, region3end.get_relative())
 
         self.assertIsNone(region3.get_warmup())
 
@@ -509,22 +508,22 @@ def test_load_pinpoints_matrix_region_1(self):
         )
 
         regions = looppoint.get_regions()
-        self.assertEquals(1, len(regions))
+        self.assertEqual(1, len(regions))
 
         self.assertTrue(1 in regions)
         region1 = regions[1]
-        self.assertEquals(4.0, region1.get_multiplier())
+        self.assertEqual(4.0, region1.get_multiplier())
 
         region1start = region1.get_simulation().get_start()
-        self.assertEquals(0x4069D0, region1start.get_pc())
-        self.assertEquals(211076617, region1start.get_global())
+        self.assertEqual(0x4069D0, region1start.get_pc())
+        self.assertEqual(211076617, region1start.get_global())
         self.assertIsNone(region1start.get_relative())
 
         region1end = region1.get_simulation().get_end()
-        self.assertEquals(0x4069D0, region1end.get_pc())
-        self.assertEquals(219060252, region1end.get_global())
+        self.assertEqual(0x4069D0, region1end.get_pc())
+        self.assertEqual(219060252, region1end.get_global())
         self.assertIsNotNone(region1end.get_relative())
-        self.assertEquals(1060676, region1end.get_relative())
+        self.assertEqual(1060676, region1end.get_relative())
 
         self.assertIsNone(region1.get_warmup())
 
@@ -542,31 +541,31 @@ def test_load_pinpoints_matrix_region_1(self):
             region_id="1",
         )
 
-        self.assertEquals(1, len(looppoint.get_regions()))
+        self.assertEqual(1, len(looppoint.get_regions()))
         self.assertTrue("1" in looppoint.get_regions())
         region = looppoint.get_regions()["1"]
 
-        self.assertEquals(4.0, region.get_multiplier())
+        self.assertEqual(4.0, region.get_multiplier())
 
         region_start = region.get_simulation().get_start()
-        self.assertEquals(4221392, region_start.get_pc())
-        self.assertEquals(211076617, region_start.get_global())
+        self.assertEqual(4221392, region_start.get_pc())
+        self.assertEqual(211076617, region_start.get_global())
         self.assertIsNotNone(region_start.get_relative())
-        self.assertEquals(15326617, region_start.get_relative())
+        self.assertEqual(15326617, region_start.get_relative())
 
         region_end = region.get_simulation().get_end()
-        self.assertEquals(4221392, region_end.get_pc())
-        self.assertEquals(219060252, region_end.get_global())
+        self.assertEqual(4221392, region_end.get_pc())
+        self.assertEqual(219060252, region_end.get_global())
         self.assertIsNotNone(region_end.get_relative())
-        self.assertEquals(23310252, region_end.get_relative())
+        self.assertEqual(23310252, region_end.get_relative())
 
         region_warmup = region.get_warmup()
         self.assertIsNotNone(region_warmup)
 
-        self.assertEquals(
+        self.assertEqual(
             PcCountPair(4221056, 23520614), region_warmup.get_start()
         )
-        self.assertEquals(
+        self.assertEqual(
             PcCountPair(4221392, 211076617), region_warmup.get_end()
         )
 
@@ -580,20 +579,20 @@ def test_load_pinpoints_matrix_region_2(self):
             region_id="2",
         )
 
-        self.assertEquals(1, len(looppoint.get_regions()))
+        self.assertEqual(1, len(looppoint.get_regions()))
         self.assertTrue("2" in looppoint.get_regions())
         region = looppoint.get_regions()["2"]
 
-        self.assertEquals(5.001, region.get_multiplier())
+        self.assertEqual(5.001, region.get_multiplier())
 
         region_start = region.get_simulation().get_start()
-        self.assertEquals(4221392, region_start.get_pc())
-        self.assertEquals(407294228, region_start.get_global())
+        self.assertEqual(4221392, region_start.get_pc())
+        self.assertEqual(407294228, region_start.get_global())
         self.assertIsNone(region_start.get_relative())
 
         region_end = region.get_simulation().get_end()
-        self.assertEquals(4221392, region_end.get_pc())
-        self.assertEquals(415282447, region_end.get_global())
+        self.assertEqual(4221392, region_end.get_pc())
+        self.assertEqual(415282447, region_end.get_global())
         self.assertIsNone(region_end.get_relative())
 
         region_warmup = region.get_warmup()
diff --git a/tests/pyunit/stdlib/resources/pyunit_client_wrapper_checks.py b/tests/pyunit/stdlib/resources/pyunit_client_wrapper_checks.py
index f190b1ed5f..bc0d00c0f4 100644
--- a/tests/pyunit/stdlib/resources/pyunit_client_wrapper_checks.py
+++ b/tests/pyunit/stdlib/resources/pyunit_client_wrapper_checks.py
@@ -34,6 +34,10 @@
 import contextlib
 from pathlib import Path
 
+from gem5.resources.client_api.atlasclient import (
+    AtlasClientHttpJsonRequestError,
+)
+
 mock_json_path = Path(__file__).parent / "refs/resources.json"
 mock_config_json = {
     "sources": {
@@ -63,12 +67,12 @@
 
 mock_json = {}
 
-with open(Path(__file__).parent / "refs/mongo-mock.json", "r") as f:
+with open(Path(__file__).parent / "refs/mongo-mock.json") as f:
     mock_json = json.load(f)
 
 duplicate_mock_json = {}
 
-with open(Path(__file__).parent / "refs/mongo-dup-mock.json", "r") as f:
+with open(Path(__file__).parent / "refs/mongo-dup-mock.json") as f:
     duplicate_mock_json = json.load(f)
 
 
@@ -419,21 +423,11 @@ def test_get_resource_same_resource_same_version(self, mock_get):
     @patch("urllib.request.urlopen", side_effect=mocked_requests_post)
     def test_invalid_auth_url(self, mock_get):
         resource_id = "test-resource"
-        f = io.StringIO()
-        with self.assertRaises(Exception) as context:
-            with contextlib.redirect_stderr(f):
-                get_resource_json_obj(
-                    resource_id,
-                    gem5_version="develop",
-                )
-        self.assertTrue(
-            "Error getting resources from client gem5-resources:"
-            " Panic: Not found" in str(f.getvalue())
-        )
-        self.assertTrue(
-            "Resource with ID 'test-resource' not found."
-            in str(context.exception)
-        )
+        with self.assertRaises(AtlasClientHttpJsonRequestError) as context:
+            get_resource_json_obj(
+                resource_id,
+                gem5_version="develop",
+            )
 
     @patch(
         "gem5.resources.client.clientwrapper",
@@ -442,21 +436,11 @@ def test_invalid_auth_url(self, mock_get):
     @patch("urllib.request.urlopen", side_effect=mocked_requests_post)
     def test_invalid_url(self, mock_get):
         resource_id = "test-resource"
-        f = io.StringIO()
-        with self.assertRaises(Exception) as context:
-            with contextlib.redirect_stderr(f):
-                get_resource_json_obj(
-                    resource_id,
-                    gem5_version="develop",
-                )
-        self.assertTrue(
-            "Error getting resources from client gem5-resources:"
-            " Panic: Not found" in str(f.getvalue())
-        )
-        self.assertTrue(
-            "Resource with ID 'test-resource' not found."
-            in str(context.exception)
-        )
+        with self.assertRaises(AtlasClientHttpJsonRequestError) as context:
+            get_resource_json_obj(
+                resource_id,
+                gem5_version="develop",
+            )
 
     @patch(
         "gem5.resources.client.clientwrapper",
@@ -465,18 +449,8 @@ def test_invalid_url(self, mock_get):
     @patch("urllib.request.urlopen", side_effect=mocked_requests_post)
     def test_invalid_url(self, mock_get):
         resource_id = "test-too-many"
-        f = io.StringIO()
-        with self.assertRaises(Exception) as context:
-            with contextlib.redirect_stderr(f):
-                get_resource_json_obj(
-                    resource_id,
-                    gem5_version="develop",
-                )
-        self.assertTrue(
-            "Error getting resources from client gem5-resources:"
-            " Panic: Too many requests" in str(f.getvalue())
-        )
-        self.assertTrue(
-            "Resource with ID 'test-too-many' not found."
-            in str(context.exception)
-        )
+        with self.assertRaises(AtlasClientHttpJsonRequestError) as context:
+            get_resource_json_obj(
+                resource_id,
+                gem5_version="develop",
+            )
diff --git a/tests/pyunit/stdlib/resources/pyunit_json_client_checks.py b/tests/pyunit/stdlib/resources/pyunit_json_client_checks.py
index 88db3d4967..82fc775975 100644
--- a/tests/pyunit/stdlib/resources/pyunit_json_client_checks.py
+++ b/tests/pyunit/stdlib/resources/pyunit_json_client_checks.py
@@ -148,16 +148,16 @@ def verify_json(self, json: Dict) -> None:
         "create_temp_resources_json" has been loaded correctly into a Python
         dictionary.
         """
-        self.assertEquals(4, len(json))
+        self.assertEqual(4, len(json))
         self.assertTrue("id" in json[0])
-        self.assertEquals("this-is-a-test-resource", json[0]["id"])
-        self.assertEquals("binary", json[0]["category"])
+        self.assertEqual("this-is-a-test-resource", json[0]["id"])
+        self.assertEqual("binary", json[0]["category"])
         self.assertTrue("id" in json[1])
-        self.assertEquals("this-is-a-test-resource", json[1]["id"])
+        self.assertEqual("this-is-a-test-resource", json[1]["id"])
         self.assertTrue("id" in json[2])
-        self.assertEquals("test-version", json[2]["id"])
+        self.assertEqual("test-version", json[2]["id"])
         self.assertTrue("id" in json[3])
-        self.assertEquals("test-version", json[3]["id"])
+        self.assertEqual("test-version", json[3]["id"])
 
     def test_get_resources_json_at_path(self) -> None:
         # Tests JSONClient.get_resources_json()
diff --git a/tests/pyunit/stdlib/resources/pyunit_local_file_path_check.py b/tests/pyunit/stdlib/resources/pyunit_local_file_path_check.py
new file mode 100644
index 0000000000..b1d0ea1bc3
--- /dev/null
+++ b/tests/pyunit/stdlib/resources/pyunit_local_file_path_check.py
@@ -0,0 +1,72 @@
+# Copyright (c) 2023 The Regents of the University of California
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from gem5.resources.downloader import _file_uri_to_path
+from pathlib import Path
+import unittest
+
+
+class LocalPathTestSuite(unittest.TestCase):
+    def test_local_path_exists_single_slash(self):
+        # Test that a local path is returned as-is
+        path = "file:/test/test/file"
+        expected_path = Path("/test/test/file")
+        self.assertEqual(_file_uri_to_path(path), expected_path)
+
+    def test_non_localhost_exception(self):
+        # Test that a local path with different netloc throws an exception
+        path = "file://test/test/file"
+        # should raise Exception because netloc is not '' or 'localhost'
+        with self.assertRaises(Exception) as exception:
+            _file_uri_to_path(path)
+        self.assertEqual(
+            str(exception.exception),
+            f"File URI '{path}' specifies host 'test'. "
+            "Only localhost is permitted.",
+        )
+
+    def test_localhost_accepted(self):
+        path = "file://localhost/test/test/file"
+        # should work as expected because netloc is 'localhost'
+        expected_path = Path("/test/test/file")
+        self.assertEqual(_file_uri_to_path(path), expected_path)
+
+    def test_local_path_exists_triple_slash(self):
+        # Test that a local path is returned as-is
+        path = "file:///test/test/file"
+        expected_path = Path("/test/test/file")
+        self.assertEqual(_file_uri_to_path(path), expected_path)
+
+    def test_local_path_exists_quadruple_slash(self):
+        # Test that a local path is returned as-is
+        path = "file:////test/test/file"
+        expected_path = Path("//test/test/file")
+        self.assertEqual(_file_uri_to_path(path), expected_path)
+
+    def test_uri_not_file(self):
+        # Test that a URL returns None
+        path = "http://test/test/file"
+        self.assertIsNone(_file_uri_to_path(path))
diff --git a/tests/pyunit/stdlib/resources/pyunit_md5_utils_check.py b/tests/pyunit/stdlib/resources/pyunit_md5_utils_check.py
index 65bf33544e..7d6e1f42de 100644
--- a/tests/pyunit/stdlib/resources/pyunit_md5_utils_check.py
+++ b/tests/pyunit/stdlib/resources/pyunit_md5_utils_check.py
@@ -46,7 +46,7 @@ def test_md5FileConsistency(self) -> None:
         md5 = md5_file(Path(file.name))
         os.remove(file.name)
 
-        self.assertEquals("b113b29fce251f2023066c3fda2ec9dd", md5)
+        self.assertEqual("b113b29fce251f2023066c3fda2ec9dd", md5)
 
     def test_identicalFilesIdenticalMd5(self) -> None:
         # This test ensures that two files with exactly the same contents have
@@ -68,14 +68,13 @@ def test_identicalFilesIdenticalMd5(self) -> None:
 
         os.remove(file.name)
 
-        self.assertEquals(first_file_md5, second_file_md5)
+        self.assertEqual(first_file_md5, second_file_md5)
 
 
 class MD5DirTestSuite(unittest.TestCase):
     """Test cases for gem5.resources.md5_utils.md5_dir()"""
 
     def _create_temp_directory(self) -> Path:
-
         dir = tempfile.mkdtemp()
 
         with open(os.path.join(dir, "file1"), "w") as f:
@@ -99,7 +98,7 @@ def test_md5DirConsistency(self) -> None:
         md5 = md5_dir(dir)
         shutil.rmtree(dir)
 
-        self.assertEquals("ad5ac785de44c9fc2fe2798cab2d7b1a", md5)
+        self.assertEqual("ad5ac785de44c9fc2fe2798cab2d7b1a", md5)
 
     def test_identicalDirsIdenticalMd5(self) -> None:
         # This test ensures that two directories with exactly the same contents
@@ -113,4 +112,4 @@ def test_identicalDirsIdenticalMd5(self) -> None:
         second_md5 = md5_dir(dir2)
         shutil.rmtree(dir2)
 
-        self.assertEquals(first_md5, second_md5)
+        self.assertEqual(first_md5, second_md5)
diff --git a/tests/pyunit/stdlib/resources/pyunit_obtain_resources_check.py b/tests/pyunit/stdlib/resources/pyunit_obtain_resources_check.py
index b1eda4e6ed..bab91f2fb7 100644
--- a/tests/pyunit/stdlib/resources/pyunit_obtain_resources_check.py
+++ b/tests/pyunit/stdlib/resources/pyunit_obtain_resources_check.py
@@ -78,13 +78,11 @@ def test_obtain_resources_no_version(self):
             resource_directory=self.get_resource_dir(),
             gem5_version="develop",
         )
-        self.assertEquals("1.7.0", resource.get_resource_version())
+        self.assertEqual("1.7.0", resource.get_resource_version())
         self.assertIsInstance(resource, BinaryResource)
-        self.assertEquals(
-            "test description v1.7.0", resource.get_description()
-        )
-        self.assertEquals("src/test-source", resource.get_source())
-        self.assertEquals(ISA.ARM, resource.get_architecture())
+        self.assertEqual("test description v1.7.0", resource.get_description())
+        self.assertEqual("src/test-source", resource.get_source())
+        self.assertEqual(ISA.ARM, resource.get_architecture())
 
     def test_obtain_resources_with_version_compatible(self):
         resource = obtain_resource(
@@ -93,13 +91,13 @@ def test_obtain_resources_with_version_compatible(self):
             resource_version="1.5.0",
             gem5_version="develop",
         )
-        self.assertEquals("1.5.0", resource.get_resource_version())
+        self.assertEqual("1.5.0", resource.get_resource_version())
         self.assertIsInstance(resource, BinaryResource)
-        self.assertEquals(
+        self.assertEqual(
             "test description for 1.5.0", resource.get_description()
         )
-        self.assertEquals("src/test-source", resource.get_source())
-        self.assertEquals(ISA.ARM, resource.get_architecture())
+        self.assertEqual("src/test-source", resource.get_source())
+        self.assertEqual(ISA.ARM, resource.get_architecture())
 
     def test_obtain_resources_with_version_incompatible(self):
         resource = None
@@ -110,12 +108,6 @@ def test_obtain_resources_with_version_incompatible(self):
                 resource_directory=self.get_resource_dir(),
                 resource_version="1.5.0",
             )
-        self.assertTrue(
-            f"warn: Resource test-binary-resource with version 1.5.0 is not known to be compatible with gem5 version {core.gem5Version}. "
-            "This may cause problems with your simulation. This resource's compatibility with different gem5 versions can be found here: "
-            f"https://resources.gem5.org/resources/test-binary-resource/versions"
-            in f.getvalue()
-        )
 
         resource = obtain_resource(
             resource_id="test-binary-resource",
@@ -123,13 +115,13 @@ def test_obtain_resources_with_version_incompatible(self):
             resource_version="1.5.0",
             gem5_version="develop",
         )
-        self.assertEquals("1.5.0", resource.get_resource_version())
+        self.assertEqual("1.5.0", resource.get_resource_version())
         self.assertIsInstance(resource, BinaryResource)
-        self.assertEquals(
+        self.assertEqual(
             "test description for 1.5.0", resource.get_description()
         )
-        self.assertEquals("src/test-source", resource.get_source())
-        self.assertEquals(ISA.ARM, resource.get_architecture())
+        self.assertEqual("src/test-source", resource.get_source())
+        self.assertEqual(ISA.ARM, resource.get_architecture())
 
     def test_obtain_resources_no_version_invalid_id(self):
         with self.assertRaises(Exception) as context:
diff --git a/tests/pyunit/stdlib/resources/pyunit_resource_specialization.py b/tests/pyunit/stdlib/resources/pyunit_resource_specialization.py
index f2088db8ef..1bf02fd691 100644
--- a/tests/pyunit/stdlib/resources/pyunit_resource_specialization.py
+++ b/tests/pyunit/stdlib/resources/pyunit_resource_specialization.py
@@ -86,11 +86,11 @@ def test_binary_resource(self) -> None:
 
         self.assertIsInstance(resource, BinaryResource)
 
-        self.assertEquals(
+        self.assertEqual(
             "binary-example documentation.", resource.get_description()
         )
-        self.assertEquals("src/simple", resource.get_source())
-        self.assertEquals(ISA.ARM, resource.get_architecture())
+        self.assertEqual("src/simple", resource.get_source())
+        self.assertEqual(ISA.ARM, resource.get_architecture())
 
     def test_kernel_resource(self) -> None:
         """Tests the loading of a KernelResource."""
@@ -102,11 +102,11 @@ def test_kernel_resource(self) -> None:
 
         self.assertIsInstance(resource, KernelResource)
 
-        self.assertEquals(
+        self.assertEqual(
             "kernel-example documentation.", resource.get_description()
         )
-        self.assertEquals("src/linux-kernel", resource.get_source())
-        self.assertEquals(ISA.RISCV, resource.get_architecture())
+        self.assertEqual("src/linux-kernel", resource.get_source())
+        self.assertEqual(ISA.RISCV, resource.get_architecture())
 
     def test_bootloader_resource(self) -> None:
         """Tests the loading of a BootloaderResource."""
@@ -118,7 +118,7 @@ def test_bootloader_resource(self) -> None:
 
         self.assertIsInstance(resource, BootloaderResource)
 
-        self.assertEquals(
+        self.assertEqual(
             "bootloader documentation.", resource.get_description()
         )
         self.assertIsNone(resource.get_source())
@@ -134,11 +134,11 @@ def test_disk_image_resource(self) -> None:
 
         self.assertIsInstance(resource, DiskImageResource)
 
-        self.assertEquals(
+        self.assertEqual(
             "disk-image documentation.", resource.get_description()
         )
-        self.assertEquals("src/x86-ubuntu", resource.get_source())
-        self.assertEquals("1", resource.get_root_partition())
+        self.assertEqual("src/x86-ubuntu", resource.get_source())
+        self.assertEqual("1", resource.get_root_partition())
 
     def test_checkpoint_resource(self) -> None:
         """Tests the loading of a CheckpointResource."""
@@ -150,7 +150,7 @@ def test_checkpoint_resource(self) -> None:
 
         self.assertIsInstance(resource, CheckpointResource)
 
-        self.assertEquals(
+        self.assertEqual(
             "checkpoint-example documentation.", resource.get_description()
         )
         self.assertIsNone(resource.get_source())
@@ -178,14 +178,14 @@ def test_simpoint_directory_resource(self) -> None:
 
         self.assertIsInstance(resource, SimpointDirectoryResource)
 
-        self.assertEquals(
+        self.assertEqual(
             "simpoint directory documentation.", resource.get_description()
         )
         self.assertIsNone(resource.get_source())
 
-        self.assertEquals(1000000, resource.get_simpoint_interval())
-        self.assertEquals(1000000, resource.get_warmup_interval())
-        self.assertEquals(
+        self.assertEqual(1000000, resource.get_simpoint_interval())
+        self.assertEqual(1000000, resource.get_warmup_interval())
+        self.assertEqual(
             Path(
                 Path(self.get_resource_dir())
                 / "simpoint-directory-example"
@@ -193,7 +193,7 @@ def test_simpoint_directory_resource(self) -> None:
             ),
             resource.get_simpoint_file(),
         )
-        self.assertEquals(
+        self.assertEqual(
             Path(
                 Path(self.get_resource_dir())
                 / "simpoint-directory-example"
@@ -201,7 +201,7 @@ def test_simpoint_directory_resource(self) -> None:
             ),
             resource.get_weight_file(),
         )
-        self.assertEquals("Example Workload", resource.get_workload_name())
+        self.assertEqual("Example Workload", resource.get_workload_name())
 
     def test_simpoint_resource(self) -> None:
         """Tests the loading of a Simpoint resource."""
@@ -213,16 +213,14 @@ def test_simpoint_resource(self) -> None:
 
         self.assertIsInstance(resource, SimpointResource)
 
-        self.assertEquals(
-            "simpoint documentation.", resource.get_description()
-        )
+        self.assertEqual("simpoint documentation.", resource.get_description())
         self.assertIsNone(resource.get_source())
         self.assertIsNone(resource.get_local_path())
 
-        self.assertEquals(1000000, resource.get_simpoint_interval())
-        self.assertEquals(23445, resource.get_warmup_interval())
-        self.assertEquals([2, 3, 4, 15], resource.get_simpoint_list())
-        self.assertEquals([0.1, 0.2, 0.4, 0.3], resource.get_weight_list())
+        self.assertEqual(1000000, resource.get_simpoint_interval())
+        self.assertEqual(23445, resource.get_warmup_interval())
+        self.assertEqual([2, 3, 4, 15], resource.get_simpoint_list())
+        self.assertEqual([0.1, 0.2, 0.4, 0.3], resource.get_weight_list())
 
     def test_file_resource(self) -> None:
         """Tests the loading of a FileResource."""
@@ -247,7 +245,7 @@ def test_directory_resource(self) -> None:
 
         self.assertIsInstance(resource, DirectoryResource)
 
-        self.assertEquals(
+        self.assertEqual(
             "directory-example documentation.", resource.get_description()
         )
         self.assertIsNone(resource.get_source())
@@ -268,7 +266,7 @@ def test_looppoint_pinpoints_resource(self) -> None:
         # LooppointCsvLoader.
         self.assertIsInstance(resource, LooppointCsvLoader)
 
-        self.assertEquals(
+        self.assertEqual(
             "A looppoint pinpoints csv file.", resource.get_description()
         )
         self.assertIsNone(resource.get_source())
@@ -278,7 +276,7 @@ def test_looppoint_json_restore_resource(self) -> None:
         Looppoint JSON file."""
 
         resource = obtain_resource(
-            resource_id="looppoint-json-restore-resource-region-1",
+            resource_id="looppoint-json-restore-resource-region-1-example",
             resource_directory=self.get_resource_dir(),
             resource_version="1.0.0",
             gem5_version="develop",
@@ -287,10 +285,10 @@ def test_looppoint_json_restore_resource(self) -> None:
         self.assertIsInstance(resource, LooppointJsonResource)
         self.assertIsInstance(resource, LooppointJsonLoader)
 
-        self.assertEquals(1, len(resource.get_regions()))
+        self.assertEqual(1, len(resource.get_regions()))
         self.assertTrue("1" in resource.get_regions())
 
-        self.assertEquals(
+        self.assertEqual(
             "A looppoint json file resource.", resource.get_description()
         )
         self.assertIsNone(resource.get_source())
diff --git a/tests/pyunit/stdlib/resources/pyunit_suite_checks.py b/tests/pyunit/stdlib/resources/pyunit_suite_checks.py
new file mode 100644
index 0000000000..419dbaed9f
--- /dev/null
+++ b/tests/pyunit/stdlib/resources/pyunit_suite_checks.py
@@ -0,0 +1,170 @@
+# Copyright (c) 2023 The Regents of the University of California
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import contextlib
+import io
+import unittest
+import tempfile
+import os
+import shutil
+from pathlib import Path
+from gem5.resources.resource import (
+    obtain_resource,
+    SuiteResource,
+    WorkloadResource,
+)
+from gem5.resources.client_api.client_wrapper import ClientWrapper
+from unittest.mock import patch
+
+mock_config_json = {
+    "sources": {
+        "baba": {
+            "url": Path(__file__).parent / "refs/suite-checks.json",
+            "isMongo": False,
+        }
+    },
+}
+
+
+class CustomSuiteResourceTestSuite(unittest.TestCase):
+    @classmethod
+    @patch(
+        "gem5.resources.client.clientwrapper",
+        new=ClientWrapper(mock_config_json),
+    )
+    def setUpClass(cls):
+        cls.workload1 = obtain_resource("simple-workload-1")
+        cls.workload2 = obtain_resource("simple-workload-2")
+        cls.SuiteResource = SuiteResource(
+            workloads={cls.workload1: set(), cls.workload2: set()}
+        )
+
+    @patch(
+        "gem5.resources.client.clientwrapper",
+        new=ClientWrapper(mock_config_json),
+    )
+    def test_with_input_group(self) -> None:
+        """
+        Tests the `with_input_group` function.
+        """
+        # test if an input group can return a single workload in a suite resource
+
+        with self.assertRaises(Exception) as context:
+            filtered_suite = self.SuiteResource.with_input_group("testtag2")
+            self.assertIsInstance(filtered_suite, SuiteResource)
+            self.assertEqual(len(filtered_suite), 0)
+            self.assertTrue(
+                f"Input group invalid not found in Suite.\n"
+                f"Available input groups are {filtered_suite.get_input_groups()}"
+                in str(context.exception)
+            )
+
+    def test_get_input_groups(self):
+        """
+        Tests the `list_input_groups` function.
+        """
+        self.assertEqual(self.SuiteResource.get_input_groups(), set())
+
+
+class SuiteResourceTestSuite(unittest.TestCase):
+    @classmethod
+    @patch(
+        "gem5.resources.client.clientwrapper",
+        new=ClientWrapper(mock_config_json),
+    )
+    def setUpClass(cls):
+        cls.suite = obtain_resource("suite-example", gem5_version="develop")
+
+    @patch(
+        "gem5.resources.client.clientwrapper",
+        new=ClientWrapper(mock_config_json),
+    )
+    def test_with_input_group(self) -> None:
+        """
+        Tests the `with_input_group` function.
+        """
+        # test if an input group can return a single workload in a suite resource
+        filtered_suite = self.suite.with_input_group("testtag2")
+        self.assertIsInstance(filtered_suite, SuiteResource)
+        self.assertEqual(len(filtered_suite), 1)
+        for workload in filtered_suite:
+            self.assertIsInstance(workload, WorkloadResource)
+
+    @patch(
+        "gem5.resources.client.clientwrapper",
+        new=ClientWrapper(mock_config_json),
+    )
+    def test_with_input_group_multiple(self) -> None:
+        # test if an input group can return multiple workloads in a suite resource
+        filtered_suite = self.suite.with_input_group("testtag1")
+        self.assertIsInstance(filtered_suite, SuiteResource)
+        self.assertEqual(len(filtered_suite), 2)
+        for workload in filtered_suite:
+            self.assertIsInstance(workload, WorkloadResource)
+
+    @patch(
+        "gem5.resources.client.clientwrapper",
+        new=ClientWrapper(mock_config_json),
+    )
+    def test_with_input_group_invalid(self) -> None:
+        """
+        Tests the `with_input_group` function with an invalid input group.
+        """
+        with self.assertRaises(Exception) as context:
+            filtered_suite = self.suite.with_input_group("invalid")
+            # check if exception is raised
+            self.assertTrue(
+                f"Input group invalid not found in Suite.\n"
+                f"Available input groups are {filtered_suite.get_input_groups()}"
+                in str(context.exception)
+            )
+
+    @patch(
+        "gem5.resources.client.clientwrapper",
+        new=ClientWrapper(mock_config_json),
+    )
+    def test_get_input_groups(self) -> None:
+        """
+        Tests the `list_input_groups` function.
+        """
+        expected_input_groups = {"testtag1", "testtag2", "testtag3"}
+        self.assertEqual(self.suite.get_input_groups(), expected_input_groups)
+
+    @patch(
+        "gem5.resources.client.clientwrapper",
+        new=ClientWrapper(mock_config_json),
+    )
+    def test_get_input_groups_not_found(self) -> None:
+        """
+        Tests the `list_input_groups` function with an invalid input group.
+        """
+        with self.assertRaises(Exception) as context:
+            self.suite.get_input_groups("invalid")
+            self.assertTrue(
+                f"Input group invalid not found in Suite.\n"
+                f"Available input groups are {self.suite.get_input_groups()}"
+                in str(context.exception)
+            )
diff --git a/tests/pyunit/stdlib/resources/pyunit_workload_checks.py b/tests/pyunit/stdlib/resources/pyunit_workload_checks.py
index b59e09d4fe..c38fc8e3b8 100644
--- a/tests/pyunit/stdlib/resources/pyunit_workload_checks.py
+++ b/tests/pyunit/stdlib/resources/pyunit_workload_checks.py
@@ -32,6 +32,7 @@
     BinaryResource,
     DiskImageResource,
     obtain_resource,
+    WorkloadResource,
 )
 
 from typing import Dict
@@ -61,47 +62,47 @@ class CustomWorkloadTestSuite(unittest.TestCase):
         new=ClientWrapper(mock_config_json),
     )
     def setUpClass(cls) -> None:
-        cls.custom_workload = CustomWorkload(
+        cls.custom_workload = WorkloadResource(
             function="set_se_binary_workload",
             parameters={
                 "binary": obtain_resource(
-                    "x86-hello64-static", gem5_version="develop"
+                    "x86-hello64-static-example", gem5_version="develop"
                 ),
                 "arguments": ["hello", 6],
             },
         )
 
     def test_get_function_str(self) -> None:
-        # Tests `CustomResource.get_function_str`
+        # Tests `CustomWorkload.get_function_str`
 
         self.assertEqual(
             "set_se_binary_workload", self.custom_workload.get_function_str()
         )
 
     def test_get_parameters(self) -> None:
-        # Tests `CustomResource.get_parameter`
+        # Tests `CustomWorkload.get_parameter`
 
         parameters = self.custom_workload.get_parameters()
         self.assertTrue(isinstance(parameters, Dict))
-        self.assertEquals(2, len(parameters))
+        self.assertEqual(2, len(parameters))
 
         self.assertTrue("binary" in parameters)
         self.assertTrue(isinstance(parameters["binary"], BinaryResource))
 
         self.assertTrue("arguments" in parameters)
         self.assertTrue(isinstance(parameters["arguments"], list))
-        self.assertEquals(2, len(parameters["arguments"]))
-        self.assertEquals("hello", parameters["arguments"][0])
-        self.assertEquals(6, parameters["arguments"][1])
+        self.assertEqual(2, len(parameters["arguments"]))
+        self.assertEqual("hello", parameters["arguments"][0])
+        self.assertEqual(6, parameters["arguments"][1])
 
     def test_add_parameters(self) -> None:
-        # Tests `CustomResource.set_parameter` for the case where we add a new
+        # Tests `CustomWorkload.set_parameter` for the case where we add a new
         # parameter value.
 
         self.custom_workload.set_parameter("test_param", 10)
 
         self.assertTrue("test_param" in self.custom_workload.get_parameters())
-        self.assertEquals(
+        self.assertEqual(
             10, self.custom_workload.get_parameters()["test_param"]
         )
 
@@ -109,14 +110,14 @@ def test_add_parameters(self) -> None:
         del self.custom_workload.get_parameters()["test_param"]
 
     def test_override_parameter(self) -> None:
-        # Tests `CustomResource.set_parameter` for the case where we override
+        # Tests `CustomWorkload.set_parameter` for the case where we override
         # a parameter's value.
 
         old_value = self.custom_workload.get_parameters()["binary"]
 
         self.custom_workload.set_parameter("binary", "test")
         self.assertTrue("binary" in self.custom_workload.get_parameters())
-        self.assertEquals(
+        self.assertEqual(
             "test", self.custom_workload.get_parameters()["binary"]
         )
 
@@ -135,12 +136,12 @@ class WorkloadTestSuite(unittest.TestCase):
         ClientWrapper(mock_config_json),
     )
     def setUpClass(cls):
-        cls.workload = Workload("simple-boot", gem5_version="develop")
+        cls.workload = obtain_resource("simple-boot", gem5_version="develop")
 
     def test_get_function_str(self) -> None:
         # Tests `Resource.get_function_str`
 
-        self.assertEquals(
+        self.assertEqual(
             "set_kernel_disk_workload", self.workload.get_function_str()
         )
 
@@ -172,7 +173,7 @@ def test_add_parameters(self) -> None:
         self.workload.set_parameter("test_param", 10)
 
         self.assertTrue("test_param" in self.workload.get_parameters())
-        self.assertEquals(10, self.workload.get_parameters()["test_param"])
+        self.assertEqual(10, self.workload.get_parameters()["test_param"])
 
         # Cleanup
         del self.workload.get_parameters()["test_param"]
@@ -185,7 +186,7 @@ def test_override_parameter(self) -> None:
 
         self.workload.set_parameter("readfile_contents", "test")
         self.assertTrue("readfile_contents" in self.workload.get_parameters())
-        self.assertEquals(
+        self.assertEqual(
             "test", self.workload.get_parameters()["readfile_contents"]
         )
 
diff --git a/tests/pyunit/stdlib/resources/refs/resource-specialization.json b/tests/pyunit/stdlib/resources/refs/resource-specialization.json
index 414bf73b11..9a92204765 100644
--- a/tests/pyunit/stdlib/resources/refs/resource-specialization.json
+++ b/tests/pyunit/stdlib/resources/refs/resource-specialization.json
@@ -19,9 +19,9 @@
         "id": "disk-image-example",
         "description": "disk-image documentation.",
         "architecture": "X86",
-        "is_zipped": true,
-        "md5sum": "90e363abf0ddf22eefa2c7c5c9391c49",
-        "url": "http://dist.gem5.org/dist/develop/images/x86/ubuntu-18-04/x86-ubuntu.img.gz",
+        "is_zipped": false,
+        "md5sum": "71b2cb004fe2cda4556f0b1a38638af6",
+        "url": "http://dist.gem5.org/dist/develop/test-progs/hello/bin/arm/linux/hello64-static",
         "source": "src/x86-ubuntu",
         "root_partition": "1",
         "resource_version": "1.0.0",
@@ -64,9 +64,9 @@
         "description": "checkpoint-example documentation.",
         "architecture": "RISCV",
         "is_zipped": false,
-        "md5sum": "3a57c1bb1077176c4587b8a3bf4f8ace",
         "source": null,
         "is_tar_archive": true,
+        "md5sum": "3a57c1bb1077176c4587b8a3bf4f8ace",
         "url": "http://dist.gem5.org/dist/develop/checkpoints/riscv-hello-example-checkpoint.tar",
         "resource_version": "1.0.0",
         "gem5_versions": [
@@ -93,8 +93,8 @@
         "id": "file-example",
         "description": null,
         "is_zipped": false,
-        "md5sum": "2efd144c11829ab18d54eae6371e120a",
-        "url": "http://dist.gem5.org/dist/develop/checkpoints/riscv-hello-example-checkpoint.tar",
+        "md5sum": "71b2cb004fe2cda4556f0b1a38638af6",
+        "url": "http://dist.gem5.org/dist/develop/test-progs/hello/bin/arm/linux/hello64-static",
         "source": null,
         "resource_version": "1.0.0",
         "gem5_versions": [
@@ -106,10 +106,10 @@
         "category": "directory",
         "id": "directory-example",
         "description": "directory-example documentation.",
-        "is_zipped": false,
-        "md5sum": "3a57c1bb1077176c4587b8a3bf4f8ace",
         "source": null,
+        "is_zipped": false,
         "is_tar_archive": true,
+        "md5sum": "3a57c1bb1077176c4587b8a3bf4f8ace",
         "url": "http://dist.gem5.org/dist/develop/checkpoints/riscv-hello-example-checkpoint.tar",
         "resource_version": "1.0.0",
         "gem5_versions": [
@@ -177,7 +177,7 @@
     },
     {
         "category": "looppoint-json",
-        "id": "looppoint-json-restore-resource-region-1",
+        "id": "looppoint-json-restore-resource-region-1-example",
         "description": "A looppoint json file resource.",
         "is_zipped": false,
         "region_id": "1",
diff --git a/tests/pyunit/stdlib/resources/refs/suite-checks.json b/tests/pyunit/stdlib/resources/refs/suite-checks.json
new file mode 100644
index 0000000000..7583020292
--- /dev/null
+++ b/tests/pyunit/stdlib/resources/refs/suite-checks.json
@@ -0,0 +1,97 @@
+[
+    {
+        "id": "suite-example",
+        "category": "suite",
+        "resource_version": "1.0.0",
+        "gem5_versions": ["develop","23.1"],
+        "workloads": [
+            {
+                "id": "simple-workload-1",
+                "resource_version": "1.0.0",
+                "input_group": ["testtag1", "testtag2"]
+            },
+            {
+                "id": "simple-workload-2",
+                "resource_version": "1.0.0",
+                "input_group": ["testtag1", "testtag3"]
+            }
+        ]
+    },
+    {
+        "category": "workload",
+        "id": "simple-workload-1",
+        "description": "Description of workload here",
+        "function": "set_kernel_disk_workload",
+        "resources": {
+            "kernel": "x86-linux-kernel-5.2.3-example",
+            "disk-image": "x86-ubuntu-18.04-img-example"
+        },
+        "additional_params": {
+            "readfile_contents": "echo 'Boot successful'; m5 exit"
+        },
+        "resource_version": "1.0.0",
+        "gem5_versions": [
+            "develop"
+        ]
+    },
+    {
+        "category": "workload",
+        "id": "simple-workload-2",
+        "description": "Description of workload here",
+        "function": "set_kernel_disk_workload",
+        "resources": {
+            "kernel": "x86-linux-kernel-5.2.3-example",
+            "disk-image": "x86-ubuntu-18.04-img-example"
+        },
+        "additional_params": {
+            "readfile_contents": "echo 'Boot successful'; m5 exit"
+        },
+        "resource_version": "1.0.0",
+        "gem5_versions": [
+            "develop"
+        ]
+    },
+    {
+        "category": "kernel",
+        "id": "x86-linux-kernel-5.2.3-example",
+        "description": "The linux kernel (v5.2.3), compiled to X86.",
+        "architecture": "X86",
+        "is_zipped": false,
+        "md5sum": "4838c99b77d33c8307b939c16624e4ac",
+        "url": "http://dist.gem5.org/dist/develop/kernels/x86/static/vmlinux-5.2.3",
+        "source": "src/linux-kernel",
+        "resource_version": "1.0.0",
+        "gem5_versions": [
+            "develop"
+        ]
+    },
+    {
+        "category": "disk-image",
+        "id": "x86-ubuntu-18.04-img-example",
+        "description": "A disk image containing Ubuntu 18.04 for x86..",
+        "architecture": "X86",
+        "is_zipped": false,
+        "md5sum": "dbf120338b37153e3334603970cebd8c",
+        "url": "http://dist.gem5.org/dist/develop/test-progs/hello/bin/x86/linux/hello64-static",
+        "source": "src/x86-ubuntu",
+        "root_partition": "1",
+        "resource_version": "1.0.0",
+        "gem5_versions": [
+            "develop"
+        ]
+    },
+    {
+        "category": "binary",
+        "id": "x86-hello64-static-example",
+        "description": "A 'Hello World!' binary.",
+        "architecture": "X86",
+        "is_zipped": false,
+        "md5sum": "dbf120338b37153e3334603970cebd8c",
+        "url": "http://dist.gem5.org/dist/develop/test-progs/hello/bin/x86/linux/hello64-static",
+        "source": "src/simple",
+        "resource_version": "1.0.0",
+        "gem5_versions": [
+            "develop"
+        ]
+    }
+]
diff --git a/tests/pyunit/stdlib/resources/refs/workload-checks.json b/tests/pyunit/stdlib/resources/refs/workload-checks.json
index dcb8577619..bf954059c5 100644
--- a/tests/pyunit/stdlib/resources/refs/workload-checks.json
+++ b/tests/pyunit/stdlib/resources/refs/workload-checks.json
@@ -1,7 +1,7 @@
 [
     {
         "category": "kernel",
-        "id": "x86-linux-kernel-5.2.3",
+        "id": "x86-linux-kernel-5.2.3-example",
         "description": "The linux kernel (v5.2.3), compiled to X86.",
         "architecture": "X86",
         "is_zipped": false,
@@ -15,12 +15,12 @@
     },
     {
         "category": "disk-image",
-        "id": "x86-ubuntu-18.04-img",
+        "id": "x86-ubuntu-18.04-img-example",
         "description": "A disk image containing Ubuntu 18.04 for x86..",
         "architecture": "X86",
-        "is_zipped": true,
-        "md5sum": "90e363abf0ddf22eefa2c7c5c9391c49",
-        "url": "http://dist.gem5.org/dist/develop/images/x86/ubuntu-18-04/x86-ubuntu.img.gz",
+        "is_zipped": false,
+        "md5sum": "dbf120338b37153e3334603970cebd8c",
+        "url": "http://dist.gem5.org/dist/develop/test-progs/hello/bin/x86/linux/hello64-static",
         "source": "src/x86-ubuntu",
         "root_partition": "1",
         "resource_version": "1.0.0",
@@ -34,8 +34,8 @@
         "description": "Description of workload here",
         "function": "set_kernel_disk_workload",
         "resources": {
-            "kernel": "x86-linux-kernel-5.2.3",
-            "disk-image": "x86-ubuntu-18.04-img"
+            "kernel": "x86-linux-kernel-5.2.3-example",
+            "disk-image": "x86-ubuntu-18.04-img-example"
         },
         "additional_params": {
             "readfile_contents": "echo 'Boot successful'; m5 exit"
@@ -47,7 +47,7 @@
     },
     {
         "category": "binary",
-        "id": "x86-hello64-static",
+        "id": "x86-hello64-static-example",
         "description": "A 'Hello World!' binary.",
         "architecture": "X86",
         "is_zipped": false,
diff --git a/tests/run.py b/tests/run.py
index dde8f70749..f95418523e 100644
--- a/tests/run.py
+++ b/tests/run.py
@@ -161,6 +161,7 @@ def run_test(root):
 if not os.path.isdir(test_progs):
     test_progs = joinpath(tests_root, "test-progs")
 
+
 # generate path to binary file
 def binpath(app, file=None):
     # executable has same name as app unless specified otherwise
@@ -188,7 +189,7 @@ def run_config(config, argv=None):
     src_root = os.path.abspath(os.path.join(os.path.dirname(__file__), "../"))
     abs_path = joinpath(src_root, config)
 
-    code = compile(open(abs_path, "r").read(), abs_path, "exec")
+    code = compile(open(abs_path).read(), abs_path, "exec")
     scope = {"__file__": config, "__name__": "__m5_main__"}
 
     # Set the working directory in case we are executing from
@@ -234,6 +235,7 @@ def run_config(config, argv=None):
     )
 )
 
+
 # Initialize all CPUs in a system
 def initCPUs(sys):
     def initCPU(cpu):
diff --git a/util/checkpoint-tester.py b/util/checkpoint-tester.py
index 1e4024b858..9c638dd736 100755
--- a/util/checkpoint-tester.py
+++ b/util/checkpoint-tester.py
@@ -102,7 +102,7 @@
 subprocess.call([m5_binary] + ["-red", cptdir] + args + checkpoint_args)
 
 dirs = os.listdir(cptdir)
-expr = re.compile("cpt\.([0-9]*)")
+expr = re.compile(r"cpt\.([0-9]*)")
 cpts = []
 for dir in dirs:
     match = expr.match(dir)
diff --git a/util/checkpoint_aggregator.py b/util/checkpoint_aggregator.py
index 86892c87b1..069557a2ef 100755
--- a/util/checkpoint_aggregator.py
+++ b/util/checkpoint_aggregator.py
@@ -57,7 +57,7 @@ def aggregate(output_dir, cpts, no_compress, memory_size):
     max_curtick = 0
     num_digits = len(str(len(cpts) - 1))
 
-    for (i, arg) in enumerate(cpts):
+    for i, arg in enumerate(cpts):
         print(arg)
         merged_config = myCP()
         config = myCP()
diff --git a/util/cpt_upgrader.py b/util/cpt_upgrader.py
index a852294fbc..44928bcd2d 100755
--- a/util/cpt_upgrader.py
+++ b/util/cpt_upgrader.py
@@ -193,7 +193,7 @@ def process_file(path, **kwargs):
     if not osp.isfile(path):
         import errno
 
-        raise IOError(errno.ENOENT, "No such file", path)
+        raise OSError(errno.ENOENT, "No such file", path)
 
     verboseprint(f"Processing file {path}....")
 
@@ -208,7 +208,7 @@ def process_file(path, **kwargs):
     cpt.optionxform = str
 
     # Read the current data
-    cpt_file = open(path, "r")
+    cpt_file = open(path)
     cpt.read_file(cpt_file)
     cpt_file.close()
 
@@ -220,7 +220,7 @@ def process_file(path, **kwargs):
 
         # Legacy linear checkpoint version
         # convert to list of tags before proceeding
-        tags = set([])
+        tags = set()
         for i in range(2, cpt_ver + 1):
             tags.add(Upgrader.legacy[i].tag)
         verboseprint("performed legacy version -> tags conversion")
@@ -253,7 +253,7 @@ def process_file(path, **kwargs):
     # downgraders are present, respecting dependences
     to_apply = (Upgrader.tag_set - tags) | (Upgrader.untag_set & tags)
     while to_apply:
-        ready = set([t for t in to_apply if Upgrader.get(t).ready(tags)])
+        ready = {t for t in to_apply if Upgrader.get(t).ready(tags)}
         if not ready:
             print("could not apply these upgrades:", " ".join(to_apply))
             print("update dependences impossible to resolve; aborting")
diff --git a/util/cpt_upgraders/arm-ccregs.py b/util/cpt_upgraders/arm-ccregs.py
index 435be7b0cb..abfbe2d5b3 100644
--- a/util/cpt_upgraders/arm-ccregs.py
+++ b/util/cpt_upgraders/arm-ccregs.py
@@ -5,13 +5,13 @@ def upgrader(cpt):
         for sec in cpt.sections():
             import re
 
-            re_cpu_match = re.match("^(.*sys.*\.cpu[^.]*)\.xc\.(.+)$", sec)
+            re_cpu_match = re.match(r"^(.*sys.*\.cpu[^.]*)\.xc\.(.+)$", sec)
             # Search for all the execution contexts
             if not re_cpu_match:
                 continue
 
             items = []
-            for (item, value) in cpt.items(sec):
+            for item, value in cpt.items(sec):
                 items.append(item)
             if "ccRegs" not in items:
                 intRegs = cpt.get(sec, "intRegs").split()
diff --git a/util/cpt_upgraders/arm-contextidr-el2.py b/util/cpt_upgraders/arm-contextidr-el2.py
index 891fec5e0d..d016c1a224 100644
--- a/util/cpt_upgraders/arm-contextidr-el2.py
+++ b/util/cpt_upgraders/arm-contextidr-el2.py
@@ -5,7 +5,7 @@ def upgrader(cpt):
             import re
 
             # Search for all ISA sections
-            if re.search(".*sys.*\.cpu.*\.isa$", sec):
+            if re.search(r".*sys.*\.cpu.*\.isa$", sec):
                 miscRegs = cpt.get(sec, "miscRegs").split()
                 # CONTEXTIDR_EL2 defaults to 0b11111100000000000001
                 miscRegs[599:599] = [0xFC001]
diff --git a/util/cpt_upgraders/arm-gem5-gic-ext.py b/util/cpt_upgraders/arm-gem5-gic-ext.py
index fea852ff13..4e543a24e0 100644
--- a/util/cpt_upgraders/arm-gem5-gic-ext.py
+++ b/util/cpt_upgraders/arm-gem5-gic-ext.py
@@ -61,7 +61,7 @@ def upgrader(cpt):
     new_per_cpu_regs = (("cpuSgiPendingExt", "0"), ("cpuSgiActiveExt", "0"))
 
     for sec in cpt.sections():
-        if re.search(".*\.gic$", sec):
+        if re.search(r".*\.gic$", sec):
             for reg, default in per_cpu_regs:
                 value = cpt.get(sec, reg).split(" ")
                 assert (
diff --git a/util/cpt_upgraders/arm-gicv2-banked-regs.py b/util/cpt_upgraders/arm-gicv2-banked-regs.py
index 44a6146b58..6438a178d4 100644
--- a/util/cpt_upgraders/arm-gicv2-banked-regs.py
+++ b/util/cpt_upgraders/arm-gicv2-banked-regs.py
@@ -33,13 +33,14 @@
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+
 # duplicate banked registers into new per-cpu arrays.
 def upgrader(cpt):
     if cpt.get("root", "isa", fallback="") == "arm":
         for sec in cpt.sections():
             import re
 
-            if not re.search("\.gic$", sec):
+            if not re.search(r"\.gic$", sec):
                 continue
             cpuEnabled = cpt.get(sec, "cpuEnabled").split()
 
diff --git a/util/cpt_upgraders/arm-hdlcd-upgrade.py b/util/cpt_upgraders/arm-hdlcd-upgrade.py
index 96d6368718..87431e8b9f 100644
--- a/util/cpt_upgraders/arm-hdlcd-upgrade.py
+++ b/util/cpt_upgraders/arm-hdlcd-upgrade.py
@@ -69,7 +69,7 @@ def upgrader(cpt):
     }
 
     for sec in cpt.sections():
-        if re.search(".*\.hdlcd$", sec):
+        if re.search(r".*\.hdlcd$", sec):
             options = {}
             for new, old in list(option_names.items()):
                 options[new] = cpt.get(sec, old)
diff --git a/util/cpt_upgraders/arm-miscreg-teehbr.py b/util/cpt_upgraders/arm-miscreg-teehbr.py
index d6e81e0da1..656757d036 100644
--- a/util/cpt_upgraders/arm-miscreg-teehbr.py
+++ b/util/cpt_upgraders/arm-miscreg-teehbr.py
@@ -5,7 +5,7 @@ def upgrader(cpt):
             import re
 
             # Search for all ISA sections
-            if re.search(".*sys.*\.cpu.*\.isa$", sec):
+            if re.search(r".*sys.*\.cpu.*\.isa$", sec):
                 mr = cpt.get(sec, "miscRegs").split()
                 if len(mr) == 161:
                     print("MISCREG_TEEHBR already seems to be inserted.")
diff --git a/util/cpt_upgraders/arm-sve.py b/util/cpt_upgraders/arm-sve.py
index 45d2949aa8..c484bae057 100644
--- a/util/cpt_upgraders/arm-sve.py
+++ b/util/cpt_upgraders/arm-sve.py
@@ -12,8 +12,7 @@ def upgrader(cpt):
             import re
 
             # Search for all ISA sections
-            if re.search(".*sys.*\.cpu.*\.isa$", sec):
-
+            if re.search(r".*sys.*\.cpu.*\.isa$", sec):
                 # haveSVE = false
                 cpt.set(sec, "haveSVE", "false")
 
diff --git a/util/cpt_upgraders/arm-sysreg-mapping-ns.py b/util/cpt_upgraders/arm-sysreg-mapping-ns.py
index fd02062039..c0a76549a0 100644
--- a/util/cpt_upgraders/arm-sysreg-mapping-ns.py
+++ b/util/cpt_upgraders/arm-sysreg-mapping-ns.py
@@ -33,6 +33,7 @@
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+
 # reflect updated register mappings for ARM ISA
 def upgrader(cpt):
     if cpt.get("root", "isa", fallback="") == "arm":
@@ -40,7 +41,7 @@ def upgrader(cpt):
             import re
 
             # Search for all ISA sections
-            if re.search(".*sys.*\.cpu.*\.isa\d*$", sec):
+            if re.search(r".*sys.*\.cpu.*\.isa\d*$", sec):
                 mr = cpt.get(sec, "miscRegs").split()
                 if int(mr[0]) & 16 == 0:  # CPSR reg width; 0 for AArch64
                     mr[112] = mr[111]  # ACTLR_NS = ACTLR
diff --git a/util/cpt_upgraders/armv8.py b/util/cpt_upgraders/armv8.py
index 6679beb88a..e5453770fa 100644
--- a/util/cpt_upgraders/armv8.py
+++ b/util/cpt_upgraders/armv8.py
@@ -12,7 +12,7 @@ def upgrader(cpt):
     )
     # Find the CPU context's and upgrade their registers
     for sec in cpt.sections():
-        re_xc_match = re.match("^.*?sys.*?\.cpu(\d+)*\.xc\.*", sec)
+        re_xc_match = re.match(r"^.*?sys.*?\.cpu(\d+)*\.xc\.*", sec)
         if not re_xc_match:
             continue
 
@@ -38,7 +38,7 @@ def upgrader(cpt):
 
     # Update the cpu interrupt field
     for sec in cpt.sections():
-        re_int_match = re.match("^.*?sys.*?\.cpu(\d+)*$", sec)
+        re_int_match = re.match(r"^.*?sys.*?\.cpu(\d+)*$", sec)
         if not re_int_match:
             continue
 
@@ -49,7 +49,7 @@ def upgrader(cpt):
 
     # Update the per cpu interrupt structure
     for sec in cpt.sections():
-        re_int_match = re.match("^.*?sys.*?\.cpu(\d+)*\.interrupts$", sec)
+        re_int_match = re.match(r"^.*?sys.*?\.cpu(\d+)*\.interrupts$", sec)
         if not re_int_match:
             continue
 
@@ -60,7 +60,7 @@ def upgrader(cpt):
 
     # Update the misc regs and add in new isa specific fields
     for sec in cpt.sections():
-        re_isa_match = re.match("^.*?sys.*?\.cpu(\d+)*\.isa$", sec)
+        re_isa_match = re.match(r"^.*?sys.*?\.cpu(\d+)*\.isa$", sec)
         if not re_isa_match:
             continue
 
@@ -254,7 +254,7 @@ def upgrader(cpt):
     cpu_prefix = {}
     # Add in state for ITB/DTB
     for sec in cpt.sections():
-        re_tlb_match = re.match("(^.*?sys.*?\.cpu(\d+)*)\.(dtb|itb)$", sec)
+        re_tlb_match = re.match(r"(^.*?sys.*?\.cpu(\d+)*)\.(dtb|itb)$", sec)
         if not re_tlb_match:
             continue
 
@@ -271,7 +271,7 @@ def upgrader(cpt):
     # Add in extra state for the new TLB Entries
     for sec in cpt.sections():
         re_tlbentry_match = re.match(
-            "(^.*?sys.*?\.cpu(\d+)*)\.(dtb|itb).TlbEntry\d+$", sec
+            r"(^.*?sys.*?\.cpu(\d+)*)\.(dtb|itb).TlbEntry\d+$", sec
         )
         if not re_tlbentry_match:
             continue
diff --git a/util/cpt_upgraders/isa-is-simobject.py b/util/cpt_upgraders/isa-is-simobject.py
index 0fd33f733e..fa6734cbfa 100644
--- a/util/cpt_upgraders/isa-is-simobject.py
+++ b/util/cpt_upgraders/isa-is-simobject.py
@@ -60,7 +60,7 @@ def upgrader(cpt):
     for sec in cpt.sections():
         import re
 
-        re_cpu_match = re.match("^(.*sys.*\.cpu[^.]*)\.xc\.(.+)$", sec)
+        re_cpu_match = re.match(r"^(.*sys.*\.cpu[^.]*)\.xc\.(.+)$", sec)
         # Search for all the execution contexts
         if not re_cpu_match:
             continue
@@ -75,17 +75,17 @@ def upgrader(cpt):
 
         isa_section = []
         for fspec in isa_fields:
-            for (key, value) in cpt.items(sec, raw=True):
+            for key, value in cpt.items(sec, raw=True):
                 if key in isa_fields:
                     isa_section.append((key, value))
 
         name = f"{re_cpu_match.group(1)}.isa"
         isa_sections.append((name, isa_section))
 
-        for (key, value) in isa_section:
+        for key, value in isa_section:
             cpt.remove_option(sec, key)
 
-    for (sec, options) in isa_sections:
+    for sec, options in isa_sections:
         # Some intermediate versions of gem5 have empty ISA sections
         # (after we made the ISA a SimObject, but before we started to
         # serialize into a separate ISA section).
@@ -97,7 +97,7 @@ def upgrader(cpt):
                     "Unexpected populated ISA section in old checkpoint"
                 )
 
-        for (key, value) in options:
+        for key, value in options:
             cpt.set(sec, key, value)
 
 
diff --git a/util/cpt_upgraders/memory-per-range.py b/util/cpt_upgraders/memory-per-range.py
index d75a4acf8c..24fa0b592e 100644
--- a/util/cpt_upgraders/memory-per-range.py
+++ b/util/cpt_upgraders/memory-per-range.py
@@ -6,7 +6,7 @@ def upgrader(cpt):
         import re
 
         # Search for a physical memory
-        if re.search(".*sys.*\.physmem$", sec):
+        if re.search(r".*sys.*\.physmem$", sec):
             # Add the number of stores attribute to the global physmem
             cpt.set(sec, "nbr_of_stores", "1")
 
@@ -24,7 +24,7 @@ def upgrader(cpt):
             cpt.set(section_name, "store_id", "0")
             cpt.set(section_name, "range_size", mem_size)
             cpt.set(section_name, "filename", mem_filename)
-        elif re.search(".*sys.*\.\w*mem$", sec):
+        elif re.search(r".*sys.*\.\w*mem$", sec):
             # Due to the lack of information about a start address,
             # this migration only works if there is a single memory in
             # the system, thus starting at 0
diff --git a/util/cpt_upgraders/mempool-sections.py b/util/cpt_upgraders/mempool-sections.py
index dec2e02799..55f1d591dc 100644
--- a/util/cpt_upgraders/mempool-sections.py
+++ b/util/cpt_upgraders/mempool-sections.py
@@ -12,7 +12,6 @@ def upgrader(cpt):
             systems[sec] = ptrs, limits
 
     for sec, (ptrs, limits) in systems.items():
-
         ptrs = list(map(int, ptrs.split()))
         limits = list(map(int, limits.split()))
 
diff --git a/util/cpt_upgraders/remove-arm-cpsr-mode-miscreg.py b/util/cpt_upgraders/remove-arm-cpsr-mode-miscreg.py
index 8eba866f1a..66b6cd8bd7 100644
--- a/util/cpt_upgraders/remove-arm-cpsr-mode-miscreg.py
+++ b/util/cpt_upgraders/remove-arm-cpsr-mode-miscreg.py
@@ -5,7 +5,7 @@ def upgrader(cpt):
             import re
 
             # Search for all ISA sections
-            if re.search(".*sys.*\.cpu.*\.isa$", sec):
+            if re.search(r".*sys.*\.cpu.*\.isa$", sec):
                 mr = cpt.get(sec, "miscRegs").split()
                 # Remove MISCREG_CPSR_MODE
                 del mr[137]
diff --git a/tests/configs/o3-timing-ruby.py b/util/cpt_upgraders/riscv-dyn-vlen.py
similarity index 64%
rename from tests/configs/o3-timing-ruby.py
rename to util/cpt_upgraders/riscv-dyn-vlen.py
index 30ee69ef23..ea2de9d19d 100644
--- a/tests/configs/o3-timing-ruby.py
+++ b/util/cpt_upgraders/riscv-dyn-vlen.py
@@ -1,6 +1,6 @@
-# Copyright (c) 2006-2007 The Regents of The University of Michigan
+# Copyright (c) 2023 Barcelona Supercomputing Center (BSC)
 # All rights reserved.
-#
+
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions are
 # met: redistributions of source code must retain the above copyright
@@ -11,7 +11,7 @@
 # neither the name of the copyright holders nor the names of its
 # contributors may be used to endorse or promote products derived from
 # this software without specific prior written permission.
-#
+
 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -24,33 +24,26 @@
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-import m5
-from m5.objects import *
-
-import ruby_config
-
-ruby_memory = ruby_config.generate("TwoLevel_SplitL1UnifiedL2.rb", 1)
-
-cpu = DerivO3CPU(cpu_id=0)
 
-system = System(
-    cpu=cpu,
-    physmem=ruby_memory,
-    membus=SystemXBar(),
-    mem_mode="timing",
-    clk_domain=SrcClockDomain(clock="1GHz"),
-)
+def upgrader(cpt):
+    """
+    Update the checkpoint to support initial RVV implemtation.
+    The updater is taking the following steps.
 
-# Create a seperate clock domain for components that should run at
-# CPUs frequency
-system.cpu.clk_domain = SrcClockDomain(clock="2GHz")
+    Set vector registers to occupy 327680 bytes (40regs * 8192bytes).
+    Vector registers now ocupy this space regardless of VLEN as the
+    VecRegContainer is always MaxVecLenInBytes.
+    """
 
-system.physmem.port = system.membus.mem_side_ports
-# create the interrupt controller
-cpu.createInterruptController()
-cpu.connectBus(system.membus)
+    for sec in cpt.sections():
+        import re
 
-# Connect the system port for loading of binaries etc
-system.system_port = system.membus.cpu_side_ports
+        # Search for all XC sections
 
-root = Root(full_system=False, system=system)
+        if re.search(r".*processor.*\.core.*\.xc.*", sec):
+            # Updating RVV vector registers (dummy values)
+            mr = cpt.get(sec, "regs.vector").split()
+            if len(mr) != 327680:
+                cpt.set(
+                    sec, "regs.vector", " ".join("0" for i in range(327680))
+                )
diff --git a/tests/configs/pc-simple-atomic.py b/util/cpt_upgraders/riscv-pcstate.py
similarity index 65%
rename from tests/configs/pc-simple-atomic.py
rename to util/cpt_upgraders/riscv-pcstate.py
index ac2c3c06b8..4182355419 100644
--- a/tests/configs/pc-simple-atomic.py
+++ b/util/cpt_upgraders/riscv-pcstate.py
@@ -1,15 +1,6 @@
-# Copyright (c) 2012 ARM Limited
+# Copyright (c) 2023 Google LLC
 # All rights reserved.
-#
-# The license below extends only to copyright in the software and shall
-# not be construed as granting a license to any other intellectual
-# property including but not limited to intellectual property relating
-# to a hardware implementation of the functionality of the software
-# licensed hereunder.  You may use the software subject to the license
-# terms below provided that you ensure that this notice is replicated
-# unmodified and in its entirety in all distributions of the software,
-# modified or unmodified, in source code or in binary form.
-#
+
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions are
 # met: redistributions of source code must retain the above copyright
@@ -20,7 +11,7 @@
 # neither the name of the copyright holders nor the names of its
 # contributors may be used to endorse or promote products derived from
 # this software without specific prior written permission.
-#
+
 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -33,9 +24,26 @@
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-from m5.objects import *
-from x86_generic import *
 
-root = LinuxX86FSSystemUniprocessor(
-    mem_mode="atomic", mem_class=SimpleMemory, cpu_class=AtomicSimpleCPU
-).create_root()
+def upgrader(cpt):
+    # Update the RISC-V pcstate to match the new version of
+    # PCState
+
+    for sec in cpt.sections():
+        import re
+
+        if re.search(r".*processor.*\.core.*\.xc.*", sec):
+            if cpt.get(sec, "_rvType", fallback="") == "":
+                cpt.set(sec, "_rvType", "1")
+
+            if cpt.get(sec, "_vlenb", fallback="") == "":
+                cpt.set(sec, "_vlenb", "32")
+
+            if cpt.get(sec, "_vtype", fallback="") == "":
+                cpt.set(sec, "_vtype", str(1 << 63))
+
+            if cpt.get(sec, "_vl", fallback="") == "":
+                cpt.set(sec, "_vl", "0")
+
+            if cpt.get(sec, "_compressed", fallback="") == "":
+                cpt.set(sec, "_compressed", "false")
diff --git a/util/cpt_upgraders/riscv-vext.py b/util/cpt_upgraders/riscv-vext.py
new file mode 100644
index 0000000000..d335f74b83
--- /dev/null
+++ b/util/cpt_upgraders/riscv-vext.py
@@ -0,0 +1,84 @@
+# Copyright (c) 2023 Barcelona Supercomputing Center (BSC)
+# All rights reserved.
+
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+def upgrader(cpt):
+    """
+    Update the checkpoint to support initial RVV implemtation.
+    The updater is taking the following steps.
+
+    1) Set vector registers to occupy 1280 bytes (40regs * 32bytes)
+    2) Clear vector_element, vector_predicate and matrix registers
+    3) Add RVV misc registers in the checkpoint
+    """
+
+    for sec in cpt.sections():
+        import re
+
+        # Search for all XC sections
+        if re.search(r".*processor.*\.core.*\.xc.*", sec):
+            # Updating RVV vector registers (dummy values)
+            # Assuming VLEN = 256 bits (32 bytes)
+            mr = cpt.get(sec, "regs.vector").split()
+            if len(mr) <= 8:
+                cpt.set(sec, "regs.vector", " ".join("0" for i in range(1280)))
+
+            # Updating RVV vector element (dummy values)
+            cpt.set(sec, "regs.vector_element", "")
+
+            # Updating RVV vector predicate (dummy values)
+            cpt.set(sec, "regs.vector_predicate", "")
+
+            # Updating RVV matrix (dummy values)
+            cpt.set(sec, "regs.matrix", "")
+
+        # Search for all ISA sections
+        if re.search(r".*processor.*\.core.*\.isa$", sec):
+            # Updating RVV misc registers (dummy values)
+            mr = cpt.get(sec, "miscRegFile").split()
+            if len(mr) == 164:
+                print(
+                    "MISCREG_* RVV registers already seem " "to be inserted."
+                )
+            else:
+                # Add dummy value for MISCREG_VSTART
+                mr.insert(121, 0)
+                # Add dummy value for MISCREG_VXSAT
+                mr.insert(121, 0)
+                # Add dummy value for MISCREG_VXRM
+                mr.insert(121, 0)
+                # Add dummy value for MISCREG_VCSR
+                mr.insert(121, 0)
+                # Add dummy value for MISCREG_VL
+                mr.insert(121, 0)
+                # Add dummy value for MISCREG_VTYPE
+                mr.insert(121, 0)
+                # Add dummy value for MISCREG_VLENB
+                mr.insert(121, 0)
+                cpt.set(sec, "miscRegFile", " ".join(str(x) for x in mr))
+
+
+legacy_version = 17
diff --git a/util/cpt_upgraders/smt-interrupts.py b/util/cpt_upgraders/smt-interrupts.py
index d8366c2aa4..0e0e0e963a 100644
--- a/util/cpt_upgraders/smt-interrupts.py
+++ b/util/cpt_upgraders/smt-interrupts.py
@@ -5,7 +5,7 @@ def upgrader(cpt):
     for sec in cpt.sections():
         import re
 
-        re_cpu_match = re.match("^(.*sys.*\.cpu[^._]*)$", sec)
+        re_cpu_match = re.match(r"^(.*sys.*\.cpu[^._]*)$", sec)
         if re_cpu_match != None:
             interrupts = cpt.get(sec, "interrupts")
             intStatus = cpt.get(sec, "intStatus")
diff --git a/util/cpt_upgraders/x86-add-tlb.py b/util/cpt_upgraders/x86-add-tlb.py
index 5b6778bcbf..4903704731 100644
--- a/util/cpt_upgraders/x86-add-tlb.py
+++ b/util/cpt_upgraders/x86-add-tlb.py
@@ -5,11 +5,11 @@ def upgrader(cpt):
             import re
 
             # Search for all ISA sections
-            if re.search(".*sys.*\.cpu.*\.dtb$", sec):
+            if re.search(r".*sys.*\.cpu.*\.dtb$", sec):
                 cpt.set(sec, "_size", "0")
                 cpt.set(sec, "lruSeq", "0")
 
-            if re.search(".*sys.*\.cpu.*\.itb$", sec):
+            if re.search(r".*sys.*\.cpu.*\.itb$", sec):
                 cpt.set(sec, "_size", "0")
                 cpt.set(sec, "lruSeq", "0")
     else:
diff --git a/util/decode_inst_dep_trace.py b/util/decode_inst_dep_trace.py
index ded0051ae1..9cd50a6819 100755
--- a/util/decode_inst_dep_trace.py
+++ b/util/decode_inst_dep_trace.py
@@ -125,12 +125,12 @@ def main():
 
     try:
         ascii_out = open(sys.argv[2], "w")
-    except IOError:
+    except OSError:
         print("Failed to open ", sys.argv[2], " for writing")
         exit(-1)
 
     # Read the magic number in 4-byte Little Endian
-    magic_number = proto_in.read(4)
+    magic_number = proto_in.read(4).decode()
 
     if magic_number != "gem5":
         print("Unrecognized file")
diff --git a/util/decode_inst_trace.py b/util/decode_inst_trace.py
index 8e59f6955d..5e77138689 100755
--- a/util/decode_inst_trace.py
+++ b/util/decode_inst_trace.py
@@ -85,7 +85,7 @@ def main():
 
     try:
         ascii_out = open(sys.argv[2], "w")
-    except IOError:
+    except OSError:
         print("Failed to open ", sys.argv[2], " for writing")
         exit(-1)
 
@@ -153,7 +153,9 @@ def main():
 
         for mem_acc in inst.mem_access:
             ascii_out.write(
-                " %#x-%#x;" % (mem_acc.addr, mem_acc.addr + mem_acc.size)
+                " {:#x}-{:#x};".format(
+                    mem_acc.addr, mem_acc.addr + mem_acc.size
+                )
             )
 
         ascii_out.write("\n")
diff --git a/util/decode_packet_trace.py b/util/decode_packet_trace.py
index 66a74c6f01..5111ea4618 100755
--- a/util/decode_packet_trace.py
+++ b/util/decode_packet_trace.py
@@ -59,7 +59,7 @@ def main():
 
     try:
         ascii_out = open(sys.argv[2], "w")
-    except IOError:
+    except OSError:
         print("Failed to open ", sys.argv[2], " for writing")
         exit(-1)
 
diff --git a/util/dockerfiles/docker-bake.hcl b/util/dockerfiles/docker-bake.hcl
new file mode 100644
index 0000000000..8218290bd2
--- /dev/null
+++ b/util/dockerfiles/docker-bake.hcl
@@ -0,0 +1,189 @@
+# Copyright (c) 2023 The Regents of the University of California
+# All Rights Reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# docker buildx bake --push
+# https://docs.docker.com/build/bake/reference
+
+variable "IMAGE_URI" {
+  default = "ghcr.io/gem5" # The gem5 GitHub container registry.
+}
+
+variable "TAG" {
+  default = "latest"
+}
+
+# A group of targets to be built. Note: groups can contain other groups.
+# Any target or group can be build individually. I.e.:
+# `docker buildx bake --push ubuntu-20-04_all-dependencies` or
+# `docker buildx bake --push ubuntu-releases`.
+group "default" {
+  targets=["clang-compilers", "ubuntu-releases", "gcc-compilers", "gcn-gpu", "gpu-fs", "sst", "systemc", "llvm-gnu-cross-compiler-riscv64", "gem5-all-min-dependencies"]
+}
+
+group "ubuntu-releases" {
+  targets=["ubuntu-22-04_all-dependencies", "ubuntu-20-04_all-dependencies", "ubuntu-22-04_min-dependencies"]
+}
+
+group "clang-compilers" {
+  targets = ["clang-compilers-base-20-04", "clang-compilers-base-22-04", "clang-compilers-16"]
+}
+
+group "gcc-compilers" {
+  targets = ["gcc-compilers-base-20-04", "gcc-compilers-base-22-04"]
+}
+
+# Common attributes across all targets. Note: these can be overwritten.
+target "common" {
+  # Here we are enabling multi-platform builds. We are compiling to both ARM
+  # amd X86.
+  platforms = ["linux/amd64", "linux/arm64"]
+}
+
+target "gcn-gpu" {
+  inherits = ["common"]
+  dockerfile = "Dockerfile"
+  context = "gcn-gpu"
+  tags = ["${IMAGE_URI}/gcn-gpu:${TAG}"]
+}
+
+target "gpu-fs" {
+  inherits = ["common"]
+  dockerfile = "Dockerfile"
+  context = "gpu-fs"
+  tags = ["${IMAGE_URI}/gpu-fs:${TAG}"]
+}
+
+target "sst" {
+  inherits = ["common"]
+  dockerfile = "Dockerfile"
+  context = "sst-11.1.0"
+  tags = ["${IMAGE_URI}/sst-env:${TAG}"]
+}
+
+target "systemc" {
+  inherits = ["common"]
+  dockerfile = "Dockerfile"
+  context = "systemc-2.3.3"
+  tags = ["${IMAGE_URI}/systemc-env:${TAG}"]
+}
+
+target "ubuntu-22-04_all-dependencies" {
+  inherits = ["common"]
+  dockerfile = "Dockerfile"
+  context = "ubuntu-22.04_all-dependencies"
+  tags = ["${IMAGE_URI}/ubuntu-22.04_all-dependencies:${TAG}"]
+}
+
+target "ubuntu-20-04_all-dependencies" {
+  inherits = ["common"]
+  dockerfile = "Dockerfile"
+  context = "ubuntu-20.04_all-dependencies"
+  tags = ["${IMAGE_URI}/ubuntu-20.04_all-dependencies:${TAG}"]
+}
+
+target "ubuntu-22-04_min-dependencies" {
+  inherits = ["common"]
+  dockerfile = "Dockerfile"
+  context = "ubuntu-22.04_min-dependencies"
+  tags = ["${IMAGE_URI}/ubuntu-22.04_min-dependencies:${TAG}"]
+}
+
+target "gcc-compilers-base-20-04" {
+  name = "gcc-compilers-${replace(ver, ".", "-")}"
+  inherits = ["common"]
+  context = "ubuntu-20.04_gcc-version"
+  dockerfile = "Dockerfile"
+  matrix = {
+    ver = ["8", "9", "10"]
+  }
+  args = {
+    version = ver
+  }
+  tags = ["${IMAGE_URI}/gcc-version-${ver}:${TAG}"]
+}
+
+target "gcc-compilers-base-22-04" {
+  name = "gcc-compilers-${replace(ver, ".", "-")}"
+  inherits = ["common"]
+  context = "ubuntu-22.04_gcc-version"
+  dockerfile = "Dockerfile"
+  matrix = {
+    ver = ["11", "12"]
+  }
+  args = {
+    version = ver
+  }
+  tags = ["${IMAGE_URI}/gcc-version-${ver}:${TAG}"]
+}
+
+target "clang-compilers-base-20-04" {
+  name = "clang-compilers-${replace(ver, ".", "-")}"
+  inherits = ["common"]
+  context = "ubuntu-20.04_clang-version"
+  dockerfile = "Dockerfile"
+  matrix = {
+    ver = ["7", "8", "9", "10", "11", "12"]
+  }
+  args = {
+    version = ver
+  }
+  tags = ["${IMAGE_URI}/clang-version-${ver}:${TAG}"]
+}
+
+target "clang-compilers-base-22-04" {
+  name = "clang-compilers-${replace(ver, ".", "-")}"
+  inherits = ["common"]
+  context = "ubuntu-22.04_clang-version"
+  dockerfile = "Dockerfile"
+  matrix = {
+    ver = ["13", "14", "15"]
+  }
+  args = {
+    version = ver
+  }
+  tags =  ["${IMAGE_URI}/clang-version-${ver}:${TAG}"]
+}
+
+target "clang-compilers-16" {
+  inherits = ["common"]
+  dockerfile = "Dockerfile"
+  context = "ubuntu-22.04_clang_16"
+  tags = ["${IMAGE_URI}/clang-version-16:${TAG}"]
+}
+
+target "llvm-gnu-cross-compiler-riscv64" {
+  inherits = ["common"]
+  dockerfile = "Dockerfile"
+  context = "llvm-gnu-cross-compiler-riscv64"
+  tags = ["${IMAGE_URI}/llvm-gnu-cross-compiler-riscv64:${TAG}"]
+}
+
+target "gem5-all-min-dependencies" {
+  inherits = ["common"]
+  dockerfile = "Dockerfile"
+  context = "gem5-all-min-dependencies"
+  tags = ["${IMAGE_URI}/gem5-all-min-dependencies:${TAG}"]
+}
diff --git a/util/dockerfiles/docker-compose.yaml b/util/dockerfiles/docker-compose.yaml
index 39579962b1..5c63e101ba 100644
--- a/util/dockerfiles/docker-compose.yaml
+++ b/util/dockerfiles/docker-compose.yaml
@@ -1,3 +1,4 @@
+---
 version: '2'
 
 services:
@@ -13,7 +14,7 @@ services:
         image: gcr.io/gem5-test/gpu-fs:latest
     sst:
         build:
-            context: sst-11.1.0
+            context: sst
             dockerfile: Dockerfile
         image: gcr.io/gem5-test/sst-env:latest
     systemc:
@@ -21,11 +22,6 @@ services:
             context: systemc-2.3.3
             dockerfile: Dockerfile
         image: gcr.io/gem5-test/systemc-env:latest
-    ubuntu-18.04_all-dependencies:
-        build:
-            context: ubuntu-18.04_all-dependencies
-            dockerfile: Dockerfile
-        image: gcr.io/gem5-test/ubuntu-18.04_all-dependencies:latest
     ubuntu-20.04_all-dependencies:
         build:
             context: ubuntu-20.04_all-dependencies
@@ -41,27 +37,13 @@ services:
             context: ubuntu-22.04_min-dependencies
             dockerfile: Dockerfile
         image: gcr.io/gem5-test/ubuntu-22.04_min-dependencies:latest
-    gcc-7:
-        build:
-            context: ubuntu-18.04_gcc-version
-            dockerfile: Dockerfile
-            args:
-                - version=7
-        image: gcr.io/gem5-test/gcc-version-7:latest
     gcc-8:
         build:
-            context: ubuntu-18.04_gcc-version
+            context: ubuntu-20.04_gcc-version
             dockerfile: Dockerfile
             args:
                 - version=8
         image: gcr.io/gem5-test/gcc-version-8:latest
-    gcc-9:
-        build:
-            context: ubuntu-20.04_gcc-version
-            dockerfile: Dockerfile
-            args:
-                - version=9
-        image: gcr.io/gem5-test/gcc-version-9:latest
     gcc-10:
         build:
             context: ubuntu-20.04_gcc-version
@@ -83,30 +65,23 @@ services:
             args:
                 - version=12
         image: gcr.io/gem5-test/gcc-version-12:latest
-    clang-6:
-        build:
-            context: ubuntu-18.04_clang-version
-            dockerfile: Dockerfile
-            args:
-                - version=6.0
-        image: gcr.io/gem5-test/clang-version-6.0:latest
     clang-7:
         build:
-            context: ubuntu-18.04_clang-version
+            context: ubuntu-20.04_clang-version
             dockerfile: Dockerfile
             args:
                 - version=7
         image: gcr.io/gem5-test/clang-version-7:latest
     clang-8:
         build:
-            context: ubuntu-18.04_clang-version
+            context: ubuntu-20.04_clang-version
             dockerfile: Dockerfile
             args:
                 - version=8
         image: gcr.io/gem5-test/clang-version-8:latest
     clang-9:
         build:
-            context: ubuntu-18.04_clang-version
+            context: ubuntu-20.04_clang-version
             dockerfile: Dockerfile
             args:
                 - version=9
@@ -146,6 +121,18 @@ services:
             args:
                 - version=14
         image: gcr.io/gem5-test/clang-version-14:latest
+    clang-15:
+        build:
+            context: ubuntu-22.04_clang-version
+            dockerfile: Dockerfile
+            args:
+                - version=15
+        image: gcr.io/gem5-test/clang-version-15:latest
+    clang-16:
+        build:
+            context: ubuntu-22.04_clang-16
+            dockerfile: Dockerfile
+        image: gcr.io/gem5-test/clang-version-16:latest
     llvm-gnu-cross-compiler-riscv64:
         build:
             context: llvm-gnu-cross-compiler-riscv64
diff --git a/util/dockerfiles/gcn-gpu/Dockerfile b/util/dockerfiles/gcn-gpu/Dockerfile
index c5db8963a8..9aead08c43 100644
--- a/util/dockerfiles/gcn-gpu/Dockerfile
+++ b/util/dockerfiles/gcn-gpu/Dockerfile
@@ -23,13 +23,13 @@
 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-FROM ubuntu:20.04
+FROM --platform=${BUILDPLATFORM} ubuntu:20.04
 ENV DEBIAN_FRONTEND=noninteractive
 RUN apt -y update && apt -y upgrade && \
     apt -y install build-essential git m4 scons zlib1g zlib1g-dev \
     libprotobuf-dev protobuf-compiler libprotoc-dev libgoogle-perftools-dev \
     python3-dev python-is-python3 doxygen libboost-all-dev \
-    libhdf5-serial-dev python3-pydot libpng-dev libelf-dev pkg-config
+    libhdf5-serial-dev python3-pydot libpng-dev libelf-dev pkg-config gdb
 
 # Requirements for ROCm
 RUN apt -y install cmake mesa-common-dev libgflags-dev libgoogle-glog-dev
@@ -121,3 +121,41 @@ RUN git clone -b rocm-4.0.1 https://github.com/ROCmSoftwarePlatform/MIOpen.git
 # when linking in the database file
 RUN mkdir -p /root/.cache/miopen/2.9.0.8252-rocm-rel-4.0-26-64506314 && \
     ln -s /root/.cache/miopen/2.9.0.8252-rocm-rel-4.0-26-64506314 /root/.cache/miopen/2.9.0
+
+# Add commands from halofinder Dockerfile
+RUN apt-get update && apt-get -y install libopenmpi-dev libomp-dev
+
+ENV HCC_AMDGPU_TARGET="gfx801,gfx803,gfx900"
+
+ENV HIPCC_BIN=/opt/rocm/bin
+ENV MPI_INCLUDE=/usr/lib/x86_64-linux-gnu/openmpi/include
+
+ENV OPT="-O3 -g -DRCB_UNTHREADED_BUILD -DUSE_SERIAL_COSMO"
+ENV OMP="-I/usr/lib/llvm-10/include/openmp -L/usr/lib/llvm-10/lib -fopenmp"
+
+ENV HIPCC_FLAGS="-v -ffast_math -DINLINE_FORCE -I${MPI_INCLUDE}"
+ENV HIPCC_FLAGS="-v -I${MPI_INCLUDE} -I/opt/rocm/hip/include"
+
+ENV HACC_PLATFORM="hip"
+ENV HACC_OBJDIR="${HACC_PLATFORM}"
+
+ENV HACC_CFLAGS="$OPT $OMP $HIPCC_FLAGS"
+ENV HACC_CC="${HIPCC_BIN}/hipcc -x c -Xclang -std=c99"
+
+ENV HACC_CXXFLAGS="$OPT $OMP $HIPCC_FLAGS"
+ENV HACC_CXX="${HIPCC_BIN}/hipcc -Xclang"
+
+ENV HACC_LDFLAGS="-lm -lrt"
+
+# USE_SERIAL_COSMO must be set to avoid building the code with MPI, which isn't
+# supported on the GPU model in gem5.
+ENV USE_SERIAL_COSMO="1"
+ENV HACC_NUM_CUDA_DEV="1"
+ENV HACC_MPI_CFLAGS="$OPT $OMP $HIPCC_FLAGS"
+ENV HACC_MPI_CC="${HIPCC_BIN}/hipcc -x c -Xclang -std=c99 -Xclang -pthread"
+
+ENV HACC_MPI_CXXFLAGS="$OPT $OMP $HIPCC_FLAGS"
+ENV HACC_MPI_CXX="${HIPCC_BIN}/hipcc -Xclang -pthread"
+ENV HACC_MPI_LD="${HIPCC_BIN}/hipcc -Xclang -pthread"
+
+ENV HACC_MPI_LDFLAGS="-lm -lrt"
diff --git a/util/dockerfiles/gem5-all-min-dependencies/Dockerfile b/util/dockerfiles/gem5-all-min-dependencies/Dockerfile
index da5613e1a1..cb73ab0bce 100644
--- a/util/dockerfiles/gem5-all-min-dependencies/Dockerfile
+++ b/util/dockerfiles/gem5-all-min-dependencies/Dockerfile
@@ -24,13 +24,13 @@
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-FROM gcr.io/gem5-test/ubuntu-22.04_min-dependencies:latest as source
+FROM --platform=${BUILDPLATFORM} ghcr.io/gem5/ubuntu-22.04_min-dependencies:latest as source
 RUN apt -y update && apt -y install git
-RUN git clone -b develop https://gem5.googlesource.com/public/gem5 /gem5
+RUN git clone -b develop https://github.com/gem5/gem5/ /gem5
 WORKDIR /gem5
 RUN scons -j`nproc` build/ALL/gem5.fast
 
-FROM gcr.io/gem5-test/ubuntu-22.04_min-dependencies:latest
+FROM ghcr.io/gem5/ubuntu-22.04_min-dependencies:latest
 COPY --from=source /gem5/build/ALL/gem5.fast /usr/local/bin/gem5
 
 ENTRYPOINT [ "/usr/local/bin/gem5" ]
diff --git a/util/dockerfiles/gpu-fs/Dockerfile b/util/dockerfiles/gpu-fs/Dockerfile
index 63ae6b0783..55f4de4aff 100644
--- a/util/dockerfiles/gpu-fs/Dockerfile
+++ b/util/dockerfiles/gpu-fs/Dockerfile
@@ -27,7 +27,7 @@
 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 # POSSIBILITY OF SUCH DAMAGE.
 
-FROM ubuntu:20.04
+FROM --platform=${BUILDPLATFORM} ubuntu:20.04
 ENV DEBIAN_FRONTEND=noninteractive
 RUN apt -y update && apt -y upgrade && \
     apt -y install build-essential git m4 scons zlib1g zlib1g-dev \
@@ -44,12 +44,12 @@ RUN apt -y install wget gnupg2 rpm
 # Get the radeon gpg key for apt repository
 RUN wget -q -O - https://repo.radeon.com/rocm/rocm.gpg.key | apt-key add -
 
-# Modify apt sources to pull from ROCm 4.2 repository only
-RUN echo 'deb [arch=amd64] https://repo.radeon.com/rocm/apt/4.2/ ubuntu main' | tee /etc/apt/sources.list.d/rocm.list
+# Modify apt sources to pull from ROCm 5.4.2 repository only
+RUN echo 'deb [arch=amd64] https://repo.radeon.com/rocm/apt/5.4.2/ ubuntu main' | tee /etc/apt/sources.list.d/rocm.list
 
 RUN apt-get update
 RUN apt -y install libnuma-dev
 
 # Install the ROCm-dkms source
 RUN apt -y install initramfs-tools
-RUN apt -y install rocm-dkms
+RUN apt -y install rocm-dev
diff --git a/util/dockerfiles/llvm-gnu-cross-compiler-riscv64/Dockerfile b/util/dockerfiles/llvm-gnu-cross-compiler-riscv64/Dockerfile
index 0f01e7931d..cc84f3d45a 100644
--- a/util/dockerfiles/llvm-gnu-cross-compiler-riscv64/Dockerfile
+++ b/util/dockerfiles/llvm-gnu-cross-compiler-riscv64/Dockerfile
@@ -26,7 +26,7 @@
 
 
 # stage 1: download the dependencies
-FROM ubuntu:20.04 AS stage1
+FROM --platform=${BUILDPLATFORM} ubuntu:20.04 AS stage1
 
 ENV DEBIAN_FRONTEND=noninteractive
 RUN apt -y update && apt -y upgrade && apt -y install \
diff --git a/util/dockerfiles/sst-11.1.0/Dockerfile b/util/dockerfiles/sst/Dockerfile
similarity index 88%
rename from util/dockerfiles/sst-11.1.0/Dockerfile
rename to util/dockerfiles/sst/Dockerfile
index 970e6979b4..50027fe5b2 100644
--- a/util/dockerfiles/sst-11.1.0/Dockerfile
+++ b/util/dockerfiles/sst/Dockerfile
@@ -1,4 +1,4 @@
-# Copyright (c) 2021 The Regents of the University of California
+# Copyright (c) 2021-2023 The Regents of the University of California
 # All Rights Reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -24,7 +24,7 @@
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-FROM ubuntu:20.04
+FROM --platform=${BUILDPLATFORM} ubuntu:20.04
 
 ENV DEBIAN_FRONTEND=noninteractive
 RUN apt -y update && apt -y upgrade && \
@@ -41,9 +41,9 @@ RUN mkdir /sst
 
 # Download and build SST-Core without MPI support
 WORKDIR /sst/
-RUN wget https://github.com/sstsimulator/sst-core/releases/download/v11.1.0_Final/sstcore-11.1.0.tar.gz; \
-    tar xf sstcore-11.1.0.tar.gz;
-WORKDIR /sst/sstcore-11.1.0/
+RUN wget https://github.com/sstsimulator/sst-core/releases/download/v13.0.0_Final/sstcore-13.0.0.tar.gz; \
+    tar xf sstcore-13.0.0.tar.gz;
+WORKDIR /sst/sstcore-13.0.0/
 RUN ./configure --prefix=$SST_CORE_HOME --with-python=/usr/bin/python3-config \
                                         --disable-mpi; \
     make all -j $(nproc); \
@@ -51,9 +51,9 @@ RUN ./configure --prefix=$SST_CORE_HOME --with-python=/usr/bin/python3-config \
 
 # Download and build SST-Elements
 WORKDIR /sst
-RUN wget https://github.com/sstsimulator/sst-elements/releases/download/v11.1.0_Final/sstelements-11.1.0.tar.gz; \
-    tar xf sstelements-11.1.0.tar.gz;
-WORKDIR /sst/sst-elements-library-11.1.0/
+RUN wget https://github.com/sstsimulator/sst-elements/releases/download/v13.0.0_Final/sstelements-13.0.0.tar.gz; \
+    tar xf sstelements-13.0.0.tar.gz;
+WORKDIR /sst/sst-elements-library-13.0.0/
 RUN ./configure --prefix=$SST_CORE_HOME --with-python=/usr/bin/python3-config \
                                         --with-sst-core=$SST_CORE_HOME; \
     make all -j $(nproc); \
diff --git a/util/dockerfiles/systemc-2.3.3/Dockerfile b/util/dockerfiles/systemc-2.3.3/Dockerfile
index ac94666d6d..41d4fafb93 100644
--- a/util/dockerfiles/systemc-2.3.3/Dockerfile
+++ b/util/dockerfiles/systemc-2.3.3/Dockerfile
@@ -24,7 +24,7 @@
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-FROM ubuntu:22.04
+FROM --platform=${BUILDPLATFORM} ubuntu:22.04
 
 ENV DEBIAN_FRONTEND=noninteractive
 RUN apt -y update && apt -y upgrade && \
diff --git a/util/dockerfiles/ubuntu-20.04_all-dependencies/Dockerfile b/util/dockerfiles/ubuntu-20.04_all-dependencies/Dockerfile
index c838a06dda..8f092adc7a 100644
--- a/util/dockerfiles/ubuntu-20.04_all-dependencies/Dockerfile
+++ b/util/dockerfiles/ubuntu-20.04_all-dependencies/Dockerfile
@@ -24,7 +24,7 @@
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-FROM ubuntu:20.04
+FROM --platform=${BUILDPLATFORM} ubuntu:20.04
 
 ENV DEBIAN_FRONTEND=noninteractive
 RUN apt -y update && apt -y upgrade && \
@@ -32,6 +32,15 @@ RUN apt -y update && apt -y upgrade && \
     libprotobuf-dev protobuf-compiler libprotoc-dev libgoogle-perftools-dev \
     python3-dev python-is-python3 doxygen libboost-all-dev \
     libhdf5-serial-dev python3-pydot libpng-dev libelf-dev pkg-config pip \
-    python3-venv black
+    python3-venv black gcc-10 g++-10
 
 RUN pip install mypy pre-commit
+
+RUN update-alternatives --install \
+    /usr/bin/g++ g++ /usr/bin/g++-10 100
+RUN update-alternatives --install \
+    /usr/bin/gcc gcc /usr/bin/gcc-10 100
+RUN update-alternatives --install \
+    /usr/bin/c++ c++ /usr/bin/g++-10 100
+RUN update-alternatives --install \
+    /usr/bin/cc cc /usr/bin/gcc-10 100
diff --git a/util/dockerfiles/ubuntu-20.04_clang-version/Dockerfile b/util/dockerfiles/ubuntu-20.04_clang-version/Dockerfile
index 00f34c4d9e..2ea9413c7d 100644
--- a/util/dockerfiles/ubuntu-20.04_clang-version/Dockerfile
+++ b/util/dockerfiles/ubuntu-20.04_clang-version/Dockerfile
@@ -23,7 +23,7 @@
 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-FROM ubuntu:20.04
+FROM --platform=${BUILDPLATFORM} ubuntu:20.04
 
 # Valid version values:
 # 6.0
@@ -40,7 +40,10 @@ RUN apt -y update && apt -y upgrade && \
     apt -y install git m4 scons zlib1g zlib1g-dev libprotobuf-dev \
     protobuf-compiler libprotoc-dev libgoogle-perftools-dev python3-dev \
     python-is-python3 doxygen libboost-all-dev libhdf5-serial-dev \
-    python3-pydot libpng-dev clang-${version} make
+    python3-pydot libpng-dev clang-${version} make \
+    # This is needed as clang-8 does not have the libstdc++-10-dev package.
+    # It is necessary for compilation.
+    libstdc++-10-dev
 
 RUN apt-get --purge -y remove gcc
 
diff --git a/util/dockerfiles/ubuntu-20.04_gcc-version/Dockerfile b/util/dockerfiles/ubuntu-20.04_gcc-version/Dockerfile
index 0ec8083c53..7841635aef 100644
--- a/util/dockerfiles/ubuntu-20.04_gcc-version/Dockerfile
+++ b/util/dockerfiles/ubuntu-20.04_gcc-version/Dockerfile
@@ -23,12 +23,12 @@
 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-FROM ubuntu:20.04
+FROM --platform=${BUILDPLATFORM} ubuntu:20.04
 
 # Valid version values:
 # 7
 # 8
-# 9
+# 9 # Not supported. See: https://github.com/gem5/gem5/issues/555.
 # 10
 ARG version
 
diff --git a/util/dockerfiles/ubuntu-22.04_all-dependencies/Dockerfile b/util/dockerfiles/ubuntu-22.04_all-dependencies/Dockerfile
index e5afc63be6..9e2580e642 100644
--- a/util/dockerfiles/ubuntu-22.04_all-dependencies/Dockerfile
+++ b/util/dockerfiles/ubuntu-22.04_all-dependencies/Dockerfile
@@ -24,7 +24,7 @@
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-FROM ubuntu:22.04
+FROM --platform=${BUILDPLATFORM} ubuntu:22.04
 
 ENV DEBIAN_FRONTEND=noninteractive
 RUN apt -y update && apt -y upgrade && \
diff --git a/util/dockerfiles/ubuntu-18.04_clang-version/Dockerfile b/util/dockerfiles/ubuntu-22.04_clang-16/Dockerfile
similarity index 71%
rename from util/dockerfiles/ubuntu-18.04_clang-version/Dockerfile
rename to util/dockerfiles/ubuntu-22.04_clang-16/Dockerfile
index 3d9c3a7c12..045efbd13a 100644
--- a/util/dockerfiles/ubuntu-18.04_clang-version/Dockerfile
+++ b/util/dockerfiles/ubuntu-22.04_clang-16/Dockerfile
@@ -1,4 +1,4 @@
-# Copyright (c) 2020 The Regents of the University of California
+# Copyright (c) 2023 The Regents of the University of California
 # All Rights Reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -23,31 +23,27 @@
 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+FROM --platform=${BUILDPLATFORM} ubuntu:22.04
 
-FROM ubuntu:18.04
-
-# Valid version values:
-# 3.9
-# 4.0
-# 5.0
-# 6.0
-# 7
-# 8
-# 9
-ARG version
 
+ENV DEBIAN_FRONTEND=noninteractive
 RUN apt -y update && apt -y upgrade && \
-    apt -y install git m4 scons zlib1g zlib1g-dev clang-${version} \
-    libprotobuf-dev protobuf-compiler libprotoc-dev libgoogle-perftools-dev \
-    python3-dev python3 doxygen make
+    apt -y install git m4 scons zlib1g zlib1g-dev libprotobuf-dev \
+    protobuf-compiler libprotoc-dev libgoogle-perftools-dev python3-dev \
+    python-is-python3 doxygen libboost-all-dev libhdf5-serial-dev \
+    python3-pydot libpng-dev make lsb-release wget \
+    software-properties-common gnupg
+
+COPY llvm.sh /llvm.sh
+RUN ./llvm.sh 16
 
 RUN apt-get --purge -y remove gcc
 
 RUN update-alternatives --install \
-    /usr/bin/clang++ clang++ /usr/bin/clang++-${version} 100
+    /usr/bin/clang++ clang++ /usr/bin/clang++-16 100
 RUN update-alternatives --install \
-    /usr/bin/clang clang /usr/bin/clang-${version} 100
+    /usr/bin/clang clang /usr/bin/clang-16 100
 RUN update-alternatives --install \
-    /usr/bin/c++ c++ /usr/bin/clang++-${version} 100
+    /usr/bin/c++ c++ /usr/bin/clang++-16 100
 RUN update-alternatives --install \
-    /usr/bin/cc cc /usr/bin/clang-${version} 100
+    /usr/bin/cc cc /usr/bin/clang-16 100
diff --git a/util/dockerfiles/ubuntu-22.04_clang-16/llvm.sh b/util/dockerfiles/ubuntu-22.04_clang-16/llvm.sh
new file mode 100755
index 0000000000..200c0390fe
--- /dev/null
+++ b/util/dockerfiles/ubuntu-22.04_clang-16/llvm.sh
@@ -0,0 +1,176 @@
+#!/bin/bash
+################################################################################
+# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+################################################################################
+#
+# This script will install the llvm toolchain on the different
+# Debian and Ubuntu versions
+
+set -eux
+
+usage() {
+    set +x
+    echo "Usage: $0 [llvm_major_version] [all] [OPTIONS]" 1>&2
+    echo -e "all\t\t\tInstall all packages." 1>&2
+    echo -e "-n=code_name\t\tSpecifies the distro codename, for example bionic" 1>&2
+    echo -e "-h\t\t\tPrints this help." 1>&2
+    echo -e "-m=repo_base_url\tSpecifies the base URL from which to download." 1>&2
+    exit 1;
+}
+
+CURRENT_LLVM_STABLE=17
+BASE_URL="http://apt.llvm.org"
+
+# Check for required tools
+needed_binaries=(lsb_release wget add-apt-repository gpg)
+missing_binaries=()
+for binary in "${needed_binaries[@]}"; do
+    if ! which $binary &>/dev/null ; then
+        missing_binaries+=($binary)
+    fi
+done
+if [[ ${#missing_binaries[@]} -gt 0 ]] ; then
+    echo "You are missing some tools this script requires: ${missing_binaries[@]}"
+    echo "(hint: apt install lsb-release wget software-properties-common gnupg)"
+    exit 4
+fi
+
+# Set default values for commandline arguments
+# We default to the current stable branch of LLVM
+LLVM_VERSION=$CURRENT_LLVM_STABLE
+ALL=0
+DISTRO=$(lsb_release -is)
+VERSION=$(lsb_release -sr)
+UBUNTU_CODENAME=""
+CODENAME_FROM_ARGUMENTS=""
+# Obtain VERSION_CODENAME and UBUNTU_CODENAME (for Ubuntu and its derivatives)
+source /etc/os-release
+DISTRO=${DISTRO,,}
+case ${DISTRO} in
+    debian)
+        # Debian Trixie has a workaround because of
+        # https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=1038383
+        if [[ "${VERSION}" == "unstable" ]] || [[ "${VERSION}" == "testing" ]] || [[ "${VERSION_CODENAME}" == "trixie" ]]; then
+            CODENAME=unstable
+            LINKNAME=
+        else
+            # "stable" Debian release
+            CODENAME=${VERSION_CODENAME}
+            LINKNAME=-${CODENAME}
+        fi
+        ;;
+    *)
+        # ubuntu and its derivatives
+        if [[ -n "${UBUNTU_CODENAME}" ]]; then
+            CODENAME=${UBUNTU_CODENAME}
+            if [[ -n "${CODENAME}" ]]; then
+                LINKNAME=-${CODENAME}
+            fi
+        fi
+        ;;
+esac
+
+# read optional command line arguments
+if [ "$#" -ge 1 ] && [ "${1::1}" != "-" ]; then
+    if [ "$1" != "all" ]; then
+        LLVM_VERSION=$1
+    else
+        # special case for ./llvm.sh all
+        ALL=1
+    fi
+    OPTIND=2
+    if [ "$#" -ge 2 ]; then
+      if [ "$2" == "all" ]; then
+          # Install all packages
+          ALL=1
+          OPTIND=3
+      fi
+    fi
+fi
+
+while getopts ":hm:n:" arg; do
+    case $arg in
+    h)
+        usage
+        ;;
+    m)
+        BASE_URL=${OPTARG}
+        ;;
+    n)
+        CODENAME=${OPTARG}
+        if [[ "${CODENAME}" == "unstable" ]]; then
+            # link name does not apply to unstable repository
+            LINKNAME=
+        else
+            LINKNAME=-${CODENAME}
+        fi
+        CODENAME_FROM_ARGUMENTS="true"
+        ;;
+    esac
+done
+
+if [[ $EUID -ne 0 ]]; then
+   echo "This script must be run as root!"
+   exit 1
+fi
+
+declare -A LLVM_VERSION_PATTERNS
+LLVM_VERSION_PATTERNS[9]="-9"
+LLVM_VERSION_PATTERNS[10]="-10"
+LLVM_VERSION_PATTERNS[11]="-11"
+LLVM_VERSION_PATTERNS[12]="-12"
+LLVM_VERSION_PATTERNS[13]="-13"
+LLVM_VERSION_PATTERNS[14]="-14"
+LLVM_VERSION_PATTERNS[15]="-15"
+LLVM_VERSION_PATTERNS[16]="-16"
+LLVM_VERSION_PATTERNS[17]="-17"
+LLVM_VERSION_PATTERNS[18]=""
+
+if [ ! ${LLVM_VERSION_PATTERNS[$LLVM_VERSION]+_} ]; then
+    echo "This script does not support LLVM version $LLVM_VERSION"
+    exit 3
+fi
+
+LLVM_VERSION_STRING=${LLVM_VERSION_PATTERNS[$LLVM_VERSION]}
+
+# join the repository name
+if [[ -n "${CODENAME}" ]]; then
+    REPO_NAME="deb ${BASE_URL}/${CODENAME}/  llvm-toolchain${LINKNAME}${LLVM_VERSION_STRING} main"
+
+    # check if the repository exists for the distro and version
+    if ! wget -q --method=HEAD ${BASE_URL}/${CODENAME} &> /dev/null; then
+        if [[ -n "${CODENAME_FROM_ARGUMENTS}" ]]; then
+            echo "Specified codename '${CODENAME}' is not supported by this script."
+        else
+            echo "Distribution '${DISTRO}' in version '${VERSION}' is not supported by this script."
+        fi
+        exit 2
+    fi
+fi
+
+
+# install everything
+
+if [[ ! -f /etc/apt/trusted.gpg.d/apt.llvm.org.asc ]]; then
+    # download GPG key once
+    wget -qO- https://apt.llvm.org/llvm-snapshot.gpg.key | tee /etc/apt/trusted.gpg.d/apt.llvm.org.asc
+fi
+
+if [[ -z "`apt-key list 2> /dev/null | grep -i llvm`" ]]; then
+    # Delete the key in the old format
+    apt-key del AF4F7421
+fi
+add-apt-repository "${REPO_NAME}"
+apt-get update
+PKG="clang-$LLVM_VERSION lldb-$LLVM_VERSION lld-$LLVM_VERSION clangd-$LLVM_VERSION"
+if [[ $ALL -eq 1 ]]; then
+    # same as in test-install.sh
+    # No worries if we have dups
+    PKG="$PKG clang-tidy-$LLVM_VERSION clang-format-$LLVM_VERSION clang-tools-$LLVM_VERSION llvm-$LLVM_VERSION-dev lld-$LLVM_VERSION lldb-$LLVM_VERSION llvm-$LLVM_VERSION-tools libomp-$LLVM_VERSION-dev libc++-$LLVM_VERSION-dev libc++abi-$LLVM_VERSION-dev libclang-common-$LLVM_VERSION-dev libclang-$LLVM_VERSION-dev libclang-cpp$LLVM_VERSION-dev libunwind-$LLVM_VERSION-dev"
+    if test $LLVM_VERSION -gt 14; then
+        PKG="$PKG libclang-rt-$LLVM_VERSION-dev libpolly-$LLVM_VERSION-dev"
+    fi
+fi
+apt-get install -y $PKG
diff --git a/util/dockerfiles/ubuntu-22.04_clang-version/Dockerfile b/util/dockerfiles/ubuntu-22.04_clang-version/Dockerfile
index 148b71dea3..7ddf7fe825 100644
--- a/util/dockerfiles/ubuntu-22.04_clang-version/Dockerfile
+++ b/util/dockerfiles/ubuntu-22.04_clang-version/Dockerfile
@@ -23,10 +23,11 @@
 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-FROM ubuntu:22.04
+FROM --platform=${BUILDPLATFORM} ubuntu:22.04
 
 # Valid version values:
 # 13
+# 15
 ARG version
 
 ENV DEBIAN_FRONTEND=noninteractive
diff --git a/util/dockerfiles/ubuntu-22.04_gcc-version/Dockerfile b/util/dockerfiles/ubuntu-22.04_gcc-version/Dockerfile
index fcf909cec2..feac348780 100644
--- a/util/dockerfiles/ubuntu-22.04_gcc-version/Dockerfile
+++ b/util/dockerfiles/ubuntu-22.04_gcc-version/Dockerfile
@@ -23,7 +23,7 @@
 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-FROM ubuntu:22.04
+FROM --platform=${BUILDPLATFORM} ubuntu:22.04
 
 # Valid version values:
 # 11
diff --git a/util/dockerfiles/ubuntu-22.04_min-dependencies/Dockerfile b/util/dockerfiles/ubuntu-22.04_min-dependencies/Dockerfile
index 978e2c6af5..690959da91 100644
--- a/util/dockerfiles/ubuntu-22.04_min-dependencies/Dockerfile
+++ b/util/dockerfiles/ubuntu-22.04_min-dependencies/Dockerfile
@@ -24,7 +24,7 @@
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-FROM ubuntu:22.04
+FROM --platform=${BUILDPLATFORM} ubuntu:22.04
 
 ENV DEBIAN_FRONTEND=noninteractive
 RUN apt -y update && apt -y upgrade && \
diff --git a/util/encode_inst_dep_trace.py b/util/encode_inst_dep_trace.py
index 9ab95bd7ed..5ca3cda79b 100755
--- a/util/encode_inst_dep_trace.py
+++ b/util/encode_inst_dep_trace.py
@@ -127,8 +127,8 @@ def main():
 
     # Open the file in read mode
     try:
-        ascii_in = open(sys.argv[1], "r")
-    except IOError:
+        ascii_in = open(sys.argv[1])
+    except OSError:
         print("Failed to open ", sys.argv[1], " for reading")
         exit(-1)
 
diff --git a/util/encode_packet_trace.py b/util/encode_packet_trace.py
index bdf1c3db06..5df3b21c7c 100755
--- a/util/encode_packet_trace.py
+++ b/util/encode_packet_trace.py
@@ -92,14 +92,14 @@ def main():
         exit(-1)
 
     try:
-        ascii_in = open(sys.argv[1], "r")
-    except IOError:
+        ascii_in = open(sys.argv[1])
+    except OSError:
         print("Failed to open ", sys.argv[1], " for reading")
         exit(-1)
 
     try:
         proto_out = open(sys.argv[2], "wb")
-    except IOError:
+    except OSError:
         print("Failed to open ", sys.argv[2], " for writing")
         exit(-1)
 
diff --git a/util/find_copyrights.py b/util/find_copyrights.py
index 0bd0ef3a51..28e3b4c66b 100644
--- a/util/find_copyrights.py
+++ b/util/find_copyrights.py
@@ -6,7 +6,7 @@
 
 from file_types import lang_type, find_files
 
-mode_line = re.compile("(-\*- *mode:.* *-\*-)")
+mode_line = re.compile(r"(-\*- *mode:.* *-\*-)")
 shell_comment = re.compile(r"^\s*#")
 lisp_comment = re.compile(r";")
 cpp_comment = re.compile(r"//")
@@ -116,7 +116,7 @@ def process_dates(dates):
     for date in dates:
         match = date_range_re.match(date)
         if match:
-            f, l = [int(d) for d in match.groups()]
+            f, l = (int(d) for d in match.groups())
             for i in range(f, l + 1):
                 output.add(i)
         else:
diff --git a/util/gem5-resources-manager/.gitignore b/util/gem5-resources-manager/.gitignore
new file mode 100644
index 0000000000..ce625cd446
--- /dev/null
+++ b/util/gem5-resources-manager/.gitignore
@@ -0,0 +1,12 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+
+# Unit test / coverage reports
+.coverage
+database/*
+instance
+instance/*
+
+# Environments
+.env
+.venv
diff --git a/util/gem5-resources-manager/README.md b/util/gem5-resources-manager/README.md
new file mode 100644
index 0000000000..efbbf97b16
--- /dev/null
+++ b/util/gem5-resources-manager/README.md
@@ -0,0 +1,216 @@
+# gem5 Resources Manager
+
+This directory contains the code to convert the JSON file to a MongoDB database. This also contains tools to manage the database as well as the JSON file.
+
+# Table of Contents
+- [gem5 Resources Manager](#gem5-resources-manager)
+- [Table of Contents](#table-of-contents)
+- [Resources Manager](#resources-manager)
+  - [Setup](#setup)
+  - [Selecting a Database](#selecting-a-database)
+    - [MongoDB](#mongodb)
+    - [JSON File](#json-file)
+  - [Adding a Resource](#adding-a-resource)
+  - [Updating a Resource](#updating-a-resource)
+  - [Deleting a Resource](#deleting-a-resource)
+  - [Adding a New Version](#adding-a-new-version)
+  - [Validation](#validation)
+- [CLI tool](#cli-tool)
+  - [create\_resources\_json](#create_resources_json)
+  - [restore\_backup](#restore_backup)
+  - [backup\_mongodb](#backup_mongodb)
+  - [get\_resource](#get_resource)
+- [Changes to Structure of JSON](#changes-to-structure-of-json)
+- [Testing](#testing)
+
+# Resources Manager
+
+This is a tool to manage the resources JSON file and the MongoDB database. This tool is used to add, delete, update, view, and search for resources.
+
+## Setup
+
+First, install the requirements:
+
+```bash
+pip3 install -r requirements.txt
+```
+
+Then run the flask server:
+
+```bash
+python3 server.py
+```
+
+Then, you can access the server at `http://localhost:5000`.
+
+## Selecting a Database
+
+The Resource Manager currently supports 2 database options: MongoDB and JSON file.
+
+Select the database you want to use by clicking on the button on home page.
+
+### MongoDB
+
+The MongoDB database is hosted on MongoDB Atlas. To use this database, you need to have the MongoDB URI, collection name, and database name.  Once you have the information, enter it into the form and click "login" or "save and login" to login to the database.
+
+Another way to use the MongoDB database is to switch to the Generate URI tab and enter the information there. This would generate a URI that you can use to login to the database.
+
+### JSON File
+
+There are currently 3 ways to use the JSON file:
+
+1. Adding a URL to the JSON file
+2. Uploading a JSON file
+3. Using an existing JSON file
+
+## Adding a Resource
+
+Once you are logged in, you can use the search bar to search for resources. If the ID doesn't exist, it would be prefilled with the required fields. You can then edit the fields and click "add" to add the resource to the database.
+
+## Updating a Resource
+
+If the ID exists, the form would be prefilled with the existing data. You can then edit the fields and click "update" to update the resource in the database.
+
+## Deleting a Resource
+
+If the ID exists, the form would be prefilled with the existing data. You can then click "delete" to delete the resource from the database.
+
+## Adding a New Version
+
+If the ID exists, the form would be prefilled with the existing data. Change the `resource_version` field to the new version and click "add" to add the new version to the database. You will only be able to add a new version if the `resource_version` field is different from any of the existing versions.
+
+## Validation
+
+The Resource Manager validates the data before adding it to the database. If the data is invalid, it would show an error message and not add the data to the database. The validation is done using the [schema](schema/schema.json) file. The Monaco editor automatically validates the data as you type and displays the errors in the editor.
+
+To view the schema, click on the "Show Schema" button on the left side of the page.
+
+# CLI tool
+
+```bash
+usage: gem5_resource_cli.py [-h] [-u URI] [-d DATABASE] [-c COLLECTION] {get_resource,backup_mongodb,restore_backup,create_resources_json} ...
+
+CLI for gem5-resources.
+
+positional arguments:
+  {get_resource,backup_mongodb,restore_backup,create_resources_json}
+                        The command to run.
+    get_resource        Retrieves a resource from the collection based on the given ID. if a resource version is provided, it will retrieve the resource
+                        with the given ID and version.
+    backup_mongodb      Backs up the MongoDB collection to a JSON file.
+    restore_backup      Restores a backup of the MongoDB collection from a JSON file.
+    create_resources_json
+                        Creates a JSON file of all the resources in the collection.
+
+optional arguments:
+  -h, --help            show this help message and exit
+  -u URI, --uri URI     The URI of the MongoDB database. (default: None)
+  -d DATABASE, --database DATABASE
+                        The MongoDB database to use. (default: gem5-vision)
+  -c COLLECTION, --collection COLLECTION
+                        The MongoDB collection to use. (default: versions_test)
+```
+
+By default, the cli uses environment variables to get the URI. You can create a .env file with the `MONGO_URI` variable set to your URI. If you want to use a different URI, you can use the `-u` flag to specify the URI.
+
+## create_resources_json
+
+This command is used to create a new JSON file from the old JSON file. This is used to make the JSON file "parseable" by removing the nested JSON and adding the new fields.
+
+```bash
+usage: gem5_resource_cli.py create_resources_json [-h] [-v VERSION] [-o OUTPUT] [-s SOURCE]
+
+optional arguments:
+  -h, --help            show this help message and exit
+  -v VERSION, --version VERSION
+                        The version of the resources to create the JSON file for. (default: dev)
+  -o OUTPUT, --output OUTPUT
+                        The JSON file to create. (default: resources.json)
+  -s SOURCE, --source SOURCE
+                        The path to the gem5 source code. (default: )
+```
+
+A sample command to run this is:
+
+```bash
+python3 gem5_resource_cli.py create_resources_json -o resources_new.json -s ./gem5
+```
+
+## restore_backup
+
+This command is used to update the MongoDB database with the new JSON file. This is used to update the database with the new JSON file.
+
+```bash
+usage: gem5_resource_cli.py restore_backup [-h] [-f FILE]
+
+optional arguments:
+  -h, --help            show this help message and exit
+
+required arguments:
+  -f FILE, --file FILE  The JSON file to restore the MongoDB collection from.
+```
+
+A sample command to run this is:
+
+```bash
+python3 gem5_resource_cli.py restore_backup -f resources.json
+```
+
+## backup_mongodb
+
+This command is used to backup the MongoDB database to a JSON file. This is used to create a backup of the database.
+
+```bash
+usage: gem5_resource_cli.py backup_mongodb [-h] -f FILE
+
+optional arguments:
+  -h, --help            show this help message and exit
+
+required arguments:
+  -f FILE, --file FILE  The JSON file to back up the MongoDB collection to.
+```
+
+A sample command to run this is:
+
+```bash
+python3 gem5_resource_cli.py backup_mongodb -f resources.json
+```
+
+## get_resource
+
+This command is used to get a resource from the MongoDB database. This is used to get a resource from the database.
+
+```bash
+usage: gem5_resource_cli.py get_resource [-h] -i ID [-v VERSION]
+
+optional arguments:
+  -h, --help            show this help message and exit
+  -v VERSION, --version VERSION
+                        The version of the resource to retrieve.
+
+required arguments:
+  -i ID, --id ID        The ID of the resource to retrieve.
+```
+
+A sample command to run this is:
+
+```bash
+python3 gem5_resource_cli.py get_resource -i x86-ubuntu-18.04-img -v 1.0.0
+```
+# Changes to Structure of JSON
+
+To view the new schema, see [schema.json](https://resources.gem5.org/gem5-resources-schema.json).
+
+# Testing
+
+To run the tests, run the following command:
+
+```bash
+coverage run -m unittest discover -s test -p '*_test.py'
+```
+
+To view the coverage report, run the following command:
+
+```bash
+coverage report
+```
diff --git a/util/gem5-resources-manager/api/client.py b/util/gem5-resources-manager/api/client.py
new file mode 100644
index 0000000000..20a91b50d2
--- /dev/null
+++ b/util/gem5-resources-manager/api/client.py
@@ -0,0 +1,134 @@
+# Copyright (c) 2023 The Regents of the University of California
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from abc import ABC, abstractmethod
+from typing import Dict, List
+
+
+class Client(ABC):
+    def __init__(self):
+        self.__undo_stack = []
+        self.__redo_stack = []
+        self.__undo_limit = 10
+
+    @abstractmethod
+    def find_resource(self, query: Dict) -> Dict:
+        raise NotImplementedError
+
+    @abstractmethod
+    def get_versions(self, query: Dict) -> List[Dict]:
+        raise NotImplementedError
+
+    @abstractmethod
+    def update_resource(self, query: Dict) -> Dict:
+        raise NotImplementedError
+
+    @abstractmethod
+    def check_resource_exists(self, query: Dict) -> Dict:
+        raise NotImplementedError
+
+    @abstractmethod
+    def insert_resource(self, query: Dict) -> Dict:
+        raise NotImplementedError
+
+    @abstractmethod
+    def delete_resource(self, query: Dict) -> Dict:
+        raise NotImplementedError
+
+    @abstractmethod
+    def save_session(self) -> Dict:
+        raise NotImplementedError
+
+    def undo_operation(self) -> Dict:
+        """
+        This function undoes the last operation performed on the database.
+        """
+        if len(self.__undo_stack) == 0:
+            return {"status": "Nothing to undo"}
+        operation = self.__undo_stack.pop()
+        print(operation)
+        if operation["operation"] == "insert":
+            self.delete_resource(operation["resource"])
+        elif operation["operation"] == "delete":
+            self.insert_resource(operation["resource"])
+        elif operation["operation"] == "update":
+            self.update_resource(operation["resource"])
+            temp = operation["resource"]["resource"]
+            operation["resource"]["resource"] = operation["resource"][
+                "original_resource"
+            ]
+            operation["resource"]["original_resource"] = temp
+        else:
+            raise Exception("Invalid Operation")
+        self.__redo_stack.append(operation)
+        return {"status": "Undone"}
+
+    def redo_operation(self) -> Dict:
+        """
+        This function redoes the last operation performed on the database.
+        """
+        if len(self.__redo_stack) == 0:
+            return {"status": "No operations to redo"}
+        operation = self.__redo_stack.pop()
+        print(operation)
+        if operation["operation"] == "insert":
+            self.insert_resource(operation["resource"])
+        elif operation["operation"] == "delete":
+            self.delete_resource(operation["resource"])
+        elif operation["operation"] == "update":
+            self.update_resource(operation["resource"])
+            temp = operation["resource"]["resource"]
+            operation["resource"]["resource"] = operation["resource"][
+                "original_resource"
+            ]
+            operation["resource"]["original_resource"] = temp
+        else:
+            raise Exception("Invalid Operation")
+        self.__undo_stack.append(operation)
+        return {"status": "Redone"}
+
+    def _add_to_stack(self, operation: Dict) -> Dict:
+        if len(self.__undo_stack) == self.__undo_limit:
+            self.__undo_stack.pop(0)
+        self.__undo_stack.append(operation)
+        self.__redo_stack.clear()
+        return {"status": "Added to stack"}
+
+    def get_revision_status(self) -> Dict:
+        """
+        This function saves the status of revision operations to a dictionary.
+
+        The revision operations whose statuses are saved are undo and redo.
+
+        If the stack of a given revision operation is empty, the status of
+        that operation is set to 1 else the status is set to 0.
+
+        :return: A dictionary containing the status of revision operations.
+        """
+        return {
+            "undo": 1 if len(self.__undo_stack) == 0 else 0,
+            "redo": 1 if len(self.__redo_stack) == 0 else 0,
+        }
diff --git a/util/gem5-resources-manager/api/create_resources_json.py b/util/gem5-resources-manager/api/create_resources_json.py
new file mode 100644
index 0000000000..3179142939
--- /dev/null
+++ b/util/gem5-resources-manager/api/create_resources_json.py
@@ -0,0 +1,331 @@
+# Copyright (c) 2023 The Regents of the University of California
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import json
+import requests
+import base64
+import os
+from jsonschema import validate
+
+
+class ResourceJsonCreator:
+    """
+    This class generates the JSON which is pushed onto MongoDB.
+    On a high-level, it does the following:
+        - Adds certain fields to the JSON.
+        - Populates those fields.
+        - Makes sure the JSON follows the schema.
+    """
+
+    # Global Variables
+    base_url = "https://github.com/gem5/gem5/tree/develop"  # gem5 GitHub URL
+    resource_url_map = {
+        "dev": (
+            "https://gem5.googlesource.com/public/gem5-resources/+/refs/heads/"
+            "develop/resources.json?format=TEXT"
+        ),
+        "22.1": (
+            "https://gem5.googlesource.com/public/gem5-resources/+/refs/heads/"
+            "stable/resources.json?format=TEXT"
+        ),
+        "22.0": (
+            "http://resources.gem5.org/prev-resources-json/"
+            "resources-21-2.json"
+        ),
+        "21.2": (
+            "http://resources.gem5.org/prev-resources-json/"
+            "resources-22-0.json"
+        ),
+    }
+
+    def __init__(self):
+        self.schema = {}
+        with open("schema/schema.json") as f:
+            self.schema = json.load(f)
+
+    def _get_file_data(self, url):
+        json_data = None
+        try:
+            json_data = requests.get(url).text
+            json_data = base64.b64decode(json_data).decode("utf-8")
+            return json.loads(json_data)
+        except:
+            json_data = requests.get(url).json()
+            return json_data
+
+    def _get_size(self, url):
+        """
+        Helper function to return the size of a download through its URL.
+        Returns 0 if URL has an error.
+
+        :param url: Download URL
+        """
+        try:
+            response = requests.head(url)
+            size = int(response.headers.get("content-length", 0))
+            return size
+        except Exception as e:
+            return 0
+
+    def _search_folder(self, folder_path, id):
+        """
+        Helper function to find the instance of a string in a folder.
+        This is recursive, i.e., subfolders will also be searched.
+
+        :param folder_path: Path to the folder to begin searching
+        :param id: Phrase to search in the folder
+
+        :returns matching_files: List of file paths to the files containing id
+        """
+        matching_files = []
+        for filename in os.listdir(folder_path):
+            file_path = os.path.join(folder_path, filename)
+            if os.path.isfile(file_path):
+                with open(file_path, encoding="utf-8", errors="ignore") as f:
+                    contents = f.read()
+                    if id in contents:
+                        file_path = file_path.replace("\\", "/")
+                        matching_files.append(file_path)
+            elif os.path.isdir(file_path):
+                matching_files.extend(self._search_folder(file_path, id))
+        return matching_files
+
+    def _change_type(self, resource):
+        if resource["type"] == "workload":
+            # get the architecture from the name and remove 64 from it
+            resource["architecture"] = (
+                resource["name"].split("-")[0].replace("64", "").upper()
+            )
+            return resource
+        if "kernel" in resource["name"]:
+            resource["type"] = "kernel"
+        elif "bootloader" in resource["name"]:
+            resource["type"] = "bootloader"
+        elif "benchmark" in resource["documentation"]:
+            resource["type"] = "disk-image"
+            # if tags not in resource:
+            if "tags" not in resource:
+                resource["tags"] = []
+            resource["tags"].append("benchmark")
+            if (
+                "additional_metadata" in resource
+                and "root_partition" in resource["additional_metadata"]
+                and resource["additional_metadata"]["root_partition"]
+                is not None
+            ):
+                resource["root_partition"] = resource["additional_metadata"][
+                    "root_partition"
+                ]
+            else:
+                resource["root_partition"] = ""
+        elif resource["url"] is not None and ".img.gz" in resource["url"]:
+            resource["type"] = "disk-image"
+            if (
+                "additional_metadata" in resource
+                and "root_partition" in resource["additional_metadata"]
+                and resource["additional_metadata"]["root_partition"]
+                is not None
+            ):
+                resource["root_partition"] = resource["additional_metadata"][
+                    "root_partition"
+                ]
+            else:
+                resource["root_partition"] = ""
+        elif "binary" in resource["documentation"]:
+            resource["type"] = "binary"
+        elif "checkpoint" in resource["documentation"]:
+            resource["type"] = "checkpoint"
+        elif "simpoint" in resource["documentation"]:
+            resource["type"] = "simpoint"
+        return resource
+
+    def _extract_code_examples(self, resource, source):
+        """
+        This function goes by IDs present in the resources DataFrame.
+        It finds which files use those IDs in gem5/configs.
+        It adds the GitHub URL of those files under "example".
+        It finds whether those files are used in gem5/tests/gem5.
+        If yes, it marks "tested" as True. If not, it marks "tested" as False.
+        "example" and "tested" are made into a JSON for every code example.
+        This list of JSONs is assigned to the 'code_examples' field of the
+        DataFrame.
+
+        :param resources: A DataFrame containing the current state of
+        resources.
+        :param source: Path to gem5
+
+        :returns resources: DataFrame with ['code-examples'] populated.
+        """
+        id = resource["id"]
+        # search for files in the folder tree that contain the 'id' value
+        matching_files = self._search_folder(
+            source + "/configs", '"' + id + '"'
+        )
+        filenames = [os.path.basename(path) for path in matching_files]
+        tested_files = []
+        for file in filenames:
+            tested_files.append(
+                True
+                if len(self._search_folder(source + "/tests/gem5", file)) > 0
+                else False
+            )
+
+        matching_files = [
+            file.replace(source, self.base_url) for file in matching_files
+        ]
+
+        code_examples = []
+
+        for i in range(len(matching_files)):
+            json_obj = {
+                "example": matching_files[i],
+                "tested": tested_files[i],
+            }
+            code_examples.append(json_obj)
+        return code_examples
+
+    def unwrap_resources(self, ver):
+        data = self._get_file_data(self.resource_url_map[ver])
+        resources = data["resources"]
+        new_resources = []
+        for resource in resources:
+            if resource["type"] == "group":
+                for group in resource["contents"]:
+                    new_resources.append(group)
+            else:
+                new_resources.append(resource)
+        return new_resources
+
+    def _get_example_usage(self, resource):
+        if resource["category"] == "workload":
+            return f"Workload(\"{resource['id']}\")"
+        else:
+            return f"obtain_resource(resource_id=\"{resource['id']}\")"
+
+    def _parse_readme(self, url):
+        metadata = {
+            "tags": [],
+            "author": [],
+            "license": "",
+        }
+        try:
+            request = requests.get(url)
+            content = request.text
+            content = content.split("---")[1]
+            content = content.split("---")[0]
+            if "tags:" in content:
+                tags = content.split("tags:\n")[1]
+                tags = tags.split(":")[0]
+                tags = tags.split("\n")[:-1]
+                tags = [tag.strip().replace("- ", "") for tag in tags]
+                if tags == [""] or tags == None:
+                    tags = []
+                metadata["tags"] = tags
+            if "author:" in content:
+                author = content.split("author:")[1]
+                author = author.split("\n")[0]
+                author = (
+                    author.replace("[", "").replace("]", "").replace('"', "")
+                )
+                author = author.split(",")
+                author = [a.strip() for a in author]
+                metadata["author"] = author
+            if "license:" in content:
+                license = content.split("license:")[1].split("\n")[0]
+                metadata["license"] = license
+        except:
+            pass
+        return metadata
+
+    def _add_fields(self, resources, source):
+        new_resources = []
+        for resource in resources:
+            res = self._change_type(resource)
+            res["gem5_versions"] = ["23.0"]
+            res["resource_version"] = "1.0.0"
+            res["category"] = res["type"]
+            del res["type"]
+            res["id"] = res["name"]
+            del res["name"]
+            res["description"] = res["documentation"]
+            del res["documentation"]
+            if "additional_metadata" in res:
+                for k, v in res["additional_metadata"].items():
+                    res[k] = v
+                del res["additional_metadata"]
+            res["example_usage"] = self._get_example_usage(res)
+            if "source" in res:
+                url = (
+                    "https://raw.githubusercontent.com/gem5/"
+                    "gem5-resources/develop/"
+                    + str(res["source"])
+                    + "/README.md"
+                )
+                res["source_url"] = (
+                    "https://github.com/gem5/gem5-resources/tree/develop/"
+                    + str(res["source"])
+                )
+            else:
+                url = ""
+                res["source_url"] = ""
+            metadata = self._parse_readme(url)
+            if "tags" in res:
+                res["tags"].extend(metadata["tags"])
+            else:
+                res["tags"] = metadata["tags"]
+            res["author"] = metadata["author"]
+            res["license"] = metadata["license"]
+
+            res["code_examples"] = self._extract_code_examples(res, source)
+
+            if "url" in resource:
+                download_url = res["url"].replace(
+                    "{url_base}", "http://dist.gem5.org/dist/develop"
+                )
+                res["url"] = download_url
+                res["size"] = self._get_size(download_url)
+            else:
+                res["size"] = 0
+
+            res = {k: v for k, v in res.items() if v is not None}
+
+            new_resources.append(res)
+        return new_resources
+
+    def _validate_schema(self, resources):
+        for resource in resources:
+            try:
+                validate(resource, schema=self.schema)
+            except Exception as e:
+                print(resource)
+                raise e
+
+    def create_json(self, version, source, output):
+        resources = self.unwrap_resources(version)
+        resources = self._add_fields(resources, source)
+        self._validate_schema(resources)
+        with open(output, "w") as f:
+            json.dump(resources, f, indent=4)
diff --git a/util/gem5-resources-manager/api/json_client.py b/util/gem5-resources-manager/api/json_client.py
new file mode 100644
index 0000000000..24cfaee88c
--- /dev/null
+++ b/util/gem5-resources-manager/api/json_client.py
@@ -0,0 +1,217 @@
+# Copyright (c) 2023 The Regents of the University of California
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from pathlib import Path
+import json
+from api.client import Client
+from typing import Dict, List
+
+
+class JSONClient(Client):
+    def __init__(self, file_path):
+        super().__init__()
+        self.file_path = Path("database/") / file_path
+        self.resources = self._get_resources(self.file_path)
+
+    def _get_resources(self, path: Path) -> List[Dict]:
+        """
+        Retrieves the resources from the JSON file.
+        :param path: The path to the JSON file.
+        :return: The resources as a JSON string.
+        """
+        with open(path) as f:
+            return json.load(f)
+
+    def find_resource(self, query: Dict) -> Dict:
+        """
+        Finds a resource within a list of resources based on the
+        provided query.
+        :param query: The query object containing the search criteria.
+        :return: The resource that matches the query.
+        """
+        found_resources = []
+        for resource in self.resources:
+            if (
+                "resource_version" not in query
+                or query["resource_version"] == ""
+                or query["resource_version"] == "Latest"
+            ):
+                if resource["id"] == query["id"]:
+                    found_resources.append(resource)
+            else:
+                if (
+                    resource["id"] == query["id"]
+                    and resource["resource_version"]
+                    == query["resource_version"]
+                ):
+                    return resource
+        if not found_resources:
+            return {"exists": False}
+        return max(
+            found_resources,
+            key=lambda resource: tuple(
+                map(int, resource["resource_version"].split("."))
+            ),
+        )
+
+    def get_versions(self, query: Dict) -> List[Dict]:
+        """
+        Retrieves all versions of a resource with the given ID from the
+        list of resources.
+        :param query: The query object containing the search criteria.
+        :return: A list of all versions of the resource.
+        """
+        versions = []
+        for resource in self.resources:
+            if resource["id"] == query["id"]:
+                versions.append(
+                    {"resource_version": resource["resource_version"]}
+                )
+        versions.sort(
+            key=lambda resource: tuple(
+                map(int, resource["resource_version"].split("."))
+            ),
+            reverse=True,
+        )
+        return versions
+
+    def update_resource(self, query: Dict) -> Dict:
+        """
+        Updates a resource within a list of resources based on the
+        provided query.
+
+        The function iterates over the resources and checks if the "id" and
+        "resource_version" of a resource match the values in the query.
+        If there is a match, it removes the existing resource from the list
+        and appends the updated resource.
+
+        After updating the resources, the function saves the updated list to
+        the specified file path.
+
+        :param query: The query object containing the resource
+        identification criteria.
+        :return: A dictionary indicating that the resource was updated.
+        """
+        original_resource = query["original_resource"]
+        modified_resource = query["resource"]
+        if (
+            original_resource["id"] != modified_resource["id"]
+            and original_resource["resource_version"]
+            != modified_resource["resource_version"]
+        ):
+            return {"status": "Cannot change resource id"}
+        for resource in self.resources:
+            if (
+                resource["id"] == original_resource["id"]
+                and resource["resource_version"]
+                == original_resource["resource_version"]
+            ):
+                self.resources.remove(resource)
+                self.resources.append(modified_resource)
+
+        self.write_to_file()
+        return {"status": "Updated"}
+
+    def check_resource_exists(self, query: Dict) -> Dict:
+        """
+        Checks if a resource exists within a list of resources based on the
+        provided query.
+
+        The function iterates over the resources and checks if the "id" and
+        "resource_version" of a resource match the values in the query.
+        If a matching resource is found, it returns a dictionary indicating
+        that the resource exists.
+        If no matching resource is found, it returns a dictionary indicating
+        that the resource does not exist.
+
+        :param query: The query object containing the resource identification
+        criteria.
+        :return: A dictionary indicating whether the resource exists.
+        """
+        for resource in self.resources:
+            if (
+                resource["id"] == query["id"]
+                and resource["resource_version"] == query["resource_version"]
+            ):
+                return {"exists": True}
+        return {"exists": False}
+
+    def insert_resource(self, query: Dict) -> Dict:
+        """
+        Inserts a new resource into a list of resources.
+
+        The function appends the query (new resource) to the resources list,
+        indicating the insertion.
+        It then writes the updated resources to the specified file path.
+
+        :param query: The query object containing the resource identification
+        criteria.
+        :return: A dictionary indicating that the resource was inserted.
+        """
+        if self.check_resource_exists(query)["exists"]:
+            return {"status": "Resource already exists"}
+        self.resources.append(query)
+        self.write_to_file()
+        return {"status": "Inserted"}
+
+    def delete_resource(self, query: Dict) -> Dict:
+        """
+        This function deletes a resource from the list of resources based on
+        the provided query.
+
+        :param query: The query object containing the resource identification
+        criteria.
+        :return: A dictionary indicating that the resource was deleted.
+        """
+        for resource in self.resources:
+            if (
+                resource["id"] == query["id"]
+                and resource["resource_version"] == query["resource_version"]
+            ):
+                self.resources.remove(resource)
+        self.write_to_file()
+        return {"status": "Deleted"}
+
+    def write_to_file(self) -> None:
+        """
+        This function writes the list of resources to a file at the specified
+        file path.
+
+        :return: None
+        """
+        with Path(self.file_path).open("w") as outfile:
+            json.dump(self.resources, outfile, indent=4)
+
+    def save_session(self) -> Dict:
+        """
+        This function saves the client session to a dictionary.
+        :return: A dictionary containing the client session.
+        """
+        session = {
+            "client": "json",
+            "filename": self.file_path.name,
+        }
+        return session
diff --git a/util/gem5-resources-manager/api/mongo_client.py b/util/gem5-resources-manager/api/mongo_client.py
new file mode 100644
index 0000000000..845524b886
--- /dev/null
+++ b/util/gem5-resources-manager/api/mongo_client.py
@@ -0,0 +1,237 @@
+# Copyright (c) 2023 The Regents of the University of California
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import json
+from bson import json_util
+from api.client import Client
+from pymongo.errors import ConnectionFailure, ConfigurationError
+from pymongo import MongoClient
+from typing import Dict, List
+import pymongo
+
+
+class DatabaseConnectionError(Exception):
+    "Raised for failure to connect to MongoDB client"
+    pass
+
+
+class MongoDBClient(Client):
+    def __init__(self, mongo_uri, database_name, collection_name):
+        super().__init__()
+        self.mongo_uri = mongo_uri
+        self.collection_name = collection_name
+        self.database_name = database_name
+        self.collection = self._get_database(
+            mongo_uri, database_name, collection_name
+        )
+
+    def _get_database(
+        self,
+        mongo_uri: str,
+        database_name: str,
+        collection_name: str,
+    ) -> pymongo.collection.Collection:
+        """
+        This function returns a MongoDB database object for the specified
+        collection.
+        It takes three arguments: 'mongo_uri', 'database_name', and
+        'collection_name'.
+
+        :param: mongo_uri: URI of the MongoDB instance
+        :param: database_name: Name of the database
+        :param: collection_name: Name of the collection
+        :return: database: MongoDB database object
+        """
+
+        try:
+            client = MongoClient(mongo_uri)
+            client.admin.command("ping")
+        except ConnectionFailure:
+            client.close()
+            raise DatabaseConnectionError(
+                "Could not connect to MongoClient with given URI!"
+            )
+        except ConfigurationError as e:
+            raise DatabaseConnectionError(e)
+
+        database = client[database_name]
+        if database.name not in client.list_database_names():
+            raise DatabaseConnectionError("Database Does not Exist!")
+
+        collection = database[collection_name]
+        if collection.name not in database.list_collection_names():
+            raise DatabaseConnectionError("Collection Does not Exist!")
+
+        return collection
+
+    def find_resource(self, query: Dict) -> Dict:
+        """
+        Find a resource in the database
+
+        :param query: JSON object with id and resource_version
+        :return: json_resource: JSON object with request resource or
+        error message
+        """
+        if "resource_version" not in query or query["resource_version"] == "":
+            resource = (
+                self.collection.find({"id": query["id"]}, {"_id": 0})
+                .sort("resource_version", -1)
+                .limit(1)
+            )
+        else:
+            resource = (
+                self.collection.find(
+                    {
+                        "id": query["id"],
+                        "resource_version": query["resource_version"],
+                    },
+                    {"_id": 0},
+                )
+                .sort("resource_version", -1)
+                .limit(1)
+            )
+        json_resource = json_util.dumps(resource)
+        res = json.loads(json_resource)
+        if res == []:
+            return {"exists": False}
+        return res[0]
+
+    def update_resource(self, query: Dict) -> Dict[str, str]:
+        """
+        This function updates a resource in the database by first checking if
+        the resource version in the request matches the resource version
+        stored in the database.
+        If they match, the resource is updated in the database. If they do not
+        match, the update is rejected.
+
+        :param: query: JSON object with original_resource and the
+        updated resource
+        :return: json_response: JSON object with status message
+        """
+        original_resource = query["original_resource"]
+        modified_resource = query["resource"]
+        try:
+            self.collection.replace_one(
+                {
+                    "id": original_resource["id"],
+                    "resource_version": original_resource["resource_version"],
+                },
+                modified_resource,
+            )
+        except Exception as e:
+            print(e)
+            return {"status": "Resource does not exist"}
+        return {"status": "Updated"}
+
+    def get_versions(self, query: Dict) -> List[Dict]:
+        """
+        This function retrieves all versions of a resource with the given ID
+        from the database.
+        It takes two arguments, the database object and a JSON object
+        containing the 'id' key of the resource to be retrieved.
+
+        :param: query: JSON object with id
+        :return: json_resource: JSON object with all resource versions
+        """
+        versions = self.collection.find(
+            {"id": query["id"]}, {"resource_version": 1, "_id": 0}
+        ).sort("resource_version", -1)
+        # convert to json
+        res = json_util.dumps(versions)
+        return json_util.loads(res)
+
+    def delete_resource(self, query: Dict) -> Dict[str, str]:
+        """
+        This function deletes a resource from the database by first checking
+        if the resource version in the request matches the resource version
+        stored in the database.
+        If they match, the resource is deleted from the database. If they do
+        not match, the delete operation is rejected
+
+        :param: query: JSON object with id and resource_version
+        :return: json_response: JSON object with status message
+        """
+        self.collection.delete_one(
+            {"id": query["id"], "resource_version": query["resource_version"]}
+        )
+        return {"status": "Deleted"}
+
+    def insert_resource(self, query: Dict) -> Dict[str, str]:
+        """
+        This function inserts a new resource into the database using the
+        'insert_one' method of the MongoDB client.
+        The function takes two arguments, the database object and the JSON
+        object representing the new resource to be inserted.
+
+        :param: json: JSON object representing the new resource to be inserted
+        :return: json_response: JSON object with status message
+        """
+        try:
+            self.collection.insert_one(query)
+        except Exception as e:
+            return {"status": "Resource already exists"}
+        return {"status": "Inserted"}
+
+    def check_resource_exists(self, query: Dict) -> Dict:
+        """
+        This function checks if a resource exists in the database by searching
+        for a resource with a matching 'id' and 'resource_version' in
+        the database.
+        The function takes two arguments, the database object and a JSON object
+        containing the 'id' and 'resource_version' keys.
+
+        :param: json: JSON object with id and resource_version
+        :return: json_response: JSON object with boolean 'exists' key
+        """
+        resource = (
+            self.collection.find(
+                {
+                    "id": query["id"],
+                    "resource_version": query["resource_version"],
+                },
+                {"_id": 0},
+            )
+            .sort("resource_version", -1)
+            .limit(1)
+        )
+        json_resource = json_util.dumps(resource)
+        res = json.loads(json_resource)
+        if res == []:
+            return {"exists": False}
+        return {"exists": True}
+
+    def save_session(self) -> Dict:
+        """
+        This function saves the client session to a dictionary.
+        :return: A dictionary containing the client session.
+        """
+        session = {
+            "client": "mongodb",
+            "uri": self.mongo_uri,
+            "database": self.database_name,
+            "collection": self.collection_name,
+        }
+        return session
diff --git a/util/gem5-resources-manager/gem5_resource_cli.py b/util/gem5-resources-manager/gem5_resource_cli.py
new file mode 100644
index 0000000000..28528bec92
--- /dev/null
+++ b/util/gem5-resources-manager/gem5_resource_cli.py
@@ -0,0 +1,285 @@
+#!/usr/bin/env python3
+# Copyright (c) 2023 The Regents of the University of California
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import json
+from pymongo import MongoClient
+from api.create_resources_json import ResourceJsonCreator
+import os
+from dotenv import load_dotenv
+import argparse
+from itertools import cycle
+from shutil import get_terminal_size
+from threading import Thread
+from time import sleep
+
+load_dotenv()
+
+# read MONGO_URI from environment variable
+MONGO_URI = os.getenv("MONGO_URI")
+
+
+class Loader:
+    def __init__(self, desc="Loading...", end="Done!", timeout=0.1):
+        """
+        A loader-like context manager
+
+        Args:
+            desc (str, optional): The loader's description.
+            Defaults to "Loading...".
+            end (str, optional): Final print. Defaults to "Done!".
+            timeout (float, optional): Sleep time between prints.
+            Defaults to 0.1.
+        """
+        self.desc = desc
+        self.end = end
+        self.timeout = timeout
+
+        self._thread = Thread(target=self._animate, daemon=True)
+        self.steps = ["⢿", "⣻", "⣽", "⣾", "⣷", "⣯", "⣟", "⡿"]
+        self.done = False
+
+    def start(self):
+        self._thread.start()
+        return self
+
+    def _animate(self):
+        for c in cycle(self.steps):
+            if self.done:
+                break
+            print(f"\r{self.desc} {c}", flush=True, end="")
+            sleep(self.timeout)
+
+    def __enter__(self):
+        self.start()
+
+    def stop(self):
+        self.done = True
+        cols = get_terminal_size((80, 20)).columns
+        print("\r" + " " * cols, end="", flush=True)
+        print(f"\r{self.end}", flush=True)
+
+    def __exit__(self, exc_type, exc_value, tb):
+        # handle exceptions with those variables ^
+        self.stop()
+
+
+def get_database(collection="versions_test", uri=MONGO_URI, db="gem5-vision"):
+    """
+    Retrieves the MongoDB database for gem5-vision.
+    """
+    CONNECTION_STRING = uri
+    try:
+        client = MongoClient(CONNECTION_STRING)
+        client.server_info()
+    except:
+        print("\nCould not connect to MongoDB")
+        exit(1)
+    return client[db][collection]
+
+
+collection = None
+
+
+def cli():
+    parser = argparse.ArgumentParser(
+        description="CLI for gem5-resources.",
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+    )
+    parser.add_argument(
+        "-u",
+        "--uri",
+        help="The URI of the MongoDB database.",
+        type=str,
+        default=MONGO_URI,
+    )
+    parser.add_argument(
+        "-d",
+        "--database",
+        help="The MongoDB database to use.",
+        type=str,
+        default="gem5-vision",
+    )
+    parser.add_argument(
+        "-c",
+        "--collection",
+        help="The MongoDB collection to use.",
+        type=str,
+        default="versions_test",
+    )
+
+    subparsers = parser.add_subparsers(
+        help="The command to run.", dest="command", required=True
+    )
+
+    parser_get_resource = subparsers.add_parser(
+        "get_resource",
+        help=(
+            "Retrieves a resource from the collection based on the given ID."
+            "\n if a resource version is provided, it will retrieve the "
+            "resource with the given ID and version."
+        ),
+    )
+    req_group = parser_get_resource.add_argument_group(
+        title="required arguments"
+    )
+    req_group.add_argument(
+        "-i",
+        "--id",
+        help="The ID of the resource to retrieve.",
+        type=str,
+        required=True,
+    )
+    parser_get_resource.add_argument(
+        "-v",
+        "--version",
+        help="The version of the resource to retrieve.",
+        type=str,
+        required=False,
+    )
+    parser_get_resource.set_defaults(func=get_resource)
+
+    parser_backup_mongodb = subparsers.add_parser(
+        "backup_mongodb",
+        help="Backs up the MongoDB collection to a JSON file.",
+    )
+    req_group = parser_backup_mongodb.add_argument_group(
+        title="required arguments"
+    )
+    req_group.add_argument(
+        "-f",
+        "--file",
+        help="The JSON file to back up the MongoDB collection to.",
+        type=str,
+        required=True,
+    )
+    parser_backup_mongodb.set_defaults(func=backup_mongodb)
+
+    parser_update_mongodb = subparsers.add_parser(
+        "restore_backup",
+        help="Restores a backup of the MongoDB collection from a JSON file.",
+    )
+    req_group = parser_update_mongodb.add_argument_group(
+        title="required arguments"
+    )
+    req_group.add_argument(
+        "-f",
+        "--file",
+        help="The JSON file to restore the MongoDB collection from.",
+        type=str,
+    )
+    parser_update_mongodb.set_defaults(func=restore_backup)
+
+    parser_create_resources_json = subparsers.add_parser(
+        "create_resources_json",
+        help="Creates a JSON file of all the resources in the collection.",
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+    )
+    parser_create_resources_json.add_argument(
+        "-v",
+        "--version",
+        help="The version of the resources to create the JSON file for.",
+        type=str,
+        default="dev",
+    )
+    parser_create_resources_json.add_argument(
+        "-o",
+        "--output",
+        help="The JSON file to create.",
+        type=str,
+        default="resources.json",
+    )
+    parser_create_resources_json.add_argument(
+        "-s",
+        "--source",
+        help="The path to the gem5 source code.",
+        type=str,
+        default="",
+    )
+    parser_create_resources_json.set_defaults(func=create_resources_json)
+
+    args = parser.parse_args()
+    if args.collection:
+        global collection
+        with Loader("Connecting to MongoDB...", end="Connected to MongoDB"):
+            collection = get_database(args.collection, args.uri, args.database)
+    args.func(args)
+
+
+def get_resource(args):
+    # set the end after the loader is created
+    loader = Loader("Retrieving resource...").start()
+    resource = None
+    if args.version:
+        resource = collection.find_one(
+            {"id": args.id, "resource_version": args.version}, {"_id": 0}
+        )
+    else:
+        resource = collection.find({"id": args.id}, {"_id": 0})
+        resource = list(resource)
+    if resource:
+        loader.end = json.dumps(resource, indent=4)
+    else:
+        loader.end = "Resource not found"
+
+    loader.stop()
+
+
+def backup_mongodb(args):
+    """
+    Backs up the MongoDB collection to a JSON file.
+
+    :param file: The JSON file to back up the MongoDB collection to.
+    """
+    with Loader(
+        "Backing up the database...",
+        end="Backed up the database to " + args.file,
+    ):
+        # get all the data from the collection
+        resources = collection.find({}, {"_id": 0})
+        # write to resources.json
+        with open(args.file, "w") as f:
+            json.dump(list(resources), f, indent=4)
+
+
+def restore_backup(args):
+    with Loader("Restoring backup...", end="Updated the database\n"):
+        with open(args.file) as f:
+            resources = json.load(f)
+            # clear the collection
+            collection.delete_many({})
+            # push the new data
+            collection.insert_many(resources)
+
+
+def create_resources_json(args):
+    with Loader("Creating resources JSON...", end="Created " + args.output):
+        creator = ResourceJsonCreator()
+        creator.create_json(args.version, args.source, args.output)
+
+
+if __name__ == "__main__":
+    cli()
diff --git a/util/gem5-resources-manager/requirements.txt b/util/gem5-resources-manager/requirements.txt
new file mode 100644
index 0000000000..5ffd51deb2
--- /dev/null
+++ b/util/gem5-resources-manager/requirements.txt
@@ -0,0 +1,29 @@
+attrs==23.1.0
+blinker==1.6.2
+certifi==2023.5.7
+cffi==1.15.1
+charset-normalizer==3.1.0
+click==8.1.3
+colorama==0.4.6
+coverage==7.2.7
+cryptography==39.0.2
+dnspython==2.3.0
+Flask==2.3.2
+idna==3.4
+importlib-metadata==6.6.0
+itsdangerous==2.1.2
+Jinja2==3.1.2
+jsonschema==4.17.3
+Markdown==3.4.3
+MarkupSafe==2.1.3
+mongomock==4.1.2
+packaging==23.1
+pycparser==2.21
+pymongo==4.3.3
+pyrsistent==0.19.3
+python-dotenv==1.0.0
+requests==2.31.0
+sentinels==1.0.0
+urllib3==2.0.2
+Werkzeug==2.3.4
+zipp==3.15.0
diff --git a/util/gem5-resources-manager/server.py b/util/gem5-resources-manager/server.py
new file mode 100644
index 0000000000..ec298d6c70
--- /dev/null
+++ b/util/gem5-resources-manager/server.py
@@ -0,0 +1,884 @@
+# Copyright (c) 2023 The Regents of the University of California
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from flask import (
+    render_template,
+    Flask,
+    request,
+    redirect,
+    url_for,
+    make_response,
+)
+from bson import json_util
+import json
+import jsonschema
+import requests
+import markdown
+import base64
+import secrets
+from pathlib import Path
+from werkzeug.utils import secure_filename
+from cryptography.fernet import Fernet, InvalidToken
+from cryptography.hazmat.primitives.kdf.scrypt import Scrypt
+from cryptography.exceptions import InvalidSignature
+from api.json_client import JSONClient
+from api.mongo_client import MongoDBClient
+
+databases = {}
+
+response = requests.get(
+    "https://resources.gem5.org/gem5-resources-schema.json"
+)
+schema = json.loads(response.content)
+
+
+UPLOAD_FOLDER = Path("database/")
+TEMP_UPLOAD_FOLDER = Path("database/.tmp/")
+CONFIG_FILE = Path("instance/config.py")
+SESSIONS_COOKIE_KEY = "sessions"
+ALLOWED_EXTENSIONS = {"json"}
+CLIENT_TYPES = ["mongodb", "json"]
+
+
+app = Flask(__name__, instance_relative_config=True)
+
+
+if not CONFIG_FILE.exists():
+    CONFIG_FILE.parent.mkdir()
+    with CONFIG_FILE.open("w+") as f:
+        f.write(f"SECRET_KEY = {secrets.token_bytes(32)}")
+
+
+app.config.from_pyfile(CONFIG_FILE.name)
+
+
+# Sorts keys in any serialized dict
+# Default = True
+# Set False to persevere JSON key order
+app.json.sort_keys = False
+
+
+def startup_config_validation():
+    """
+    Validates the startup configuration.
+
+    Raises:
+        ValueError: If the 'SECRET_KEY' is not set or is not of type 'bytes'.
+    """
+    if not app.secret_key:
+        raise ValueError("SECRET_KEY not set")
+    if not isinstance(app.secret_key, bytes):
+        raise ValueError("SECRET_KEY must be of type 'bytes'")
+
+
+def startup_dir_file_validation():
+    """
+    Validates the startup directory and file configuration.
+
+    Creates the required directories if they do not exist.
+    """
+    for dir in [UPLOAD_FOLDER, TEMP_UPLOAD_FOLDER]:
+        if not dir.is_dir():
+            dir.mkdir()
+
+
+with app.app_context():
+    startup_config_validation()
+    startup_dir_file_validation()
+
+
+@app.route("/")
+def index():
+    """
+    Renders the index HTML template.
+
+    :return: The rendered index HTML template.
+    """
+    return render_template("index.html")
+
+
+@app.route("/login/mongodb")
+def login_mongodb():
+    """
+    Renders the MongoDB login HTML template.
+
+    :return: The rendered MongoDB login HTML template.
+    """
+    return render_template("login/login_mongodb.html")
+
+
+@app.route("/login/json")
+def login_json():
+    """
+    Renders the JSON login HTML template.
+
+    :return: The rendered JSON login HTML template.
+    """
+    return render_template("login/login_json.html")
+
+
+@app.route("/validateMongoDB", methods=["POST"])
+def validate_mongodb():
+    """
+    Validates the MongoDB connection parameters and redirects to the editor route if successful.
+
+    This route expects a POST request with a JSON payload containing an alias for the session and the listed parameters in order to validate the MongoDB instance.
+
+    This route expects the following JSON payload parameters:
+    - uri: The MongoDB connection URI.
+    - collection: The name of the collection in the MongoDB database.
+    - database: The name of the MongoDB database.
+    - alias: The value by which the session will be keyed in `databases`.
+
+    If the 'uri' parameter is empty, a JSON response with an error message and status code 400 (Bad Request) is returned.
+    If the connection parameters are valid, the route redirects to the 'editor' route with the appropriate query parameters.
+
+    :return: A redirect response to the 'editor' route or a JSON response with an error message and status code 400.
+    """
+    global databases
+    try:
+        databases[request.json["alias"]] = MongoDBClient(
+            mongo_uri=request.json["uri"],
+            database_name=request.json["database"],
+            collection_name=request.json["collection"],
+        )
+    except Exception as e:
+        return {"error": str(e)}, 400
+    return redirect(
+        url_for("editor", alias=request.json["alias"]),
+        302,
+    )
+
+
+@app.route("/validateJSON", methods=["GET"])
+def validate_json_get():
+    """
+    Validates the provided JSON URL and redirects to the editor route if successful.
+
+    This route expects the following query parameters:
+    - q: The URL of the JSON file.
+    - filename: An optional filename for the uploaded JSON file.
+
+    If the 'q' parameter is empty, a JSON response with an error message and status code 400 (Bad Request) is returned.
+    If the JSON URL is valid, the function retrieves the JSON content, saves it to a file, and redirects to the 'editor'
+    route with the appropriate query parameters.
+
+    :return: A redirect response to the 'editor' route or a JSON response with an error message and status code 400.
+    """
+    filename = request.args.get("filename")
+    url = request.args.get("q")
+    if not url:
+        return {"error": "empty"}, 400
+    response = requests.get(url)
+    if response.status_code != 200:
+        return {"error": "invalid status"}, response.status_code
+    filename = secure_filename(request.args.get("filename"))
+    path = UPLOAD_FOLDER / filename
+    if (UPLOAD_FOLDER / filename).is_file():
+        temp_path = TEMP_UPLOAD_FOLDER / filename
+        with temp_path.open("wb") as f:
+            f.write(response.content)
+        return {"conflict": "existing file in server"}, 409
+    with path.open("wb") as f:
+        f.write(response.content)
+    global databases
+    if filename in databases:
+        return {"error": "alias already exists"}, 409
+    try:
+        databases[filename] = JSONClient(filename)
+    except Exception as e:
+        return {"error": str(e)}, 400
+    return redirect(
+        url_for("editor", alias=filename),
+        302,
+    )
+
+
+@app.route("/validateJSON", methods=["POST"])
+def validate_json_post():
+    """
+    Validates and processes the uploaded JSON file.
+
+    This route expects a file with the key 'file' in the request files.
+    If the file is not present, a JSON response with an error message
+    and status code 400 (Bad Request) is returned.
+    If the file already exists in the server, a JSON response with a
+    conflict error message and status code 409 (Conflict) is returned.
+    If the file's filename conflicts with an existing alias, a JSON
+    response with an error message and status code 409 (Conflict) is returned.
+    If there is an error while processing the JSON file, a JSON response
+    with the error message and status code 400 (Bad Request) is returned.
+    If the file is successfully processed, a redirect response to the
+    'editor' route with the appropriate query parameters is returned.
+
+    :return: A JSON response with an error message and
+    status code 400 or 409, or a redirect response to the 'editor' route.
+    """
+    temp_path = None
+    if "file" not in request.files:
+        return {"error": "empty"}, 400
+    file = request.files["file"]
+    filename = secure_filename(file.filename)
+    path = UPLOAD_FOLDER / filename
+    if path.is_file():
+        temp_path = TEMP_UPLOAD_FOLDER / filename
+        file.save(temp_path)
+        return {"conflict": "existing file in server"}, 409
+    file.save(path)
+    global databases
+    if filename in databases:
+        return {"error": "alias already exists"}, 409
+    try:
+        databases[filename] = JSONClient(filename)
+    except Exception as e:
+        return {"error": str(e)}, 400
+    return redirect(
+        url_for("editor", alias=filename),
+        302,
+    )
+
+
+@app.route("/existingJSON", methods=["GET"])
+def existing_json():
+    """
+    Handles the request for an existing JSON file.
+
+    This route expects a query parameter 'filename'
+    specifying the name of the JSON file.
+    If the file is not present in the 'databases',
+    it tries to create a 'JSONClient' instance for the file.
+    If there is an error while creating the 'JSONClient'
+    instance, a JSON response with the error message
+    and status code 400 (Bad Request) is returned.
+    If the file is present in the 'databases', a redirect
+    response to the 'editor' route with the appropriate
+    query parameters is returned.
+
+    :return: A JSON response with an error message
+    and status code 400, or a redirect response to the 'editor' route.
+    """
+    filename = request.args.get("filename")
+    global databases
+    if filename not in databases:
+        try:
+            databases[filename] = JSONClient(filename)
+        except Exception as e:
+            return {"error": str(e)}, 400
+    return redirect(
+        url_for("editor", alias=filename),
+        302,
+    )
+
+
+@app.route("/existingFiles", methods=["GET"])
+def get_existing_files():
+    """
+    Retrieves the list of existing files in the upload folder.
+
+    This route returns a JSON response containing the names of the existing files in the upload folder configured in the
+    Flask application.
+
+    :return: A JSON response with the list of existing files.
+    """
+    files = [f.name for f in UPLOAD_FOLDER.iterdir() if f.is_file()]
+    return json.dumps(files)
+
+
+@app.route("/resolveConflict", methods=["GET"])
+def resolve_conflict():
+    """
+    Resolves file conflict with JSON files.
+
+    This route expects the following query parameters:
+    - filename: The name of the file that is conflicting or an updated name for it to resolve the name conflict
+    - resolution: A resolution option, defined as follows:
+        - clearInput: Deletes the conflicting file and does not proceed with login
+        - openExisting: Opens the existing file in `UPLOAD_FOLDER`
+        - overwrite: Overwrites the existing file with the conflicting file
+        - newFilename: Renames conflicting file, moving it to `UPLOAD_FOLDER`
+
+    If the resolution parameter is not from the list given, an error is returned.
+
+    The conflicting file in `TEMP_UPLOAD_FOLDER` is deleted.
+
+    :return: A JSON response containing an error, or a success response, or a redirect to the editor.
+    """
+    filename = secure_filename(request.args.get("filename"))
+    resolution = request.args.get("resolution")
+    resolution_options = [
+        "clearInput",
+        "openExisting",
+        "overwrite",
+        "newFilename",
+    ]
+    temp_path = TEMP_UPLOAD_FOLDER / filename
+    if not resolution:
+        return {"error": "empty"}, 400
+    if resolution not in resolution_options:
+        return {"error": "invalid resolution"}, 400
+    if resolution == resolution_options[0]:
+        temp_path.unlink()
+        return {"success": "input cleared"}, 204
+    if resolution in resolution_options[-2:]:
+        next(TEMP_UPLOAD_FOLDER.glob("*")).replace(UPLOAD_FOLDER / filename)
+    if temp_path.is_file():
+        temp_path.unlink()
+    global databases
+    if filename in databases:
+        return {"error": "alias already exists"}, 409
+    try:
+        databases[filename] = JSONClient(filename)
+    except Exception as e:
+        return {"error": str(e)}, 400
+    return redirect(
+        url_for("editor", alias=filename),
+        302,
+    )
+
+
+@app.route("/editor")
+def editor():
+    """
+    Renders the editor page based on the specified database type.
+
+    This route expects a GET request with specific query parameters:
+    - "alias": An optional alias for the MongoDB database.
+
+    The function checks if the query parameters are present. If not, it returns a 404 error.
+
+    The function determines the database type based on the instance of the client object stored in the databases['alias']. If the type is not in the
+    "CLIENT_TYPES" configuration, it returns a 404 error.
+
+    :return: The rendered editor template based on the specified database type.
+    """
+    global databases
+    if not request.args:
+        return render_template("404.html"), 404
+    alias = request.args.get("alias")
+    if alias not in databases:
+        return render_template("404.html"), 404
+
+    client_type = ""
+    if isinstance(databases[alias], JSONClient):
+        client_type = CLIENT_TYPES[1]
+    elif isinstance(databases[alias], MongoDBClient):
+        client_type = CLIENT_TYPES[0]
+    else:
+        return render_template("404.html"), 404
+
+    response = make_response(
+        render_template("editor.html", client_type=client_type, alias=alias)
+    )
+
+    response.headers["Cache-Control"] = "no-cache, no-store, must-revalidate"
+    response.headers["Pragma"] = "no-cache"
+    response.headers["Expires"] = "0"
+
+    return response
+
+
+@app.route("/help")
+def help():
+    """
+    Renders the help page.
+
+    This route reads the contents of the "help.md" file located in the "static" folder and renders it as HTML using the
+    Markdown syntax. The rendered HTML is then passed to the "help.html" template for displaying the help page.
+
+    :return: The rendered help page HTML.
+    """
+    with Path("static/help.md").open("r") as f:
+        return render_template(
+            "help.html", rendered_html=markdown.markdown(f.read())
+        )
+
+
+@app.route("/find", methods=["POST"])
+def find():
+    """
+    Finds a resource based on the provided search criteria.
+
+    This route expects a POST request with a JSON payload containing the alias of the session which is to be searched for the
+    resource and the search criteria.
+
+    The alias is used in retrieving the session from `databases`. If the session is not found, an error is returned.
+
+    The Client API is used to find the resource by calling `find_resource()` on the session where the operation is
+    accomplished by the concrete client class.
+
+    The result of the `find_resource` operation is returned as a JSON response.
+
+    :return: A JSON response containing the result of the `find_resource` operation.
+    """
+    alias = request.json["alias"]
+    if alias not in databases:
+        return {"error": "database not found"}, 400
+    database = databases[alias]
+    return database.find_resource(request.json)
+
+
+@app.route("/update", methods=["POST"])
+def update():
+    """
+    Updates a resource with provided changes.
+
+    This route expects a POST request with a JSON payload containing the alias of the session which contains the resource
+    that is to be updated and the data for updating the resource.
+
+    The alias is used in retrieving the session from `databases`. If the session is not found, an error is returned.
+
+    The Client API is used to update the resource by calling `update_resource()` on the session where the operation is
+    accomplished by the concrete client class.
+
+    The `_add_to_stack` function of the session is called to insert the operation, update, and necessary data onto the revision
+    operations stack.
+
+    The result of the `update_resource` operation is returned as a JSON response. It contains the original and the modified resources.
+
+    :return: A JSON response containing the result of the `update_resource` operation.
+    """
+    alias = request.json["alias"]
+    if alias not in databases:
+        return {"error": "database not found"}, 400
+    database = databases[alias]
+    original_resource = request.json["original_resource"]
+    modified_resource = request.json["resource"]
+    status = database.update_resource(
+        {
+            "original_resource": original_resource,
+            "resource": modified_resource,
+        }
+    )
+    database._add_to_stack(
+        {
+            "operation": "update",
+            "resource": {
+                "original_resource": modified_resource,
+                "resource": original_resource,
+            },
+        }
+    )
+    return status
+
+
+@app.route("/versions", methods=["POST"])
+def getVersions():
+    """
+    Retrieves the versions of a resource based on the provided search criteria.
+
+    This route expects a POST request with a JSON payload containing the alias of the session which contains the resource
+    whose versions are to be retrieved and the search criteria.
+
+    The alias is used in retrieving the session from `databases`. If the session is not found, an error is returned.
+
+    The Client API is used to get the versions of a resource by calling `get_versions()` on the session where the operation is
+    accomplished by the concrete client class.
+
+    The result of the `get_versions` operation is returned as a JSON response.
+
+    :return: A JSON response containing the result of the `get_versions` operation.
+    """
+    alias = request.json["alias"]
+    if alias not in databases:
+        return {"error": "database not found"}, 400
+    database = databases[alias]
+    return database.get_versions(request.json)
+
+
+@app.route("/categories", methods=["GET"])
+def getCategories():
+    """
+    Retrieves the categories of the resources.
+
+    This route returns a JSON response containing the categories of the resources. The categories are obtained from the
+    "enum" property of the "category" field in the schema.
+
+    :return: A JSON response with the categories of the resources.
+    """
+    return json.dumps(schema["properties"]["category"]["enum"])
+
+
+@app.route("/schema", methods=["GET"])
+def getSchema():
+    """
+    Retrieves the schema definition of the resources.
+
+    This route returns a JSON response containing the schema definition of the resources. The schema is obtained from the
+    `schema` variable.
+
+    :return: A JSON response with the schema definition of the resources.
+    """
+    return json_util.dumps(schema)
+
+
+@app.route("/keys", methods=["POST"])
+def getFields():
+    """
+    Retrieves the required fields for a specific category based on the provided data.
+
+    This route expects a POST request with a JSON payload containing the data for retrieving the required fields.
+    The function constructs an empty object `empty_object` with the "category" and "id" values from the request payload.
+
+    The function then uses the JSONSchema validator to validate the `empty_object` against the `schema`. It iterates
+    through the validation errors and handles two types of errors:
+
+    1. "is a required property" error: If a required property is missing in the `empty_object`, the function retrieves
+       the default value for that property from the schema and sets it in the `empty_object`.
+
+    2. "is not valid under any of the given schemas" error: If a property is not valid under the current schema, the
+       function evolves the validator to use the schema corresponding to the requested category. It then iterates
+       through the validation errors again and handles any missing required properties as described in the previous
+       step.
+
+    Finally, the `empty_object` with the required fields populated (including default values if applicable) is returned
+    as a JSON response.
+
+    :return: A JSON response containing the `empty_object` with the required fields for the specified category.
+    """
+    empty_object = {
+        "category": request.json["category"],
+        "id": request.json["id"],
+    }
+    validator = jsonschema.Draft7Validator(schema)
+    errors = list(validator.iter_errors(empty_object))
+    for error in errors:
+        if "is a required property" in error.message:
+            required = error.message.split("'")[1]
+            empty_object[required] = error.schema["properties"][required][
+                "default"
+            ]
+        if "is not valid under any of the given schemas" in error.message:
+            validator = validator.evolve(
+                schema=error.schema["definitions"][request.json["category"]]
+            )
+            for e in validator.iter_errors(empty_object):
+                if "is a required property" in e.message:
+                    required = e.message.split("'")[1]
+                    if "default" in e.schema["properties"][required]:
+                        empty_object[required] = e.schema["properties"][
+                            required
+                        ]["default"]
+                    else:
+                        empty_object[required] = ""
+    return json.dumps(empty_object)
+
+
+@app.route("/delete", methods=["POST"])
+def delete():
+    """
+    Deletes a resource.
+
+    This route expects a POST request with a JSON payload containing the alias of the session from which a resource is to be
+    deleted and the data for deleting the resource.
+
+    The alias is used in retrieving the session from `databases`. If the session is not found, an error is returned.
+
+    The Client API is used to delete the resource by calling `delete_resource()` on the session where the operation is
+    accomplished by the concrete client class.
+
+    The `_add_to_stack` function of the session is called to insert the operation, delete, and necessary data onto the revision
+    operations stack.
+
+    The result of the `delete` operation is returned as a JSON response.
+
+    :return: A JSON response containing the result of the `delete` operation.
+    """
+    alias = request.json["alias"]
+    if alias not in databases:
+        return {"error": "database not found"}, 400
+    database = databases[alias]
+    resource = request.json["resource"]
+    status = database.delete_resource(resource)
+    database._add_to_stack({"operation": "delete", "resource": resource})
+    return status
+
+
+@app.route("/insert", methods=["POST"])
+def insert():
+    """
+    Inserts a new resource.
+
+    This route expects a POST request with a JSON payload containing the alias of the session to which the data
+    is to be inserted and the data for inserting the resource.
+
+    The alias is used in retrieving the session from `databases`. If the session is not found, an error is returned.
+
+    The Client API is used to insert the new resource by calling `insert_resource()` on the session where the operation is
+    accomplished by the concrete client class.
+
+    The `_add_to_stack` function of the session is called to insert the operation, insert, and necessary data onto the revision
+    operations stack.
+
+    The result of the `insert` operation is returned as a JSON response.
+
+    :return: A JSON response containing the result of the `insert` operation.
+    """
+    alias = request.json["alias"]
+    if alias not in databases:
+        return {"error": "database not found"}, 400
+    database = databases[alias]
+    resource = request.json["resource"]
+    status = database.insert_resource(resource)
+    database._add_to_stack({"operation": "insert", "resource": resource})
+    return status
+
+
+@app.route("/undo", methods=["POST"])
+def undo():
+    """
+    Undoes last operation performed on the session.
+
+    This route expects a POST request with a JSON payload containing the alias of the session whose last operation
+    is to be undone.
+
+    The alias is used in retrieving the session from `databases`. If the session is not found, an error is returned.
+
+    The Client API is used to undo the last operation performed on the session by calling `undo_operation()` on the
+    session where the operation is accomplished by the concrete client class.
+
+    The result of the `undo_operation` operation is returned as a JSON response.
+
+    :return: A JSON response containing the result of the `undo_operation` operation.
+    """
+    alias = request.json["alias"]
+    if alias not in databases:
+        return {"error": "database not found"}, 400
+    database = databases[alias]
+    return database.undo_operation()
+
+
+@app.route("/redo", methods=["POST"])
+def redo():
+    """
+    Redoes last operation performed on the session.
+
+    This route expects a POST request with a JSON payload containing the alias of the session whose last operation
+    is to be redone.
+
+    The alias is used in retrieving the session from `databases`. If the session is not found, an error is returned.
+
+    The Client API is used to redo the last operation performed on the session by calling `redo_operation()` on the
+    session where the operation is accomplished by the concrete client class.
+
+    The result of the `redo_operation` operation is returned as a JSON response.
+
+    :return: A JSON response containing the result of the `redo_operation` operation.
+    """
+    alias = request.json["alias"]
+    if alias not in databases:
+        return {"error": "database not found"}, 400
+    database = databases[alias]
+    return database.redo_operation()
+
+
+@app.route("/getRevisionStatus", methods=["POST"])
+def get_revision_status():
+    """
+    Gets the status of revision operations.
+
+    This route expects a POST request with a JSON payload containing the alias of the session whose revision operations
+    statuses is being requested.
+
+    The alias is used in retrieving the session from `databases`. If the session is not found, an error is
+    returned.
+
+    The Client API is used to get the status of the revision operations by calling `get_revision_status()` on the
+    session where the operation is accomplished by the concrete client class.
+
+    The result of the `get_revision_status` is returned as a JSON response.
+
+    :return: A JSON response contain the result of the `get_revision_status` operation.
+    """
+    alias = request.json["alias"]
+    if alias not in databases:
+        return {"error": "database not found"}, 400
+    database = databases[alias]
+    return database.get_revision_status()
+
+
+def fernet_instance_generation(password):
+    """
+    Generates Fernet instance for use in Saving and Loading Session.
+
+    Utilizes Scrypt Key Derivation Function with `SECRET_KEY` as salt value and recommended
+    values for `length`, `n`, `r`, and `p` parameters. Derives key using `password`. Derived
+    key is then used to initialize Fernet instance.
+
+    :param password: User provided password
+    :return: Fernet instance
+    """
+    return Fernet(
+        base64.urlsafe_b64encode(
+            Scrypt(salt=app.secret_key, length=32, n=2**16, r=8, p=1).derive(
+                password.encode()
+            )
+        )
+    )
+
+
+@app.route("/saveSession", methods=["POST"])
+def save_session():
+    """
+    Generates ciphertext of session that is to be saved.
+
+    This route expects a POST request with a JSON payload containing the alias of the session that is to be
+    saved and a password to be used in encrypting the session data.
+
+    The alias is used in retrieving the session from `databases`. If the session is not found, an error is
+    returned.
+
+    The `save_session()` method is called to get the necessary session data from the corresponding `Client`
+    as a dictionary.
+
+    A Fernet instance, using the user provided password, is instantiated. The session data is encrypted using this
+    instance. If an Exception is raised, an error response is returned.
+
+    The result of the save_session operation is returned as a JSON response. The ciphertext is returned or an error
+    message if an error occurred.
+
+    :return: A JSON response containing the result of the save_session operation.
+    """
+    alias = request.json["alias"]
+    if alias not in databases:
+        return {"error": "database not found"}, 400
+    session = databases[alias].save_session()
+    try:
+        fernet_instance = fernet_instance_generation(request.json["password"])
+        ciphertext = fernet_instance.encrypt(json.dumps(session).encode())
+    except (TypeError, ValueError):
+        return {"error": "Failed to Encrypt Session!"}, 400
+    return {"ciphertext": ciphertext.decode()}, 200
+
+
+@app.route("/loadSession", methods=["POST"])
+def load_session():
+    """
+    Loads session from data specified in user request.
+
+    This route expects a POST request with a JSON payload containing the encrypted ciphertext containing the session
+    data, the alias of the session that is to be restored, and the password associated with it.
+
+    A Fernet instance, using the user provided password, is instantiated. The session data is decrypted using this
+    instance. If an Exception is raised, an error response is returned.
+
+    The `Client` type is retrieved from the session data and a redirect to the appropriate login with the stored
+    parameters from the session data is applied.
+
+    The result of the load_session operation is returned either as a JSON response containing the error message
+    or a redirect.
+
+    :return: A JSON response containing the error of the load_session operation or a redirect.
+    """
+    alias = request.json["alias"]
+    session = request.json["session"]
+    try:
+        fernet_instance = fernet_instance_generation(request.json["password"])
+        session_data = json.loads(fernet_instance.decrypt(session))
+    except (InvalidSignature, InvalidToken):
+        return {"error": "Incorrect Password! Please Try Again!"}, 400
+    client_type = session_data["client"]
+    if client_type == CLIENT_TYPES[0]:
+        try:
+            databases[alias] = MongoDBClient(
+                mongo_uri=session_data["uri"],
+                database_name=session_data["database"],
+                collection_name=session_data["collection"],
+            )
+        except Exception as e:
+            return {"error": str(e)}, 400
+
+        return redirect(
+            url_for("editor", type=CLIENT_TYPES[0], alias=alias),
+            302,
+        )
+    elif client_type == CLIENT_TYPES[1]:
+        return redirect(
+            url_for("existing_json", filename=session_data["filename"]),
+            302,
+        )
+    else:
+        return {"error": "Invalid Client Type!"}, 409
+
+
+@app.errorhandler(404)
+def handle404(error):
+    """
+    Error handler for 404 (Not Found) errors.
+
+    This function is called when a 404 error occurs. It renders the "404.html" template and returns it as a response with
+    a status code of 404.
+
+    :param error: The error object representing the 404 error.
+    :return: A response containing the rendered "404.html" template with a status code of 404.
+    """
+    return render_template("404.html"), 404
+
+
+@app.route("/checkExists", methods=["POST"])
+def checkExists():
+    """
+    Checks if a resource exists based on the provided data.
+
+    This route expects a POST request with a JSON payload containing the alias of the session in which it is to be
+    determined whether a given resource exists and the necessary data for checking the existence of the resource.
+
+    The alias is used in retrieving the session from `databases`. If the session is not found, an error is
+    returned.
+
+    The Client API is used to check the existence of the resource by calling `check_resource_exists()` on the
+    session where the operation is accomplished by the concrete client class.
+
+    The result of the `check_resource_exists` is returned as a JSON response.
+
+    :return: A JSON response contain the result of the `check_resource_exists` operation.
+    """
+    alias = request.json["alias"]
+    if alias not in databases:
+        return {"error": "database not found"}, 400
+    database = databases[alias]
+    return database.check_resource_exists(request.json)
+
+
+@app.route("/logout", methods=["POST"])
+def logout():
+    """
+    Logs the user out of the application.
+
+    Deletes the alias from the `databases` dictionary.
+
+    :param alias: The alias of the database to logout from.
+
+    :return: A redirect to the index page.
+    """
+    alias = request.json["alias"]
+    if alias not in databases:
+        return {"error": "database not found"}, 400
+    databases.pop(alias)
+    return (redirect(url_for("index")), 302)
+
+
+if __name__ == "__main__":
+    app.run(debug=True)
diff --git a/util/gem5-resources-manager/static/help.md b/util/gem5-resources-manager/static/help.md
new file mode 100644
index 0000000000..c79d26dea4
--- /dev/null
+++ b/util/gem5-resources-manager/static/help.md
@@ -0,0 +1,65 @@
+# Help
+
+## Load Previous Session
+Retrieves list of saved sessions from browser localStorage.
+If found, displays list, can select a session to restore, and if entered password is correct session is restored and redirects to editor.
+
+## MongoDB
+Set up editor view for MongoDB Instance.
+
+### Login: Enter URI
+Utilize if the MongoDB connection string is known.
+
+#### Fields:
+  - URI: [MongoDB](https://www.mongodb.com/docs/manual/reference/connection-string/)
+
+#### Additional Fields:
+  - Collection: Specify collection in MongoDB instance to retrieve
+  - Database: Specify database in MongoDB instance to retrieve
+  - Alias: Optional. Provide a display alias to show on editor view instead of URI
+
+### Login: Generate URI
+Provides method to generate MongoDB URI connection string if it is not known or to supply with additional parameters.
+
+#### Fields:
+
+  - Connection: Specify connection mode, Standard or DNS Seed List, as defined by [MongoDB](https://www.mongodb.com/docs/manual/reference/connection-string/)
+  - Username: Optional.
+  - Password: Optional.
+  - Host: Specify host/list of hosts for instance
+  - Retry Writes: Allow MongoDB to retry a write to database once if they fail the first time
+  - Write Concern: Determines level of acknowledgement required from database for write operations, specifies how many nodes must acknowledge the operation before it is considered successful. (Currently set to majority)
+  - Options: Optional. Additional parameters that can be set when connecting to the instance
+
+#### Additional Fields:
+  - Collection: Specify collection in MongoDB instance to retrieve
+  - Database: Specify database in MongoDB instance to retrieve
+  - Alias: Optional field to provide a display alias to show on editor view instead of URI
+
+## JSON
+Set up editor view for JSON file. Can Specify a URL to a remote JSON file to be imported
+or select a local JSON file.
+
+
+## Editor
+Page containing Monaco VSCode Diff Editor to allow editing of database entries.
+
+### Database Actions:
+Actions that can be performed on database currently in use.
+
+- Search: Search for resource in database with exact Resource ID
+- Version: Dropdown that allows for selection of a particular resource version of resource currently in view
+- Category: Specify category of resource to viewed as defined by schema
+- Undo: Undoes last edit to database
+- Redo: Redoes last undone change to database
+- Show Schema: Sets view for schema of current database (read only)
+- Save Session: Save session in encrypted format to browser localStorage
+- Logout: Removes sessions from list of active sessions
+
+### Editing Actions:
+Actions that can be performed on resource currently in view.
+
+- Add New Resource: Add a new resource to database
+- Add New Version: Insert a new version of current resource
+- Delete: Permanently delete resource
+- Update: Update resource with edits made
diff --git a/util/gem5-resources-manager/static/images/favicon.png b/util/gem5-resources-manager/static/images/favicon.png
new file mode 100644
index 0000000000..d0103efa4d
Binary files /dev/null and b/util/gem5-resources-manager/static/images/favicon.png differ
diff --git a/util/gem5-resources-manager/static/images/gem5ColorLong.gif b/util/gem5-resources-manager/static/images/gem5ColorLong.gif
new file mode 100644
index 0000000000..552e4d12fc
Binary files /dev/null and b/util/gem5-resources-manager/static/images/gem5ColorLong.gif differ
diff --git a/util/gem5-resources-manager/static/images/gem5ResourcesManager.png b/util/gem5-resources-manager/static/images/gem5ResourcesManager.png
new file mode 100644
index 0000000000..dac4cb595e
Binary files /dev/null and b/util/gem5-resources-manager/static/images/gem5ResourcesManager.png differ
diff --git a/util/gem5-resources-manager/static/js/app.js b/util/gem5-resources-manager/static/js/app.js
new file mode 100644
index 0000000000..ed5025a2f2
--- /dev/null
+++ b/util/gem5-resources-manager/static/js/app.js
@@ -0,0 +1,135 @@
+const loadingContainer = document.getElementById("loading-container");
+const alertPlaceholder = document.getElementById('liveAlertPlaceholder');
+const interactiveElems = document.querySelectorAll('button, input, select');
+
+const appendAlert = (errorHeader, id, message, type) => {
+  const alertDiv = document.createElement('div');
+  alertDiv.classList.add("alert", `alert-${type}`, "alert-dismissible", "fade", "show", "d-flex", "flex-column", "shadow-sm");
+  alertDiv.setAttribute("role", "alert");
+  alertDiv.setAttribute("id", id);
+  alertDiv.style.maxWidth = "320px";
+
+  alertDiv.innerHTML = [
+    `  <div class="d-flex align-items-center main-text-semi">`,
+    `    <svg xmlns="http://www.w3.org/2000/svg" fill="currentColor" height="1.5rem" class="bi bi-exclamation-octagon-fill me-3" viewBox="0 0 16 16">`,
+    `      <path d="M11.46.146A.5.5 0 0 0 11.107 0H4.893a.5.5 0 0 0-.353.146L.146 4.54A.5.5 0 0 0 0 4.893v6.214a.5.5 0 0 0 .146.353l4.394 4.394a.5.5 0 0 0
+              .353.146h6.214a.5.5 0 0 0 .353-.146l4.394-4.394a.5.5 0 0 0 .146-.353V4.893a.5.5 0 0 0-.146-.353L11.46.146zM8 4c.535 0 .954.462.9.995l-.35
+              3.507a.552.552 0 0 1-1.1 0L7.1 4.995A.905.905 0 0 1 8 4zm.002 6a1 1 0 1 1 0 2 1 1 0 0 1 0-2z"/>`,
+    `    </svg>`,
+    `    <span class="main-text-regular">${errorHeader}</span>`,
+    `      <button type="button" class="btn-close" data-bs-dismiss="alert" aria-label="Close"></button>`,
+    `    </div>`,
+    `  <hr />`,
+    `  <div>${message}</div>`,
+  ].join('');
+
+  window.scrollTo(0, 0);
+
+  alertPlaceholder.append(alertDiv);
+
+  setTimeout(function () {
+    bootstrap.Alert.getOrCreateInstance(document.getElementById(`${id}`)).close();
+  }, 5000);
+}
+
+function toggleInteractables(isBlocking, excludedOnNotBlockingIds = [], otherBlockingUpdates = () => {}) {
+  if (isBlocking) {
+    loadingContainer.classList.add("d-flex");
+    interactiveElems.forEach(elems => {
+      elems.disabled = true;
+    });
+    window.scrollTo(0, 0);
+    otherBlockingUpdates();
+    return;
+  }
+
+  setTimeout(() => {
+    loadingContainer.classList.remove("d-flex");
+    interactiveElems.forEach(elems => {
+      !excludedOnNotBlockingIds.includes(elems.id) ? elems.disabled = false : null;
+    });
+    otherBlockingUpdates();
+  }, 250);
+}
+
+function showResetSavedSessionsModal() {
+  let sessions = localStorage.getItem("sessions");
+  if (sessions === null) {
+    appendAlert('Error!', 'noSavedSessions', `No Saved Sessions Exist!`, 'danger');
+    return;
+  }
+  sessions = JSON.parse(sessions);
+
+  const resetSavedSessionsModal = new bootstrap.Modal(document.getElementById('resetSavedSessionsModal'), {
+    focus: true, keyboard: false
+  });
+
+
+  let select = document.getElementById("delete-session-dropdown");
+  select.innerHTML = "";
+  Object.keys(sessions).forEach((alias) => {
+    let option = document.createElement("option");
+    option.value = alias;
+    option.innerHTML = alias;
+    select.appendChild(option);
+  });
+
+  document.getElementById("selected-session").innerText = `"${document.getElementById("delete-session-dropdown").value}"`;
+
+  resetSavedSessionsModal.show();
+}
+
+function resetSavedSessions() {
+  bootstrap.Modal.getInstance(document.getElementById("resetSavedSessionsModal")).hide();
+
+  const sessions = JSON.parse(localStorage.getItem("sessions"));
+  if (sessions === null) {
+    appendAlert('Error!', 'noSavedSessions', `No Saved Sessions Exist!`, 'danger');
+    return;
+  }
+
+  const activeTab = document.getElementById("reset-tabs").querySelector(".nav-link.active").getAttribute("id");
+  if (activeTab === "delete-one-tab") {
+    const deleteOneConfirmation = document.getElementById("delete-one-confirmation").value;
+    if (deleteOneConfirmation !== document.getElementById("delete-session-dropdown").value) {
+      document.getElementById("resetSavedSessionsModal").querySelectorAll("form").forEach(form => {
+        form.reset();
+      })
+      appendAlert('Error!', 'noSavedSessions', `Invalid Confirmation Entry!`, 'danger');
+      return;
+    }
+
+    delete sessions[document.getElementById("delete-session-dropdown").value];
+    Object.keys(sessions).length === 0
+      ? localStorage.removeItem("sessions")
+      : localStorage.setItem("sessions", JSON.stringify(sessions));
+
+    } else {
+    const deleteAllConfirmation = document.getElementById("delete-all-confirmation").value;
+    if (deleteAllConfirmation !== "Delete All") {
+      document.getElementById("resetSavedSessionsModal").querySelectorAll("form").forEach(form => {
+        form.reset();
+      })
+      appendAlert('Error!', 'noSavedSessions', `Invalid Confirmation Entry!`, 'danger');
+      return;
+    }
+
+    localStorage.removeItem("sessions");
+  }
+
+  appendAlert('Success!', 'resetCookies', `Saved Session Reset Successful!`, 'success');
+  setTimeout(() => {
+    location.reload();
+  }, 750);
+}
+
+document.getElementById("close-reset-modal").addEventListener("click", () => {
+  document.getElementById("resetSavedSessionsModal").querySelectorAll("form").forEach(form => {
+    form.reset();
+  })
+});
+
+document.getElementById("delete-session-dropdown").addEventListener("change", () => {
+  document.getElementById("selected-session").innerText =
+    `"${document.getElementById("delete-session-dropdown").value}"`;
+});
diff --git a/util/gem5-resources-manager/static/js/editor.js b/util/gem5-resources-manager/static/js/editor.js
new file mode 100644
index 0000000000..64786da0bd
--- /dev/null
+++ b/util/gem5-resources-manager/static/js/editor.js
@@ -0,0 +1,589 @@
+const diffEditorContainer = document.getElementById("diff-editor");
+var diffEditor;
+var originalModel;
+var modifiedModel;
+
+const schemaEditorContainer = document.getElementById("schema-editor");
+var schemaEditor;
+var schemaModel;
+
+const schemaButton = document.getElementById("schema-toggle");
+const editingActionsButtons = Array.from(
+  document.querySelectorAll("#editing-actions button")
+);
+var editingActionsState;
+
+const tooltipTriggerList = document.querySelectorAll('[data-bs-toggle="tooltip"]');
+tooltipTriggerList.forEach(tooltip => {
+  tooltip.setAttribute("data-bs-trigger", "hover");
+});
+const tooltipList = [...tooltipTriggerList].map(tooltipTriggerEl => new bootstrap.Tooltip(tooltipTriggerEl));
+
+require.config({
+  paths: {
+    vs: "https://cdnjs.cloudflare.com/ajax/libs/monaco-editor/0.26.1/min/vs",
+  },
+});
+require(["vs/editor/editor.main"], () => {
+  originalModel = monaco.editor.createModel(`{\n}`, "json");
+  modifiedModel = monaco.editor.createModel(`{\n}`, "json");
+  diffEditor = monaco.editor.createDiffEditor(diffEditorContainer, {
+    theme: "vs-dark",
+    language: "json",
+    automaticLayout: true,
+  });
+  diffEditor.setModel({
+    original: originalModel,
+    modified: modifiedModel,
+  });
+  fetch("/schema")
+    .then((res) => res.json())
+    .then((data) => {
+      monaco.languages.json.jsonDefaults.setDiagnosticsOptions({
+        trailingCommas: "error",
+        comments: "error",
+        validate: true,
+        schemas: [
+          {
+            uri: "http://json-schema.org/draft-07/schema",
+            fileMatch: ["*"],
+            schema: data,
+          },
+        ],
+      });
+
+      schemaEditor = monaco.editor.create(schemaEditorContainer, {
+        theme: "vs-dark",
+        language: "json",
+        automaticLayout: true,
+        readOnly: true,
+      });
+
+      schemaModel = monaco.editor.createModel(`{\n}`, "json");
+      schemaEditor.setModel(schemaModel);
+      schemaModel.setValue(JSON.stringify(data, null, 4));
+
+      schemaEditorContainer.style.display = "none";
+    });
+});
+
+let clientType = document.getElementById('client-type');
+clientType.textContent = clientType.textContent === "mongodb" ? "MongoDB" : clientType.textContent.toUpperCase();
+
+const revisionButtons = [document.getElementById("undo-operation"), document.getElementById("redo-operation")];
+revisionButtons.forEach(btn => {
+  btn.disabled = true;
+});
+
+const editorGroupIds = [];
+document.querySelectorAll(".editorButtonGroup button, .revisionButtonGroup button")
+  .forEach(btn => {
+    editorGroupIds.push(btn.id);
+  });
+
+function checkErrors() {
+  let errors = monaco.editor.getModelMarkers({ resource: modifiedModel.uri });
+  if (errors.length > 0) {
+    console.log(errors);
+    let str = "";
+    errors.forEach((error) => {
+      str += error.message + "\n";
+    });
+    appendAlert('Error!', 'schemaError', { str }, 'danger');
+    return true;
+  }
+  return false;
+}
+let didChange = false;
+
+function update(e) {
+  e.preventDefault();
+  if (checkErrors()) {
+    return;
+  }
+  let json = JSON.parse(modifiedModel.getValue());
+  let original_json = JSON.parse(originalModel.getValue());
+
+  console.log(json);
+  fetch("/update", {
+    method: "POST",
+    headers: {
+      "Content-Type": "application/json",
+    },
+    body: JSON.stringify({
+      resource: json,
+      original_resource: original_json,
+      alias: document.getElementById("alias").innerText,
+    }),
+  })
+    .then((res) => res.json())
+    .then(async (data) => {
+      console.log(data);
+      await addVersions();
+      //Select last option
+      document.getElementById("version-dropdown").value =
+        json["resource_version"];
+      console.log(document.getElementById("version-dropdown").value);
+      find(e);
+    });
+}
+
+function addNewResource(e) {
+  e.preventDefault();
+  if (checkErrors()) {
+    return;
+  }
+  let json = JSON.parse(modifiedModel.getValue());
+  console.log(json);
+  fetch("/insert", {
+    method: "POST",
+    headers: {
+      "Content-Type": "application/json",
+    },
+    body: JSON.stringify({
+      resource: json,
+      alias: document.getElementById("alias").innerText,
+    }),
+  })
+    .then((res) => res.json())
+    .then(async (data) => {
+      console.log(data);
+      await addVersions();
+      //Select last option
+      document.getElementById("version-dropdown").value =
+        json["resource_version"];
+      console.log(document.getElementById("version-dropdown").value);
+      find(e);
+    });
+}
+
+function addVersion(e) {
+  e.preventDefault();
+  console.log("add version");
+  if (checkErrors()) {
+    return;
+  }
+  let json = JSON.parse(modifiedModel.getValue());
+  console.log(json["resource_version"]);
+  fetch("/checkExists", {
+    method: "POST",
+    headers: {
+      "Content-Type": "application/json",
+    },
+    body: JSON.stringify({
+      id: json["id"],
+      resource_version: json["resource_version"],
+      alias: document.getElementById("alias").innerText,
+    }),
+  })
+    .then((res) => res.json())
+    .then((data) => {
+      console.log(data["exists"]);
+      if (data["exists"] == true) {
+        appendAlert("Error!", "existingResourceVersion", "Resource version already exists!", "danger");
+        return;
+      } else {
+        fetch("/insert", {
+          method: "POST",
+          headers: {
+            "Content-Type": "application/json",
+          },
+          body: JSON.stringify({
+            resource: json,
+            alias: document.getElementById("alias").innerText,
+          }),
+        })
+          .then((res) => res.json())
+          .then(async (data) => {
+            console.log("added version");
+            console.log(data);
+            await addVersions();
+            //Select last option
+            document.getElementById("version-dropdown").value =
+              json["resource_version"];
+            console.log(document.getElementById("version-dropdown").value);
+            find(e);
+          });
+      }
+    });
+}
+
+function deleteRes(e) {
+  e.preventDefault();
+  console.log("delete");
+  let id = document.getElementById("id").value;
+  let resource_version = JSON.parse(originalModel.getValue())[
+    "resource_version"
+  ];
+  let json = JSON.parse(originalModel.getValue());
+  console.log(resource_version);
+  fetch("/delete", {
+    method: "POST",
+    headers: {
+      "Content-Type": "application/json",
+    },
+    body: JSON.stringify({
+      resource: json,
+      alias: document.getElementById("alias").innerText,
+    }),
+  })
+    .then((res) => res.json())
+    .then(async (data) => {
+      console.log(data);
+      await addVersions();
+      //Select first option
+      document.getElementById("version-dropdown").value =
+        document.getElementById("version-dropdown").options[0].value;
+      console.log(document.getElementById("version-dropdown").value);
+      find(e);
+    });
+}
+
+document.getElementById("id").onchange = function () {
+  console.log("id changed");
+  didChange = true;
+};
+
+async function addVersions() {
+  let select = document.getElementById("version-dropdown");
+  select.innerHTML = "Latest";
+  await fetch("/versions", {
+    method: "POST",
+    headers: {
+      "Content-Type": "application/json",
+    },
+    body: JSON.stringify({
+      id: document.getElementById("id").value,
+      alias: document.getElementById("alias").innerText,
+    }),
+  })
+    .then((res) => res.json())
+    .then((data) => {
+      let select = document.getElementById("version-dropdown");
+      if (data.length == 0) {
+        data = [{ resource_version: "Latest" }];
+      }
+      data.forEach((version) => {
+        let option = document.createElement("option");
+        option.value = version["resource_version"];
+        option.innerText = version["resource_version"];
+        select.appendChild(option);
+      });
+    });
+}
+
+function find(e) {
+  e.preventDefault();
+  if (didChange) {
+    addVersions();
+    didChange = false;
+  }
+
+  closeSchema();
+
+  toggleInteractables(true, editorGroupIds, () => {
+    diffEditor.updateOptions({ readOnly: true });
+    updateRevisionBtnsDisabledAttr();
+  });
+
+  fetch("/find", {
+    method: "POST",
+    headers: {
+      "Content-Type": "application/json",
+    },
+    body: JSON.stringify({
+      id: document.getElementById("id").value,
+      resource_version: document.getElementById("version-dropdown").value,
+      alias: document.getElementById("alias").innerText,
+    }),
+  })
+    .then((res) => res.json())
+    .then((data) => {
+      console.log(data);
+      toggleInteractables(false, editorGroupIds, () => {
+        diffEditor.updateOptions({ readOnly: false });
+        updateRevisionBtnsDisabledAttr();
+      });
+
+      if (data["exists"] == false) {
+        fetch("/keys", {
+          method: "POST",
+          headers: {
+            "Content-Type": "application/json",
+          },
+          body: JSON.stringify({
+            category: document.getElementById("category").value,
+            id: document.getElementById("id").value,
+          }),
+        })
+          .then((res) => res.json())
+          .then((data) => {
+            console.log(data)
+            data["id"] = document.getElementById("id").value;
+            data["category"] = document.getElementById("category").value;
+            originalModel.setValue(JSON.stringify(data, null, 4));
+            modifiedModel.setValue(JSON.stringify(data, null, 4));
+
+            document.getElementById("add_new_resource").disabled = false;
+            document.getElementById("add_version").disabled = true;
+            document.getElementById("delete").disabled = true;
+            document.getElementById("update").disabled = true;
+          });
+      } else {
+        console.log(data);
+        originalModel.setValue(JSON.stringify(data, null, 4));
+        modifiedModel.setValue(JSON.stringify(data, null, 4));
+
+        document.getElementById("version-dropdown").value =
+          data.resource_version;
+        document.getElementById("category").value = data.category;
+
+        document.getElementById("add_new_resource").disabled = true;
+        document.getElementById("add_version").disabled = false;
+        document.getElementById("delete").disabled = false;
+        document.getElementById("update").disabled = false;
+      }
+    });
+}
+
+window.onload = () => {
+  let ver_dropdown = document.getElementById("version-dropdown");
+  let option = document.createElement("option");
+  option.value = "Latest";
+  option.innerHTML = "Latest";
+  ver_dropdown.appendChild(option);
+  fetch("/categories")
+    .then((res) => res.json())
+    .then((data) => {
+      console.log(data);
+      let select = document.getElementById("category");
+      data.forEach((category) => {
+        let option = document.createElement("option");
+        option.value = category;
+        option.innerHTML = category;
+        select.appendChild(option);
+      });
+      fetch("/keys", {
+        method: "POST",
+        headers: {
+          "Content-Type": "application/json",
+        },
+        body: JSON.stringify({
+          category: document.getElementById("category").value,
+          id: "",
+        }),
+      })
+        .then((res) => res.json())
+        .then((data) => {
+          data["id"] = "";
+          data["category"] = document.getElementById("category").value;
+          originalModel.setValue(JSON.stringify(data, null, 4));
+          modifiedModel.setValue(JSON.stringify(data, null, 4));
+          document.getElementById("add_new_resource").disabled = false;
+        });
+    });
+
+  checkExistingSavedSession();
+};
+
+const myModal = new bootstrap.Modal("#ConfirmModal", {
+  keyboard: false,
+});
+
+let confirmButton = document.getElementById("confirm");
+
+function showModal(event, callback) {
+  event.preventDefault();
+  myModal.show();
+  confirmButton.onclick = () => {
+    callback(event);
+    myModal.hide();
+  };
+}
+
+let editorTitle = document.getElementById("editor-title");
+
+function showSchema() {
+  if (diffEditorContainer.style.display !== "none") {
+    diffEditorContainer.style.display = "none";
+    schemaEditorContainer.classList.add("editor-sizing");
+    schemaEditor.setPosition({ column: 1, lineNumber: 1 });
+    schemaEditor.revealPosition({ column: 1, lineNumber: 1 });
+    schemaEditorContainer.style.display = "block";
+
+    editingActionsState = editingActionsButtons.map(
+      (button) => button.disabled
+    );
+
+    editingActionsButtons.forEach((btn) => {
+      btn.disabled = true;
+    });
+
+    editorTitle.children[0].style.display = "none";
+    editorTitle.children[1].textContent = "Schema (Read Only)";
+
+    schemaButton.textContent = "Close Schema";
+    schemaButton.onclick = closeSchema;
+  }
+}
+
+function closeSchema() {
+  if (schemaEditorContainer.style.display !== "none") {
+    schemaEditorContainer.style.display = "none";
+    diffEditorContainer.style.display = "block";
+
+    editingActionsButtons.forEach((btn, i) => {
+      btn.disabled = editingActionsState[i];
+    });
+
+    editorTitle.children[0].style.display = "unset";
+    editorTitle.children[1].textContent = "Edited";
+
+    schemaButton.textContent = "Show Schema";
+    schemaButton.onclick = showSchema;
+  }
+}
+
+const saveSessionBtn = document.getElementById("saveSession");
+saveSessionBtn.disabled = true;
+
+let password = document.getElementById("session-password");
+password.addEventListener("input", () => {
+  saveSessionBtn.disabled = password.value === "";
+});
+
+function showSaveSessionModal() {
+  const saveSessionModal = new bootstrap.Modal(document.getElementById('saveSessionModal'), {
+    focus: true, keyboard: false
+  });
+  saveSessionModal.show();
+}
+
+function saveSession() {
+  alias = document.getElementById("alias").innerText;
+
+  bootstrap.Modal.getInstance(document.getElementById("saveSessionModal")).hide();
+
+  let preserveDisabled = [];
+  document.querySelectorAll(".editorButtonGroup button, .revisionButtonGroup button")
+    .forEach(btn => {
+      btn.disabled === true ? preserveDisabled.push(btn.id) : null;
+    });
+
+  toggleInteractables(true);
+
+  fetch("/saveSession", {
+    method: "POST",
+    headers: {
+      "Content-Type": "application/json",
+    },
+    body: JSON.stringify({
+      alias: alias,
+      password: document.getElementById("session-password").value
+    }),
+  })
+    .then((res) => {
+      document.getElementById("saveSessionForm").reset();
+
+      toggleInteractables(false, preserveDisabled);
+
+      res.json()
+        .then((data) => {
+          if (res.status === 400) {
+            appendAlert('Error!', 'saveSessionError', `${data["error"]}`, 'danger');
+            return;
+          }
+
+          let sessions = JSON.parse(localStorage.getItem("sessions")) || {};
+          sessions[alias] = data["ciphertext"];
+          localStorage.setItem("sessions", JSON.stringify(sessions));
+
+          document.getElementById("showSaveSessionModal").innerText = "Session Saved";
+          checkExistingSavedSession();
+        })
+    })
+}
+
+function executeRevision(event, operation) {
+  if (!["undo", "redo"].includes(operation)) {
+    appendAlert("Error!", "invalidRevOp", "Fatal! Invalid Revision Operation!", "danger");
+    return;
+  }
+
+  toggleInteractables(true, editorGroupIds, () => {
+    diffEditor.updateOptions({ readOnly: true });
+    updateRevisionBtnsDisabledAttr();
+  });
+  fetch(`/${operation}`, {
+    method: "POST",
+    headers: {
+      "Content-Type": "application/json",
+    },
+    body: JSON.stringify({
+      alias: document.getElementById("alias").innerText,
+    }),
+  })
+    .then(() => {
+      toggleInteractables(false, editorGroupIds, () => {
+        diffEditor.updateOptions({ readOnly: false });
+        updateRevisionBtnsDisabledAttr();
+      });
+      find(event);
+    })
+}
+
+function updateRevisionBtnsDisabledAttr() {
+  fetch("/getRevisionStatus", {
+    method: "POST",
+    headers: {
+      "Content-Type": "application/json",
+    },
+    body: JSON.stringify({
+      alias: document.getElementById("alias").innerText,
+    }),
+  })
+    .then((res) => res.json())
+    .then((data) => {
+      revisionButtons[0].disabled = data.undo;
+      revisionButtons[1].disabled = data.redo;
+    })
+}
+
+function logout() {
+  toggleInteractables(true);
+
+  fetch("/logout", {
+    method: "POST",
+    headers: {
+      "Content-Type": "application/json",
+    },
+    body: JSON.stringify({
+      alias: document.getElementById("alias").innerText,
+    }),
+  })
+    .then((res) => {
+      toggleInteractables(false);
+
+      if (res.status !== 302) {
+        res.json()
+          .then((data) => {
+            appendAlert('Error!', 'logoutError', `${data["error"]}`, 'danger');
+            return;
+          })
+      }
+
+      window.location = res.url;
+    })
+}
+
+function checkExistingSavedSession() {
+  document.getElementById("existing-session-warning").style.display =
+    document.getElementById("alias").innerText in JSON.parse(localStorage.getItem("sessions") || "{}")
+      ? "flex"
+      : "none";
+}
+
+document.getElementById("close-save-session-modal").addEventListener("click", () => {
+  document.getElementById("saveSessionModal").querySelector("form").reset();
+  saveSessionBtn.disabled = password.value === "";
+});
diff --git a/util/gem5-resources-manager/static/js/index.js b/util/gem5-resources-manager/static/js/index.js
new file mode 100644
index 0000000000..1509d2d893
--- /dev/null
+++ b/util/gem5-resources-manager/static/js/index.js
@@ -0,0 +1,75 @@
+window.onload = () => {
+  let select = document.getElementById("sessions-dropdown");
+  const sessions = JSON.parse(localStorage.getItem("sessions"));
+
+  if (sessions === null) {
+    document.getElementById("showSavedSessionModal").disabled = true;
+    return;
+  }
+
+  Object.keys(sessions).forEach((alias) => {
+    let option = document.createElement("option");
+    option.value = alias;
+    option.innerHTML = alias;
+    select.appendChild(option);
+  });
+}
+
+const loadSessionBtn = document.getElementById("loadSession");
+loadSessionBtn.disabled = true;
+
+let password = document.getElementById("session-password");
+password.addEventListener("input", () => {
+  loadSessionBtn.disabled = password.value === "";
+});
+
+document.getElementById("close-load-session-modal").addEventListener("click", () => {
+  document.getElementById("savedSessionModal").querySelector("form").reset();
+})
+
+function showSavedSessionModal() {
+  const savedSessionModal = new bootstrap.Modal(document.getElementById('savedSessionModal'), { focus: true, keyboard: false });
+  savedSessionModal.show();
+}
+
+function loadSession() {
+  bootstrap.Modal.getInstance(document.getElementById("savedSessionModal")).hide();
+
+  const alias = document.getElementById("sessions-dropdown").value;
+  const session = JSON.parse(localStorage.getItem("sessions"))[alias];
+
+  if (session === null) {
+    appendAlert("Error!", "sessionNotFound", "Saved Session Not Found!", "danger");
+    return;
+  }
+
+  toggleInteractables(true);
+
+  fetch("/loadSession", {
+    method: "POST",
+    headers: {
+      'Content-Type': 'application/json'
+    },
+    body: JSON.stringify({
+      password: document.getElementById("session-password").value,
+      alias: alias,
+      session: session
+    })
+  })
+    .then((res) => {
+      toggleInteractables(false);
+
+      if (res.status !== 200) {
+        res.json()
+          .then((error) => {
+            document.getElementById("savedSessionModal").querySelector("form").reset();
+            appendAlert("Error!", "invalidStatus", `${error["error"]}`, "danger");
+            return;
+          })
+      }
+
+      if (res.redirected) {
+        window.location = res.url;
+      }
+    })
+}
diff --git a/util/gem5-resources-manager/static/js/login.js b/util/gem5-resources-manager/static/js/login.js
new file mode 100644
index 0000000000..b21ffeb458
--- /dev/null
+++ b/util/gem5-resources-manager/static/js/login.js
@@ -0,0 +1,330 @@
+function handleMongoDBLogin(event) {
+  event.preventDefault();
+  const activeTab = document.getElementById("mongodb-login-tabs").querySelector(".nav-link.active").getAttribute("id");
+
+  activeTab === "enter-uri-tab" ? handleEnteredURI() : handleGenerateURI();
+
+  return;
+}
+
+function handleEnteredURI() {
+  const uri = document.getElementById('uri').value;
+  const collection = document.getElementById('collection').value;
+  const database = document.getElementById('database').value;
+  const alias = document.getElementById('alias').value;
+  const emptyInputs = [{ type: "Alias", value: alias }, { type: "Collection", value: collection }, { type: "Database", value: database }, { type: "URI", value: uri }];
+  let error = false;
+
+  for (let i = 0; i < emptyInputs.length; i++) {
+    if (emptyInputs[i].value === "") {
+      appendAlert("Error", `${emptyInputs[i].type}`, `Cannot Proceed Without ${emptyInputs[i].type} Value!`, 'danger');
+      error = true;
+    }
+  }
+
+  if (error) {
+    return;
+  }
+
+  handleMongoURLFetch(uri, collection, database, alias);
+}
+
+function handleGenerateURI() {
+  const connection = document.getElementById('connection').checked;
+  const username = document.getElementById('username').value;
+  const password = document.getElementById('password').value;
+  const collection = document.getElementById('collectionGenerate').value;
+  const database = document.getElementById('databaseGenerate').value;
+  const host = document.getElementById('host').value;
+  const alias = document.getElementById('aliasGenerate').value;
+  const options = document.getElementById('options').value.split(",");
+  let generatedURI = "";
+  const emptyInputs = [{ type: "Alias", value: alias }, { type: "Host", value: host }, { type: "Collection", value: collection }, { type: "Database", value: database }];
+  let error = false;
+
+  for (let i = 0; i < emptyInputs.length; i++) {
+    if (emptyInputs[i].value === "") {
+      appendAlert("Error", `${emptyInputs[i].type}`, `Cannot Proceed Without ${emptyInputs[i].type} Value!`, 'danger');
+      error = true;
+    }
+  }
+
+  if (error) {
+    return;
+  }
+
+  generatedURI = connection ? "mongodb+srv://" : "mongodb://";
+  if (username && password) {
+    generatedURI += `${encodeURIComponent(username)}:${encodeURIComponent(password)}@`;
+  }
+
+  generatedURI += host;
+
+  if (options.length) {
+    generatedURI += `/?${options.join("&")}`;
+  }
+
+  handleMongoURLFetch(generatedURI, collection, database, alias);
+}
+
+function handleMongoURLFetch(uri, collection, database, alias) {
+  toggleInteractables(true);
+
+  fetch("/validateMongoDB",
+    {
+      method: 'POST',
+      headers: {
+        'Content-Type': 'application/json'
+      },
+      body: JSON.stringify({
+        uri: uri,
+        collection: collection,
+        database: database,
+        alias: alias
+      })
+    })
+    .then((res) => {
+      toggleInteractables(false);
+
+      if (!res.ok) {
+        res.json()
+          .then(error => {
+            appendAlert('Error!', 'mongodbValidationError', `${error.error}`, 'danger');
+          });
+        return;
+      }
+
+      res.redirected ? window.location = res.url : appendAlert('Error!', 'invalidRes', 'Invalid Server Response!', 'danger');
+    })
+}
+
+function handleJSONLogin(event) {
+  event.preventDefault();
+  const activeTab = document.getElementById("json-login-tabs").querySelector(".nav-link.active").getAttribute("id");
+  if (activeTab === "remote-tab") {
+    handleRemoteJSON();
+  } else if (activeTab === "existing-tab") {
+    const filename = document.getElementById("existing-dropdown").value;
+    if (filename !== "No Existing Files") {
+      toggleInteractables(true);
+
+      fetch(`/existingJSON?filename=${filename}`,
+        {
+          method: 'GET',
+          headers: {
+            'Content-Type': 'application/json'
+          }
+        })
+        .then((res) => {
+          toggleInteractables(false);
+
+          if (res.status !== 200) {
+            appendAlert('Error!', 'invalidURL', 'Invalid JSON File URL!', 'danger');
+          }
+          if (res.redirected) {
+            window.location = res.url;
+          }
+        })
+    }
+  } else {
+    handleUploadJSON();
+  }
+  return;
+}
+
+function handleRemoteJSON() {
+  const url = document.getElementById("jsonRemoteURL").value;
+  const filename = document.getElementById("remoteFilename").value;
+  const emptyInputs = [{ type: "URL", value: url }, { type: "Filename", value: filename }];
+  let error = false;
+
+  for (let i = 0; i < emptyInputs.length; i++) {
+    if (emptyInputs[i].value === "") {
+      appendAlert("Error", `${emptyInputs[i].type}`, `Cannot Proceed Without ${emptyInputs[i].type} Value!`, 'danger');
+      error = true;
+    }
+  }
+
+  if (error) {
+    return;
+  }
+
+  const params = new URLSearchParams();
+  params.append('filename', filename + ".json");
+  params.append('q', url);
+
+  const flask_url = `/validateJSON?${params.toString()}`;
+
+  toggleInteractables(true);
+
+  fetch(flask_url, {
+    method: 'GET',
+  })
+    .then((res) => {
+      toggleInteractables(false);
+
+      if (res.status === 400) {
+        appendAlert('Error!', 'invalidURL', 'Invalid JSON File URL!', 'danger');
+      }
+
+      if (res.status === 409) {
+        const myModal = new bootstrap.Modal(document.getElementById('conflictResolutionModal'), { focus: true, keyboard: false });
+        document.getElementById("header-filename").textContent = `"${filename}"`;
+        myModal.show();
+      }
+
+      if (res.redirected) {
+        window.location = res.url;
+      }
+    })
+}
+
+var filename;
+
+function handleUploadJSON() {
+  const jsonFile = document.getElementById("jsonFile");
+  const file = jsonFile.files[0];
+
+  if (jsonFile.value === "") {
+    appendAlert('Error!', 'emptyUpload', 'Cannot Proceed Without Uploading a File!', 'danger');
+    return;
+  }
+
+  filename = file.name;
+
+  const form = new FormData();
+  form.append("file", file);
+
+  toggleInteractables(true);
+
+  fetch("/validateJSON", {
+    method: 'POST',
+    body: form
+  })
+    .then((res) => {
+      toggleInteractables(false);
+
+      if (res.status === 400) {
+        appendAlert('Error!', 'invalidUpload', 'Invalid JSON File Upload!', 'danger');
+      }
+
+      if (res.status === 409) {
+        const myModal = new bootstrap.Modal(document.getElementById('conflictResolutionModal'), { focus: true, keyboard: false });
+        document.getElementById("header-filename").textContent = `"${filename}"`;
+        myModal.show();
+      }
+
+      if (res.redirected) {
+        window.location = res.url;
+      }
+    })
+}
+
+function saveConflictResolution() {
+  const conflictResolutionModal = bootstrap.Modal.getInstance(document.getElementById("conflictResolutionModal"));
+  const selectedValue = document.querySelector('input[name="conflictRadio"]:checked').id;
+  const activeTab = document.getElementById("json-login-tabs").querySelector(".nav-link.active").getAttribute("id");
+
+  if (selectedValue === null) {
+    appendAlert('Error!', 'nullRadio', 'Fatal! Null Radio!', 'danger');
+    return;
+  }
+
+  if (selectedValue === "clearInput") {
+    if (activeTab === "upload-tab") {
+      document.getElementById("jsonFile").value = '';
+    }
+
+    if (activeTab === "remote-tab") {
+      document.getElementById('remoteFilename').value = '';
+      document.getElementById('jsonRemoteURL').value = '';
+    }
+
+    conflictResolutionModal.hide();
+    handleConflictResolution("clearInput", filename.split(".")[0]);
+    return;
+  }
+
+  if (selectedValue === "openExisting") {
+    conflictResolutionModal.hide();
+    handleConflictResolution("openExisting", filename.split(".")[0]);
+    return;
+  }
+
+  if (selectedValue === "overwrite") {
+    conflictResolutionModal.hide();
+    handleConflictResolution("overwrite", filename.split(".")[0]);
+    return;
+  }
+
+  if (selectedValue === "newFilename") {
+    const updatedFilename = document.getElementById("updatedFilename").value;
+    if (updatedFilename === "") {
+      appendAlert('Error!', 'emptyFilename', 'Must Enter A New Name!', 'danger');
+      return;
+    }
+
+    if (`${updatedFilename}.json` === filename) {
+      appendAlert('Error!', 'sameFilenames', 'Cannot Have Same Name as Current!', 'danger');
+      return;
+    }
+
+    conflictResolutionModal.hide();
+    handleConflictResolution("newFilename", updatedFilename);
+    return;
+  }
+}
+
+function handleConflictResolution(resolution, filename) {
+  const params = new URLSearchParams();
+  params.append('resolution', resolution);
+  params.append('filename', filename !== "" ? filename + ".json" : "");
+
+  const flask_url = `/resolveConflict?${params.toString()}`;
+  toggleInteractables(true);
+
+  fetch(flask_url, {
+    method: 'GET',
+    headers: {
+      'Content-Type': 'application/json'
+    }
+  })
+    .then((res) => {
+      toggleInteractables(false);
+
+      if (res.status === 204) {
+        console.log("Input Cleared, Cached File Deleted, Resources Unset");
+        return;
+      }
+
+      if (res.status !== 200) {
+        appendAlert('Error!', 'didNotRedirect', 'Server Did Not Redirect!', 'danger');
+        return;
+      }
+
+      if (res.redirected) {
+        window.location = res.url;
+      }
+    })
+}
+
+window.onload = () => {
+  if (window.location.pathname === "/login/json") {
+    fetch('/existingFiles', {
+      method: 'GET',
+    })
+      .then((res) => res.json())
+      .then((data) => {
+        let select = document.getElementById("existing-dropdown");
+        if (data.length === 0) {
+          data = ["No Existing Files"];
+        }
+        data.forEach((files) => {
+          let option = document.createElement("option");
+          option.value = files;
+          option.innerHTML = files;
+          select.appendChild(option);
+        });
+      });
+  }
+}
diff --git a/util/gem5-resources-manager/static/styles/global.css b/util/gem5-resources-manager/static/styles/global.css
new file mode 100644
index 0000000000..caa446a60b
--- /dev/null
+++ b/util/gem5-resources-manager/static/styles/global.css
@@ -0,0 +1,231 @@
+@import url('https://fonts.googleapis.com/css2?family=Open+Sans:wght@300;400;600;700&display=swap');
+@import url('https://fonts.googleapis.com/css2?family=Mulish:wght@700&display=swap');
+
+html,
+body {
+  min-height: 100vh;
+  margin: 0;
+}
+
+.btn-outline-primary {
+  --bs-btn-color: #0095AF;
+  --bs-btn-bg: #FFFFFF;
+  --bs-btn-border-color: #0095AF;
+  --bs-btn-hover-color: #fff;
+  --bs-btn-hover-bg: #0095AF;
+  --bs-btn-hover-border-color: #0095AF;
+  --bs-btn-focus-shadow-rgb: 13, 110, 253;
+  --bs-btn-active-color: #fff;
+  --bs-btn-active-bg: #0095AF;
+  --bs-btn-active-border-color: #0095AF;
+  --bs-btn-active-shadow: inset 0 3px 5px rgba(0, 0, 0, 0.125);
+  --bs-btn-disabled-color: white;
+  --bs-btn-disabled-bg: grey;
+  --bs-btn-disabled-border-color: grey;
+  --bs-gradient: none;
+}
+
+.btn-box-shadow {
+  box-shadow: rgba(0, 0, 0, 0.24) 0px 3px 8px;
+}
+
+.calc-main-height {
+  height: calc(100vh - 81px);
+}
+
+.main-text-semi {
+  font-family: 'Open Sans', sans-serif;
+  font-weight: 600;
+  font-size: 1rem;
+}
+
+.main-text-regular,
+.buttonGroup>button,
+#markdown-body-styling p,
+#markdown-body-styling li {
+  font-family: 'Open Sans', sans-serif;
+  font-weight: 400;
+  font-size: 1rem;
+}
+
+.secondary-text-semi {
+  font-family: 'Open Sans', sans-serif;
+  font-weight: 600;
+  font-size: 1.25rem;
+}
+
+.secondary-text-bold {
+  font-family: 'Open Sans', sans-serif;
+  font-weight: 600;
+  font-size: 1.25rem;
+}
+
+.main-text-bold {
+  font-family: 'Open Sans', sans-serif;
+  font-weight: 700;
+  font-size: 1rem;
+}
+
+.page-title,
+#markdown-body-styling h1 {
+  color: #425469;
+  font-family: 'Mulish', sans-serif;
+  font-weight: 700;
+  font-size: 2.5rem;
+}
+
+.main-panel-container {
+  max-width: 530px;
+  padding-top: 5rem;
+  padding-bottom: 5rem;
+}
+
+.input-shadow,
+.form-input-shadow>input {
+  box-shadow: rgba(0, 0, 0, 0.35) 0px 5px 15px;
+}
+
+.panel-container {
+  background: rgba(0, 149, 175, 0.50);
+  border-radius: 1rem;
+  box-shadow: rgba(0, 0, 0, 0.24) 0px 3px 8px, rgba(0, 0, 0, 0.35) 0px 5px 15px;
+  height: 555px;
+  width: 530px;
+}
+
+.panel-text-styling,
+#generate-uri-form>label {
+  text-shadow: 0px 0px 5px rgba(0, 0, 0, 0.50);
+  color: white;
+}
+
+.editorContainer {
+  width: 80%;
+}
+
+.monaco-editor {
+  position: absolute !important;
+}
+
+.editor-sizing {
+  min-height: 650px;
+  height: 75%;
+  width: 100%;
+}
+
+#liveAlertPlaceholder {
+  position: absolute;
+  margin-top: 1rem;
+  right: 2rem;
+  margin-left: 2rem;
+  z-index: 1040;
+}
+
+.alert-dismissible {
+  padding-right: 1rem;
+}
+
+.reset-nav,
+.login-nav {
+  --bs-nav-link-color: #0095AF;
+  --bs-nav-link-hover-color: white;
+  --bs-nav-tabs-link-active-color: #0095AF;
+}
+
+.login-nav-link {
+  color: white;
+  text-shadow: 0px 0px 5px rgba(0, 0, 0, 0.50);
+}
+
+.login-nav-link.active {
+  text-shadow: none;
+}
+
+.navbar-nav>.nav-link:hover {
+  text-decoration: underline;
+}
+
+.reset-nav-link:hover,
+.login-nav-link:hover {
+  background-color: #0095AF;
+}
+
+.reset-nav-link {
+  color: black;
+}
+
+.form-check-input:checked {
+  background-color: #6c6c6c;
+  border-color: #6c6c6c;
+}
+
+#markdown-body-styling h1 {
+  color: #425469;
+}
+
+code {
+  display: inline-table;
+  overflow-x: auto;
+  padding: 2px;
+  color: #333;
+  background: #f8f8f8;
+  border: 1px solid #ccc;
+  border-radius: 3px;
+}
+
+.editor-tooltips {
+  --bs-tooltip-bg: #0095AF;
+  --bs-tooltip-opacity: 1;
+}
+
+#loading-container {
+  display: none;
+  position: absolute;
+  right: 2rem;
+  margin-top: 1rem;
+}
+
+.spinner {
+  --bs-spinner-width: 2.25rem;
+  --bs-spinner-height: 2.25rem;
+  --bs-spinner-border-width: 0.45em;
+  border-color: #0095AF;
+  border-right-color: transparent;
+}
+
+#saved-confirmation {
+  opacity: 0;
+  transition: opacity 0.5s;
+}
+
+@media (max-width: 991px) {
+  .editorContainer {
+    width: 95%;
+  }
+}
+
+@media (max-width: 425px) {
+
+  .main-text-regular,
+  .main-text-semi,
+  .main-text-bold,
+  .buttonGroup>button,
+  #markdown-body-styling p {
+    font-size: 0.875rem;
+  }
+
+  .secondary-text-semi {
+    font-size: 1rem;
+  }
+
+  .page-title,
+  #markdown-body-styling h1 {
+    font-size: 2.25rem;
+  }
+}
+
+@media (min-width: 425px) {
+  #databaseActions {
+    max-width: 375px;
+  }
+}
diff --git a/util/gem5-resources-manager/templates/404.html b/util/gem5-resources-manager/templates/404.html
new file mode 100644
index 0000000000..0a38326b2e
--- /dev/null
+++ b/util/gem5-resources-manager/templates/404.html
@@ -0,0 +1,20 @@
+{% extends 'base.html' %} {% block head %}
+<title>Page Not Found</title>
+{% endblock %} {% block body %}
+<main class="container-fluid calc-main-height">
+  <div
+    class="d-flex flex-column align-items-center justify-content-center"
+    style="height: inherit"
+  >
+    <h1 style="color: #0095af; font-size: 10rem">404</h1>
+    <p class="main-text-regular text-center">
+      The page you are looking for does not seem to exist.
+    </p>
+    <a
+      href="/"
+      class="btn btn-outline-primary main-text-regular btn-box-shadow mt-2 mb-2"
+      >Home</a
+    >
+  </div>
+</main>
+{% endblock %}
diff --git a/util/gem5-resources-manager/templates/base.html b/util/gem5-resources-manager/templates/base.html
new file mode 100644
index 0000000000..3b89f8f4c1
--- /dev/null
+++ b/util/gem5-resources-manager/templates/base.html
@@ -0,0 +1,96 @@
+<html>
+  <head>
+    <meta name="viewport" content="width=device-width, initial-scale=1" />
+    <link rel="icon" type="image/png" href="/static/images/favicon.png">
+    <script src="https://code.jquery.com/jquery-3.6.4.min.js" integrity="sha256-oP6HI9z1XaZNBrJURtCoUT5SUnxFr8s3BzRl+cbzUq8=" crossorigin="anonymous"></script>
+    <link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0-alpha3/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha384-KK94CHFLLe+nY2dmCWGMq91rCGa5gtU4mk92HdvYe+M/SXH301p5ILy+dN9+nJOZ" crossorigin="anonymous">
+    <script src="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0-alpha3/dist/js/bootstrap.bundle.min.js" integrity="sha384-ENjdO4Dr2bkBIFxQpeoTz1HIcje39Wm4jDKdf19U8gI4ddQ3GYNS7NTKfAdVQSZe" crossorigin="anonymous"></script>
+    <link rel="stylesheet" href="/static/styles/global.css">
+    {% block head %}{% endblock %}
+  </head>
+  <body>
+    <nav class="navbar bg-body-tertiary navbar-expand-lg shadow-sm base-nav">
+      <div class="container-fluid">
+        <a class="navbar-brand" href="/">
+          <img src="/static/images/gem5ColorLong.gif" alt="gem5" height="55">
+        </a>
+        <button class="navbar-toggler" type="button" data-bs-toggle="offcanvas" data-bs-target="#offcanvasNavbar" aria-controls="offcanvasNavbar" aria-label="Toggle navigation">
+          <span class="navbar-toggler-icon"></span>
+        </button>
+        <div class="offcanvas offcanvas-end" tabindex="-1" id="offcanvasNavbar" aria-labelledby="offcanvasNavbarLabel">
+          <div class="offcanvas-header">
+            <h5 class="offcanvas-title secondary-text-semi" id="offcanvasNavbarLabel">gem5 Resources Manager</h5>
+            <button type="button" class="btn-close" data-bs-dismiss="offcanvas" aria-label="Close"></button>
+          </div>
+          <div class="offcanvas-body">
+            <div class="navbar-nav justify-content-end flex-grow-1 pe-3">
+              <div class="navbar-nav main-text-regular">
+                <a class="nav-link" href="https://resources.gem5.org/">gem5 Resources</a>
+                <a class="nav-link" href="{{ url_for('help') }}">Help</a>
+                <a id="reset" class="nav-link" role="button" onclick="showResetSavedSessionsModal()">Reset</a>
+              </div>
+          </div>
+        </div>
+      </div>
+    </nav>
+    <div id="liveAlertPlaceholder"></div>
+    <div id="loading-container" class="align-items-center justify-content-center">
+      <span class="main-text-semi me-3">Processing...</span>
+      <div class="spinner-border spinner" role="status">
+        <span class="visually-hidden">Processing...</span>
+      </div>
+    </div>
+    <div class="modal fade" id="resetSavedSessionsModal" tabindex="-1" aria-labelledby="resetSavedSessionsModal" aria-hidden="true" data-bs-backdrop="static">
+      <div class="modal-dialog">
+        <div class="modal-content">
+          <div class="modal-header secondary-text-semi">
+            <h5 class="modal-title secondary-text-semi" id="resetSavedSessionsLabel">Reset Saved Sessions</h5>
+            <button type="button" id="close-reset-modal" class="btn-close" data-bs-dismiss="modal" aria-label="Close"></button>
+          </div>
+          <div class="modal-body">
+            <div class="container-fluid">
+              <h5 class="secondary-text-semi mb-3" style="text-align: center">Once You Delete Sessions, There is no Going Back. Please be Certain.</h5>
+              <ul class="nav nav-tabs nav-fill reset-nav main-text-semi panel-text-styling" id="reset-tabs" role="tablist">
+                <li class="nav-item" role="presentation">
+                  <button class="nav-link active reset-nav-link" id="delete-one-tab" data-bs-toggle="tab" data-bs-target="#delete-one-panel" type="button" role="tab">Delete One</button>
+                </li>
+                <li class="nav-item" role="presentation">
+                  <button class="nav-link reset-nav-link" id="delete-all-tab" data-bs-toggle="tab" data-bs-target="#delete-all-panel" type="button" role="tab">Delete All</button>
+                </li>
+              </ul>
+              <div class="tab-content mt-3" id="tabContent">
+                <div class="tab-pane fade show active" id="delete-one-panel" role="tabpanel">
+                  <div class="d-flex justify-content-center flex-column m-auto" style="width: 90%;">
+                    <h4 class="main-text-semi mt-3 mb-3" style="text-align: center;">Select One Saved Session to Delete.</h4>
+                    <form class="row mt-3">
+                      <label for="delete-session-dropdown" class="form-label main-text-regular ps-1">Saved Sessions</label>
+                      <select id="delete-session-dropdown" class="form-select input-shadow" aria-label="Select Session"></select>
+                      <label for="delete-one-confirmation" class="form-label main-text-regular ps-1 mt-3">
+                        To confirm, type <span id="selected-session"></span> below.
+                      </label>
+                      <input type="text" class="form-control input-shadow main-text-regular" id="delete-one-confirmation" placeholder="Enter Confirmation..." />
+                    </form>
+                  </div>
+                </div>
+                <div class="tab-pane fade" id="delete-all-panel" role="tabpanel">
+                  <div class="d-flex justify-content-center flex-column m-auto" style="width: 90%;">
+                    <h4 class="main-text-semi mt-3 mb-3" style="text-align: center;">All Saved Sessions Will be Deleted.</h4>
+                    <form class="d-flex flex-column mt-3">
+                      <label for="delete-all-confirmation" class="form-label main-text-regular ps-1">To confirm, type "Delete All" below.</label>
+                      <input type="text" class="form-control input-shadow main-text-regular" id="delete-all-confirmation" placeholder="Enter Confirmation..." />
+                    </form>
+                  </div>
+                </div>
+              </div>
+            </div>
+          </div>
+          <div class="modal-footer">
+            <button id="resetCookies" type="button" class="btn btn-outline-primary" onclick="resetSavedSessions()">Reset</button>
+          </div>
+        </div>
+      </div>
+    </div>
+    {% block body %}{% endblock %}
+    <script src="/static/js/app.js"></script>
+  </body>
+</html>
diff --git a/util/gem5-resources-manager/templates/editor.html b/util/gem5-resources-manager/templates/editor.html
new file mode 100644
index 0000000000..813a4d1a4b
--- /dev/null
+++ b/util/gem5-resources-manager/templates/editor.html
@@ -0,0 +1,355 @@
+{% extends 'base.html' %} {% block head %}
+<title>Editor</title>
+<script src="https://cdnjs.cloudflare.com/ajax/libs/monaco-editor/0.36.1/min/vs/loader.min.js"></script>
+{% endblock %} {% block body %}
+<div
+  class="modal fade"
+  id="ConfirmModal"
+  tabindex="-1"
+  aria-labelledby="ConfirmModalLabel"
+  data-bs-backdrop="static"
+  aria-hidden="true"
+>
+  <div class="modal-dialog">
+    <div class="modal-content">
+      <div class="modal-header secondary-text-semi">
+        <h5 class="modal-title" id="ConfirmModalLabel">Confirm Changes</h5>
+        <button
+          type="button"
+          class="btn-close"
+          data-bs-dismiss="modal"
+          aria-label="Close"
+        ></button>
+      </div>
+      <div
+        class="modal-body main-text-semi mt-3 mb-3"
+        style="text-align: center"
+      >
+        These changes may not be able to be undone. Are you sure you want to
+        continue?
+      </div>
+      <div class="modal-footer">
+        <button id="confirm" type="button" class="btn btn-outline-primary">
+          Save Changes
+        </button>
+      </div>
+    </div>
+  </div>
+</div>
+<div
+  class="modal fade"
+  id="saveSessionModal"
+  tabindex="-1"
+  aria-labelledby="saveSessionModal"
+  aria-hidden="true"
+  data-bs-backdrop="static"
+>
+  <div class="modal-dialog">
+    <div class="modal-content">
+      <div class="modal-header secondary-text-semi">
+        <h5 class="modal-title" id="saveSessionLabel">Save Session</h5>
+        <button
+          type="button"
+          id="close-save-session-modal"
+          class="btn-close"
+          data-bs-dismiss="modal"
+          aria-label="Close"
+        ></button>
+      </div>
+      <div class="modal-body">
+        <div class="container-fluid">
+          <div class="row">
+            <h4
+              id="existing-session-warning"
+              class="main-text-semi text-center flex-column mb-3"
+            >
+              <span>Warning!</span>
+              <span
+                >Existing Saved Session of Same Alias Will Be Overwritten!</span
+              >
+            </h4>
+            <h4 class="main-text-semi text-center">
+              Provide a Password to Secure and Save this Session With.
+            </h4>
+          </div>
+          <form id="saveSessionForm" class="row">
+            <label
+              for="session-password"
+              class="form-label main-text-regular ps-1 mt-3"
+              >Enter Password</label
+            >
+            <input
+              type="password"
+              class="form-control input-shadow main-text-regular"
+              id="session-password"
+              placeholder="Password..."
+            />
+          </form>
+        </div>
+      </div>
+      <div class="modal-footer">
+        <button
+          id="saveSession"
+          type="button"
+          class="btn btn-outline-primary"
+          onclick="saveSession()"
+        >
+          Save Session
+        </button>
+      </div>
+    </div>
+  </div>
+</div>
+<main class="container-fluid calc-main-height">
+  <div class="row" style="height: inherit">
+    <div
+      id="databaseActions"
+      class="col-lg-3 offcanvas-lg offcanvas-start shadow-sm overflow-y-auto"
+      style="background-color: #f8f9fa !important; height: initial"
+    >
+      <div class="d-flex flex-row justify-content-between mt-2">
+        <h5 class="secondary-text-bold mb-0" style="color: #0095af">
+          Database Actions
+        </h5>
+        <button
+          type="button"
+          class="btn-close d-lg-none"
+          data-bs-dismiss="offcanvas"
+          data-bs-target="#databaseActions"
+          aria-label="Close"
+        ></button>
+      </div>
+      <form class="form-outline d-flex flex-column mt-3">
+        <label for="id" class="main-text-regular">Resource ID</label>
+        <div class="d-flex flex-row align-items-center gap-1">
+          <input
+            class="form-control input-shadow"
+            type="text"
+            id="id"
+            placeholder="Enter ID..."
+          />
+          <select
+            id="version-dropdown"
+            class="form-select main-text-regular input-shadow w-auto"
+            aria-label="Default select example"
+          ></select>
+        </div>
+        <label for="category" class="main-text-regular mt-3">Category</label>
+        <select
+          id="category"
+          class="form-select mt-1 input-shadow"
+          aria-label="Default select example"
+        ></select>
+        <input
+          class="btn btn-outline-primary main-text-regular align-self-end btn-box-shadow mt-3"
+          type="submit"
+          onclick="find(event)"
+          value="Find"
+        />
+      </form>
+      <div class="d-flex flex-column align-items-start mt-3 mb-3 gap-3">
+        <h5 class="secondary-text-bold mb-0" style="color: #0095af">
+          Revision Actions
+        </h5>
+        <div
+          class="d-flex flex-column justify-content-center gap-3 main-text-regular revisionButtonGroup"
+        >
+          <span
+            class="d-inline-block"
+            tabindex="0"
+            data-bs-toggle="tooltip"
+            data-bs-placement="right"
+            data-bs-custom-class="editor-tooltips"
+            data-bs-title="Undoes Last Edit to Database"
+          >
+            <button
+              type="button"
+              class="btn btn-outline-primary btn-box-shadow"
+              id="undo-operation"
+              onclick="executeRevision(event, 'undo')"
+            >
+              Undo
+            </button>
+          </span>
+          <span
+            class="d-inline-block"
+            tabindex="0"
+            data-bs-toggle="tooltip"
+            data-bs-placement="right"
+            data-bs-custom-class="editor-tooltips"
+            data-bs-title="Restores Last Undone Change to Database"
+          >
+            <button
+              type="button"
+              class="btn btn-outline-primary btn-box-shadow"
+              id="redo-operation"
+              onclick="executeRevision(event, 'redo')"
+            >
+              Redo
+            </button>
+          </span>
+        </div>
+      </div>
+      <div
+        class="btn-group-vertical gap-3 mt-3 mb-3"
+        role="group"
+        aria-label="Other Database Actions"
+      >
+        <h5 class="secondary-text-bold mb-0" style="color: #0095af">
+          Other Actions
+        </h5>
+        <span
+          class="d-inline-block"
+          tabindex="0"
+          data-bs-toggle="tooltip"
+          data-bs-placement="top"
+          data-bs-custom-class="editor-tooltips"
+          data-bs-title="View Schema Database Validated Against"
+        >
+          <button
+            type="button"
+            class="btn btn-outline-primary main-text-regular btn-box-shadow mt-1"
+            id="schema-toggle"
+            onclick="showSchema()"
+          >
+            Show Schema
+          </button>
+        </span>
+        <span
+          class="d-inline-block"
+          tabindex="0"
+          data-bs-toggle="tooltip"
+          data-bs-placement="top"
+          data-bs-custom-class="editor-tooltips"
+          data-bs-title="Securely Save Session for Expedited Login"
+        >
+          <button
+            type="button"
+            class="btn btn-outline-primary main-text-regular btn-box-shadow mt-1"
+            id="showSaveSessionModal"
+            onclick="showSaveSessionModal()"
+          >
+            Save Session
+          </button>
+        </span>
+        <button
+          type="button"
+          class="btn btn-outline-primary main-text-regular btn-box-shadow mt-1 w-auto"
+          id="logout"
+          onclick="logout()"
+        >
+          Logout
+        </button>
+      </div>
+    </div>
+    <div class="col ms-auto me-auto" style="max-width: 1440px">
+      <button
+        class="btn btn-outline-primary d-lg-none align-self-start main-text-regular mt-2 ms-1"
+        type="button"
+        data-bs-toggle="offcanvas"
+        data-bs-target="#databaseActions"
+        aria-controls="sidebar"
+      >
+        Database Actions
+      </button>
+      <div class="d-flex flex-column align-items-center">
+        <h2 id="client-type" class="page-title">{{ client_type }}</h2>
+        <h4
+          id="alias"
+          class="secondary-text-semi"
+          style="color: #425469; word-break: break-all; text-align: center"
+        >
+          {{ alias }}
+        </h4>
+      </div>
+      <div
+        class="d-flex flex-row justify-content-around mt-3"
+        id="editor-title"
+      >
+        <h4 class="secondary-text-semi" style="color: #425469">Original</h4>
+        <h4 class="secondary-text-semi" style="color: #425469">Modified</h4>
+      </div>
+      <div id="diff-editor" class="editor-sizing"></div>
+      <div id="schema-editor"></div>
+      <div
+        id="editing-actions"
+        class="d-flex flex-wrap editorButtonGroup justify-content-end pt-2 pb-2 gap-2 main-text-regular"
+      >
+        <span
+          class="d-inline-block"
+          tabindex="0"
+          data-bs-toggle="tooltip"
+          data-bs-placement="top"
+          data-bs-custom-class="editor-tooltips"
+          data-bs-title="Add a New Resource to Database"
+        >
+          <button
+            type="button"
+            class="btn btn-primary btn-box-shadow"
+            id="add_new_resource"
+            onclick="showModal(event, addNewResource)"
+            disabled
+          >
+            Add New Resource
+          </button>
+        </span>
+        <span
+          class="d-inline-block"
+          tabindex="0"
+          data-bs-toggle="tooltip"
+          data-bs-placement="top"
+          data-bs-custom-class="editor-tooltips"
+          data-bs-title="Create a New Version of Resource"
+        >
+          <button
+            type="button"
+            class="btn btn-primary btn-box-shadow"
+            id="add_version"
+            onclick="showModal(event, addVersion)"
+            disabled
+          >
+            Add New Version
+          </button>
+        </span>
+        <span
+          class="d-inline-block"
+          tabindex="0"
+          data-bs-toggle="tooltip"
+          data-bs-placement="top"
+          data-bs-custom-class="editor-tooltips"
+          data-bs-title="Delete Selected Version of Resource"
+        >
+          <button
+            type="button"
+            class="btn btn-danger btn-box-shadow"
+            id="delete"
+            onclick="showModal(event, deleteRes)"
+            disabled
+          >
+            Delete
+          </button>
+        </span>
+        <span
+          class="d-inline-block"
+          tabindex="0"
+          data-bs-toggle="tooltip"
+          data-bs-placement="top"
+          data-bs-custom-class="editor-tooltips"
+          data-bs-title="Update Current Resource With Modifications"
+        >
+          <button
+            type="button"
+            class="btn btn-primary btn-box-shadow"
+            id="update"
+            onclick="showModal(event, update)"
+            disabled
+          >
+            Update
+          </button>
+        </span>
+      </div>
+    </div>
+  </div>
+</main>
+<script src="/static/js/editor.js"></script>
+{% endblock %}
diff --git a/util/gem5-resources-manager/templates/help.html b/util/gem5-resources-manager/templates/help.html
new file mode 100644
index 0000000000..957a87b59e
--- /dev/null
+++ b/util/gem5-resources-manager/templates/help.html
@@ -0,0 +1,20 @@
+{% extends 'base.html' %} {% block head %}
+<title>Help</title>
+<link
+  rel="stylesheet"
+  href="https://cdnjs.cloudflare.com/ajax/libs/github-markdown-css/5.2.0/github-markdown-light.css"
+  integrity="sha512-n5zPz6LZB0QV1eraRj4OOxRbsV7a12eAGfFcrJ4bBFxxAwwYDp542z5M0w24tKPEhKk2QzjjIpR5hpOjJtGGoA=="
+  crossorigin="anonymous"
+  referrerpolicy="no-referrer"
+/>
+{% endblock %} {% block body %}
+<main class="container d-flex justify-content-center w-100">
+  <div
+    id="markdown-body-styling"
+    class="markdown-body mt-5"
+    style="width: inherit; margin-bottom: 5rem"
+  >
+    {{ rendered_html|safe }}
+  </div>
+</main>
+{% endblock %}
diff --git a/util/gem5-resources-manager/templates/index.html b/util/gem5-resources-manager/templates/index.html
new file mode 100644
index 0000000000..6321a9e5b6
--- /dev/null
+++ b/util/gem5-resources-manager/templates/index.html
@@ -0,0 +1,116 @@
+{% extends 'base.html' %} {% block head %}
+<title>Resources Manager</title>
+{% endblock %} {% block body %}
+<div
+  class="modal fade"
+  id="savedSessionModal"
+  tabindex="-1"
+  aria-labelledby="savedSessionModal"
+  aria-hidden="true"
+  data-bs-backdrop="static"
+>
+  <div class="modal-dialog">
+    <div class="modal-content">
+      <div class="modal-header">
+        <h5 class="modal-title secondary-text-semi" id="savedSessionModalLabel">
+          Load Saved Session
+        </h5>
+        <button
+          type="button"
+          id="close-load-session-modal"
+          class="btn-close"
+          data-bs-dismiss="modal"
+          aria-label="Close"
+        ></button>
+      </div>
+      <div class="modal-body">
+        <div class="container-fluid">
+          <div class="row">
+            <h4 class="main-text-semi text-center">
+              Select Saved Session to Load & Enter Password.
+            </h4>
+          </div>
+          <form class="row mt-3">
+            <label
+              for="sessions-dropdown"
+              class="form-label main-text-regular ps-1"
+              >Saved Sessions</label
+            >
+            <select
+              id="sessions-dropdown"
+              class="form-select input-shadow"
+              aria-label="Select Session"
+            ></select>
+            <label
+              for="session-password"
+              class="form-label main-text-regular ps-1 mt-3"
+              >Enter Password</label
+            >
+            <input
+              type="password"
+              class="form-control input-shadow main-text-regular"
+              id="session-password"
+              placeholder="Password..."
+            />
+          </form>
+        </div>
+      </div>
+      <div class="modal-footer">
+        <button
+          id="loadSession"
+          type="button"
+          class="btn btn-outline-primary"
+          onclick="loadSession()"
+        >
+          Load Session
+        </button>
+      </div>
+    </div>
+  </div>
+</div>
+<main>
+  <div
+    class="container-fluid d-flex justify-content-center main-panel-container"
+  >
+    <div
+      class="d-flex flex-column align-items-center justify-content-center panel-container"
+    >
+      <div class="d-flex flex-column align-items-center mb-3">
+        <div style="width: 50%">
+          <img
+            id="gem5RMImg"
+            class="img-fluid"
+            src="/static/images/gem5ResourcesManager.png"
+            alt="gem5"
+          />
+        </div>
+      </div>
+      <div class="d-flex flex-column justify-content-center mb-3 buttonGroup">
+        <button
+          id="showSavedSessionModal"
+          type="button"
+          class="btn btn-outline-primary btn-box-shadow mt-2 mb-2"
+          onclick="showSavedSessionModal()"
+        >
+          Load Saved Session
+        </button>
+        <a href="{{ url_for('login_mongodb') }}">
+          <button
+            class="btn btn-outline-primary btn-box-shadow mt-2 mb-2 w-100"
+          >
+            MongoDB
+          </button>
+        </a>
+        <a href="{{ url_for('login_json') }}">
+          <button
+            class="btn btn-outline-primary btn-box-shadow mt-2 mb-2 w-100"
+          >
+            JSON
+          </button>
+        </a>
+      </div>
+    </div>
+  </div>
+</main>
+<script src="/static/js/index.js"></script>
+{% endblock %}
diff --git a/util/gem5-resources-manager/templates/login/login_json.html b/util/gem5-resources-manager/templates/login/login_json.html
new file mode 100644
index 0000000000..98663a304b
--- /dev/null
+++ b/util/gem5-resources-manager/templates/login/login_json.html
@@ -0,0 +1,242 @@
+{% extends 'base.html' %} {% block head %}
+<title>JSON Login</title>
+{% endblock %} {% block body %}
+<div
+  class="modal fade"
+  id="conflictResolutionModal"
+  tabindex="-1"
+  aria-labelledby="conflictResolutionModalLabel"
+  data-bs-backdrop="static"
+  aria-hidden="true"
+>
+  <div class="modal-dialog">
+    <div class="modal-content">
+      <div class="modal-header justify-content-center">
+        <h5 class="modal-title" id="conflictResolutionModalLabel">
+          File Conflict
+        </h5>
+      </div>
+      <div class="modal-body">
+        <div class="container-fluid">
+          <div class="row">
+            <h4 class="main-text-semi">
+              <span id="header-filename">File</span>
+              <span
+                >already exists in the server. Select an option below to resolve
+                this conflict.</span
+              >
+            </h4>
+          </div>
+          <div class="row mt-1">
+            <div class="input-group flex-column main-text-regular">
+              <div class="form-check mt-1">
+                <input
+                  class="form-check-input"
+                  type="radio"
+                  name="conflictRadio"
+                  id="openExisting"
+                  checked
+                />
+                <label class="form-check-label" for="openExisting"
+                  >Open Existing</label
+                >
+              </div>
+              <div class="form-check mt-1">
+                <input
+                  class="form-check-input"
+                  type="radio"
+                  name="conflictRadio"
+                  id="clearInput"
+                />
+                <label class="form-check-label" for="clearInput"
+                  >Clear Input</label
+                >
+              </div>
+              <div class="form-check mt-1">
+                <input
+                  class="form-check-input"
+                  type="radio"
+                  name="conflictRadio"
+                  id="overwrite"
+                />
+                <label class="form-check-label" for="overwrite"
+                  >Overwrite Existing File</label
+                >
+              </div>
+              <div class="mt-1">
+                <div class="form-check">
+                  <input
+                    class="form-check-input"
+                    type="radio"
+                    name="conflictRadio"
+                    id="newFilename"
+                  />
+                  <label class="form-check-label" for="newFilename"
+                    >Enter New Filename</label
+                  >
+                </div>
+                <div class="d-flex flex-row align-items-center">
+                  <input
+                    class="form-control mt-1 main-text-regular"
+                    type="text"
+                    id="updatedFilename"
+                    name="updatedFilename"
+                    placeholder="Enter Filename..."
+                  />
+                  <span class="main-text-regular ms-3">.json</span>
+                </div>
+              </div>
+            </div>
+          </div>
+        </div>
+      </div>
+      <div class="modal-footer">
+        <button
+          id="confirm"
+          type="button"
+          class="btn btn-outline-primary"
+          onclick="saveConflictResolution()"
+        >
+          Save
+        </button>
+      </div>
+    </div>
+  </div>
+</div>
+<main>
+  <div
+    class="container-fluid d-flex justify-content-center main-panel-container"
+  >
+    <div
+      class="d-flex flex-column align-items-center justify-content-between panel-container h-auto"
+    >
+      <div
+        class="d-flex flex-column align-items-center"
+        style="width: -webkit-fill-available"
+      >
+        <h2 class="page-title panel-text-styling mt-5">JSON</h2>
+        <div class="mt-3" style="width: 75%; margin-bottom: 5rem">
+          <ul
+            class="nav nav-tabs nav-fill login-nav main-text-semi panel-text-styling"
+            id="json-login-tabs"
+            role="tablist"
+          >
+            <li class="nav-item" role="presentation">
+              <button
+                class="nav-link active login-nav-link"
+                id="remote-tab"
+                data-bs-toggle="tab"
+                data-bs-target="#remote-panel"
+                type="button"
+                role="tab"
+              >
+                Remote File
+              </button>
+            </li>
+            <li class="nav-item" role="presentation">
+              <button
+                class="nav-link login-nav-link"
+                id="existing-tab"
+                data-bs-toggle="tab"
+                data-bs-target="#existing-panel"
+                type="button"
+                role="tab"
+              >
+                Existing File
+              </button>
+            </li>
+            <li class="nav-item" role="presentation">
+              <button
+                class="nav-link login-nav-link"
+                id="upload-tab"
+                data-bs-toggle="tab"
+                data-bs-target="#upload-panel"
+                type="button"
+                role="tab"
+              >
+                Local File
+              </button>
+            </li>
+          </ul>
+          <div class="tab-content mt-5" id="tabContent">
+            <div
+              class="tab-pane fade show active"
+              id="remote-panel"
+              role="tabpanel"
+            >
+              <form class="form-outline d-flex flex-column mt-3">
+                <div class="d-flex flex-column">
+                  <label
+                    for="remoteFilename"
+                    class="main-text-semi panel-text-styling mt-3"
+                    >Filename</label
+                  >
+                  <div class="d-flex flex-row align-items-center">
+                    <input
+                      class="form-control mt-1 main-text-regular input-shadow"
+                      type="text"
+                      id="remoteFilename"
+                      name="remoteFilename"
+                      placeholder="Enter Filename..."
+                    />
+                    <span class="main-text-semi panel-text-styling ms-3"
+                      >.json</span
+                    >
+                  </div>
+                </div>
+                <label
+                  for="jsonRemoteURL"
+                  class="main-text-semi panel-text-styling mt-3"
+                  >URL to JSON File</label
+                >
+                <input
+                  class="form-control mt-1 main-text-regular input-shadow"
+                  type="text"
+                  id="jsonRemoteURL"
+                  name="jsonRemoteURL"
+                  placeholder="Enter URL..."
+                />
+              </form>
+            </div>
+            <div class="tab-pane fade" id="existing-panel" role="tabpanel">
+              <form class="form-outline d-flex flex-column mt-3">
+                <select
+                  id="existing-dropdown"
+                  class="form-select main-text-regular input-shadow"
+                  style="width: auto"
+                ></select>
+              </form>
+            </div>
+            <div class="tab-pane fade" id="upload-panel" role="tabpanel">
+              <form class="form-outline d-flex flex-column mt-3">
+                <label
+                  for="jsonFile"
+                  class="main-text-semi panel-text-styling mt-3"
+                  >Upload JSON File</label
+                >
+                <input
+                  class="form-control mt-1 main-text-regular input-shadow"
+                  type="file"
+                  id="jsonFile"
+                  accept=".json"
+                />
+              </form>
+            </div>
+          </div>
+        </div>
+      </div>
+      <div class="d-flex flex-row align-self-end me-3 mb-3 buttonGroup">
+        <button
+          type="button"
+          id="login"
+          class="btn btn-outline-primary btn-box-shadow mt-2 mb-2"
+          onclick="handleJSONLogin(event)"
+        >
+          Login
+        </button>
+      </div>
+    </div>
+  </div>
+</main>
+<script src="/static/js/login.js"></script>
+{% endblock %}
diff --git a/util/gem5-resources-manager/templates/login/login_mongodb.html b/util/gem5-resources-manager/templates/login/login_mongodb.html
new file mode 100644
index 0000000000..83361b578e
--- /dev/null
+++ b/util/gem5-resources-manager/templates/login/login_mongodb.html
@@ -0,0 +1,189 @@
+{% extends 'base.html' %} {% block head %}
+<title>MongoDB Login</title>
+{% endblock %} {% block body %}
+<main>
+  <div
+    class="container-fluid d-flex justify-content-center main-panel-container"
+  >
+    <div
+      class="d-flex flex-column align-items-center justify-content-around panel-container h-auto"
+    >
+      <div
+        class="d-flex flex-column align-items-center"
+        style="width: -webkit-fill-available"
+      >
+        <h2 class="page-title panel-text-styling mt-5">MongoDB</h2>
+        <div class="mt-3" style="width: 75%">
+          <ul
+            class="nav nav-tabs nav-fill login-nav main-text-semi"
+            id="mongodb-login-tabs"
+            role="tablist"
+          >
+            <li class="nav-item" role="presentation">
+              <button
+                class="nav-link active login-nav-link"
+                id="enter-uri-tab"
+                data-bs-toggle="tab"
+                data-bs-target="#enter-uri-panel"
+                type="button"
+                role="tab"
+              >
+                Enter URI
+              </button>
+            </li>
+            <li class="nav-item" role="presentation">
+              <button
+                class="nav-link login-nav-link"
+                id="generate-uri-tab"
+                data-bs-toggle="tab"
+                data-bs-target="#generate-uri-panel"
+                type="button"
+                role="tab"
+              >
+                Generate URI
+              </button>
+            </li>
+          </ul>
+          <div class="tab-content mt-5" id="tabContent">
+            <div
+              class="tab-pane fade show active"
+              id="enter-uri-panel"
+              role="tabpanel"
+            >
+              <form
+                class="form-outline d-flex flex-column mt-3 panel-text-styling form-input-shadow"
+              >
+                <label for="alias" class="main-text-semi">Alias</label>
+                <input
+                  class="form-control mt-1 main-text-regular"
+                  type="text"
+                  id="alias"
+                  placeholder="Enter Alias..."
+                />
+                <label for="collection" class="main-text-semi mt-3"
+                  >Collection</label
+                >
+                <input
+                  class="form-control mt-1 main-text-regular"
+                  type="text"
+                  id="collection"
+                  placeholder="Enter Collection Name..."
+                />
+                <label for="database" class="main-text-semi mt-3"
+                  >Database</label
+                >
+                <input
+                  class="form-control mt-1 main-text-regular"
+                  type="text"
+                  id="database"
+                  placeholder="Enter Database Name..."
+                />
+                <label for="uri" class="main-text-semi mt-3">MongoDB URI</label>
+                <input
+                  class="form-control mt-1 main-text-regular"
+                  type="text"
+                  id="uri"
+                  name="uri"
+                  placeholder="Enter URI..."
+                />
+              </form>
+            </div>
+            <div class="tab-pane fade" id="generate-uri-panel" role="tabpanel">
+              <form
+                id="generate-uri-form"
+                class="form-outline d-flex flex-column mt-3 form-input-shadow"
+              >
+                <div
+                  class="d-flex flex-row align-items-center justify-content-center main-text-semi panel-text-styling"
+                >
+                  <span class="me-2">Standard</span>
+                  <div class="form-check form-switch d-flex flex-row mb-0">
+                    <input
+                      class="form-check-input"
+                      type="checkbox"
+                      role="switch"
+                      id="connection"
+                      checked
+                    />
+                  </div>
+                  <span class="">DNS Seed List</span>
+                </div>
+                <label for="alias" class="main-text-semi mt-3">Alias</label>
+                <input
+                  class="form-control mt-1 main-text-regular"
+                  type="text"
+                  id="aliasGenerate"
+                  placeholder="Enter Alias..."
+                />
+                <label for="username" class="main-text-semi mt-3"
+                  >Username (Optional)</label
+                >
+                <input
+                  class="form-control mt-1 main-text-regular"
+                  type="text"
+                  id="username"
+                  placeholder="Enter Username..."
+                />
+                <label for="password" class="main-text-semi mt-3"
+                  >Password (Optional)</label
+                >
+                <input
+                  class="form-control mt-1 main-text-regular"
+                  type="text"
+                  id="password"
+                  placeholder="Enter Password..."
+                />
+                <label for="host" class="main-text-semi mt-3">Host</label>
+                <input
+                  class="form-control mt-1 main-text-regular"
+                  type="text"
+                  id="host"
+                  placeholder="Enter Host..."
+                />
+                <label for="collection" class="main-text-semi mt-3"
+                  >Collection</label
+                >
+                <input
+                  class="form-control mt-1 main-text-regular"
+                  type="text"
+                  id="collectionGenerate"
+                  placeholder="Enter Collection..."
+                />
+                <label for="database" class="main-text-semi mt-3"
+                  >Database</label
+                >
+                <input
+                  class="form-control mt-1 main-text-regular"
+                  type="text"
+                  id="databaseGenerate"
+                  placeholder="Enter Database..."
+                />
+                <label for="options" class="main-text-semi mt-3"
+                  >Options (Optional)</label
+                >
+                <input
+                  class="form-control mt-1 main-text-regular"
+                  type="text"
+                  id="options"
+                  value="retryWrites=true,w=majority"
+                />
+              </form>
+            </div>
+          </div>
+        </div>
+      </div>
+      <div class="d-flex flex-row align-self-end me-3 mt-5 mb-3 buttonGroup">
+        <button
+          type="button"
+          id="login"
+          class="btn btn-outline-primary btn-box-shadow mt-2 mb-2"
+          onclick="handleMongoDBLogin(event)"
+        >
+          Login
+        </button>
+      </div>
+    </div>
+  </div>
+</main>
+<script src="/static/js/login.js"></script>
+{% endblock %}
diff --git a/util/gem5-resources-manager/test/__init__.py b/util/gem5-resources-manager/test/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/util/gem5-resources-manager/test/api_test.py b/util/gem5-resources-manager/test/api_test.py
new file mode 100644
index 0000000000..0ff439cd2e
--- /dev/null
+++ b/util/gem5-resources-manager/test/api_test.py
@@ -0,0 +1,722 @@
+# Copyright (c) 2023 The Regents of the University of California
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import flask
+import contextlib
+import unittest
+from server import app
+import server
+import json
+from bson import json_util
+from unittest.mock import patch
+import mongomock
+from api.mongo_client import MongoDBClient
+import requests
+
+
+@contextlib.contextmanager
+def captured_templates(app):
+    """
+    This is a context manager that allows you to capture the templates
+    that are rendered during a test.
+    """
+    recorded = []
+
+    def record(sender, template, context, **extra):
+        recorded.append((template, context))
+
+    flask.template_rendered.connect(record, app)
+    try:
+        yield recorded
+    finally:
+        flask.template_rendered.disconnect(record, app)
+
+
+class TestAPI(unittest.TestCase):
+    @patch.object(
+        MongoDBClient,
+        "_get_database",
+        return_value=mongomock.MongoClient().db.collection,
+    )
+    def setUp(self, mock_get_database):
+        """This method sets up the test environment."""
+        self.ctx = app.app_context()
+        self.ctx.push()
+        self.app = app
+        self.test_client = app.test_client()
+        self.alias = "test"
+        objects = []
+        with open("./test/refs/resources.json", "rb") as f:
+            objects = json.loads(f.read(), object_hook=json_util.object_hook)
+        self.collection = mock_get_database()
+        for obj in objects:
+            self.collection.insert_one(obj)
+
+        self.test_client.post(
+            "/validateMongoDB",
+            json={
+                "uri": "mongodb://localhost:27017",
+                "database": "test",
+                "collection": "test",
+                "alias": self.alias,
+            },
+        )
+
+    def tearDown(self):
+        """
+        This method tears down the test environment.
+        """
+        self.collection.drop()
+        self.ctx.pop()
+
+    def test_get_helppage(self):
+        """
+        This method tests the call to the help page.
+        It checks if the call is GET, status code is 200 and if the template
+        rendered is help.html.
+        """
+        with captured_templates(self.app) as templates:
+            response = self.test_client.get("/help")
+            self.assertEqual(response.status_code, 200)
+            self.assertTrue(templates[0][0].name == "help.html")
+
+    def test_get_mongodb_loginpage(self):
+        """
+        This method tests the call to the MongoDB login page.
+        It checks if the call is GET, status code is 200 and if the template
+        rendered is mongoDBLogin.html.
+        """
+        with captured_templates(self.app) as templates:
+            response = self.test_client.get("/login/mongodb")
+            self.assertEqual(response.status_code, 200)
+            self.assertTrue(templates[0][0].name == "login/login_mongodb.html")
+
+    def test_get_json_loginpage(self):
+        """
+        This method tests the call to the JSON login page.
+        It checks if the call is GET, status code is 200 and if the template
+        rendered is jsonLogin.html.
+        """
+        with captured_templates(self.app) as templates:
+            response = self.test_client.get("/login/json")
+            self.assertEqual(response.status_code, 200)
+            self.assertTrue(templates[0][0].name == "login/login_json.html")
+
+    def test_get_editorpage(self):
+        """This method tests the call to the editor page.
+        It checks if the call is GET, status code is 200 and if the template
+        rendered is editor.html.
+        """
+        with captured_templates(self.app) as templates:
+            response = self.test_client.get("/editor?alias=test")
+            self.assertEqual(response.status_code, 200)
+            self.assertTrue(templates[0][0].name == "editor.html")
+
+    def test_get_editorpage_invalid(self):
+        """This method tests the call to the editor page without required
+        query parameters.
+        It checks if the call is GET, status code is 404 and if the template
+        rendered is 404.html.
+        """
+        with captured_templates(self.app) as templates:
+            response = self.test_client.get("/editor")
+            self.assertEqual(response.status_code, 404)
+            self.assertTrue(templates[0][0].name == "404.html")
+            response = self.test_client.get("/editor?alias=invalid")
+            self.assertEqual(response.status_code, 404)
+            self.assertTrue(templates[0][0].name == "404.html")
+
+    def test_default_call(self):
+        """This method tests the default call to the API."""
+        with captured_templates(self.app) as templates:
+            response = self.test_client.get("/")
+            self.assertEqual(response.status_code, 200)
+            self.assertTrue(templates[0][0].name == "index.html")
+
+    def test_default_call_is_not_post(self):
+        """This method tests that the default call is not a POST."""
+
+        response = self.test_client.post("/")
+        self.assertEqual(response.status_code, 405)
+
+    def test_get_categories(self):
+        """
+        The methods tests if the category call returns the same categories as
+        the schema.
+        """
+
+        response = self.test_client.get("/categories")
+        post_response = self.test_client.post("/categories")
+        categories = [
+            "workload",
+            "disk-image",
+            "binary",
+            "kernel",
+            "checkpoint",
+            "git",
+            "bootloader",
+            "file",
+            "directory",
+            "simpoint",
+            "simpoint-directory",
+            "resource",
+            "looppoint-pinpoint-csv",
+            "looppoint-json",
+        ]
+        self.assertEqual(post_response.status_code, 405)
+        self.assertEqual(response.status_code, 200)
+        returnedData = json.loads(response.data)
+        self.assertTrue(returnedData == categories)
+
+    def test_get_schema(self):
+        """
+        The methods tests if the schema call returns the same schema as the
+        schema file.
+        """
+
+        response = self.test_client.get("/schema")
+        post_response = self.test_client.post("/schema")
+        self.assertEqual(post_response.status_code, 405)
+        self.assertEqual(response.status_code, 200)
+        returnedData = json.loads(response.data)
+        schema = {}
+        schema = requests.get(
+            "https://resources.gem5.org/gem5-resources-schema.json"
+        ).json()
+        self.assertTrue(returnedData == schema)
+
+    def test_insert(self):
+        """This method tests the insert method of the API."""
+        test_resource = {
+            "category": "diskimage",
+            "id": "test-resource",
+            "author": ["test-author"],
+            "description": "test-description",
+            "license": "test-license",
+            "source_url": (
+                "https://github.com/gem5/gem5-resources/"
+                "tree/develop/src/x86-ubuntu"
+            ),
+            "tags": ["test-tag", "test-tag2"],
+            "example_usage": " test-usage",
+            "gem5_versions": [
+                "22.1",
+            ],
+            "resource_version": "1.0.0",
+        }
+        response = self.test_client.post(
+            "/insert", json={"resource": test_resource, "alias": self.alias}
+        )
+        self.assertEqual(response.status_code, 200)
+        self.assertEqual(response.json, {"status": "Inserted"})
+        resource = self.collection.find({"id": "test-resource"}, {"_id": 0})
+
+        json_resource = json.loads(json_util.dumps(resource[0]))
+        self.assertTrue(json_resource == test_resource)
+
+    def test_find_no_version(self):
+        """This method tests the find method of the API."""
+        test_id = "test-resource"
+        test_resource_version = "1.0.0"
+        test_resource = {
+            "category": "diskimage",
+            "id": "test-resource",
+            "author": ["test-author"],
+            "description": "test-description",
+            "license": "test-license",
+            "source_url": (
+                "https://github.com/gem5/gem5-resources/"
+                "tree/develop/src/x86-ubuntu"
+            ),
+            "tags": ["test-tag", "test-tag2"],
+            "example_usage": " test-usage",
+            "gem5_versions": [
+                "22.1",
+            ],
+            "resource_version": "1.0.0",
+        }
+        self.collection.insert_one(test_resource.copy())
+        response = self.test_client.post(
+            "/find",
+            json={"id": test_id, "resource_version": "", "alias": self.alias},
+        )
+        self.assertEqual(response.status_code, 200)
+        self.assertTrue(response.json == test_resource)
+
+    def test_find_not_exist(self):
+        """This method tests the find method of the API."""
+        test_id = "test-resource"
+        response = self.test_client.post(
+            "/find",
+            json={"id": test_id, "resource_version": "", "alias": self.alias},
+        )
+        self.assertEqual(response.status_code, 200)
+        self.assertTrue(response.json == {"exists": False})
+
+    def test_find_with_version(self):
+        """This method tests the find method of the API."""
+        test_id = "test-resource"
+        test_resource = {
+            "category": "diskimage",
+            "id": "test-resource",
+            "author": ["test-author"],
+            "description": "test-description",
+            "license": "test-license",
+            "source_url": (
+                "https://github.com/gem5/gem5-resources/"
+                "tree/develop/src/x86-ubuntu"
+            ),
+            "tags": ["test-tag", "test-tag2"],
+            "example_usage": " test-usage",
+            "gem5_versions": [
+                "22.1",
+            ],
+            "resource_version": "1.0.0",
+        }
+        self.collection.insert_one(test_resource.copy())
+        test_resource["resource_version"] = "1.0.1"
+        test_resource["description"] = "test-description2"
+        self.collection.insert_one(test_resource.copy())
+        response = self.test_client.post(
+            "/find",
+            json={
+                "id": test_id,
+                "resource_version": "1.0.1",
+                "alias": self.alias,
+            },
+        )
+        self.assertEqual(response.status_code, 200)
+        return_json = response.json
+        self.assertTrue(return_json["description"] == "test-description2")
+        self.assertTrue(return_json["resource_version"] == "1.0.1")
+        self.assertTrue(return_json == test_resource)
+
+    def test_delete(self):
+        """This method tests the delete method of the API."""
+        test_id = "test-resource"
+        test_version = "1.0.0"
+        test_resource = {
+            "category": "diskimage",
+            "id": "test-resource",
+            "author": ["test-author"],
+            "description": "test-description",
+            "license": "test-license",
+            "source_url": (
+                "https://github.com/gem5/gem5-resources/"
+                "tree/develop/src/x86-ubuntu"
+            ),
+            "tags": ["test-tag", "test-tag2"],
+            "example_usage": " test-usage",
+            "gem5_versions": [
+                "22.1",
+            ],
+            "resource_version": "1.0.0",
+        }
+        self.collection.insert_one(test_resource.copy())
+        response = self.test_client.post(
+            "/delete", json={"resource": test_resource, "alias": self.alias}
+        )
+        self.assertEqual(response.status_code, 200)
+        self.assertEqual(response.json, {"status": "Deleted"})
+        resource = self.collection.find({"id": "test-resource"}, {"_id": 0})
+        json_resource = json.loads(json_util.dumps(resource))
+        self.assertTrue(json_resource == [])
+
+    def test_if_resource_exists_true(self):
+        """This method tests the checkExists method of the API."""
+        test_id = "test-resource"
+        test_version = "1.0.0"
+        test_resource = {
+            "category": "diskimage",
+            "id": "test-resource",
+            "author": ["test-author"],
+            "description": "test-description",
+            "license": "test-license",
+            "source_url": (
+                "https://github.com/gem5/gem5-resources/"
+                "tree/develop/src/x86-ubuntu"
+            ),
+            "tags": ["test-tag", "test-tag2"],
+            "example_usage": " test-usage",
+            "gem5_versions": [
+                "22.1",
+            ],
+            "resource_version": "1.0.0",
+        }
+        self.collection.insert_one(test_resource.copy())
+        response = self.test_client.post(
+            "/checkExists",
+            json={
+                "id": test_id,
+                "resource_version": test_version,
+                "alias": self.alias,
+            },
+        )
+        self.assertEqual(response.status_code, 200)
+        self.assertEqual(response.json, {"exists": True})
+
+    def test_if_resource_exists_false(self):
+        """This method tests the checkExists method of the API."""
+        test_id = "test-resource"
+        test_version = "1.0.0"
+        response = self.test_client.post(
+            "/checkExists",
+            json={
+                "id": test_id,
+                "resource_version": test_version,
+                "alias": self.alias,
+            },
+        )
+        self.assertEqual(response.status_code, 200)
+        self.assertEqual(response.json, {"exists": False})
+
+    def test_get_resource_versions(self):
+        """This method tests the getResourceVersions method of the API."""
+        test_id = "test-resource"
+        test_resource = {
+            "category": "diskimage",
+            "id": "test-resource",
+            "author": ["test-author"],
+            "description": "test-description",
+            "license": "test-license",
+            "source_url": (
+                "https://github.com/gem5/gem5-resources/"
+                "tree/develop/src/x86-ubuntu"
+            ),
+            "tags": ["test-tag", "test-tag2"],
+            "example_usage": " test-usage",
+            "gem5_versions": [
+                "22.1",
+            ],
+            "resource_version": "1.0.0",
+        }
+        self.collection.insert_one(test_resource.copy())
+        test_resource["resource_version"] = "1.0.1"
+        test_resource["description"] = "test-description2"
+        self.collection.insert_one(test_resource.copy())
+        response = self.test_client.post(
+            "/versions", json={"id": test_id, "alias": self.alias}
+        )
+        return_json = json.loads(response.data)
+        self.assertEqual(response.status_code, 200)
+        self.assertEqual(
+            return_json,
+            [{"resource_version": "1.0.1"}, {"resource_version": "1.0.0"}],
+        )
+
+    def test_update_resource(self):
+        """This method tests the updateResource method of the API."""
+        test_id = "test-resource"
+        test_resource = {
+            "category": "diskimage",
+            "id": "test-resource",
+            "author": ["test-author"],
+            "description": "test-description",
+            "license": "test-license",
+            "source_url": (
+                "https://github.com/gem5/gem5-resources/"
+                "tree/develop/src/x86-ubuntu"
+            ),
+            "tags": ["test-tag", "test-tag2"],
+            "example_usage": " test-usage",
+            "gem5_versions": [
+                "22.1",
+            ],
+            "resource_version": "1.0.0",
+        }
+        original_resource = test_resource.copy()
+        self.collection.insert_one(test_resource.copy())
+        test_resource["description"] = "test-description2"
+        test_resource["example_usage"] = "test-usage2"
+        response = self.test_client.post(
+            "/update",
+            json={
+                "original_resource": original_resource,
+                "resource": test_resource,
+                "alias": self.alias,
+            },
+        )
+        self.assertEqual(response.status_code, 200)
+        self.assertEqual(response.json, {"status": "Updated"})
+        resource = self.collection.find({"id": test_id}, {"_id": 0})
+        json_resource = json.loads(json_util.dumps(resource))
+        self.assertTrue(json_resource == [test_resource])
+
+    def test_keys_1(self):
+        """This method tests the keys method of the API."""
+        response = self.test_client.post(
+            "/keys", json={"category": "simpoint", "id": "test-resource"}
+        )
+        test_response = {
+            "category": "simpoint",
+            "id": "test-resource",
+            "author": [],
+            "description": "",
+            "license": "",
+            "source_url": "",
+            "tags": [],
+            "example_usage": "",
+            "gem5_versions": [],
+            "resource_version": "1.0.0",
+            "simpoint_interval": 0,
+            "warmup_interval": 0,
+        }
+        self.assertEqual(response.status_code, 200)
+        self.assertEqual(json.loads(response.data), test_response)
+
+    def test_keys_2(self):
+        """This method tests the keys method of the API."""
+        response = self.test_client.post(
+            "/keys", json={"category": "disk-image", "id": "test-resource"}
+        )
+        test_response = {
+            "category": "disk-image",
+            "id": "test-resource",
+            "author": [],
+            "description": "",
+            "license": "",
+            "source_url": "",
+            "tags": [],
+            "example_usage": "",
+            "gem5_versions": [],
+            "resource_version": "1.0.0",
+        }
+        self.assertEqual(response.status_code, 200)
+        self.assertEqual(json.loads(response.data), test_response)
+
+    def test_undo(self):
+        """This method tests the undo method of the API."""
+        test_id = "test-resource"
+        test_resource = {
+            "category": "disk-image",
+            "id": "test-resource",
+            "author": [],
+            "description": "",
+            "license": "",
+            "source_url": "",
+            "tags": [],
+            "example_usage": "",
+            "gem5_versions": [],
+            "resource_version": "1.0.0",
+        }
+        original_resource = test_resource.copy()
+        # insert resource
+        response = self.test_client.post(
+            "/insert", json={"resource": test_resource, "alias": self.alias}
+        )
+        self.assertEqual(response.status_code, 200)
+        self.assertEqual(response.json, {"status": "Inserted"})
+        # update resource
+        test_resource["description"] = "test-description2"
+        test_resource["example_usage"] = "test-usage2"
+        response = self.test_client.post(
+            "/update",
+            json={
+                "original_resource": original_resource,
+                "resource": test_resource,
+                "alias": self.alias,
+            },
+        )
+        self.assertEqual(response.status_code, 200)
+        self.assertEqual(response.json, {"status": "Updated"})
+        # check if resource is updated
+        resource = self.collection.find({"id": test_id}, {"_id": 0})
+        json_resource = json.loads(json_util.dumps(resource))
+        self.assertTrue(json_resource == [test_resource])
+        # undo update
+        response = self.test_client.post("/undo", json={"alias": self.alias})
+        self.assertEqual(response.status_code, 200)
+        self.assertEqual(response.json, {"status": "Undone"})
+        # check if resource is back to original
+        resource = self.collection.find({"id": test_id}, {"_id": 0})
+        json_resource = json.loads(json_util.dumps(resource))
+        self.assertTrue(json_resource == [original_resource])
+
+    def test_redo(self):
+        """This method tests the undo method of the API."""
+        test_id = "test-resource"
+        test_resource = {
+            "category": "disk-image",
+            "id": "test-resource",
+            "author": [],
+            "description": "",
+            "license": "",
+            "source_url": "",
+            "tags": [],
+            "example_usage": "",
+            "gem5_versions": [],
+            "resource_version": "1.0.0",
+        }
+        original_resource = test_resource.copy()
+        # insert resource
+        response = self.test_client.post(
+            "/insert", json={"resource": test_resource, "alias": self.alias}
+        )
+        self.assertEqual(response.status_code, 200)
+        self.assertEqual(response.json, {"status": "Inserted"})
+        # update resource
+        test_resource["description"] = "test-description2"
+        test_resource["example_usage"] = "test-usage2"
+        response = self.test_client.post(
+            "/update",
+            json={
+                "original_resource": original_resource,
+                "resource": test_resource,
+                "alias": self.alias,
+            },
+        )
+        self.assertEqual(response.status_code, 200)
+        self.assertEqual(response.json, {"status": "Updated"})
+        # check if resource is updated
+        resource = self.collection.find({"id": test_id}, {"_id": 0})
+        json_resource = json.loads(json_util.dumps(resource))
+        self.assertTrue(json_resource == [test_resource])
+        # undo update
+        response = self.test_client.post("/undo", json={"alias": self.alias})
+        self.assertEqual(response.status_code, 200)
+        self.assertEqual(response.json, {"status": "Undone"})
+        # check if resource is back to original
+        resource = self.collection.find({"id": test_id}, {"_id": 0})
+        json_resource = json.loads(json_util.dumps(resource))
+        self.assertTrue(json_resource == [original_resource])
+        # redo update
+        response = self.test_client.post("/redo", json={"alias": self.alias})
+        self.assertEqual(response.status_code, 200)
+        self.assertEqual(response.json, {"status": "Redone"})
+        # check if resource is updated again
+        resource = self.collection.find({"id": test_id}, {"_id": 0})
+        json_resource = json.loads(json_util.dumps(resource))
+        self.assertTrue(json_resource == [test_resource])
+
+    def test_invalid_alias(self):
+        test_id = "test-resource"
+        test_resource = {
+            "category": "diskimage",
+            "id": "test-resource",
+            "author": ["test-author"],
+            "description": "test-description",
+            "license": "test-license",
+            "source_url": (
+                "https://github.com/gem5/gem5-resources/"
+                "tree/develop/src/x86-ubuntu"
+            ),
+            "tags": ["test-tag", "test-tag2"],
+            "example_usage": " test-usage",
+            "gem5_versions": [
+                "22.1",
+            ],
+            "resource_version": "1.0.0",
+        }
+        alias = "invalid"
+        response = self.test_client.post(
+            "/insert", json={"resource": test_resource, "alias": alias}
+        )
+        self.assertEqual(response.status_code, 400)
+        self.assertEqual(response.json, {"error": "database not found"})
+        response = self.test_client.post(
+            "/find",
+            json={"id": test_id, "resource_version": "", "alias": alias},
+        )
+        self.assertEqual(response.status_code, 400)
+        self.assertEqual(response.json, {"error": "database not found"})
+        response = self.test_client.post(
+            "/delete", json={"resource": test_resource, "alias": alias}
+        )
+        self.assertEqual(response.status_code, 400)
+        self.assertEqual(response.json, {"error": "database not found"})
+        response = self.test_client.post(
+            "/checkExists",
+            json={"id": test_id, "resource_version": "", "alias": alias},
+        )
+        self.assertEqual(response.status_code, 400)
+        self.assertEqual(response.json, {"error": "database not found"})
+        response = self.test_client.post(
+            "/versions", json={"id": test_id, "alias": alias}
+        )
+        self.assertEqual(response.status_code, 400)
+        self.assertEqual(response.json, {"error": "database not found"})
+        response = self.test_client.post(
+            "/update",
+            json={
+                "original_resource": test_resource,
+                "resource": test_resource,
+                "alias": alias,
+            },
+        )
+        self.assertEqual(response.status_code, 400)
+        self.assertEqual(response.json, {"error": "database not found"})
+        response = self.test_client.post("/undo", json={"alias": alias})
+        self.assertEqual(response.status_code, 400)
+        self.assertEqual(response.json, {"error": "database not found"})
+        response = self.test_client.post("/redo", json={"alias": alias})
+        self.assertEqual(response.status_code, 400)
+        self.assertEqual(response.json, {"error": "database not found"})
+        response = self.test_client.post(
+            "/getRevisionStatus", json={"alias": alias}
+        )
+        self.assertEqual(response.status_code, 400)
+        self.assertEqual(response.json, {"error": "database not found"})
+        response = self.test_client.post("/saveSession", json={"alias": alias})
+        self.assertEqual(response.status_code, 400)
+        self.assertEqual(response.json, {"error": "database not found"})
+
+    def test_get_revision_status_valid(self):
+        response = self.test_client.post(
+            "/getRevisionStatus", json={"alias": self.alias}
+        )
+        self.assertEqual(response.status_code, 200)
+        self.assertEqual(response.json, {"undo": 1, "redo": 1})
+
+    @patch.object(
+        MongoDBClient,
+        "_get_database",
+        return_value=mongomock.MongoClient().db.collection,
+    )
+    def test_save_session_load_session(self, mock_get_database):
+        password = "test"
+        expected_session = server.databases["test"].save_session()
+        response = self.test_client.post(
+            "/saveSession", json={"alias": self.alias, "password": password}
+        )
+        self.assertEqual(response.status_code, 200)
+
+        response = self.test_client.post(
+            "/loadSession",
+            json={
+                "alias": self.alias,
+                "session": response.json["ciphertext"],
+                "password": password,
+            },
+        )
+        self.assertEqual(response.status_code, 302)
+        self.assertEqual(
+            expected_session, server.databases[self.alias].save_session()
+        )
+
+    def test_logout(self):
+        response = self.test_client.post("/logout", json={"alias": self.alias})
+        self.assertEqual(response.status_code, 302)
+        self.assertNotIn(self.alias, server.databases)
diff --git a/util/gem5-resources-manager/test/comprehensive_test.py b/util/gem5-resources-manager/test/comprehensive_test.py
new file mode 100644
index 0000000000..4c32087324
--- /dev/null
+++ b/util/gem5-resources-manager/test/comprehensive_test.py
@@ -0,0 +1,407 @@
+# Copyright (c) 2023 The Regents of the University of California
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import unittest
+from server import app
+import json
+from bson import json_util
+import copy
+import mongomock
+from unittest.mock import patch
+from api.mongo_client import MongoDBClient
+
+
+class TestComprehensive(unittest.TestCase):
+    @patch.object(
+        MongoDBClient,
+        "_get_database",
+        return_value=mongomock.MongoClient().db.collection,
+    )
+    def setUp(self, mock_get_database):
+        """This method sets up the test environment."""
+        self.ctx = app.app_context()
+        self.ctx.push()
+        self.app = app
+        self.test_client = app.test_client()
+        self.alias = "test"
+        objects = []
+        with open("./test/refs/resources.json", "rb") as f:
+            objects = json.loads(f.read(), object_hook=json_util.object_hook)
+        self.collection = mock_get_database()
+        for obj in objects:
+            self.collection.insert_one(obj)
+
+        self.test_client.post(
+            "/validateMongoDB",
+            json={
+                "uri": "mongodb://localhost:27017",
+                "database": "test",
+                "collection": "test",
+                "alias": self.alias,
+            },
+        )
+
+    def tearDown(self):
+        """This method tears down the test environment."""
+        self.collection.drop()
+        self.ctx.pop()
+
+    def test_insert_find_update_find(self):
+        test_resource = {
+            "category": "diskimage",
+            "id": "test-resource",
+            "author": ["test-author"],
+            "description": "test-description",
+            "license": "test-license",
+            "source_url": (
+                "https://github.com/gem5/gem5-resources/"
+                "tree/develop/src/x86-ubuntu"
+            ),
+            "tags": ["test-tag", "test-tag2"],
+            "example_usage": " test-usage",
+            "gem5_versions": [
+                "22.1",
+            ],
+            "resource_version": "1.0.0",
+        }
+        original_resource = test_resource.copy()
+        test_id = test_resource["id"]
+        test_resource_version = test_resource["resource_version"]
+        # insert resource
+        response = self.test_client.post(
+            "/insert", json={"resource": test_resource, "alias": self.alias}
+        )
+        self.assertEqual(response.status_code, 200)
+        self.assertEqual(response.json, {"status": "Inserted"})
+        # find resource
+        response = self.test_client.post(
+            "/find",
+            json={
+                "id": test_id,
+                "resource_version": test_resource_version,
+                "alias": self.alias,
+            },
+        )
+        self.assertEqual(response.status_code, 200)
+        self.assertTrue(response.json == test_resource)
+
+        # update resource
+        test_resource["description"] = "test-description-2"
+        test_resource["author"].append("test-author-2")
+        response = self.test_client.post(
+            "/update",
+            json={
+                "original_resource": original_resource,
+                "resource": test_resource,
+                "alias": self.alias,
+            },
+        )
+        self.assertEqual(response.status_code, 200)
+        self.assertEqual(response.json, {"status": "Updated"})
+        # find resource
+        response = self.test_client.post(
+            "/find",
+            json={
+                "id": test_id,
+                "resource_version": test_resource_version,
+                "alias": self.alias,
+            },
+        )
+        self.assertEqual(response.status_code, 200)
+        self.assertTrue(response.json == test_resource)
+
+    def test_find_new_insert(self):
+        test_resource = {
+            "category": "diskimage",
+            "id": "test-resource",
+            "author": ["test-author"],
+            "description": "test-description",
+            "license": "test-license",
+            "source_url": (
+                "https://github.com/gem5/gem5-resources/"
+                "tree/develop/src/x86-ubuntu"
+            ),
+            "tags": ["test-tag", "test-tag2"],
+            "example_usage": " test-usage",
+            "gem5_versions": [
+                "22.1",
+            ],
+            "resource_version": "1.0.0",
+        }
+        test_id = test_resource["id"]
+        test_resource_version = test_resource["resource_version"]
+        # find resource
+        response = self.test_client.post(
+            "/find",
+            json={
+                "id": test_id,
+                "resource_version": test_resource_version,
+                "alias": self.alias,
+            },
+        )
+        self.assertEqual(response.status_code, 200)
+        self.assertEqual(response.json, {"exists": False})
+        # insert resource
+        response = self.test_client.post(
+            "/insert", json={"resource": test_resource, "alias": self.alias}
+        )
+        self.assertEqual(response.status_code, 200)
+        self.assertEqual(response.json, {"status": "Inserted"})
+        # find resource
+        response = self.test_client.post(
+            "/find",
+            json={
+                "id": test_id,
+                "resource_version": test_resource_version,
+                "alias": self.alias,
+            },
+        )
+        self.assertEqual(response.status_code, 200)
+        self.assertTrue(response.json == test_resource)
+
+    def test_insert_find_new_version_find_older(self):
+        test_resource = {
+            "category": "diskimage",
+            "id": "test-resource",
+            "author": ["test-author"],
+            "description": "test-description",
+            "license": "test-license",
+            "source_url": (
+                "https://github.com/gem5/gem5-resources/"
+                "tree/develop/src/x86-ubuntu"
+            ),
+            "tags": ["test-tag", "test-tag2"],
+            "example_usage": " test-usage",
+            "gem5_versions": [
+                "22.1",
+            ],
+            "resource_version": "1.0.0",
+        }
+        test_id = test_resource["id"]
+        test_resource_version = test_resource["resource_version"]
+        # insert resource
+        response = self.test_client.post(
+            "/insert", json={"resource": test_resource, "alias": self.alias}
+        )
+        self.assertEqual(response.status_code, 200)
+        self.assertEqual(response.json, {"status": "Inserted"})
+        # find resource
+        response = self.test_client.post(
+            "/find",
+            json={
+                "id": test_id,
+                "resource_version": test_resource_version,
+                "alias": self.alias,
+            },
+        )
+        self.assertEqual(response.status_code, 200)
+        self.assertTrue(response.json == test_resource)
+
+        # add new version
+        test_resource_new_version = copy.deepcopy(test_resource)
+        test_resource_new_version["description"] = "test-description-2"
+        test_resource_new_version["author"].append("test-author-2")
+        test_resource_new_version["resource_version"] = "1.0.1"
+
+        response = self.test_client.post(
+            "/insert",
+            json={"resource": test_resource_new_version, "alias": self.alias},
+        )
+        self.assertEqual(response.status_code, 200)
+        self.assertEqual(response.json, {"status": "Inserted"})
+
+        # get resource versions
+        response = self.test_client.post(
+            "/versions", json={"id": test_id, "alias": self.alias}
+        )
+        return_json = json.loads(response.data)
+        self.assertEqual(response.status_code, 200)
+        self.assertEqual(
+            return_json,
+            [{"resource_version": "1.0.1"}, {"resource_version": "1.0.0"}],
+        )
+
+        resource_version = return_json[1]["resource_version"]
+        # find older version
+        response = self.test_client.post(
+            "/find",
+            json={
+                "id": test_id,
+                "resource_version": resource_version,
+                "alias": self.alias,
+            },
+        )
+        self.assertEqual(response.status_code, 200)
+        self.assertTrue(response.json == test_resource)
+
+    def test_find_add_new_version_delete_older(self):
+        test_resource = {
+            "category": "binary",
+            "id": "binary-example",
+            "description": "binary-example documentation.",
+            "architecture": "ARM",
+            "is_zipped": False,
+            "md5sum": "71b2cb004fe2cda4556f0b1a38638af6",
+            "url": (
+                "http://dist.gem5.org/dist/develop/"
+                "test-progs/hello/bin/arm/linux/hello64-static"
+            ),
+            "source": "src/simple",
+            "resource_version": "1.0.0",
+            "gem5_versions": ["23.0"],
+        }
+        test_id = test_resource["id"]
+        test_resource_version = test_resource["resource_version"]
+        # find resource
+        response = self.test_client.post(
+            "/find",
+            json={
+                "id": test_id,
+                "resource_version": test_resource_version,
+                "alias": self.alias,
+            },
+        )
+        self.assertEqual(response.status_code, 200)
+        self.assertTrue(response.json == test_resource)
+
+        # add new version
+        test_resource_new_version = copy.deepcopy(test_resource)
+        test_resource_new_version["description"] = "test-description-2"
+        test_resource_new_version["resource_version"] = "1.0.1"
+
+        response = self.test_client.post(
+            "/insert",
+            json={"resource": test_resource_new_version, "alias": self.alias},
+        )
+        self.assertEqual(response.status_code, 200)
+        self.assertEqual(response.json, {"status": "Inserted"})
+
+        # get resource versions
+        response = self.test_client.post(
+            "/versions", json={"id": test_id, "alias": self.alias}
+        )
+        return_json = json.loads(response.data)
+        self.assertEqual(response.status_code, 200)
+        self.assertEqual(
+            return_json,
+            [{"resource_version": "1.0.1"}, {"resource_version": "1.0.0"}],
+        )
+        # delete older version
+        response = self.test_client.post(
+            "/delete", json={"resource": test_resource, "alias": self.alias}
+        )
+        self.assertEqual(response.status_code, 200)
+        self.assertEqual(response.json, {"status": "Deleted"})
+
+        # get resource versions
+        response = self.test_client.post(
+            "/versions", json={"id": test_id, "alias": self.alias}
+        )
+        return_json = json.loads(response.data)
+        self.assertEqual(response.status_code, 200)
+        self.assertEqual(return_json, [{"resource_version": "1.0.1"}])
+
+    def test_find_add_new_version_update_older(self):
+        test_resource = {
+            "category": "binary",
+            "id": "binary-example",
+            "description": "binary-example documentation.",
+            "architecture": "ARM",
+            "is_zipped": False,
+            "md5sum": "71b2cb004fe2cda4556f0b1a38638af6",
+            "url": (
+                "http://dist.gem5.org/dist/develop/"
+                "test-progs/hello/bin/arm/linux/hello64-static"
+            ),
+            "source": "src/simple",
+            "resource_version": "1.0.0",
+            "gem5_versions": ["23.0"],
+        }
+        original_resource = test_resource.copy()
+        test_id = test_resource["id"]
+        test_resource_version = test_resource["resource_version"]
+        # find resource
+        response = self.test_client.post(
+            "/find",
+            json={
+                "id": test_id,
+                "resource_version": test_resource_version,
+                "alias": self.alias,
+            },
+        )
+        self.assertEqual(response.status_code, 200)
+        self.assertTrue(response.json == test_resource)
+
+        # add new version
+        test_resource_new_version = copy.deepcopy(test_resource)
+        test_resource_new_version["description"] = "test-description-2"
+        test_resource_new_version["resource_version"] = "1.0.1"
+
+        response = self.test_client.post(
+            "/insert",
+            json={"resource": test_resource_new_version, "alias": self.alias},
+        )
+        self.assertEqual(response.status_code, 200)
+        self.assertEqual(response.json, {"status": "Inserted"})
+
+        # get resource versions
+        response = self.test_client.post(
+            "/versions", json={"id": test_id, "alias": self.alias}
+        )
+        return_json = json.loads(response.data)
+        self.assertEqual(response.status_code, 200)
+        self.assertEqual(
+            return_json,
+            [{"resource_version": "1.0.1"}, {"resource_version": "1.0.0"}],
+        )
+
+        resource_version = return_json[1]["resource_version"]
+
+        # update older version
+        test_resource["description"] = "test-description-3"
+
+        response = self.test_client.post(
+            "/update",
+            json={
+                "original_resource": original_resource,
+                "resource": test_resource,
+                "alias": self.alias,
+            },
+        )
+        self.assertEqual(response.status_code, 200)
+        self.assertEqual(response.json, {"status": "Updated"})
+
+        # find resource
+        response = self.test_client.post(
+            "/find",
+            json={
+                "id": test_id,
+                "resource_version": resource_version,
+                "alias": self.alias,
+            },
+        )
+        self.assertEqual(response.status_code, 200)
+        self.assertTrue(response.json == test_resource)
diff --git a/util/gem5-resources-manager/test/json_client_test.py b/util/gem5-resources-manager/test/json_client_test.py
new file mode 100644
index 0000000000..0168d475ac
--- /dev/null
+++ b/util/gem5-resources-manager/test/json_client_test.py
@@ -0,0 +1,262 @@
+# Copyright (c) 2023 The Regents of the University of California
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import unittest
+from api.json_client import JSONClient
+from server import app
+import json
+from bson import json_util
+from unittest.mock import patch
+from pathlib import Path
+from api.json_client import JSONClient
+
+
+def get_json():
+    with open("test/refs/test_json.json") as f:
+        jsonFile = f.read()
+        return json.loads(jsonFile)
+
+
+def mockinit(self, file_path):
+    self.file_path = Path("test/refs/") / file_path
+    with open(self.file_path) as f:
+        self.resources = json.load(f)
+
+
+class TestJson(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        with open("./test/refs/resources.json", "rb") as f:
+            jsonFile = f.read()
+            with open("./test/refs/test_json.json", "wb") as f:
+                f.write(jsonFile)
+
+    @classmethod
+    def tearDownClass(cls):
+        Path("./test/refs/test_json.json").unlink()
+
+    @patch.object(JSONClient, "__init__", mockinit)
+    def setUp(self):
+        """This method sets up the test environment."""
+        with open("./test/refs/test_json.json", "rb") as f:
+            jsonFile = f.read()
+            self.original_json = json.loads(jsonFile)
+        self.json_client = JSONClient("test_json.json")
+
+    def tearDown(self):
+        """This method tears down the test environment."""
+        with open("./test/refs/test_json.json", "w") as f:
+            json.dump(self.original_json, f, indent=4)
+
+    def test_insertResource(self):
+        test_resource = {
+            "category": "diskimage",
+            "id": "test-resource",
+            "author": ["test-author"],
+            "description": "test-description",
+            "license": "test-license",
+            "source_url": (
+                "https://github.com/gem5/gem5-resources/"
+                "tree/develop/src/x86-ubuntu"
+            ),
+            "tags": ["test-tag", "test-tag2"],
+            "example_usage": " test-usage",
+            "gem5_versions": [
+                "22.1",
+            ],
+            "resource_version": "1.0.0",
+        }
+        response = self.json_client.insert_resource(test_resource)
+        self.assertEqual(response, {"status": "Inserted"})
+        json_data = get_json()
+        self.assertNotEqual(json_data, self.original_json)
+        self.assertIn(test_resource, json_data)
+
+    def test_insertResource_duplicate(self):
+        test_resource = {
+            "category": "diskimage",
+            "id": "disk-image-example",
+            "description": "disk-image documentation.",
+            "architecture": "X86",
+            "is_zipped": True,
+            "md5sum": "90e363abf0ddf22eefa2c7c5c9391c49",
+            "url": (
+                "http://dist.gem5.org/dist/develop/images"
+                "/x86/ubuntu-18-04/x86-ubuntu.img.gz"
+            ),
+            "source": "src/x86-ubuntu",
+            "root_partition": "1",
+            "resource_version": "1.0.0",
+            "gem5_versions": ["23.0"],
+        }
+        response = self.json_client.insert_resource(test_resource)
+        self.assertEqual(response, {"status": "Resource already exists"})
+
+    def test_find_no_version(self):
+        expected_response = {
+            "category": "diskimage",
+            "id": "disk-image-example",
+            "description": "disk-image documentation.",
+            "architecture": "X86",
+            "is_zipped": True,
+            "md5sum": "90e363abf0ddf22eefa2c7c5c9391c49",
+            "url": (
+                "http://dist.gem5.org/dist/develop/images"
+                "/x86/ubuntu-18-04/x86-ubuntu.img.gz"
+            ),
+            "source": "src/x86-ubuntu",
+            "root_partition": "1",
+            "resource_version": "1.0.0",
+            "gem5_versions": ["23.0"],
+        }
+        response = self.json_client.find_resource(
+            {"id": expected_response["id"]}
+        )
+        self.assertEqual(response, expected_response)
+
+    def test_find_with_version(self):
+        expected_response = {
+            "category": "kernel",
+            "id": "kernel-example",
+            "description": "kernel-example documentation.",
+            "architecture": "RISCV",
+            "is_zipped": False,
+            "md5sum": "60a53c7d47d7057436bf4b9df707a841",
+            "url": (
+                "http://dist.gem5.org/dist/develop"
+                "/kernels/x86/static/vmlinux-5.4.49"
+            ),
+            "source": "src/linux-kernel",
+            "resource_version": "1.0.0",
+            "gem5_versions": ["23.0"],
+        }
+        response = self.json_client.find_resource(
+            {
+                "id": expected_response["id"],
+                "resource_version": expected_response["resource_version"],
+            }
+        )
+        self.assertEqual(response, expected_response)
+
+    def test_find_not_found(self):
+        response = self.json_client.find_resource({"id": "not-found"})
+        self.assertEqual(response, {"exists": False})
+
+    def test_deleteResource(self):
+        deleted_resource = {
+            "category": "diskimage",
+            "id": "disk-image-example",
+            "description": "disk-image documentation.",
+            "architecture": "X86",
+            "is_zipped": True,
+            "md5sum": "90e363abf0ddf22eefa2c7c5c9391c49",
+            "url": (
+                "http://dist.gem5.org/dist/develop/"
+                "images/x86/ubuntu-18-04/x86-ubuntu.img.gz"
+            ),
+            "source": "src/x86-ubuntu",
+            "root_partition": "1",
+            "resource_version": "1.0.0",
+            "gem5_versions": ["23.0"],
+        }
+        response = self.json_client.delete_resource(
+            {
+                "id": deleted_resource["id"],
+                "resource_version": deleted_resource["resource_version"],
+            }
+        )
+        self.assertEqual(response, {"status": "Deleted"})
+        json_data = get_json()
+        self.assertNotEqual(json_data, self.original_json)
+        self.assertNotIn(deleted_resource, json_data)
+
+    def test_updateResource(self):
+        updated_resource = {
+            "category": "diskimage",
+            "id": "disk-image-example",
+            "description": "disk-image documentation.",
+            "architecture": "X86",
+            "is_zipped": True,
+            "md5sum": "90e363abf0ddf22eefa2c7c5c9391c49",
+            "url": (
+                "http://dist.gem5.org/dist/develop/images"
+                "/x86/ubuntu-18-04/x86-ubuntu.img.gz"
+            ),
+            "source": "src/x86-ubuntu",
+            "root_partition": "1",
+            "resource_version": "1.0.0",
+            "gem5_versions": ["23.0"],
+        }
+        original_resource = {
+            "category": "diskimage",
+            "id": "disk-image-example",
+            "description": "disk-image documentation.",
+            "architecture": "X86",
+            "is_zipped": True,
+            "md5sum": "90e363abf0ddf22eefa2c7c5c9391c49",
+            "url": (
+                "http://dist.gem5.org/dist/develop/"
+                "images/x86/ubuntu-18-04/x86-ubuntu.img.gz"
+            ),
+            "source": "src/x86-ubuntu",
+            "root_partition": "1",
+            "resource_version": "1.0.0",
+            "gem5_versions": ["23.0"],
+        }
+        response = self.json_client.update_resource(
+            {
+                "original_resource": original_resource,
+                "resource": updated_resource,
+            }
+        )
+        self.assertEqual(response, {"status": "Updated"})
+        json_data = get_json()
+        self.assertNotEqual(json_data, self.original_json)
+        self.assertIn(updated_resource, json_data)
+
+    def test_getVersions(self):
+        resource_id = "kernel-example"
+        response = self.json_client.get_versions({"id": resource_id})
+        self.assertEqual(
+            response,
+            [{"resource_version": "2.0.0"}, {"resource_version": "1.0.0"}],
+        )
+
+    def test_checkResourceExists_True(self):
+        resource_id = "kernel-example"
+        resource_version = "1.0.0"
+        response = self.json_client.check_resource_exists(
+            {"id": resource_id, "resource_version": resource_version}
+        )
+        self.assertEqual(response, {"exists": True})
+
+    def test_checkResourceExists_False(self):
+        resource_id = "kernel-example"
+        resource_version = "3.0.0"
+        response = self.json_client.check_resource_exists(
+            {"id": resource_id, "resource_version": resource_version}
+        )
+        self.assertEqual(response, {"exists": False})
diff --git a/util/gem5-resources-manager/test/mongo_client_test.py b/util/gem5-resources-manager/test/mongo_client_test.py
new file mode 100644
index 0000000000..761475ead8
--- /dev/null
+++ b/util/gem5-resources-manager/test/mongo_client_test.py
@@ -0,0 +1,281 @@
+# Copyright (c) 2023 The Regents of the University of California
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import unittest
+from server import app, databases
+import json
+from bson import json_util
+import mongomock
+from unittest.mock import patch
+from api.mongo_client import MongoDBClient
+
+
+class TestApi(unittest.TestCase):
+    """This is a test class that tests the API."""
+
+    API_URL = "http://127.0.0.1:5000"
+
+    @patch.object(
+        MongoDBClient,
+        "_get_database",
+        return_value=mongomock.MongoClient().db.collection,
+    )
+    def setUp(self, mock_get_database):
+        """This method sets up the test environment."""
+        objects = []
+        with open("./test/refs/resources.json", "rb") as f:
+            objects = json.loads(f.read(), object_hook=json_util.object_hook)
+        self.collection = mock_get_database()
+        for obj in objects:
+            self.collection.insert_one(obj)
+        self.mongo_client = MongoDBClient(
+            "mongodb://localhost:27017", "test", "test"
+        )
+
+    def tearDown(self):
+        """This method tears down the test environment."""
+        self.collection.drop()
+
+    def test_insertResource(self):
+        test_resource = {
+            "category": "diskimage",
+            "id": "test-resource",
+            "author": ["test-author"],
+            "description": "test-description",
+            "license": "test-license",
+            "source_url": (
+                "https://github.com/gem5/gem5-resources/"
+                "tree/develop/src/x86-ubuntu"
+            ),
+            "tags": ["test-tag", "test-tag2"],
+            "example_usage": " test-usage",
+            "gem5_versions": [
+                "22.1",
+            ],
+            "resource_version": "1.0.0",
+        }
+        ret_value = self.mongo_client.insert_resource(test_resource)
+        self.assertEqual(ret_value, {"status": "Inserted"})
+        self.assertEqual(
+            self.collection.find({"id": "test-resource"})[0], test_resource
+        )
+        self.collection.delete_one({"id": "test-resource"})
+
+    def test_insertResource_duplicate(self):
+        test_resource = {
+            "category": "diskimage",
+            "id": "test-resource",
+            "author": ["test-author"],
+            "description": "test-description",
+            "license": "test-license",
+            "source_url": (
+                "https://github.com/gem5/gem5-resources/"
+                "tree/develop/src/x86-ubuntu"
+            ),
+            "tags": ["test-tag", "test-tag2"],
+            "example_usage": " test-usage",
+            "gem5_versions": [
+                "22.1",
+            ],
+            "resource_version": "1.0.0",
+        }
+        self.collection.insert_one(test_resource)
+        ret_value = self.mongo_client.insert_resource(test_resource)
+        self.assertEqual(ret_value, {"status": "Resource already exists"})
+
+    def test_findResource_no_version(self):
+        test_resource = {
+            "category": "diskimage",
+            "id": "test-resource",
+            "author": ["test-author"],
+            "description": "test-description",
+            "license": "test-license",
+            "source_url": (
+                "https://github.com/gem5/gem5-resources"
+                "/tree/develop/src/x86-ubuntu"
+            ),
+            "tags": ["test-tag", "test-tag2"],
+            "example_usage": " test-usage",
+            "gem5_versions": [
+                "22.1",
+            ],
+            "resource_version": "1.0.0",
+        }
+        self.collection.insert_one(test_resource.copy())
+        ret_value = self.mongo_client.find_resource({"id": "test-resource"})
+        self.assertEqual(ret_value, test_resource)
+        self.collection.delete_one({"id": "test-resource"})
+
+    def test_findResource_with_version(self):
+        test_resource = {
+            "category": "diskimage",
+            "id": "test-resource",
+            "author": ["test-author"],
+            "description": "test-description",
+            "license": "test-license",
+            "source_url": (
+                "https://github.com/gem5/gem5-resources"
+                "/tree/develop/src/x86-ubuntu"
+            ),
+            "tags": ["test-tag", "test-tag2"],
+            "example_usage": " test-usage",
+            "gem5_versions": [
+                "22.1",
+            ],
+            "resource_version": "1.0.0",
+        }
+        self.collection.insert_one(test_resource.copy())
+        test_resource["resource_version"] = "2.0.0"
+        test_resource["description"] = "test-description2"
+        self.collection.insert_one(test_resource.copy())
+        ret_value = self.mongo_client.find_resource(
+            {"id": "test-resource", "resource_version": "2.0.0"}
+        )
+        self.assertEqual(ret_value, test_resource)
+
+    def test_findResource_not_found(self):
+        ret_value = self.mongo_client.find_resource({"id": "test-resource"})
+        self.assertEqual(ret_value, {"exists": False})
+
+    def test_deleteResource(self):
+        test_resource = {
+            "category": "diskimage",
+            "id": "test-resource",
+            "author": ["test-author"],
+            "description": "test-description",
+            "license": "test-license",
+            "source_url": (
+                "https://github.com/gem5/gem5-resources"
+                "/tree/develop/src/x86-ubuntu"
+            ),
+            "tags": ["test-tag", "test-tag2"],
+            "example_usage": " test-usage",
+            "gem5_versions": [
+                "22.1",
+            ],
+            "resource_version": "1.0.0",
+        }
+        self.collection.insert_one(test_resource.copy())
+        ret_value = self.mongo_client.delete_resource(
+            {"id": "test-resource", "resource_version": "1.0.0"}
+        )
+        self.assertEqual(ret_value, {"status": "Deleted"})
+
+        self.assertEqual(
+            json.loads(
+                json_util.dumps(self.collection.find({"id": "test-resource"}))
+            ),
+            [],
+        )
+
+    def test_updateResource(self):
+        test_resource = {
+            "category": "diskimage",
+            "id": "test-resource",
+            "author": ["test-author"],
+            "description": "test-description",
+            "license": "test-license",
+            "source_url": (
+                "https://github.com/gem5/gem5-resources"
+                "/tree/develop/src/x86-ubuntu"
+            ),
+            "tags": ["test-tag", "test-tag2"],
+            "example_usage": " test-usage",
+            "gem5_versions": [
+                "22.1",
+            ],
+            "resource_version": "1.0.0",
+        }
+        original_resource = test_resource.copy()
+        self.collection.insert_one(test_resource.copy())
+        test_resource["author"].append("test-author2")
+        test_resource["description"] = "test-description2"
+        ret_value = self.mongo_client.update_resource(
+            {"original_resource": original_resource, "resource": test_resource}
+        )
+        self.assertEqual(ret_value, {"status": "Updated"})
+        self.assertEqual(
+            self.collection.find({"id": "test-resource"}, {"_id": 0})[0],
+            test_resource,
+        )
+
+    def test_checkResourceExists(self):
+        test_resource = {
+            "category": "diskimage",
+            "id": "test-resource",
+            "author": ["test-author"],
+            "description": "test-description",
+            "license": "test-license",
+            "source_url": (
+                "https://github.com/gem5/gem5-resources"
+                "/tree/develop/src/x86-ubuntu"
+            ),
+            "tags": ["test-tag", "test-tag2"],
+            "example_usage": " test-usage",
+            "gem5_versions": [
+                "22.1",
+            ],
+            "resource_version": "1.0.0",
+        }
+        self.collection.insert_one(test_resource.copy())
+        ret_value = self.mongo_client.check_resource_exists(
+            {"id": "test-resource", "resource_version": "1.0.0"}
+        )
+        self.assertEqual(ret_value, {"exists": True})
+
+    def test_checkResourceExists_not_found(self):
+        ret_value = self.mongo_client.check_resource_exists(
+            {"id": "test-resource", "resource_version": "1.0.0"}
+        )
+        self.assertEqual(ret_value, {"exists": False})
+
+    def test_getVersion(self):
+        test_resource = {
+            "category": "diskimage",
+            "id": "test-resource",
+            "author": ["test-author"],
+            "description": "test-description",
+            "license": "test-license",
+            "source_url": (
+                "https://github.com/gem5/gem5-resources"
+                "/tree/develop/src/x86-ubuntu"
+            ),
+            "tags": ["test-tag", "test-tag2"],
+            "example_usage": " test-usage",
+            "gem5_versions": [
+                "22.1",
+            ],
+            "resource_version": "1.0.0",
+        }
+        self.collection.insert_one(test_resource.copy())
+        test_resource["resource_version"] = "2.0.0"
+        test_resource["description"] = "test-description2"
+        self.collection.insert_one(test_resource.copy())
+        ret_value = self.mongo_client.get_versions({"id": "test-resource"})
+        self.assertEqual(
+            ret_value,
+            [{"resource_version": "2.0.0"}, {"resource_version": "1.0.0"}],
+        )
diff --git a/util/gem5-resources-manager/test/refs/resources.json b/util/gem5-resources-manager/test/refs/resources.json
new file mode 100644
index 0000000000..614f8dc764
--- /dev/null
+++ b/util/gem5-resources-manager/test/refs/resources.json
@@ -0,0 +1,196 @@
+[
+    {
+        "category": "kernel",
+        "id": "kernel-example",
+        "description": "kernel-example documentation.",
+        "architecture": "RISCV",
+        "is_zipped": false,
+        "md5sum": "60a53c7d47d7057436bf4b9df707a841",
+        "url": "http://dist.gem5.org/dist/develop/kernels/x86/static/vmlinux-5.4.49",
+        "source": "src/linux-kernel",
+        "resource_version": "1.0.0",
+        "gem5_versions": [
+            "23.0"
+        ]
+    },
+    {
+        "category": "kernel",
+        "id": "kernel-example",
+        "description": "kernel-example documentation 2.",
+        "architecture": "RISCV",
+        "is_zipped": false,
+        "md5sum": "60a53c7d47d7057436bf4b9df707a841",
+        "url": "http://dist.gem5.org/dist/develop/kernels/x86/static/vmlinux-5.4.49",
+        "source": "src/linux-kernel",
+        "resource_version": "2.0.0",
+        "gem5_versions": [
+            "23.0"
+        ]
+    },
+    {
+        "category": "diskimage",
+        "id": "disk-image-example",
+        "description": "disk-image documentation.",
+        "architecture": "X86",
+        "is_zipped": true,
+        "md5sum": "90e363abf0ddf22eefa2c7c5c9391c49",
+        "url": "http://dist.gem5.org/dist/develop/images/x86/ubuntu-18-04/x86-ubuntu.img.gz",
+        "source": "src/x86-ubuntu",
+        "root_partition": "1",
+        "resource_version": "1.0.0",
+        "gem5_versions": [
+            "23.0"
+        ]
+    },
+    {
+        "category": "binary",
+        "id": "binary-example",
+        "description": "binary-example documentation.",
+        "architecture": "ARM",
+        "is_zipped": false,
+        "md5sum": "71b2cb004fe2cda4556f0b1a38638af6",
+        "url": "http://dist.gem5.org/dist/develop/test-progs/hello/bin/arm/linux/hello64-static",
+        "source": "src/simple",
+        "resource_version": "1.0.0",
+        "gem5_versions": [
+            "23.0"
+        ]
+
+    },
+    {
+        "category": "bootloader",
+        "id": "bootloader-example",
+        "description": "bootloader documentation.",
+        "is_zipped": false,
+        "md5sum": "71b2cb004fe2cda4556f0b1a38638af6",
+        "url": "http://dist.gem5.org/dist/develop/test-progs/hello/bin/arm/linux/hello64-static",
+        "resource_version": "1.0.0",
+        "gem5_versions": [
+            "23.0"
+        ]
+    },
+    {
+        "category": "checkpoint",
+        "id": "checkpoint-example",
+        "description": "checkpoint-example documentation.",
+        "architecture": "RISCV",
+        "is_zipped": false,
+        "md5sum": "3a57c1bb1077176c4587b8a3bf4f8ace",
+        "source": null,
+        "is_tar_archive": true,
+        "url": "http://dist.gem5.org/dist/develop/checkpoints/riscv-hello-example-checkpoint.tar",
+        "resource_version": "1.0.0",
+        "gem5_versions": [
+            "23.0"
+        ]
+    },
+    {
+        "category": "git",
+        "id": "git-example",
+        "description": null,
+        "is_zipped": false,
+        "is_tar_archive": true,
+        "md5sum": "71b2cb004fe2cda4556f0b1a38638af6",
+        "url": "http://dist.gem5.org/dist/develop/checkpoints/riscv-hello-example-checkpoint.tar",
+        "resource_version": "1.0.0",
+        "gem5_versions": [
+            "23.0"
+        ]
+    },
+    {
+        "category": "file",
+        "id": "file-example",
+        "description": null,
+        "is_zipped": false,
+        "md5sum": "71b2cb004fe2cda4556f0b1a38638af6",
+        "url": "http://dist.gem5.org/dist/develop/checkpoints/riscv-hello-example-checkpoint.tar",
+        "source": null,
+        "resource_version": "1.0.0",
+        "gem5_versions": [
+            "23.0"
+        ]
+    },
+    {
+        "category": "directory",
+        "id": "directory-example",
+        "description": "directory-example documentation.",
+        "is_zipped": false,
+        "md5sum": "3a57c1bb1077176c4587b8a3bf4f8ace",
+        "source": null,
+        "is_tar_archive": true,
+        "url": "http://dist.gem5.org/dist/develop/checkpoints/riscv-hello-example-checkpoint.tar",
+        "resource_version": "1.0.0",
+        "gem5_versions": [
+            "23.0"
+        ]
+    },
+    {
+        "category": "simpoint-directory",
+        "id": "simpoint-directory-example",
+        "description": "simpoint directory documentation.",
+        "is_zipped": false,
+        "md5sum": "3fcffe3956c8a95e3fb82e232e2b41fb",
+        "source": null,
+        "is_tar_archive": true,
+        "url": "http://dist.gem5.org/dist/develop/simpoints/x86-print-this-15000-simpoints-20221013.tar",
+        "simpoint_interval": 1000000,
+        "warmup_interval": 1000000,
+        "simpoint_file": "simpoint.simpt",
+        "weight_file": "simpoint.weight",
+        "workload_name": "Example Workload",
+        "resource_version": "1.0.0",
+        "gem5_versions": [
+            "23.0"
+        ]
+    },
+    {
+        "category": "simpoint",
+        "id": "simpoint-example",
+        "description": "simpoint documentation.",
+        "simpoint_interval": 1000000,
+        "warmup_interval": 23445,
+        "simpoint_list": [
+            2,
+            3,
+            4,
+            15
+        ],
+        "weight_list": [
+            0.1,
+            0.2,
+            0.4,
+            0.3
+        ],
+        "resource_version": "1.0.0",
+        "gem5_versions": [
+            "23.0"
+        ]
+    },
+    {
+        "category": "looppoint-pinpoint-csv",
+        "id": "looppoint-pinpoint-csv-resource",
+        "description": "A looppoint pinpoints csv file.",
+        "is_zipped": false,
+        "md5sum": "199ab22dd463dc70ee2d034bfe045082",
+        "url": "http://dist.gem5.org/dist/develop/pinpoints/x86-matrix-multiply-omp-100-8-global-pinpoints-20230127",
+        "source": null,
+        "resource_version": "1.0.0",
+        "gem5_versions": [
+            "23.0"
+        ]
+    },
+    {
+        "category": "looppoint-json",
+        "id": "looppoint-json-restore-resource-region-1",
+        "description": "A looppoint json file resource.",
+        "is_zipped": false,
+        "region_id": "1",
+        "md5sum": "a71ed64908b082ea619b26b940a643c1",
+        "url": "http://dist.gem5.org/dist/develop/looppoints/x86-matrix-multiply-omp-100-8-looppoint-json-20230128",
+        "source": null,
+        "resource_version": "1.0.0",
+        "gem5_versions": [
+            "23.0"
+        ]
+    }
+]
diff --git a/tests/configs/t1000-simple-atomic.py b/util/gem5-stubgen.py
similarity index 58%
rename from tests/configs/t1000-simple-atomic.py
rename to util/gem5-stubgen.py
index 76d39327d2..d003e2212e 100644
--- a/tests/configs/t1000-simple-atomic.py
+++ b/util/gem5-stubgen.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2007 The Regents of The University of Michigan
+# Copyright (c) 2023 The Regents of the University of California
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -24,36 +24,40 @@
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-import m5
-from m5.objects import *
-
-m5.util.addToPath("../configs/")
-from common import FSConfig
-
-try:
-    system = FSConfig.makeSparcSystem("atomic")
-except IOError as e:
-    skip_test(reason=str(e))
-
-system.voltage_domain = VoltageDomain()
-system.clk_domain = SrcClockDomain(
-    clock="1GHz", voltage_domain=system.voltage_domain
-)
-system.cpu_clk_domain = SrcClockDomain(
-    clock="1GHz", voltage_domain=system.voltage_domain
-)
-cpu = AtomicSimpleCPU(cpu_id=0, clk_domain=system.cpu_clk_domain)
-system.cpu = cpu
-# create the interrupt controller
-cpu.createInterruptController()
-cpu.connectBus(system.membus)
-
-# create the memory controllers and connect them, stick with
-# the physmem name to avoid bumping all the reference stats
-system.physmem = [SimpleMemory(range=r) for r in system.mem_ranges]
-for i in range(len(system.physmem)):
-    system.physmem[i].port = system.membus.mem_side_ports
-
-root = Root(full_system=True, system=system)
-
-m5.ticks.setGlobalFrequency("2GHz")
+from mypy.stubgen import generate_stubs, parse_options
+
+"""
+This allows us to generate stubs for the modules in gem5. The output will be
+a "typings" directory which can be used by Pylance (Python IntelliSense) to
+infer typings in Visual Studio Code.
+
+Note: A "typings" directory in the root of the workspace is the default
+location for Pylance to look for typings. This can be changed via
+`python.analysis.stubPath` in "settings.json".
+
+Usage
+=====
+
+```sh
+pip3 install -r requirements.txt
+scons build/ALL/gem5.opt -j$(nproc)
+./build/ALL/gem5.opt util/gem5-stubgen.py
+```
+
+"""
+
+if __name__ == "__m5_main__":
+    import m5
+
+    # get a list of all modules exported by gem5
+    modules = m5.__spec__.loader_state
+
+    options = parse_options(
+        ("--module " + " --module ".join(modules)).split(" ")
+        + ["--output", "typings"]
+    )
+    generate_stubs(options)
+
+if __name__ == "__main__":
+    print("Error: This script is meant to be run with the gem5 binary")
+    exit(1)
diff --git a/util/gem5art/artifact/README.md b/util/gem5art/artifact/README.md
index aae297332a..86e30abcd0 100644
--- a/util/gem5art/artifact/README.md
+++ b/util/gem5art/artifact/README.md
@@ -51,7 +51,7 @@ gem5_binary = Artifact.registerArtifact(
     inputs = [gem5_repo,],
     documentation = '''
       Default gem5 binary compiled for the X86 ISA.
-      This was built from the main gem5 repo (gem5.googlesource.com) without
+      This was built from the main gem5 repo (https://github.com/gem5/gem5/) without
       any modifications. We recently updated to the current gem5 master
       which has a fix for memory channel address striping.
     '''
diff --git a/util/gem5art/artifact/gem5art/artifact/_artifactdb.py b/util/gem5art/artifact/gem5art/artifact/_artifactdb.py
index 16d35e86e8..a18f47d7d0 100644
--- a/util/gem5art/artifact/gem5art/artifact/_artifactdb.py
+++ b/util/gem5art/artifact/gem5art/artifact/_artifactdb.py
@@ -185,22 +185,21 @@ def downloadFile(self, key: UUID, path: Path) -> None:
     def searchByName(self, name: str, limit: int) -> Iterable[Dict[str, Any]]:
         """Returns an iterable of all artifacts in the database that match
         some name."""
-        for d in self.artifacts.find({"name": name}, limit=limit):
-            yield d
+        yield from self.artifacts.find({"name": name}, limit=limit)
 
     def searchByType(self, typ: str, limit: int) -> Iterable[Dict[str, Any]]:
         """Returns an iterable of all artifacts in the database that match
         some type."""
-        for d in self.artifacts.find({"type": typ}, limit=limit):
-            yield d
+        yield from self.artifacts.find({"type": typ}, limit=limit)
 
     def searchByNameType(
         self, name: str, typ: str, limit: int
     ) -> Iterable[Dict[str, Any]]:
         """Returns an iterable of all artifacts in the database that match
         some name and type."""
-        for d in self.artifacts.find({"type": typ, "name": name}, limit=limit):
-            yield d
+        yield from self.artifacts.find(
+            {"type": typ, "name": name}, limit=limit
+        )
 
     def searchByLikeNameType(
         self, name: str, typ: str, limit: int
@@ -211,8 +210,7 @@ def searchByLikeNameType(
         data = self.artifacts.find(
             {"type": typ, "name": {"$regex": f"{name}"}}, limit=limit
         )
-        for d in data:
-            yield d
+        yield from data
 
 
 class ArtifactFileDB(ArtifactDB):
@@ -318,7 +316,7 @@ def _load_from_file(
         uuid_mapping: Dict[str, Dict[str, str]] = {}
         hash_mapping: Dict[str, List[str]] = {}
         if json_file.exists():
-            with open(json_file, "r") as f:
+            with open(json_file) as f:
                 j = json.load(f)
                 for an_artifact in j:
                     the_uuid = an_artifact["_id"]
diff --git a/util/gem5art/artifact/gem5art/artifact/artifact.py b/util/gem5art/artifact/gem5art/artifact/artifact.py
index b71369c689..d178b218dd 100644
--- a/util/gem5art/artifact/gem5art/artifact/artifact.py
+++ b/util/gem5art/artifact/gem5art/artifact/artifact.py
@@ -166,7 +166,6 @@ def createArtifact(
         version: str = "",
         **kwargs: str,
     ) -> "Artifact":
-
         """Constructs a new artifact without using the database.
 
         Different from registerArtifact(), this method won't use database.
diff --git a/util/gem5art/artifact/setup.py b/util/gem5art/artifact/setup.py
index 869603db4b..78247eb16d 100755
--- a/util/gem5art/artifact/setup.py
+++ b/util/gem5art/artifact/setup.py
@@ -56,8 +56,8 @@
     install_requires=["pymongo"],
     python_requires=">=3.6",
     project_urls={
-        "Bug Reports": "https://gem5.atlassian.net/",
-        "Source": "https://gem5.googlesource.com/",
+        "Bug Reports": "https://github.com/gem5/issues/",
+        "Source": "https://github.com/gem5/gem5/",
         "Documentation": "https://www.gem5.org/documentation/gem5art",
     },
 )
diff --git a/util/gem5art/artifact/tests/test_artifact.py b/util/gem5art/artifact/tests/test_artifact.py
index af6f8ae75b..a12cc8f028 100644
--- a/util/gem5art/artifact/tests/test_artifact.py
+++ b/util/gem5art/artifact/tests/test_artifact.py
@@ -85,7 +85,7 @@ class TestGit(unittest.TestCase):
     def test_keys(self):
         git = artifact.artifact.getGit(Path("."))
         self.assertSetEqual(
-            set(git.keys()), set(["origin", "hash", "name"]), "git keys wrong"
+            set(git.keys()), {"origin", "hash", "name"}, "git keys wrong"
         )
 
     def test_origin(self):
@@ -205,7 +205,6 @@ def test_similar(self):
 
 class TestRegisterArtifact(unittest.TestCase):
     def setUp(self):
-
         # Create and register an artifact
         self.testArtifactA = artifact.Artifact.registerArtifact(
             name="artifact-A",
diff --git a/util/gem5art/artifact/tests/test_filedb.py b/util/gem5art/artifact/tests/test_filedb.py
index 9b5cd02d52..b7144ffcf3 100644
--- a/util/gem5art/artifact/tests/test_filedb.py
+++ b/util/gem5art/artifact/tests/test_filedb.py
@@ -62,7 +62,7 @@ def test_init_function(self):
         self.assertTrue(Path("test.json").exists())
 
     def test_json_content(self):
-        with open("test.json", "r") as f:
+        with open("test.json") as f:
             artifacts = json.load(f)
         self.assertTrue(len(artifacts) == 1)
         artifact = artifacts[0]
diff --git a/util/gem5art/run/bin/gem5art-getruns b/util/gem5art/run/bin/gem5art-getruns
index 5474dd68d7..da8d2f0835 100755
--- a/util/gem5art/run/bin/gem5art-getruns
+++ b/util/gem5art/run/bin/gem5art-getruns
@@ -71,7 +71,6 @@ def parseArgs():
 
 
 if __name__ == "__main__":
-
     args = parseArgs()
 
     db = getDBConnection(args.db_uri)
diff --git a/util/gem5art/run/gem5art/run.py b/util/gem5art/run/gem5art/run.py
index 12e4b3e208..7b7b823200 100644
--- a/util/gem5art/run/gem5art/run.py
+++ b/util/gem5art/run/gem5art/run.py
@@ -672,7 +672,6 @@ def getRunsByNameLike(
 def getRerunnableRunsByNameLike(
     db: ArtifactDB, name: str, fs_only: bool = False, limit: int = 0
 ) -> Iterable[gem5Run]:
-
     """Returns a generator of gem5Run objects having rerunnable as true
     and the object "name" containing the name parameter as a substring. The
     parameter is case sensitive.
diff --git a/util/gem5art/run/setup.py b/util/gem5art/run/setup.py
index d17124bd1f..1ab51b5c2d 100755
--- a/util/gem5art/run/setup.py
+++ b/util/gem5art/run/setup.py
@@ -57,7 +57,7 @@
     python_requires=">=3.6",
     project_urls={
         "Bug Reports": "https://gem5.atlassian.net/",
-        "Source": "https://gem5.googlesource.com/",
+        "Source": "https://github.com/gem5/gem5/",
         "Documentation": "https://www.gem5.org/documentation/gem5art",
     },
     scripts=["bin/gem5art-getruns"],
diff --git a/util/gem5art/tasks/setup.py b/util/gem5art/tasks/setup.py
index 7bcfc642ae..290c68a17a 100755
--- a/util/gem5art/tasks/setup.py
+++ b/util/gem5art/tasks/setup.py
@@ -58,7 +58,7 @@
     python_requires=">=3.6",
     project_urls={
         "Bug Reports": "https://gem5.atlassian.net/",
-        "Source": "https://gem5.googlesource.com/",
+        "Source": "https://github.com/gem5/gem5/",
         "Documentation": "https://www.gem5.org/documentation/gem5art",
     },
 )
diff --git a/util/gem5img.py b/util/gem5img.py
index 8eb0965c9e..dcb66e0bc9 100755
--- a/util/gem5img.py
+++ b/util/gem5img.py
@@ -65,6 +65,7 @@
 # Whether to print debug output.
 debug = False
 
+
 # Figure out cylinders, heads and sectors from a size in blocks.
 def chsFromSize(sizeInBlocks):
     if sizeInBlocks >= MaxLBABlocks:
@@ -139,7 +140,7 @@ def findProg(program, cleanupDev=None):
     return out.strip()
 
 
-class LoopbackDevice(object):
+class LoopbackDevice:
     def __init__(self, devFile=None):
         self.devFile = devFile
 
@@ -227,7 +228,7 @@ def mountPointToDev(mountPoint):
 commandOrder = []
 
 
-class Command(object):
+class Command:
     def addArgument(self, *args, **kargs):
         self.parser.add_argument(*args, **kargs)
 
diff --git a/util/gerrit-bot/bot.py b/util/gerrit-bot/bot.py
index f6b9469d9b..5ef4151a48 100755
--- a/util/gerrit-bot/bot.py
+++ b/util/gerrit-bot/bot.py
@@ -97,7 +97,7 @@ def __init__(self, config):
     def __read_auth_file(self, auth_file_path):
         username = ""
         password = ""
-        with open(auth_file_path, "r") as f:
+        with open(auth_file_path) as f:
             lines = f.readlines()
             username = lines[0].strip()
             password = lines[1].strip()
@@ -107,7 +107,7 @@ def __read_time_tracker_file(self, file_path):
         prev_query_time = 0
 
         try:
-            with open(file_path, "r") as f:
+            with open(file_path) as f:
                 lines = f.readlines()
                 prev_query_time = int(float(lines[0].strip()))
         except FileNotFoundError:
@@ -134,7 +134,7 @@ def __update_time_tracker_file(self, file_path, prev_query_time):
     def __read_maintainer_account_id_file(self, maintainers, file_path):
         account_ids = {}
         try:
-            with open(file_path, "r") as f:
+            with open(file_path) as f:
                 account_ids = json.load(f)
         except (FileNotFoundError, json.decoder.JSONDecodeError):
             # create a placeholder file
@@ -147,7 +147,7 @@ def __read_maintainer_account_id_file(self, maintainers, file_path):
 
     def __update_maintainer_account_id_file(self, file_path, maintainers):
         # get the current map
-        with open(file_path, "r") as f:
+        with open(file_path) as f:
             account_ids = json.load(f)
         # get maintainer email addresses
         email_addresses = set()
diff --git a/util/gerrit-bot/extract_gitcookies.py b/util/gerrit-bot/extract_gitcookies.py
index ef17be10de..4487513dee 100755
--- a/util/gerrit-bot/extract_gitcookies.py
+++ b/util/gerrit-bot/extract_gitcookies.py
@@ -44,7 +44,7 @@ def parse_gitcookies_line(raw):
 
 def parse_gitcookies(input_path):
     username_password_dict = {}
-    with open(input_path, "r") as input_stream:
+    with open(input_path) as input_stream:
         for line in input_stream:
             username, password = parse_gitcookies_line(line)
             if not username:
diff --git a/util/gerrit-bot/util.py b/util/gerrit-bot/util.py
index b410858e14..16a3b49216 100644
--- a/util/gerrit-bot/util.py
+++ b/util/gerrit-bot/util.py
@@ -78,10 +78,8 @@ def add_maintainers_to_change(
                 maintainer_emails.add(email)
         except KeyError:
             print(
-                (
-                    f"warning: `change-{change_id}` has an unknown tag: "
-                    f"`{tag}`"
-                )
+                f"warning: `change-{change_id}` has an unknown tag: "
+                f"`{tag}`"
             )
     for email in maintainer_emails:
         if email in avoid_emails:
diff --git a/util/git-commit-msg.py b/util/git-commit-msg.py
index 12baad8c19..f0a60110c7 100755
--- a/util/git-commit-msg.py
+++ b/util/git-commit-msg.py
@@ -57,7 +57,7 @@ def _printErrorQuit(error_message):
 --------------------------------------------------------------------------
     """
     )
-    print(open(sys.argv[1], "r").read())
+    print(open(sys.argv[1]).read())
     print(
         """
 --------------------------------------------------------------------------
@@ -100,13 +100,12 @@ def _validateTags(commit_header):
     maintainer_dict = maintainers.Maintainers.from_file()
     valid_tags = [tag for tag, _ in maintainer_dict]
 
-    # Remove non-tag 'pmc' and add special tags not in MAINTAINERS.yaml
-    valid_tags.remove("pmc")
+    # Add special tags not in MAINTAINERS.yaml
     valid_tags.extend(["RFC", "WIP"])
 
     tags = "".join(commit_header.split(":")[0].split()).split(",")
     if any(tag not in valid_tags for tag in tags):
-        invalid_tag = next((tag for tag in tags if tag not in valid_tags))
+        invalid_tag = next(tag for tag in tags if tag not in valid_tags)
         _printErrorQuit("Invalid Gem5 tag: " + invalid_tag)
 
 
@@ -121,7 +120,7 @@ def _validateTags(commit_header):
 commit_message_lines = commit_message.splitlines()
 commit_header = commit_message_lines[0]
 commit_header_match = re.search(
-    "^(fixup! )?(\S[\w\-][,\s*[\w\-]+]*:.+\S$)", commit_header
+    r"^(fixup! )?(\S[\w\-][,\s*[\w\-]+]*:.+\S$)", commit_header
 )
 if commit_header_match is None:
     _printErrorQuit("Invalid commit header")
diff --git a/util/github-runners-vagrant/README.md b/util/github-runners-vagrant/README.md
new file mode 100644
index 0000000000..ca504e3c63
--- /dev/null
+++ b/util/github-runners-vagrant/README.md
@@ -0,0 +1,100 @@
+# Setting up a Github Actions Runner with Vagrant
+
+This directory provides a way to setup Github Actions runners using Vagrant to host them in Virtual machines.
+
+This tutorial has been written with the assumption of running on a machine with Ubuntu 22.04.
+Setting up a runner on a different OS may require some changes.
+
+Before anything else, copy this directory, "util/github-runners-vagrant", to the root of the location on your host system you wish to setup the VMs from.
+The CWD is assumed to be this directory.
+
+## Install Dependencies
+
+```sh
+sudo apt install vagrant
+sudo apt-get build-dep vagrant ruby-libvirt
+sudo apt-get install qemu libvirt-daemon-system libvirt-clients ebtables dnsmasq-base libxslt-dev libxml2-dev libvirt-dev zlib1g-dev ruby-dev
+
+# Note: The vagrant-libvirt APT package does not work as intended. We must
+# remove it from the system otherwise errors will occur (we will install it
+# later using the vagrant plugin command).
+sudo apt purge vagrant-libvirt
+```
+
+## Install Vagrant Plugins
+
+Once everything is set properly, set the `VAGRANT_HOME` environment variable to the directory in which the Vagrant files and other scripts are stored (i.e., the CWD).
+For example:
+
+```sh
+export VAGRANT_HOME=`pwd`
+```
+
+After this, install the relevant vagrant plugins:
+
+``` sh
+vagrant plugin install dotenv
+vagrant plugin install vagrant-libvirt
+vagrant plugin install vagrant-reload
+```
+
+## Creating the virtual machines
+
+The Vagrantfile in this directory defines the VMs that can be built and used to create a GitHub Actions runner.
+This standard VM has 4-cores, 16GB of RAM, and 60GB of disk space.
+This is sufficient to both compile gem5 and run most simulations.
+
+At the top of the Vagrantfile, there are a few variables that must be set prior to creating the VMs.
+
+* `NUM_RUNNERS`: The number of runners to create.
+* `PERSONAL_ACCESS_TOKEN`: The GitHub personal access token to use.
+You can generate a Personal Access Token [here](https://github.com/settings/tokens)
+Make sure to set admin permissions on this token.
+* `GITHUB_ORG`: The GitHub organization to add the runners to.
+E.g., if the URL to your organization is https://github.com/orgs/gem5, then the variable should be set to "gem5".
+* `HOSTNAME` : The hostname of the VM to be created (note, this will be appended with a number to create a unique hostname for each VM).
+E.g., if set to `my-machine` and the number of runners set to `2`, two VMs will be created.
+One called `my-machine-1` and the other `my-machine-2`.
+
+When set simply run:
+
+```sh
+vagrant up --provider=libvirt
+```
+
+This should automatically create your machines then configure and start up a Github Actions runner in each.
+You can check the status of the runner here: https://github.com/organizations/{GITHUB_ORG}/settings/actions/runners
+
+If the runner ever shows as offline, you can rerun the `vagrant up --provider=libvirt` command to make sure everything is working properly.
+
+## Troubleshooting
+
+### The default libvirt disk image storage pool is on the wrong drive
+
+By default libvirt will store disk images in "/var/lib/libvirt/images".
+This is not ideal as it is on a small root partition.
+A solution to this is to change the default storage location.
+To do so, do the following:
+
+```sh
+virsh pool-list --all # Confirm here a "default" pool exist. We'll modify this.
+virsh pool-dumpxml default >default-pool.xml # We take a dump of the default then removed it.
+virsh pool-destroy default
+virsh pool-undefine default
+vim default-pool.xml # Change the image path to the desired path
+virsh pool-define default-pool.xml # From here we re-add the default.
+virsh pool-start default
+virsh pool-autostart default
+```
+
+### Error: "Vagrant failed to initialize at a very early stage"
+
+W set the `VAGRANT_HOME` environment variable to the CWD.
+It's likely this has become unset The solution is simple.
+Within the directory containing "Vagrantfile":
+
+```sh
+VAGRANT_HOME=`pwd` vagrant <command>
+```
+
+You may want to set `VAGRANT_HOME` in your .bashrc or .zshrc.
diff --git a/util/github-runners-vagrant/Vagrantfile b/util/github-runners-vagrant/Vagrantfile
new file mode 100644
index 0000000000..0e505ba38b
--- /dev/null
+++ b/util/github-runners-vagrant/Vagrantfile
@@ -0,0 +1,89 @@
+# -*- mode: ruby -*-
+# vi: set ft=ruby :
+
+# Copyright (c) 2023 The Regents of the University of California
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+NUM_RUNNERS=0 # Set this to the desired number of runners.
+PERSONAL_ACCESS_TOKEN="<PERSONAL ACCESS TOKEN>"
+GITHUB_ORG="<GITHUB_ORG>"
+HOSTNAME="<VM NAME>"
+
+Vagrant.configure("2") do |config|
+  config.ssh.username = "vagrant"
+  config.ssh.password = "vagrant"
+
+  (1..NUM_RUNNERS).each do |i|
+
+    config.vm.define "#{HOSTNAME}-#{i}" do |runner|
+      runner.vm.hostname = "#{HOSTNAME}-#{i}"
+      runner.vm.box = "generic/ubuntu2204"
+      runner.vm.box_check_update = true
+
+      runner.vm.provider "libvirt" do |vb|
+        # Customize the amount of cpus, memory, and storage on the VM:
+        vb.cpus = "4".to_i
+        vb.memory = "16384".to_i
+        vb.machine_virtual_size = 128 # 128G is the minimum.
+      end
+
+      # sets up vm
+      runner.vm.provision :shell, path: "provision_root.sh"
+      runner.vm.provision :shell, privileged: false, path: "provision_nonroot.sh"
+      # The provision_root.sh adds the vagrant user to the docker group, so we need to reload the VM.
+      runner.vm.provision :reload
+      # Copy the "action-run.sh" script from the host to the VM.
+      runner.vm.provision "file", source: "./action-run.sh", destination: "/tmp/action-run.sh"
+      runner.vm.provision :shell, privileged: false,  inline: "cp /tmp/action-run.sh ."
+
+      # The following attempts to see if KVM can be used inside the docker
+      # container.
+      #
+      # Almost all github action jobs run within a docker container. Therefore
+      # to be compatible with KVM, KVM must be enabled inside the docker.
+      #
+      # We used existence of "kvm-works" in the VM home directory is how we
+      # indicate that KVM is working. It is created if the 'kvm-ok' command is
+      # successful. This is then passed to the action-run.sh script to indicate
+      # that the runner can be used for KVM via the `kvm` label.
+      runner.vm.provision :shell, privileged: false, run: 'always',  inline: <<-SHELL
+           rm -f kvm-works
+           docker run --device /dev/kvm -v$(pwd):/work -w /work --rm ubuntu:22.04 bash -c "apt update -y && apt install -y cpu-checker && kvm-ok"
+           status=$?
+           if [[ ${status} == 0 ]]; then
+                echo >&1 "Success. KVM enabled."
+                echo "success" > kvm-works
+           else
+                echo >&2 "Failure. KVM not enabled."
+           fi
+           exit 0
+      SHELL
+      # Execute the actions-run.sh script on every boot. This configures and starts the runner.
+      # Note the 'kvm' label is applied to this runner if the "kvm-works" file eixsts. See above.
+      runner.vm.provision :shell, privileged: false, run: 'always', inline: "./action-run.sh #{PERSONAL_ACCESS_TOKEN} #{GITHUB_ORG} $(if [ -f 'kvm-works' ]; then echo 'kvm'; fi) >> action-run.log 2>&1 &"
+    end
+  end
+end
diff --git a/util/github-runners-vagrant/action-run.sh b/util/github-runners-vagrant/action-run.sh
new file mode 100755
index 0000000000..67cc6f3cf8
--- /dev/null
+++ b/util/github-runners-vagrant/action-run.sh
@@ -0,0 +1,95 @@
+#!/bin/bash
+
+# Copyright (c) 2023 The Regents of the University of California
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+set -x
+
+# No argument checking here, this is run directly in the Vagrantfile.
+PERSONAL_ACCESS_TOKEN="$1"
+GITHUB_ORG="$2"
+LABELS="$3"
+WORK_DIR="_work"
+
+# This checks there isn't another instance of this script running.
+# If this script is run twice then more than one runner can be active in the
+# VM and this causes problems.
+if [[ `pgrep -f $0` != "$$" ]]; then
+    echo "Another instance of shell already exist! Exiting"
+    exit
+fi
+
+# If the tarball isn't here then download it and extract it.
+# Note: we don't delete the tarball, we use it to check if we've already
+# downloaded it and extracted it.
+if [ ! -f "actions-runner-linux-x64-2.304.0.tar.gz" ]; then
+    wget https://github.com/actions/runner/releases/download/v2.304.0/actions-runner-linux-x64-2.304.0.tar.gz
+    tar xzf ./actions-runner-linux-x64-2.304.0.tar.gz
+fi
+
+# An infinite loop to re-configure and re-run the runner after each job.
+while true; do
+    # 1. Obtain the registration token.
+    token_curl=$(curl -L \
+    		      -X POST \
+    		      -H "Accept: application/vnd.github+json" \
+    		      -H "Authorization: Bearer ${PERSONAL_ACCESS_TOKEN}" \
+    		      -H "X-GitHub-Api-Version: 2022-11-28" \
+    		      https://api.github.com/orgs/${GITHUB_ORG}/actions/runners/registration-token)
+
+    token=$(echo ${token_curl} | jq -r '.token')
+
+    if [[ "${token}" == "null" ]];
+    then
+        # If "null" is returned, this can be because the GitHub API rate limit
+        # has been exceeded. To be safe we wait for 15 mintues before
+        # continuing.
+        sleep 900 # 15 minutes.
+        continue
+    fi
+
+    # 2. Configure the runner.
+    ./config.sh --unattended \
+                --url https://github.com/${GITHUB_ORG} \
+                --ephemeral \
+                --replace \
+                --work "${WORK_DIR}" \
+                --name "$(hostname)" \
+                --labels "${LABELS}" \
+                --token ${token}
+
+    # 3. Run the runner.
+    ./run.sh # This will complete with the runner being destroyed
+
+    # 4. Cleanup the machine
+    sudo rm -rf "${WORK_DIR}"
+    docker system prune --force --volumes --all
+
+    # 5. Sleep for a few minutes
+    #    GitHub has a api rate limit. This sleep ensures we dont ping GitHub
+    #    too frequently.
+    sleep 180 # 3 minutes.
+done
diff --git a/util/github-runners-vagrant/halt-helper.sh b/util/github-runners-vagrant/halt-helper.sh
new file mode 100755
index 0000000000..9b4caf7e28
--- /dev/null
+++ b/util/github-runners-vagrant/halt-helper.sh
@@ -0,0 +1,83 @@
+#!/bin/bash
+
+# Copyright (c) 2023 The Regents of the University of California
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# This script will try to safely halt each VM specified in the Vagrantfile.
+# A VM is skipped if it is currently running a job and returned to after
+# attempted shutdowns on the other VMs. This cycle continues indefinitely until
+# all the runners are shutdown.
+#
+# This script is usefull as the VMs occasionally need to be halted to apply
+# patches and do maintenance. This script allows us to do this without
+# interrupting any jobs that may be running.
+
+while true; do
+    # This will list all the VMs still running. If there are no VM's running,
+    # we infer all have been shutdown and we exit the script. Otherwise, we
+    # iterate over he VMs in an attempt to shut them down.
+    active=$(vagrant status | grep running | tr -s ' ' | cut -d ' ' -f1)
+    if [ "$active" == "" ]; then
+        echo "All VMs have been shutdown. Exiting."
+        exit 0
+    fi
+    echo "The following VMs are still running:"
+    echo "${active}"
+
+    for virtm in $active
+    do
+        # This script will first list the contents of the "_diag" directory.
+        # This directory hosts the github action runner job logs. Each job
+        # is logged to a seperate file in the directpry. This script then
+        # sort these files by name. The last file in this sorted list is  the
+        # most recent file and therefore for the most recent job. We can sort
+        # them in this was because their filenames are appended with UTC
+        # timestamps.
+        #
+        # One one job ever runs at a time on a GitHub runner so if there is any
+        # job running, it be being logged in the most recent file in the
+        # "_diag" directory.
+        #
+        # If the job has completed the last line in the file will contain the
+        # string "Job completed.". This script checks for this and, if found,
+        # we assume there are no jobs running safely run `vagrant halt` to
+        # shutdown the VM. If the job is still running we print a message
+        # saying the job is still running and will return to it on the next
+        # iteration of the loop.
+        echo "Inspecting \"${virtm}\"..."
+        vagrant ssh $virtm -c 'ls _diag | sort | tail -1 | xargs -I % cat "_diag/%" | tail -1 | grep -q "Job completed"'
+        status=$?
+        if [[ ${status} == 0 ]]; then
+            echo "${virtm} is Idle. Attempting shutdown"
+            vagrant halt ${virtm} && echo "${virtm} successfully halted" || echo "${virtm} experience a failure halting"
+        else
+            echo "${virtm} is Busy. Skipping for now."
+        fi
+    done
+    # Sleep here for 20 seconds just to ensure all the VMs have time
+    # to shutdown.
+    sleep 20
+done
diff --git a/tests/configs/pc-simple-timing.py b/util/github-runners-vagrant/provision_nonroot.sh
similarity index 65%
rename from tests/configs/pc-simple-timing.py
rename to util/github-runners-vagrant/provision_nonroot.sh
index c095401381..d817959285 100644
--- a/tests/configs/pc-simple-timing.py
+++ b/util/github-runners-vagrant/provision_nonroot.sh
@@ -1,15 +1,8 @@
-# Copyright (c) 2012 ARM Limited
+#!/usr/bin/env bash
+
+# Copyright (c) 2023 The Regents of the University of California
 # All rights reserved.
 #
-# The license below extends only to copyright in the software and shall
-# not be construed as granting a license to any other intellectual
-# property including but not limited to intellectual property relating
-# to a hardware implementation of the functionality of the software
-# licensed hereunder.  You may use the software subject to the license
-# terms below provided that you ensure that this notice is replicated
-# unmodified and in its entirety in all distributions of the software,
-# modified or unmodified, in source code or in binary form.
-#
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions are
 # met: redistributions of source code must retain the above copyright
@@ -33,9 +26,16 @@
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-from m5.objects import *
-from x86_generic import *
+# fail on unset variables and command errors
+set -eu -o pipefail # -x: is for debugging
+
+# Install deno
+curl -fsSL https://deno.land/x/install/install.sh | sh
+echo "export PATH=\"\${HOME}/.deno/bin:\${PATH}\"" >> ~/.profile
+echo "export PATH=\"\${HOME}/.deno/bin:\${PATH}\"" >> ~/.bash_profile
 
-root = LinuxX86FSSystemUniprocessor(
-    mem_mode="timing", mem_class=DDR3_1600_8x8, cpu_class=TimingSimpleCPU
-).create_root()
+# Install docker compose
+DOCKER_COMPOSE_VERSION=$(curl -s https://api.github.com/repos/docker/compose/releases/latest | jq -r '.tag_name')
+mkdir -p "${HOME}/.docker/cli-plugins"
+curl -sL "https://github.com/docker/compose/releases/download/${DOCKER_COMPOSE_VERSION}/docker-compose-$(uname -s)-$(uname -m)" -o "${HOME}/.docker/cli-plugins/docker-compose"
+chmod +x "${HOME}/.docker/cli-plugins/docker-compose"
diff --git a/util/github-runners-vagrant/provision_root.sh b/util/github-runners-vagrant/provision_root.sh
new file mode 100644
index 0000000000..d3e6bb574c
--- /dev/null
+++ b/util/github-runners-vagrant/provision_root.sh
@@ -0,0 +1,95 @@
+#!/usr/bin/env bash
+
+# Copyright (c) 2023 The Regents of the University of California
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# fail on unset variables and command errors
+set -eu -o pipefail # -x: is for debugging
+
+apt-get update
+apt-get upgrade -y
+add-apt-repository --yes --update ppa:git-core/ppa
+apt-get install -y \
+  software-properties-common \
+  bash \
+  build-essential \
+  clang-format \
+  git \
+  git-lfs \
+  jq \
+  libffi-dev \
+  libssl-dev \
+  nkf \
+  python3 \
+  python3-dev \
+  python3-pip \
+  python3-venv \
+  shellcheck \
+  tree \
+  wget \
+  yamllint \
+  zstd \
+  jq \
+  apt-transport-https ca-certificates \
+  curl \
+  gnupg \
+  lsb-release \
+  cpu-checker
+
+# Install docker
+apt-get install -y apt-transport-https ca-certificates curl gnupg lsb-release
+curl -fsSL https://download.docker.com/linux/ubuntu/gpg | gpg --dearmor -o /usr/share/keyrings/docker-archive-keyring.gpg
+echo "deb [arch=amd64 signed-by=/usr/share/keyrings/docker-archive-keyring.gpg] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" | tee /etc/apt/sources.list.d/docker.list > /dev/null
+apt-get update -y
+apt-get install -y docker-ce docker-ce-cli containerd.io
+
+# Add the Vagrant user to the docker group.
+# Note: The VM needs rebooted for this to take effect. `newgrp docker` doesn't
+# work.
+usermod -aG docker vagrant
+
+kvm-ok
+kvm_ok_status=$?
+
+# `kvm-ok` will return a exit zero if the machine supports KVM, and non-zero
+# otherwise. If the machine support KVM, let's enable it.
+if [[ ${kvm_ok_status} == 0 ]]; then
+    apt install -y qemu-kvm \
+                   virt-manager \
+                  libvirt-daemon-system virtinst \
+                  libvirt-clients bridge-utils && \
+    sudo systemctl enable --now libvirtd && \
+    sudo systemctl start libvirtd && \
+    usermod -aG kvm vagrant && \
+    usermod -aG libvirt vagrant
+fi
+
+# Cleanup
+apt-get autoremove -y
+
+# Resize the root partition to fill up all the free size on the disk
+lvextend -l +100%FREE $(df / --output=source | sed 1d)
+resize2fs $(df / --output=source | sed 1d)
diff --git a/util/hammersim/prob-005.json.zip b/util/hammersim/prob-005.json.zip
new file mode 100644
index 0000000000..58695005ff
Binary files /dev/null and b/util/hammersim/prob-005.json.zip differ
diff --git a/util/hammersim/process_dmap_json.py b/util/hammersim/process_dmap_json.py
new file mode 100644
index 0000000000..1dc799d4ab
--- /dev/null
+++ b/util/hammersim/process_dmap_json.py
@@ -0,0 +1,39 @@
+# importing the module
+import json
+
+# This script takes a device_map.json file
+# and converts it into a device_map.txt file
+# to make it easier to be parsed in C++ code
+
+# Output file follows the following format
+# Start of each new entry with "**"
+# then the next lines will correspond to the following:
+# 1) rank (actually right now this entry/line will be missing since the
+# current tests are only done with single rank DRAM)
+# 2) bank
+# 3) the row number
+# 4) a comma separated list of corrupt columns within a particular row
+#     (line ends with "e")
+
+# opening the JSON file
+data = open('device_map.json',)
+
+# deserializing the data
+data = json.load(data)
+
+outfile = open("device_map.txt", "w")
+outfile.write("**\n")
+# 1 rank device
+for bank in data["0"]:
+      for row in data["0"][bank]:
+            outfile.write(bank)
+            outfile.write("\n")
+            outfile.write(row)
+            outfile.write("\n")
+            for col in range(len(data["0"][bank][row])):
+                  #print(col)
+                  outfile.write(str(data["0"][bank][row][col]))
+                  outfile.write(",")
+            outfile.write("e\n")
+            outfile.write("**\n")
+outfile.close()
diff --git a/util/logroll.py b/util/logroll.py
index 02ca309e87..04f7d5c69d 100755
--- a/util/logroll.py
+++ b/util/logroll.py
@@ -105,7 +105,7 @@ class CopyingMock(unittest.mock.MagicMock):
     def __call__(self, *args, **kwargs):
         args = copy.deepcopy(args)
         kwargs = copy.deepcopy(kwargs)
-        return super(CopyingMock, self).__call__(*args, **kwargs)
+        return super().__call__(*args, **kwargs)
 
 
 class TestLogroll(unittest.TestCase):
@@ -125,14 +125,12 @@ class TestLogroll(unittest.TestCase):
 
     # Generator which returns lines like a file object would.
     def line_gen(self, lines):
-        for line in lines:
-            yield line
+        yield from lines
 
     # Generator like above, but which simulates a signal midway through.
     def signal_line_gen(self, lines, pos, sig_dict, signal):
         # Return the first few lines.
-        for line in lines[:pos]:
-            yield line
+        yield from lines[:pos]
 
         # Simulate receiving the signal.
         self.assertIn(signal, sig_dict)
@@ -141,8 +139,7 @@ def signal_line_gen(self, lines, pos, sig_dict, signal):
             sig_dict[signal](None, None)
 
         # Return the remaining lines.
-        for line in lines[pos:]:
-            yield line
+        yield from lines[pos:]
 
     # Set up a mock of signal.signal to record handlers in a dict.
     def mock_signal_dict(self, mock):
@@ -214,7 +211,6 @@ def test_sigusr1_filling_main(self):
         ) as mock_signal, unittest.mock.patch(
             __name__ + ".dump_lines", new_callable=CopyingMock
         ) as mock_dump_lines:
-
             signal_dict = self.mock_signal_dict(mock_signal)
 
             main(
@@ -237,7 +233,6 @@ def test_sigint_filling_main(self):
         ) as mock_signal, unittest.mock.patch(
             __name__ + ".dump_lines", new_callable=CopyingMock
         ) as mock_dump_lines:
-
             signal_dict = self.mock_signal_dict(mock_signal)
 
             with self.assertRaises(SystemExit):
@@ -258,7 +253,6 @@ def test_sigusr1_full_main(self):
         ) as mock_signal, unittest.mock.patch(
             __name__ + ".dump_lines", new_callable=CopyingMock
         ) as mock_dump_lines:
-
             signal_dict = self.mock_signal_dict(mock_signal)
 
             main(
@@ -283,7 +277,6 @@ def test_sigint_full_main(self):
         ) as mock_signal, unittest.mock.patch(
             __name__ + ".dump_lines", new_callable=CopyingMock
         ) as mock_dump_lines:
-
             signal_dict = self.mock_signal_dict(mock_signal)
 
             with self.assertRaises(SystemExit):
diff --git a/util/maint/lib/maintainers.py b/util/maint/lib/maintainers.py
index 93ea1a17bf..4cc713aada 100644
--- a/util/maint/lib/maintainers.py
+++ b/util/maint/lib/maintainers.py
@@ -83,7 +83,7 @@ def __str__(self) -> str:
         ]
 
 
-class Subsystem(object):
+class Subsystem:
     tag: str
     status: Status
     maintainers: List[Tuple[str, str]]  # Name, email
@@ -102,7 +102,7 @@ def __init__(
         self.description = description if description is not None else ""
 
 
-class Maintainers(object):
+class Maintainers:
     DEFAULT_MAINTAINERS = os.path.join(
         os.path.dirname(__file__), "../../../MAINTAINERS.yaml"
     )
@@ -118,7 +118,6 @@ def __init__(self, ydict: Mapping[str, Any]):
     def from_file(
         cls, path_or_file: Optional[PathOrFile] = None
     ) -> "Maintainers":
-
         return cls(Maintainers._load_maintainers_file(path_or_file))
 
     @classmethod
@@ -133,7 +132,7 @@ def _load_maintainers_file(
             path_or_file = cls.DEFAULT_MAINTAINERS
 
         if isinstance(path_or_file, str):
-            with open(path_or_file, "r") as fin:
+            with open(path_or_file) as fin:
                 return yaml.load(fin, Loader=yaml.SafeLoader)
         else:
             return yaml.load(path_or_file, Loader=yaml.SafeLoader)
diff --git a/util/maint/list_changes.py b/util/maint/list_changes.py
index 0d61e39fde..1dcb70def2 100755
--- a/util/maint/list_changes.py
+++ b/util/maint/list_changes.py
@@ -41,7 +41,7 @@
 from functools import wraps
 
 
-class Commit(object):
+class Commit:
     _re_tag = re.compile(r"^((?:\w|-)+): (.*)$")
 
     def __init__(self, rev):
@@ -137,12 +137,12 @@ def list_changes(upstream, feature, paths=[]):
     feature_revs = tuple(list_revs(upstream, feature, paths=paths))
     upstream_revs = tuple(list_revs(feature, upstream, paths=paths))
 
-    feature_cids = dict(
-        [(c.change_id, c) for c in feature_revs if c.change_id is not None]
-    )
-    upstream_cids = dict(
-        [(c.change_id, c) for c in upstream_revs if c.change_id is not None]
-    )
+    feature_cids = {
+        c.change_id: c for c in feature_revs if c.change_id is not None
+    }
+    upstream_cids = {
+        c.change_id: c for c in upstream_revs if c.change_id is not None
+    }
 
     incoming = [
         r
@@ -251,13 +251,11 @@ def _main():
     if args.deep_search:
         print("Incorrectly rebased changes:")
         all_upstream_revs = list_revs(args.upstream, paths=args.paths)
-        all_upstream_cids = dict(
-            [
-                (c.change_id, c)
-                for c in all_upstream_revs
-                if c.change_id is not None
-            ]
-        )
+        all_upstream_cids = {
+            c.change_id: c
+            for c in all_upstream_revs
+            if c.change_id is not None
+        }
         incorrect_outgoing = [
             r for r in outgoing if r.change_id in all_upstream_cids
         ]
diff --git a/util/maint/show_changes_by_file.py b/util/maint/show_changes_by_file.py
index 75b7e7edd9..8da3ee64bd 100755
--- a/util/maint/show_changes_by_file.py
+++ b/util/maint/show_changes_by_file.py
@@ -38,7 +38,7 @@
 
 class OrderedDefaultDict(OrderedDict, defaultdict):
     def __init__(self, default_factory=None, *args, **kwargs):
-        super(OrderedDefaultDict, self).__init__(*args, **kwargs)
+        super().__init__(*args, **kwargs)
         self.default_factory = default_factory
 
 
diff --git a/util/minorview/blobs.py b/util/minorview/blobs.py
index 51b28d0d1e..8d379f0cb8 100644
--- a/util/minorview/blobs.py
+++ b/util/minorview/blobs.py
@@ -181,7 +181,7 @@ def cross(cr, centre, size):
     cr.line_to(x, bottom)
 
 
-class Blob(object):
+class Blob:
     """Blob super class"""
 
     def __init__(self, picChar, unit, topLeft, colour, size=Point(1, 1)):
@@ -217,7 +217,7 @@ def __init__(
         colour=colours.black,
         size=Point(1, 1),
     ):
-        super(Block, self).__init__(picChar, unit, topLeft, colour, size=size)
+        super().__init__(picChar, unit, topLeft, colour, size=size)
         # {horiz, vert}
         self.stripDir = "horiz"
         # {LR, RL}: LR means the first strip will be on the left/top,
@@ -388,7 +388,7 @@ class Key(Blob):
     def __init__(
         self, picChar, unit, topLeft, colour=colours.black, size=Point(1, 1)
     ):
-        super(Key, self).__init__(picChar, unit, topLeft, colour, size=size)
+        super().__init__(picChar, unit, topLeft, colour, size=size)
         self.colours = "BBBB"
         self.displayName = unit
 
@@ -464,7 +464,7 @@ def __init__(
         size=Point(1.0, 1.0),
         direc="right",
     ):
-        super(Arrow, self).__init__(unit, unit, topLeft, colour, size=size)
+        super().__init__(unit, unit, topLeft, colour, size=size)
         self.direc = direc
 
     def render(self, cr, view, event, select, time):
diff --git a/util/minorview/model.py b/util/minorview/model.py
index 126b730bea..cf8c04bc04 100644
--- a/util/minorview/model.py
+++ b/util/minorview/model.py
@@ -45,10 +45,10 @@
 id_parts = "TSPLFE"
 
 all_ids = set(id_parts)
-no_ids = set([])
+no_ids = set()
 
 
-class BlobDataSelect(object):
+class BlobDataSelect:
     """Represents which data is displayed for Ided object"""
 
     def __init__(self):
@@ -62,7 +62,7 @@ def __and__(self, rhs):
         return ret
 
 
-class BlobVisualData(object):
+class BlobVisualData:
     """Super class for block data colouring"""
 
     def to_striped_block(self, select):
@@ -113,7 +113,7 @@ def __cmp__(self, right):
 
     def from_string(self, string):
         m = re.match(
-            "^(F;)?(\d+)/(\d+)\.(\d+)/(\d+)(/(\d+)(\.(\d+))?)?", string
+            r"^(F;)?(\d+)/(\d+)\.(\d+)/(\d+)(/(\d+)(\.(\d+))?)?", string
         )
 
         def seqnum_from_string(string):
@@ -205,7 +205,7 @@ def __init__(self):
         self.id = Id()
 
     def from_string(self, string):
-        m = re.match("^(\w+);(\d+)\.(\d+);([0-9a-fA-Fx]+);(.*)$", string)
+        m = re.match(r"^(\w+);(\d+)\.(\d+);([0-9a-fA-Fx]+);(.*)$", string)
 
         if m is not None:
             (
@@ -283,7 +283,7 @@ def to_striped_block(self, select):
         return [direc_colour] + self.id.to_striped_block(select)
 
 
-class ColourPattern(object):
+class ColourPattern:
     """Super class for decoders that make 2D grids rather than just single
     striped blocks"""
 
@@ -493,7 +493,7 @@ def find_colour_decoder(stripSpace, decoderName, dataName, picPairs):
         return None
 
 
-class IdedObj(object):
+class IdedObj:
     """An object identified by an Id carrying paired data.
     The super class for Inst and Line"""
 
@@ -518,7 +518,7 @@ class Inst(IdedObj):
     """A non-fault instruction"""
 
     def __init__(self, id, disassembly, addr, pairs={}):
-        super(Inst, self).__init__(id, pairs)
+        super().__init__(id, pairs)
         if "nextAddr" in pairs:
             self.nextAddr = int(pairs["nextAddr"], 0)
             del pairs["nextAddr"]
@@ -542,7 +542,7 @@ class InstFault(IdedObj):
     """A fault instruction"""
 
     def __init__(self, id, fault, addr, pairs={}):
-        super(InstFault, self).__init__(id, pairs)
+        super().__init__(id, pairs)
         self.fault = fault
         self.addr = addr
 
@@ -557,7 +557,7 @@ class Line(IdedObj):
     """A fetched line"""
 
     def __init__(self, id, vaddr, paddr, size, pairs={}):
-        super(Line, self).__init__(id, pairs)
+        super().__init__(id, pairs)
         self.vaddr = vaddr
         self.paddr = paddr
         self.size = size
@@ -573,7 +573,7 @@ class LineFault(IdedObj):
     """A faulting line"""
 
     def __init__(self, id, fault, vaddr, pairs={}):
-        super(LineFault, self).__init__(id, pairs)
+        super().__init__(id, pairs)
         self.vaddr = vaddr
         self.fault = fault
 
@@ -584,7 +584,7 @@ def table_line(self):
         return ret
 
 
-class BlobEvent(object):
+class BlobEvent:
     """Time event for a single blob"""
 
     def __init__(self, unit, time, pairs={}):
@@ -624,7 +624,7 @@ def find_inst(data):
         return sorted(ret)
 
 
-class BlobModel(object):
+class BlobModel:
     """Model bringing together blob definitions and parsed events"""
 
     def __init__(self, unitNamePrefix=""):
@@ -856,7 +856,7 @@ def update_comments(comments, time):
         still_skipping = True
         l = f.readline()
         while l and still_skipping:
-            match = re.match("^\s*(\d+):", l)
+            match = re.match(r"^\s*(\d+):", l)
             if match is not None:
                 event_time = match.groups()
                 if int(event_time[0]) >= startTime:
@@ -867,7 +867,7 @@ def update_comments(comments, time):
                 l = f.readline()
 
         match_line_re = re.compile(
-            "^\s*(\d+):\s*([\w\.]+):\s*(Minor\w+:)?\s*(.*)$"
+            r"^\s*(\d+):\s*([\w\.]+):\s*(Minor\w+:)?\s*(.*)$"
         )
 
         # Parse each line of the events file, accumulating comments to be
@@ -880,13 +880,13 @@ def update_comments(comments, time):
                 event_time = int(event_time)
 
                 unit = re.sub(
-                    "^" + self.unitNamePrefix + "\.?(.*)$", "\\1", unit
+                    "^" + self.unitNamePrefix + r"\.?(.*)$", "\\1", unit
                 )
 
                 # When the time changes, resolve comments
                 if event_time != time:
                     if self.numEvents > next_progress_print_event_count:
-                        print(("Parsed to time: %d" % event_time))
+                        print("Parsed to time: %d" % event_time)
                         next_progress_print_event_count = self.numEvents + 1000
                     update_comments(comments, time)
                     comments = []
@@ -1137,7 +1137,7 @@ def expand_macros(pairs, newPairs):
         def line_is_comment(line):
             """Returns true if a line starts with #, returns False
             for lines which are None"""
-            return line is not None and re.match("^\s*#", line) is not None
+            return line is not None and re.match(r"^\s*#", line) is not None
 
         def get_line(f):
             """Get a line from file f extending that line if it ends in
@@ -1186,19 +1186,19 @@ def get_line(f):
             l = parse.remove_trailing_ws(l)
             l = re.sub("#.*", "", l)
 
-            if re.match("^\s*$", l) is not None:
+            if re.match(r"^\s*$", l) is not None:
                 pass
             elif l == "<<<":
                 in_picture = True
             elif l == ">>>":
                 in_picture = False
             elif in_picture:
-                picture.append(re.sub("\s*$", "", l))
+                picture.append(re.sub(r"\s*$", "", l))
             else:
                 line_match = re.match(
-                    "^([a-zA-Z0-9][a-zA-Z0-9]):\s+([\w.]+)\s*(.*)", l
+                    r"^([a-zA-Z0-9][a-zA-Z0-9]):\s+([\w.]+)\s*(.*)", l
                 )
-                macro_match = re.match("macro\s+(\w+):(.*)", l)
+                macro_match = re.match(r"macro\s+(\w+):(.*)", l)
 
                 if macro_match is not None:
                     name, defn = macro_match.groups()
diff --git a/util/minorview/parse.py b/util/minorview/parse.py
index 5b6bea0c79..bc12536980 100644
--- a/util/minorview/parse.py
+++ b/util/minorview/parse.py
@@ -44,14 +44,14 @@ def list_parser(names):
     ret = []
     accum = []
     for elem in elems:
-        if re.search("^\((.*)\)$", elem):
-            accum.append(re.sub("^\((.*)\)", "\\1", elem))
+        if re.search(r"^\((.*)\)$", elem):
+            accum.append(re.sub(r"^\((.*)\)", "\\1", elem))
             ret.append(accum)
             accum = []
-        elif re.search("^\(", elem):
-            accum.append(re.sub("^\(", "", elem))
-        elif re.search("\)$", elem):
-            accum.append(re.sub("\)$", "", elem))
+        elif re.search(r"^\(", elem):
+            accum.append(re.sub(r"^\(", "", elem))
+        elif re.search(r"\)$", elem):
+            accum.append(re.sub(r"\)$", "", elem))
             ret.append(accum)
             accum = []
         elif len(accum) != 0:
@@ -72,18 +72,18 @@ def map2(f, ls):
 
 
 def remove_trailing_ws(line):
-    return re.sub("\s*$", "", line)
+    return re.sub(r"\s*$", "", line)
 
 
 def remove_leading_and_trailing_ws(line):
-    return re.sub("\s*$", "", re.sub("^\s*", "", line))
+    return re.sub(r"\s*$", "", re.sub(r"^\s*", "", line))
 
 
 def parse_pairs_list(pairString):
     """parse a string like 'name=value name2=value2' into a
     list of pairs of ('name', 'value') ..."""
     ret = []
-    pairs = re.finditer('(\w+)(=("[^"]*"|[^\s]*))?', pairString)
+    pairs = re.finditer(r'(\w+)(=("[^"]*"|[^\s]*))?', pairString)
     for pair in pairs:
         name, rest, value = pair.groups()
         if value is not None:
diff --git a/util/minorview/point.py b/util/minorview/point.py
index 17190e1ca7..636704002d 100644
--- a/util/minorview/point.py
+++ b/util/minorview/point.py
@@ -34,7 +34,7 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
-class Point(object):
+class Point:
     """2D point coordinates/size type"""
 
     def __init__(self, x, y):
diff --git a/util/minorview/view.py b/util/minorview/view.py
index 7c1aef873f..37564996eb 100644
--- a/util/minorview/view.py
+++ b/util/minorview/view.py
@@ -49,7 +49,7 @@
 from . import blobs
 
 
-class BlobView(object):
+class BlobView:
     """The canvas view of the pipeline"""
 
     def __init__(self, model):
@@ -189,7 +189,7 @@ def set_da_size(self):
         self.da.set_size_request(10, int(self.initialHeight))
 
 
-class BlobController(object):
+class BlobController:
     """The controller bar for the viewer"""
 
     def __init__(
@@ -361,7 +361,7 @@ def load_events(self, button):
         self.view.redraw()
 
 
-class Overlay(object):
+class Overlay:
     """An Overlay is a speech bubble explaining the data in a blob"""
 
     def __init__(self, model, view, point, blob):
@@ -456,7 +456,7 @@ def text_width(str):
             text_point += text_step
 
 
-class BlobWindow(object):
+class BlobWindow:
     """The top-level window and its mouse control"""
 
     def __init__(self, model, view, controller):
diff --git a/util/o3-pipeview.py b/util/o3-pipeview.py
index fe49706dad..3228832446 100755
--- a/util/o3-pipeview.py
+++ b/util/o3-pipeview.py
@@ -511,7 +511,7 @@ def main():
         sys.exit(1)
     # Process trace
     print("Processing trace... ", end=" ")
-    with open(args.tracefile, "r") as trace:
+    with open(args.tracefile) as trace:
         with open(args.outfile, "w") as out:
             process_trace(
                 trace,
diff --git a/util/dockerfiles/ubuntu-18.04_gcc-version/Dockerfile b/util/obtain-resource.py
similarity index 50%
rename from util/dockerfiles/ubuntu-18.04_gcc-version/Dockerfile
rename to util/obtain-resource.py
index 3e94e8d0e6..de6a7b90e2 100644
--- a/util/dockerfiles/ubuntu-18.04_gcc-version/Dockerfile
+++ b/util/obtain-resource.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020 The Regents of the University of California
+# Copyright (c) 2023 The Regents of the University of California
 # All Rights Reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -23,27 +23,65 @@
 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-FROM ubuntu:18.04
-
-# Valid version values:
-# 4.8
-# 5
-# 6
-# 7
-# 8
-ARG version
-
-RUN apt -y update && apt -y upgrade && \
-    apt -y install git m4 scons zlib1g zlib1g-dev gcc-multilib \
-    libprotobuf-dev protobuf-compiler libprotoc-dev libgoogle-perftools-dev \
-    python3-dev python3 doxygen wget zip gcc-${version} \
-    g++-${version} make
-
-RUN update-alternatives --install \
-    /usr/bin/g++ g++ /usr/bin/g++-${version} 100
-RUN update-alternatives --install \
-    /usr/bin/gcc gcc /usr/bin/gcc-${version} 100
-RUN update-alternatives --install \
-    /usr/bin/c++ c++ /usr/bin/g++-${version} 100
-RUN update-alternatives --install \
-    /usr/bin/cc cc /usr/bin/gcc-${version} 100
+
+"""
+Obtain a resource from gem5 resource.
+
+Usage
+-----
+
+```sh
+scons build/ALL/gem5.opt -j$(nproc)
+build/ALL/gem5.opt util/obtain-resource.py <resource_id> [-p <path>] [-q]
+# Example:
+# `build/ALL/gem5.opt util/obtain-resource.py arm-hello64-static -p arm-hello`
+# This will download the resource with id `arm-hello64-static` to the
+# "arm-hello" in the CWD.
+```
+"""
+
+if __name__ == "__m5_main__":
+    from gem5.resources.resource import obtain_resource
+    import argparse
+
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument(
+        "id",
+        type=str,
+        help="The resource id to download.",
+    )
+
+    parser.add_argument(
+        "-p",
+        "--path",
+        type=str,
+        required=False,
+        help="The path the resource is to be downloaded to. If not specified, "
+        "the resource will be downloaded to the default location in the "
+        "gem5 local cache of resources",
+    )
+
+    parser.add_argument(
+        "-q",
+        "--quiet",
+        action="store_true",
+        default=False,
+        help="Suppress output.",
+    )
+
+    args = parser.parse_args()
+
+    resource = obtain_resource(
+        resource_id=args.id,
+        quiet=args.quiet,
+        to_path=args.path,
+    )
+
+    if not args.quiet:
+        print(f"Resource at: '" + str(resource.get_local_path()) + "'")
+
+    exit(0)
+
+print("Error: This script is meant to be run with the gem5 binary")
+exit(1)
diff --git a/util/on-chip-network-power-area.py b/util/on-chip-network-power-area.py
index 61a316a99c..563bf6334d 100644
--- a/util/on-chip-network-power-area.py
+++ b/util/on-chip-network-power-area.py
@@ -55,6 +55,7 @@
 sys.path.append("build/ext/dsent")
 import dsent
 
+
 # Parse gem5 config.ini file for the configuration parameters related to
 # the on-chip network.
 def parseConfig(config_file):
@@ -182,13 +183,12 @@ def parseStats(
     buffers_per_control_vc,
     ni_flit_size_bits,
 ):
-
     # Open the stats.txt file and parse it to for the required numbers
     # and the number of routers.
     try:
-        stats_handle = open(stats_file, "r")
+        stats_handle = open(stats_file)
         stats_handle.close()
-    except IOError:
+    except OSError:
         print("Failed to open ", stats_file, " for reading")
         exit(-1)
 
diff --git a/util/oprofile-top.py b/util/oprofile-top.py
index 4d5a693451..2808ea10fe 100755
--- a/util/oprofile-top.py
+++ b/util/oprofile-top.py
@@ -66,7 +66,7 @@ def category(app, sym):
 prof = {}
 linenum = 0
 for line in f.readlines():
-    line = re.sub("\(no symbols\)", "nosym", line)
+    line = re.sub(r"\(no symbols\)", "nosym", line)
     line = re.sub("anonymous.*", "nosym", line)
     linenum += 1
     if linenum < 4:
diff --git a/util/plot_dram/PlotPowerStates.py b/util/plot_dram/PlotPowerStates.py
index b476a24da1..31e831f8b0 100755
--- a/util/plot_dram/PlotPowerStates.py
+++ b/util/plot_dram/PlotPowerStates.py
@@ -127,7 +127,7 @@ def plotLowPStates(
     @param delay_list: list of itt max multipliers (e.g. [1, 20, 200])
 
     """
-    stats_file = open(stats_fname, "r")
+    stats_file = open(stats_fname)
 
     global bankUtilValues
     bankUtilValues = bank_util_list
@@ -150,7 +150,6 @@ def plotLowPStates(
     for delay in delayValues:
         for bank_util in bankUtilValues:
             for seq_bytes in seqBytesValues:
-
                 for line in stats_file:
                     if "Begin" in line:
                         break
@@ -261,7 +260,6 @@ def plotStackedStates(delay, states_list, bottom_state, plot_name, ylabel_str):
     ind = np.arange(N)
 
     for sub_idx, bank_util in enumerate(bankUtilValues):
-
         l_states = {}
         p_states = {}
 
diff --git a/util/plot_dram/dram_lat_mem_rd_plot.py b/util/plot_dram/dram_lat_mem_rd_plot.py
index 0d0e8d052b..798148f714 100755
--- a/util/plot_dram/dram_lat_mem_rd_plot.py
+++ b/util/plot_dram/dram_lat_mem_rd_plot.py
@@ -46,25 +46,25 @@
 import sys
 import re
 
+
 # This script is intended to post process and plot the output from
-# running configs/dram/lat_mem_rd.py, as such it parses the simout and
+# running configs/dram/lat_mem_rd.py, as such it parses the simout.txt and
 # stats.txt to get the relevant data points.
 def main():
-
     if len(sys.argv) != 2:
         print("Usage: ", sys.argv[0], "<simout directory>")
         exit(-1)
 
     try:
-        stats = open(sys.argv[1] + "/stats.txt", "r")
-    except IOError:
+        stats = open(sys.argv[1] + "/stats.txt")
+    except OSError:
         print("Failed to open ", sys.argv[1] + "/stats.txt", " for reading")
         exit(-1)
 
     try:
-        simout = open(sys.argv[1] + "/simout", "r")
-    except IOError:
-        print("Failed to open ", sys.argv[1] + "/simout", " for reading")
+        simout = open(sys.argv[1] + "/simout.txt")
+    except OSError:
+        print("Failed to open ", sys.argv[1] + "/simout.txt", " for reading")
         exit(-1)
 
     # Get the address ranges
@@ -77,7 +77,7 @@ def main():
         if got_ranges:
             ranges.append(int(line) / 1024)
 
-        match = re.match("lat_mem_rd with (\d+) iterations, ranges:.*", line)
+        match = re.match(r"lat_mem_rd with (\d+) iterations, ranges:.*", line)
         if match:
             got_ranges = True
             iterations = int(match.groups(0)[0])
@@ -85,14 +85,14 @@ def main():
     simout.close()
 
     if not got_ranges:
-        print("Failed to get address ranges, ensure simout is up-to-date")
+        print("Failed to get address ranges, ensure simout.txt is up-to-date")
         exit(-1)
 
     # Now parse the stats
     raw_rd_lat = []
 
     for line in stats:
-        match = re.match(".*readLatencyHist::mean\s+(.+)\s+#.*", line)
+        match = re.match(r".*readLatencyHist::mean\s+(.+)\s+#.*", line)
         if match:
             raw_rd_lat.append(float(match.groups(0)[0]) / 1000)
     stats.close()
@@ -122,7 +122,7 @@ def main():
         )
         exit(-1)
 
-    for (r, l) in zip(ranges, final_rd_lat):
+    for r, l in zip(ranges, final_rd_lat):
         print(r, round(l, 2))
 
     # lazy version to check if an integer is a power of two
diff --git a/util/plot_dram/dram_sweep_plot.py b/util/plot_dram/dram_sweep_plot.py
index 1350f7af77..8fbeaf511c 100755
--- a/util/plot_dram/dram_sweep_plot.py
+++ b/util/plot_dram/dram_sweep_plot.py
@@ -47,12 +47,12 @@
 import sys
 import re
 
+
 # Determine the parameters of the sweep from the simout output, and
 # then parse the stats and plot the 3D surface corresponding to the
 # different combinations of parallel banks, and stride size, as
 # generated by the config/dram/sweep.py script
 def main():
-
     if len(sys.argv) != 3:
         print("Usage: ", sys.argv[0], "-u|p|e <simout directory>")
         exit(-1)
@@ -73,15 +73,15 @@ def main():
     mode = sys.argv[1][1]
 
     try:
-        stats = open(sys.argv[2] + "/stats.txt", "r")
-    except IOError:
+        stats = open(sys.argv[2] + "/stats.txt")
+    except OSError:
         print("Failed to open ", sys.argv[2] + "/stats.txt", " for reading")
         exit(-1)
 
     try:
-        simout = open(sys.argv[2] + "/simout", "r")
-    except IOError:
-        print("Failed to open ", sys.argv[2] + "/simout", " for reading")
+        simout = open(sys.argv[2] + "/simout.txt")
+    except OSError:
+        print("Failed to open ", sys.argv[2] + "/simout.txt", " for reading")
         exit(-1)
 
     # Get the burst size, number of banks and the maximum stride from
@@ -90,7 +90,7 @@ def main():
 
     for line in simout:
         match = re.match(
-            "DRAM sweep with burst: (\d+), banks: (\d+), max stride: (\d+)",
+            r"DRAM sweep with burst: (\d+), banks: (\d+), max stride: (\d+)",
             line,
         )
         if match:
@@ -102,7 +102,9 @@ def main():
     simout.close()
 
     if not got_sweep:
-        print("Failed to establish sweep details, ensure simout is up-to-date")
+        print(
+            "Failed to establish sweep details, ensure simout.txt is up-to-date"
+        )
         exit(-1)
 
     # Now parse the stats
@@ -111,15 +113,15 @@ def main():
     avg_pwr = []
 
     for line in stats:
-        match = re.match(".*busUtil\s+(\d+\.\d+)\s+#.*", line)
+        match = re.match(r".*busUtil\s+(\d+\.\d+)\s+#.*", line)
         if match:
             bus_util.append(float(match.groups(0)[0]))
 
-        match = re.match(".*peakBW\s+(\d+\.\d+)\s+#.*", line)
+        match = re.match(r".*peakBW\s+(\d+\.\d+)\s+#.*", line)
         if match:
             peak_bw.append(float(match.groups(0)[0]))
 
-        match = re.match(".*averagePower\s+(\d+\.?\d*)\s+#.*", line)
+        match = re.match(r".*averagePower\s+(\d+\.?\d*)\s+#.*", line)
         if match:
             avg_pwr.append(float(match.groups(0)[0]))
     stats.close()
diff --git a/util/plot_dram/lowp_dram_sweep_plot.py b/util/plot_dram/lowp_dram_sweep_plot.py
index dedd1e0c0d..0f53a3319b 100755
--- a/util/plot_dram/lowp_dram_sweep_plot.py
+++ b/util/plot_dram/lowp_dram_sweep_plot.py
@@ -106,7 +106,7 @@ def main():
             filename = plotter.stateTimePlotName(str(delay) + "-")
             outfile.write(wrapForGraphic(filename, textwidth))
             outfile.write(getCaption(delay))
-        outfile.write("\end{figure}\n")
+        outfile.write("\\end{figure}\n")
 
         # Energy plots for all delay values
         outfile.write("\\begin{figure} \n\\centering\n")
@@ -140,7 +140,6 @@ def wrapForGraphic(filename, width="1.0"):
 
 
 def startDocText(outfile):
-
     start_stuff = """
 \\documentclass[a4paper,landscape,twocolumn]{article}
 
@@ -152,7 +151,6 @@ def startDocText(outfile):
 
 
 def endDocText(outfile):
-
     end_stuff = """
 
 \\end{document}
diff --git a/util/protolib.py b/util/protolib.py
index dcfb7aabb5..e795625c39 100644
--- a/util/protolib.py
+++ b/util/protolib.py
@@ -89,9 +89,9 @@ def openFileRd(in_file):
             # reading the first message.
             proto_in.seek(1)
             proto_in.seek(0)
-        except IOError:
+        except OSError:
             proto_in = open(in_file, "rb")
-    except IOError:
+    except OSError:
         print("Failed to open ", in_file, " for reading")
         exit(-1)
     return proto_in
@@ -125,7 +125,7 @@ def _DecodeVarint32(in_file):
             return (result, pos)
         shift += 7
         if shift >= 64:
-            raise IOError("Too many bytes when decoding varint.")
+            raise OSError("Too many bytes when decoding varint.")
 
 
 def decodeMessage(in_file, message):
@@ -140,7 +140,7 @@ def decodeMessage(in_file, message):
         buf = in_file.read(size)
         message.ParseFromString(buf)
         return True
-    except IOError:
+    except OSError:
         return False
 
 
diff --git a/util/streamline/m5stats2streamline.py b/util/streamline/m5stats2streamline.py
index 8dc72bf0f9..e350806e87 100755
--- a/util/streamline/m5stats2streamline.py
+++ b/util/streamline/m5stats2streamline.py
@@ -126,7 +126,7 @@
 
 args = parser.parse_args()
 
-if not re.match("(.*)\.apc", args.output_path):
+if not re.match(r"(.*)\.apc", args.output_path):
     print("ERROR: <dest .apc folder> should end with '.apc'!")
     sys.exit(1)
 
@@ -143,6 +143,7 @@
 start_tick = -1
 end_tick = -1
 
+
 # Parse gem5 config.ini file to determine some system configurations.
 # Number of CPUs, L2s, etc.
 def parseConfig(config_file):
@@ -187,7 +188,7 @@ def parseConfig(config_file):
 kernel_uid = -1
 
 
-class Task(object):
+class Task:
     def __init__(self, uid, pid, tgid, task_name, is_process, tick):
         if pid == 0:  # Idle
             self.uid = 0
@@ -203,7 +204,7 @@ def __init__(self, uid, pid, tgid, task_name, is_process, tick):
         self.tick = tick  # time this task first appeared
 
 
-class Event(object):
+class Event:
     def __init__(self, tick, task):
         self.tick = tick
         self.task = task
@@ -221,6 +222,7 @@ def packed32(x):
     ret = []
     more = True
     while more:
+        x = int(x)
         b = x & 0x7F
         x = x >> 7
         if ((x == 0) and ((b & 0x40) == 0)) or (
@@ -383,7 +385,13 @@ def timestampList(x):
 
 def writeBinary(outfile, binary_list):
     for i in binary_list:
-        outfile.write(f"{i:c}")
+        if isinstance(i, str):
+            byteVal = bytes(i, "utf-8")
+        elif isinstance(i, int):
+            byteVal = bytes([i])
+        else:
+            byteVal = i
+        outfile.write(byteVal)
 
 
 ############################################################
@@ -654,14 +662,14 @@ def parseProcessInfo(task_file):
         sys.exit(1)
 
     process_re = re.compile(
-        "tick=(\d+)\s+(\d+)\s+cpu_id=(\d+)\s+"
-        + "next_pid=([-\d]+)\s+next_tgid=([-\d]+)\s+next_task=(.*)"
+        r"tick=(\d+)\s+(\d+)\s+cpu_id=(\d+)\s+"
+        + r"next_pid=([-\d]+)\s+next_tgid=([-\d]+)\s+next_task=(.*)"
     )
 
     task_name_failure_warned = False
 
     for line in process_file:
-        match = re.match(process_re, line)
+        match = re.match(process_re, line.decode())
         if match:
             tick = int(match.group(1))
             if start_tick < 0:
@@ -805,9 +813,8 @@ def writeXmlFile(xml, filename):
 
 
 # StatsEntry that contains individual statistics
-class StatsEntry(object):
+class StatsEntry:
     def __init__(self, name, group, group_index, per_cpu, key):
-
         # Full name of statistics
         self.name = name
 
@@ -819,12 +826,14 @@ def __init__(self, name, group, group_index, per_cpu, key):
 
         # Shorter name with "system" stripped off
         # and symbols converted to alphanumerics
-        self.short_name = re.sub("system\.", "", name)
+        self.short_name = re.sub(r"system\.", "", name)
         self.short_name = re.sub(":", "_", name)
 
         # Regex for this stat (string version used to construct union regex)
-        self.regex_string = "^" + name + "\s+([\d\.]+)"
-        self.regex = re.compile("^" + name + "\s+([\d\.e\-]+)\s+# (.*)$", re.M)
+        self.regex_string = "^" + name + r"\s+([\d\.]+)"
+        self.regex = re.compile(
+            "^" + name + r"\s+([\d\.e\-]+)\s+# (.*)$", re.M
+        )
         self.description = ""
 
         # Whether this stat is use per CPU or not
@@ -862,11 +871,11 @@ def __init__(self, name, group, group_index, per_cpu, key):
                 print("\t", per_cpu_name)
 
                 self.per_cpu_regex_string.append(
-                    "^" + per_cpu_name + "\s+[\d\.]+"
+                    "^" + per_cpu_name + r"\s+[\d\.]+"
                 )
                 self.per_cpu_regex.append(
                     re.compile(
-                        "^" + per_cpu_name + "\s+([\d\.e\-]+)\s+# (.*)$", re.M
+                        "^" + per_cpu_name + r"\s+([\d\.e\-]+)\s+# (.*)$", re.M
                     )
                 )
                 self.values.append([])
@@ -881,7 +890,7 @@ def append_value(self, val, per_cpu_index=None):
 
 # Global stats object that contains the list of stats entries
 # and other utility functions
-class Stats(object):
+class Stats:
     def __init__(self):
         self.stats_list = []
         self.tick_list = []
@@ -975,17 +984,17 @@ def readGem5Stats(stats, gem5_stats_file):
     window_end_regex = re.compile(
         "^---------- End Simulation Statistics   ----------"
     )
-    final_tick_regex = re.compile("^final_tick\s+(\d+)")
+    final_tick_regex = re.compile(r"^final_tick\s+(\d+)")
 
     global ticks_in_ns
-    sim_freq_regex = re.compile("^sim_freq\s+(\d+)")
+    sim_freq_regex = re.compile(r"^sim_freq\s+(\d+)")
     sim_freq = -1
 
     try:
         if ext == ".gz":
             f = gzip.open(gem5_stats_file, "r")
         else:
-            f = open(gem5_stats_file, "r")
+            f = open(gem5_stats_file)
     except:
         print("ERROR opening stats file", gem5_stats_file, "!")
         sys.exit(1)
@@ -997,7 +1006,7 @@ def readGem5Stats(stats, gem5_stats_file):
         error = False
         try:
             line = f.readline()
-        except IOError:
+        except OSError:
             print("")
             print("WARNING: IO error in stats file")
             print("(gzip stream not closed properly?)...continuing for now")
@@ -1139,7 +1148,7 @@ def doCapturedXML(output_path, stats):
     counters = ET.SubElement(xml, "counters")
     for stat in stats.stats_list:
         s = ET.SubElement(counters, "counter")
-        stat_name = re.sub("\.", "_", stat.short_name)
+        stat_name = re.sub(r"\.", "_", stat.short_name)
         stat_name = re.sub("#", "", stat_name)
         s.set("title", stat.group)
         s.set("name", stat_name)
@@ -1266,7 +1275,7 @@ def writeVisualAnnotations(blob, input_path, output_path):
     frame_count = 0
     file_list = os.listdir(frame_path)
     file_list.sort()
-    re_fb = re.compile("fb\.(\d+)\.(\d+)\.bmp.gz")
+    re_fb = re.compile(r"fb\.(\d+)\.(\d+)\.bmp.gz")
 
     # Use first non-negative pid to tag visual annotations
     annotate_pid = -1
diff --git a/util/style.py b/util/style.py
index 27d6568ec3..1b3ebe1602 100755
--- a/util/style.py
+++ b/util/style.py
@@ -45,7 +45,7 @@
 from style.style import StdioUI
 from style import repo
 
-verifier_names = dict([(c.__name__, c) for c in style.verifiers.all_verifiers])
+verifier_names = {c.__name__: c for c in style.verifiers.all_verifiers}
 
 
 def verify(
diff --git a/util/style/file_types.py b/util/style/file_types.py
index 3a6b93098b..1ce8363a1e 100644
--- a/util/style/file_types.py
+++ b/util/style/file_types.py
@@ -101,7 +101,7 @@ def lang_type(filename, firstline=None, openok=True):
     # if a first line was not provided but the file is ok to open,
     # grab the first line of the file.
     if firstline is None and openok:
-        handle = open(filename, "r")
+        handle = open(filename)
         firstline = handle.readline()
         handle.close()
 
diff --git a/util/style/region.py b/util/style/region.py
index bd2fc89251..05b0073e50 100644
--- a/util/style/region.py
+++ b/util/style/region.py
@@ -25,7 +25,7 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
-class _neg_inf(object):
+class _neg_inf:
     """This object always compares less than any other object"""
 
     def __repr__(self):
@@ -53,7 +53,7 @@ def __ne__(self, other):
 neg_inf = _neg_inf()
 
 
-class _pos_inf(object):
+class _pos_inf:
     """This object always compares greater than any other object"""
 
     def __repr__(self):
@@ -176,7 +176,7 @@ def __ge__(self, other):
         return self[1] > other
 
 
-class Regions(object):
+class Regions:
     """A set of regions (ranges).  Basically a region with holes.
     Includes utility functions to merge regions and figure out if
     something is in one of the regions."""
diff --git a/util/style/repo.py b/util/style/repo.py
index 18079cea6a..04db36d15d 100644
--- a/util/style/repo.py
+++ b/util/style/repo.py
@@ -43,7 +43,7 @@
 from .style import modified_regions
 
 
-class AbstractRepo(object, metaclass=ABCMeta):
+class AbstractRepo(metaclass=ABCMeta):
     def file_path(self, fname):
         """Get the absolute path to a file relative within the repository. The
         input file name must be a valid path within the repository.
@@ -76,7 +76,7 @@ def get_file(self, name):
         to the repository root.
 
         """
-        with open(self.file_path(name), "r") as f:
+        with open(self.file_path(name)) as f:
             return f.read()
 
     @abstractmethod
diff --git a/util/style/sort_includes.py b/util/style/sort_includes.py
index 9c532b5669..86d886f213 100644
--- a/util/style/sort_includes.py
+++ b/util/style/sort_includes.py
@@ -94,7 +94,7 @@ def _include_matcher(keyword="#include", delim="<>"):
     """Match an include statement and return a (keyword, file, extra)
     duple, or a touple of None values if there isn't a match."""
 
-    rex = re.compile(r"^(%s)\s*%s(.*)%s(.*)$" % (keyword, delim[0], delim[1]))
+    rex = re.compile(rf"^({keyword})\s*{delim[0]}(.*){delim[1]}(.*)$")
 
     def matcher(context, line):
         m = rex.match(line)
@@ -146,7 +146,7 @@ def matcher(context, line):
     return matcher
 
 
-class SortIncludes(object):
+class SortIncludes:
     # different types of includes for different sorting of headers
     # <Python.h>         - Python header needs to be first if it exists
     # <*.h>              - system headers (directories before files)
@@ -155,17 +155,21 @@ class SortIncludes(object):
     # "*"                - M5 headers (directories before files)
     includes_re = (
         ("main", '""', _include_matcher_main()),
-        ("python", "<>", _include_matcher_fname("^Python\.h$")),
+        ("python", "<>", _include_matcher_fname(r"^Python\.h$")),
         (
             "pybind",
             '""',
-            _include_matcher_fname("^pybind11/.*\.h$", delim='""'),
+            _include_matcher_fname(r"^pybind11/.*\.h$", delim='""'),
         ),
         ("m5shared", "<>", _include_matcher_fname("^gem5/")),
-        ("c", "<>", _include_matcher_fname("^.*\.h$")),
-        ("stl", "<>", _include_matcher_fname("^\w+$")),
-        ("cc", "<>", _include_matcher_fname("^.*\.(hh|hxx|hpp|H)$")),
-        ("m5header", '""', _include_matcher_fname("^.*\.h{1,2}$", delim='""')),
+        ("c", "<>", _include_matcher_fname(r"^.*\.h$")),
+        ("stl", "<>", _include_matcher_fname(r"^\w+$")),
+        ("cc", "<>", _include_matcher_fname(r"^.*\.(hh|hxx|hpp|H)$")),
+        (
+            "m5header",
+            '""',
+            _include_matcher_fname(r"^.*\.h{1,2}$", delim='""'),
+        ),
         ("swig0", "<>", _include_matcher(keyword="%import")),
         ("swig1", "<>", _include_matcher(keyword="%include")),
         ("swig2", '""', _include_matcher(keyword="%import", delim='""')),
@@ -266,8 +270,7 @@ def match_line(line):
 
                 # Output pending includes, a new line between, and the
                 # current l.
-                for include in self.dump_includes():
-                    yield include
+                yield from self.dump_includes()
                 yield ""
                 yield line
             else:
@@ -276,8 +279,7 @@ def match_line(line):
 
         # We've reached EOF, so dump any pending includes
         if processing_includes:
-            for include in self.dump_includes():
-                yield include
+            yield from self.dump_includes()
 
 
 # default language types to try to apply our sorting rules to
diff --git a/util/style/style.py b/util/style/style.py
index 1c6ed1cf96..a40671fb47 100644
--- a/util/style/style.py
+++ b/util/style/style.py
@@ -52,7 +52,7 @@
 any_control = re.compile(r"\b(if|while|for)([ \t]*)\(")
 
 
-class UserInterface(object, metaclass=ABCMeta):
+class UserInterface(metaclass=ABCMeta):
     def __init__(self, verbose=False):
         self.verbose = verbose
 
@@ -118,8 +118,8 @@ def match_re(fname):
     # Only include Scons files and those with extensions that suggest source
     # code
     _re_only(
-        "^((.*\/)?(SConscript|SConstruct)|"
-        ".*\.(c|h|cc|hh|cpp|hpp|isa|proto))$"
+        r"^((.*\/)?(SConscript|SConstruct)|"
+        r".*\.(c|h|cc|hh|cpp|hpp|isa|proto))$"
     ),
 ]
 
diff --git a/util/style/verifiers.py b/util/style/verifiers.py
index dbcce1c764..4860cc461e 100644
--- a/util/style/verifiers.py
+++ b/util/style/verifiers.py
@@ -103,7 +103,7 @@ def _modified_regions(old, new):
     return regions
 
 
-class Verifier(object, metaclass=ABCMeta):
+class Verifier(metaclass=ABCMeta):
     """Base class for style verifiers
 
     Verifiers check for style violations and optionally fix such
@@ -292,10 +292,18 @@ class Whitespace(LineVerifier):
     - No trailing whitespace
     """
 
-    languages = set(
-        ("C", "C++", "swig", "python", "asm", "isa", "scons", "make", "dts")
-    )
-    trail_only = set(("make", "dts"))
+    languages = {
+        "C",
+        "C++",
+        "swig",
+        "python",
+        "asm",
+        "isa",
+        "scons",
+        "make",
+        "dts",
+    }
+    trail_only = {"make", "dts"}
 
     test_name = "whitespace"
     opt_name = "white"
@@ -345,7 +353,7 @@ class SortedIncludes(Verifier):
     opt_name = "include"
 
     def __init__(self, *args, **kwargs):
-        super(SortedIncludes, self).__init__(*args, **kwargs)
+        super().__init__(*args, **kwargs)
         self.sort_includes = sort_includes.SortIncludes()
 
     def check(self, filename, regions=all_regions, fobj=None, silent=False):
@@ -404,7 +412,7 @@ def fix(self, filename, regions=all_regions):
 class ControlSpace(LineVerifier):
     """Check for exactly one space after if/while/for"""
 
-    languages = set(("C", "C++"))
+    languages = {"C", "C++"}
     test_name = "spacing after if/while/for"
     opt_name = "control"
 
@@ -420,11 +428,15 @@ def fix_line(self, line, **kwargs):
 
 
 class LineLength(LineVerifier):
-    languages = set(("C", "C++", "swig", "python", "asm", "isa", "scons"))
+    languages = {"C", "C++", "swig", "python", "asm", "isa", "scons"}
     test_name = "line length"
     opt_name = "length"
 
-    def check_line(self, line, **kwargs):
+    def check_line(self, line, language, **kwargs):
+        # Ignore line length check for include pragmas of C/C++.
+        if language in {"C", "C++"}:
+            if line.startswith("#include"):
+                return True
         return style.normalized_len(line) <= 79
 
     def fix(self, filename, regions=all_regions, **kwargs):
@@ -435,7 +447,7 @@ def fix_line(self, line):
 
 
 class ControlCharacters(LineVerifier):
-    languages = set(("C", "C++", "swig", "python", "asm", "isa", "scons"))
+    languages = {"C", "C++", "swig", "python", "asm", "isa", "scons"}
     test_name = "control character"
     opt_name = "ascii"
 
@@ -451,7 +463,7 @@ def fix_line(self, line, **kwargs):
 
 
 class BoolCompare(LineVerifier):
-    languages = set(("C", "C++", "python"))
+    languages = {"C", "C++", "python"}
     test_name = "boolean comparison"
     opt_name = "boolcomp"
 
@@ -499,22 +511,22 @@ class MultiLineClass
           : public BaseClass {
     """
 
-    languages = set(("C", "C++"))
+    languages = {"C", "C++"}
     test_name = "structure opening brace position"
     opt_name = "structurebrace"
 
     # Matches the indentation of the line
-    regex_indentation = "(?P<indentation>\s*)"
+    regex_indentation = r"(?P<indentation>\s*)"
     # Matches an optional "typedef" before the keyword
-    regex_typedef = "(?P<typedef>(typedef\s+)?)"
+    regex_typedef = r"(?P<typedef>(typedef\s+)?)"
     # Matches the structure's keyword
     regex_keyword = "(?P<keyword>class|struct|enum|union)"
     # A negative lookahead to avoid incorrect matches with variable's names
     # e.g., "classifications = {" should not be fixed here.
-    regex_avoid = "(?![^\{\s])"
+    regex_avoid = r"(?![^\{\s])"
     # Matches anything after the keyword and before the opening brace.
     # e.g., structure name, base type, type of inheritance, etc
-    regex_name = "(?P<name>[^\{]*)"
+    regex_name = r"(?P<name>[^\{]*)"
     # Matches anything after the opening brace, which should be
     # parsed recursively
     regex_extra = "(?P<extra>.*)$"
@@ -525,7 +537,7 @@ class MultiLineClass
         + regex_keyword
         + regex_avoid
         + regex_name
-        + "\{"
+        + r"\{"
         + regex_extra
     )
 
diff --git a/util/systemc/systemc_within_gem5/systemc_gem5_tlm/SystemC_Example.py b/util/systemc/systemc_within_gem5/systemc_gem5_tlm/SystemC_Example.py
index f2bee1653a..d23b6764c7 100644
--- a/util/systemc/systemc_within_gem5/systemc_gem5_tlm/SystemC_Example.py
+++ b/util/systemc/systemc_within_gem5/systemc_gem5_tlm/SystemC_Example.py
@@ -31,6 +31,7 @@
 from m5.objects.SystemC import SystemC_ScModule
 from m5.objects.Tlm import TlmTargetSocket
 
+
 # This class is a subclass of sc_module, and all the special magic which makes
 # that work is handled in the base classes.
 class TLM_Target(SystemC_ScModule):
diff --git a/util/systemc/systemc_within_gem5/systemc_simple_object/SystemC_Example.py b/util/systemc/systemc_within_gem5/systemc_simple_object/SystemC_Example.py
index 66b05bf79f..e4535d2c12 100644
--- a/util/systemc/systemc_within_gem5/systemc_simple_object/SystemC_Example.py
+++ b/util/systemc/systemc_within_gem5/systemc_simple_object/SystemC_Example.py
@@ -28,6 +28,7 @@
 
 from m5.objects.SystemC import SystemC_ScModule
 
+
 # This class is a subclass of sc_module, and all the special magic which makes
 # that work is handled in the base classes.
 class SystemC_Printer(SystemC_ScModule):
diff --git a/util/tlm/conf/tlm_elastic_slave.py b/util/tlm/conf/tlm_elastic_slave.py
index 30a412b2f8..1007c5244b 100644
--- a/util/tlm/conf/tlm_elastic_slave.py
+++ b/util/tlm/conf/tlm_elastic_slave.py
@@ -65,7 +65,7 @@
 
 # Setup System:
 system = System(
-    cpu=TraceCPU(cpu_id=0),
+    cpu=TraceCPU(),
     mem_mode="timing",
     mem_ranges=[AddrRange("512MB")],
     cache_line_size=64,
@@ -89,8 +89,7 @@
     clock="1GHz", voltage_domain=system.cpu_voltage_domain
 )
 
-# Setup CPU and its L1 caches:
-system.cpu.createInterruptController()
+# Setup CPU's L1 caches:
 system.cpu.icache = L1_ICache(size="32kB")
 system.cpu.dcache = L1_DCache(size="32kB")
 system.cpu.icache.cpu_side = system.cpu.icache_port
@@ -114,10 +113,10 @@
 
 # Connect everything:
 system.membus = SystemXBar()
-system.system_port = system.membus.slave
-system.cpu.icache.mem_side = system.membus.slave
-system.cpu.dcache.mem_side = system.membus.slave
-system.membus.master = system.tlm.port
+system.system_port = system.membus.cpu_side_ports
+system.cpu.icache.mem_side = system.membus.cpu_side_ports
+system.cpu.dcache.mem_side = system.membus.cpu_side_ports
+system.membus.mem_side_ports = system.tlm.port
 
 # Start the simulation:
 root = Root(full_system=False, system=system)
diff --git a/util/tlm/examples/tlm_elastic_slave_with_l2.py b/util/tlm/examples/tlm_elastic_slave_with_l2.py
index c72bc8976c..6b3f7b43fb 100644
--- a/util/tlm/examples/tlm_elastic_slave_with_l2.py
+++ b/util/tlm/examples/tlm_elastic_slave_with_l2.py
@@ -72,7 +72,7 @@
 
 # Setup System:
 system = System(
-    cpu=TraceCPU(cpu_id=0),
+    cpu=TraceCPU(),
     mem_mode="timing",
     mem_ranges=[AddrRange("1024MB")],
     cache_line_size=64,
@@ -96,8 +96,7 @@
     clock="1GHz", voltage_domain=system.cpu_voltage_domain
 )
 
-# Setup CPU and its L1 caches:
-system.cpu.createInterruptController()
+# Setup CPU's L1 caches:
 system.cpu.icache = L1_ICache(size="32kB")
 system.cpu.dcache = L1_DCache(size="32kB")
 system.cpu.icache.cpu_side = system.cpu.icache_port
@@ -122,12 +121,12 @@
 
 # Connect everything:
 system.membus = SystemXBar()
-system.system_port = system.membus.slave
-system.cpu.icache.mem_side = system.tol2bus.slave
-system.cpu.dcache.mem_side = system.tol2bus.slave
-system.tol2bus.master = system.l2cache.cpu_side
-system.l2cache.mem_side = system.membus.slave
-system.membus.master = system.tlm.port
+system.system_port = system.membus.cpu_side_ports
+system.cpu.icache.mem_side = system.tol2bus.cpu_side_ports
+system.cpu.dcache.mem_side = system.tol2bus.cpu_side_ports
+system.tol2bus.mem_side_ports = system.l2cache.cpu_side
+system.l2cache.mem_side = system.membus.cpu_side_ports
+system.membus.mem_side_ports = system.tlm.port
 
 # Start the simulation:
 root = Root(full_system=False, system=system)
diff --git a/util/update_copyright/__init__.py b/util/update_copyright/__init__.py
index 3b5a534696..a8bf92494b 100644
--- a/util/update_copyright/__init__.py
+++ b/util/update_copyright/__init__.py
@@ -79,7 +79,7 @@ def _update_copyright_years(m, cur_year, org_bytes):
 
 def update_copyright(data, cur_year, org_bytes):
     update_copyright_regexp = re.compile(
-        b" Copyright \\(c\\) ([0-9,\- ]+) " + org_bytes + b"\n", re.IGNORECASE
+        b" Copyright \\(c\\) ([0-9,\\- ]+) " + org_bytes + b"\n", re.IGNORECASE
     )
     return update_copyright_regexp.sub(
         lambda m: _update_copyright_years(m, cur_year, org_bytes),