diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
index 971961f35469..9af07264e258 100644
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -1 +1 @@
-* @antonwolfy @AlexanderKalistratov @vlad-perevezentsev @vtavana @ndgrigorian
+* @antonwolfy @vlad-perevezentsev @ndgrigorian
diff --git a/.github/workflows/build-sphinx.yml b/.github/workflows/build-sphinx.yml
index 26acd1646a52..a9ca25f426fe 100644
--- a/.github/workflows/build-sphinx.yml
+++ b/.github/workflows/build-sphinx.yml
@@ -26,6 +26,7 @@ jobs:
     name: Build and Deploy Docs
 
     runs-on: ubuntu-22.04
+    timeout-minutes: 60
 
     permissions:
       # Needed to cancel any previous runs that are not completed for a given workflow
@@ -101,7 +102,7 @@ jobs:
           sudo apt-get install -y nvidia-cuda-toolkit clinfo
 
       - name: Checkout repo
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
         with:
           fetch-depth: 0
 
@@ -118,7 +119,7 @@ jobs:
       - name: Setup miniconda
         id: setup_miniconda
         continue-on-error: true
-        uses: conda-incubator/setup-miniconda@505e6394dae86d6a5c7fbb6e3fb8938e3e863830 # v3.1.1
+        uses: conda-incubator/setup-miniconda@835234971496cad1653abb28a638a281cf32541f # v3.2.0
         with:
           miniforge-version: latest
           use-mamba: 'true'
@@ -128,7 +129,7 @@ jobs:
 
       - name: ReSetup miniconda
         if: steps.setup_miniconda.outcome == 'failure'
-        uses: conda-incubator/setup-miniconda@505e6394dae86d6a5c7fbb6e3fb8938e3e863830 # v3.1.1
+        uses: conda-incubator/setup-miniconda@835234971496cad1653abb28a638a281cf32541f # v3.2.0
         with:
           miniforge-version: latest
           use-mamba: 'true'
@@ -244,9 +245,10 @@ jobs:
       pull-requests: write
 
     runs-on: ubuntu-latest
+    timeout-minutes: 10
 
     steps:
-      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+      - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
         with:
           fetch-depth: 0
 
diff --git a/.github/workflows/check-mkl-interfaces.yaml b/.github/workflows/check-onemath.yaml
similarity index 73%
rename from .github/workflows/check-mkl-interfaces.yaml
rename to .github/workflows/check-onemath.yaml
index 56e95b35366e..1f820c475d7c 100644
--- a/.github/workflows/check-mkl-interfaces.yaml
+++ b/.github/workflows/check-onemath.yaml
@@ -1,4 +1,4 @@
-name: Test oneMKL interfaces
+name: Test oneAPI Math Library (oneMath)
 
 on:
   push:
@@ -15,7 +15,8 @@ env:
   build-with-oneapi-env: 'environments/build_with_oneapi.yml'
   dpctl-pkg-env: 'environments/dpctl_pkg.yml'
   oneapi-pkgs-env: 'environments/oneapi_pkgs.yml'
-  test-env-name: 'test_onemkl_interfaces'
+  test-pkg-env: 'environments/test.yml'
+  test-env-name: 'test_onemath'
   rerun-tests-on-failure: 'true'
   rerun-tests-max-attempts: 2
   rerun-tests-timeout: 20
@@ -29,6 +30,7 @@ jobs:
       actions: write
 
     runs-on: 'ubuntu-latest'
+    timeout-minutes: 5
 
     steps:
       - name: Cancel Previous Runs
@@ -37,7 +39,7 @@ jobs:
           access_token: ${{ github.token }}
 
       - name: Checkout DPNP repo
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
         with:
           fetch-depth: 0
 
@@ -48,7 +50,10 @@ jobs:
 
       - name: Merge conda env files
         run: |
-          conda-merge ${{ env.dpctl-pkg-env }} ${{ env.oneapi-pkgs-env }} ${{ env.build-with-oneapi-env }} > ${{ env.environment-file }}
+          conda-merge ${{ env.dpctl-pkg-env }}         \
+                      ${{ env.oneapi-pkgs-env }}       \
+                      ${{ env.build-with-oneapi-env }} \
+                      ${{ env.test-pkg-env }} > ${{ env.environment-file }}
           cat ${{ env.environment-file }}
 
       - name: Upload artifact
@@ -58,32 +63,31 @@ jobs:
           path: ${{ env.environment-file }}
 
   test_by_tag:
-    name: Run tests with oneMKL tag
+    name: Run tests with OneMath tag
 
     needs: build_env_file
 
     strategy:
       fail-fast: false
       matrix:
-        # python 3.13 is blocked since BLAS requires "mkl<2025.0" (see https://github.com/conda-forge/blas-feedstock/pull/128
-        # which depends on resolving MKL issue https://github.com/conda-forge/intel_repack-feedstock/issues/83)
-        python: ['3.12']
-        os: [ubuntu-22.04] # windows-2019 - no DFT support for Windows in oneMKL
+        python: ['3.13']
+        os: [ubuntu-22.04] # windows-2022 - no DFT support for Windows in oneMKL
 
     runs-on: ${{ matrix.os }}
+    timeout-minutes: 60
 
     defaults:
       run:
-        shell: ${{ matrix.os == 'windows-2019' && 'cmd /C CALL {0}' || 'bash -el {0}' }}
+        shell: ${{ matrix.os == 'windows-2022' && 'cmd /C CALL {0}' || 'bash -el {0}' }}
 
     steps:
       - name: Checkout DPNP repo
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
         with:
           fetch-depth: 0
 
       - name: Download artifact
-        uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
+        uses: actions/download-artifact@634f93cb2916e3fdff6788551b99b062d0335ce0 # v5.0.0
         with:
           name: ${{ env.environment-file-name }}
           path: ${{ env.environment-file-loc }}
@@ -91,7 +95,7 @@ jobs:
       - name: Setup miniconda
         id: setup_miniconda
         continue-on-error: true
-        uses: conda-incubator/setup-miniconda@505e6394dae86d6a5c7fbb6e3fb8938e3e863830 # v3.1.1
+        uses: conda-incubator/setup-miniconda@835234971496cad1653abb28a638a281cf32541f # v3.2.0
         with:
           miniforge-version: latest
           use-mamba: 'true'
@@ -101,7 +105,7 @@ jobs:
 
       - name: ReSetup miniconda
         if: steps.setup_miniconda.outcome == 'failure'
-        uses: conda-incubator/setup-miniconda@505e6394dae86d6a5c7fbb6e3fb8938e3e863830 # v3.1.1
+        uses: conda-incubator/setup-miniconda@835234971496cad1653abb28a638a281cf32541f # v3.2.0
         with:
           miniforge-version: latest
           use-mamba: 'true'
@@ -116,7 +120,7 @@ jobs:
 
       - name: Build and install DPNP package
         run: |
-          python scripts/build_locally.py --onemkl-interfaces --verbose
+          python scripts/build_locally.py --onemath --verbose
 
       - name: Smoke test
         run: |
@@ -148,41 +152,40 @@ jobs:
           SYCL_CACHE_PERSISTENT: 1
 
   test_by_branch:
-    name: Run tests with oneMKL develop branch
+    name: Run tests with oneMath develop branch
 
     needs: build_env_file
 
     strategy:
       fail-fast: false
       matrix:
-        # python 3.13 is blocked since BLAS requires "mkl<2025.0" (see https://github.com/conda-forge/blas-feedstock/pull/128
-        # which depends on resolving MKL issue https://github.com/conda-forge/intel_repack-feedstock/issues/83)
-        python: ['3.12']
-        os: [ubuntu-22.04] # windows-2019 - no DFT support for Windows in oneMKL
+        python: ['3.13']
+        os: [ubuntu-22.04] # windows-2022 - no DFT support for Windows in oneMKL
 
     runs-on: ${{ matrix.os }}
+    timeout-minutes: 60
 
     defaults:
       run:
-        shell: ${{ matrix.os == 'windows-2019' && 'cmd /C CALL {0}' || 'bash -el {0}' }}
+        shell: ${{ matrix.os == 'windows-2022' && 'cmd /C CALL {0}' || 'bash -el {0}' }}
 
     env:
       onemkl-source-dir: '${{ github.workspace }}/onemkl/'
 
     steps:
       - name: Checkout DPNP repo
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
         with:
           fetch-depth: 0
 
       - name: Download artifact
-        uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
+        uses: actions/download-artifact@634f93cb2916e3fdff6788551b99b062d0335ce0 # v5.0.0
         with:
           name: ${{ env.environment-file-name }}
           path: ${{ env.environment-file-loc }}
 
       - name: Checkout oneMKL repo
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
         with:
           repository: 'oneapi-src/oneMKL'
           ref: 'develop'
@@ -196,7 +199,7 @@ jobs:
       - name: Setup miniconda
         id: setup_miniconda
         continue-on-error: true
-        uses: conda-incubator/setup-miniconda@505e6394dae86d6a5c7fbb6e3fb8938e3e863830 # v3.1.1
+        uses: conda-incubator/setup-miniconda@835234971496cad1653abb28a638a281cf32541f # v3.2.0
         with:
           miniforge-version: latest
           use-mamba: 'true'
@@ -206,7 +209,7 @@ jobs:
 
       - name: ReSetup miniconda
         if: steps.setup_miniconda.outcome == 'failure'
-        uses: conda-incubator/setup-miniconda@505e6394dae86d6a5c7fbb6e3fb8938e3e863830 # v3.1.1
+        uses: conda-incubator/setup-miniconda@835234971496cad1653abb28a638a281cf32541f # v3.2.0
         with:
           miniforge-version: latest
           use-mamba: 'true'
@@ -221,7 +224,7 @@ jobs:
 
       - name: Build and install DPNP package
         run: |
-          python scripts/build_locally.py --onemkl-interfaces --onemkl-interfaces-dir=${{ env.onemkl-source-dir }} --verbose
+          python scripts/build_locally.py --onemath --onemath-dir=${{ env.onemkl-source-dir }} --verbose
 
       - name: Smoke test
         run: |
diff --git a/.github/workflows/conda-package.yml b/.github/workflows/conda-package.yml
index 958e7795973a..7c33bbe0a209 100644
--- a/.github/workflows/conda-package.yml
+++ b/.github/workflows/conda-package.yml
@@ -16,13 +16,11 @@ env:
   ver-script-part1: "import json; f = open('version.json', 'r'); j = json.load(f); f.close(); "
   ver-script-part2: "d = j['dpnp'][0]; print('='.join((d[s] for s in ('version', 'build'))))"
   fetch-depth: 1
-  # python 3.13 is blocked since BLAS requires "mkl<2025.0" (see https://github.com/conda-forge/blas-feedstock/pull/128
-  # which depends on resolving MKL issue https://github.com/conda-forge/intel_repack-feedstock/issues/83)
-  python-ver-test-all-dtypes: '3.12'
+  python-ver-test-all-dtypes: '3.13'
   test-env-name: 'test'
   rerun-tests-on-failure: 'true'
   rerun-tests-max-attempts: 2
-  rerun-tests-timeout: 35
+  rerun-tests-timeout: 40
 
 jobs:
   build:
@@ -31,20 +29,19 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        # python 3.13 is blocked since BLAS requires "mkl<2025.0" (see https://github.com/conda-forge/blas-feedstock/pull/128
-        # which depends on resolving MKL issue https://github.com/conda-forge/intel_repack-feedstock/issues/83)
-        python: ['3.9', '3.10', '3.11', '3.12']
-        os: [ubuntu-22.04, windows-2019]
+        python: ['3.9', '3.10', '3.11', '3.12', '3.13']
+        os: [ubuntu-22.04, windows-2022]
 
     permissions:
       # Needed to cancel any previous runs that are not completed for a given workflow
       actions: write
 
     runs-on: ${{ matrix.os }}
+    timeout-minutes: 60
 
     defaults:
       run:
-        shell: ${{ matrix.os == 'windows-2019' && 'cmd /C CALL {0}' || 'bash -el {0}' }}
+        shell: ${{ matrix.os == 'windows-2022' && 'cmd /C CALL {0}' || 'bash -el {0}' }}
 
     env:
       build-conda-pkg-env: 'environments/build_conda_pkg.yml'
@@ -57,14 +54,14 @@ jobs:
           access_token: ${{ github.token }}
 
       - name: Checkout DPNP repo
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
         with:
           fetch-depth: 0
 
       - name: Setup miniconda
         id: setup_miniconda
         continue-on-error: true
-        uses: conda-incubator/setup-miniconda@505e6394dae86d6a5c7fbb6e3fb8938e3e863830 # v3.1.1
+        uses: conda-incubator/setup-miniconda@835234971496cad1653abb28a638a281cf32541f # v3.2.0
         with:
           miniforge-version: latest
           use-mamba: 'true'
@@ -74,7 +71,7 @@ jobs:
 
       - name: ReSetup miniconda
         if: steps.setup_miniconda.outcome == 'failure'
-        uses: conda-incubator/setup-miniconda@505e6394dae86d6a5c7fbb6e3fb8938e3e863830 # v3.1.1
+        uses: conda-incubator/setup-miniconda@835234971496cad1653abb28a638a281cf32541f # v3.2.0
         with:
           miniforge-version: latest
           use-mamba: 'true'
@@ -96,13 +93,13 @@ jobs:
         continue-on-error: true
         run: conda build --no-test --python ${{ matrix.python }} --numpy 2.0 ${{ env.channels-list }} conda-recipe
         env:
-          MAX_BUILD_CMPL_MKL_VERSION: '2025.2a0'
+          MAX_BUILD_CMPL_MKL_VERSION: '2025.3a0'
 
       - name: ReBuild conda package
         if: steps.build_conda_pkg.outcome == 'failure'
         run: conda build --no-test --python ${{ matrix.python }} --numpy 2.0 ${{ env.channels-list }} conda-recipe
         env:
-          MAX_BUILD_CMPL_MKL_VERSION: '2025.2a0'
+          MAX_BUILD_CMPL_MKL_VERSION: '2025.3a0'
 
       - name: Upload artifact
         uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
@@ -122,6 +119,7 @@ jobs:
     needs: build
 
     runs-on: ${{ matrix.os }}
+    timeout-minutes: 100
 
     defaults:
       run:
@@ -130,8 +128,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        # python 3.13 is blocked due to MKL issue
-        python: ['3.9', '3.10', '3.11', '3.12']
+        python: ['3.9', '3.10', '3.11', '3.12', '3.13']
         os: [ubuntu-latest]
 
     env:
@@ -143,13 +140,13 @@ jobs:
 
     steps:
       - name: Checkout DPNP repo
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
         with:
           fetch-depth: ${{ env.fetch-depth }}
           path: ${{ env.dpnp-repo-path }}
 
       - name: Download artifact
-        uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
+        uses: actions/download-artifact@634f93cb2916e3fdff6788551b99b062d0335ce0 # v5.0.0
         with:
           name: ${{ env.package-name }} ${{ runner.os }} Python ${{ matrix.python }}
           path: ${{ env.pkg-path-in-channel }}
@@ -157,7 +154,7 @@ jobs:
       - name: Setup miniconda
         id: setup_miniconda
         continue-on-error: true
-        uses: conda-incubator/setup-miniconda@505e6394dae86d6a5c7fbb6e3fb8938e3e863830 # v3.1.1
+        uses: conda-incubator/setup-miniconda@835234971496cad1653abb28a638a281cf32541f # v3.2.0
         with:
           miniforge-version: latest
           use-mamba: 'true'
@@ -167,7 +164,7 @@ jobs:
 
       - name: ReSetup miniconda
         if: steps.setup_miniconda.outcome == 'failure'
-        uses: conda-incubator/setup-miniconda@505e6394dae86d6a5c7fbb6e3fb8938e3e863830 # v3.1.1
+        uses: conda-incubator/setup-miniconda@835234971496cad1653abb28a638a281cf32541f # v3.2.0
         with:
           miniforge-version: latest
           use-mamba: 'true'
@@ -191,10 +188,6 @@ jobs:
           echo PACKAGE_VERSION=${PACKAGE_VERSION}
           echo "PACKAGE_VERSION=$PACKAGE_VERSION" >> $GITHUB_ENV
 
-      # conda-index does not support python 3.13, but we need to test DPNP package with python 3.13
-      - name: Remove conda-index
-        run: mamba remove conda-index
-
       - name: Install dpnp
         id: install_dpnp
         continue-on-error: true
@@ -254,6 +247,7 @@ jobs:
     needs: build
 
     runs-on: ${{ matrix.os }}
+    timeout-minutes: 120
 
     defaults:
       run:
@@ -262,9 +256,8 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        # python 3.13 is blocked due to MKL issue
-        python: ['3.9', '3.10', '3.11', '3.12']
-        os: [windows-2019]
+        python: ['3.9', '3.10', '3.11', '3.12', '3.13']
+        os: [windows-2022]
 
     env:
       dpnp-repo-path: '${{ github.workspace }}\source'
@@ -275,13 +268,13 @@ jobs:
 
     steps:
       - name: Checkout DPNP repo
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
         with:
           fetch-depth: ${{ env.fetch-depth }}
           path: ${{ env.dpnp-repo-path }}
 
       - name: Download artifact
-        uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
+        uses: actions/download-artifact@634f93cb2916e3fdff6788551b99b062d0335ce0 # v5.0.0
         with:
           name: ${{ env.package-name }} ${{ runner.os }} Python ${{ matrix.python }}
           path: ${{ env.pkg-path-in-channel }}
@@ -300,7 +293,7 @@ jobs:
       - name: Setup miniconda
         id: setup_miniconda
         continue-on-error: true
-        uses: conda-incubator/setup-miniconda@505e6394dae86d6a5c7fbb6e3fb8938e3e863830 # v3.1.1
+        uses: conda-incubator/setup-miniconda@835234971496cad1653abb28a638a281cf32541f # v3.2.0
         with:
           miniforge-version: latest
           use-mamba: 'true'
@@ -310,7 +303,7 @@ jobs:
 
       - name: ReSetup miniconda
         if: steps.setup_miniconda.outcome == 'failure'
-        uses: conda-incubator/setup-miniconda@505e6394dae86d6a5c7fbb6e3fb8938e3e863830 # v3.1.1
+        uses: conda-incubator/setup-miniconda@835234971496cad1653abb28a638a281cf32541f # v3.2.0
         with:
           miniforge-version: latest
           use-mamba: 'true'
@@ -324,10 +317,6 @@ jobs:
           (echo CONDA_LIB_PATH=%CONDA_PREFIX%\Library\lib\) >> %GITHUB_ENV%
           (echo CONDA_LIB_BIN_PATH=%CONDA_PREFIX%\Library\bin\) >> %GITHUB_ENV%
 
-      - name: Install conda-index
-        run: |
-          mamba install conda-index=${{ env.CONDA_INDEX_VERSION }}
-
       - name: Create conda channel
         run: |
           @echo on
@@ -351,10 +340,6 @@ jobs:
           echo PACKAGE_VERSION: %PACKAGE_VERSION%
           (echo PACKAGE_VERSION=%PACKAGE_VERSION%) >> %GITHUB_ENV%
 
-      # conda-index does not support python 3.13, but we need to test DPNP package with python 3.13
-      - name: Remove conda-index
-        run: mamba remove conda-index
-
       - name: Install dpnp
         run: |
           @echo on
@@ -420,15 +405,15 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        # python 3.13 is blocked due to MKL issue
-        python: ['3.9', '3.10', '3.11', '3.12']
-        os: [ubuntu-22.04, windows-2019]
+        python: ['3.9', '3.10', '3.11', '3.12', '3.13']
+        os: [ubuntu-22.04, windows-2022]
 
     runs-on: ${{ matrix.os }}
+    timeout-minutes: 10
 
     defaults:
       run:
-        shell: ${{ matrix.os == 'windows-2019' && 'cmd /C CALL {0}' || 'bash -el {0}' }}
+        shell: ${{ matrix.os == 'windows-2022' && 'cmd /C CALL {0}' || 'bash -el {0}' }}
 
     env:
       upload-conda-pkg-env: 'environments/upload_cleanup_conda_pkg.yml'
@@ -440,24 +425,24 @@ jobs:
 
     steps:
       - name: Checkout DPNP repo
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
         with:
           fetch-depth: ${{ env.fetch-depth }}
 
       - name: Download artifact
-        uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
+        uses: actions/download-artifact@634f93cb2916e3fdff6788551b99b062d0335ce0 # v5.0.0
         with:
           name: ${{ env.package-name }} ${{ runner.os }} Python ${{ matrix.python }}
 
       - name: Download wheels artifact
-        uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
+        uses: actions/download-artifact@634f93cb2916e3fdff6788551b99b062d0335ce0 # v5.0.0
         with:
           name: ${{ env.package-name }} ${{ runner.os }} Wheels Python ${{ matrix.python }}
 
       - name: Setup miniconda
         id: setup_miniconda
         continue-on-error: true
-        uses: conda-incubator/setup-miniconda@505e6394dae86d6a5c7fbb6e3fb8938e3e863830 # v3.1.1
+        uses: conda-incubator/setup-miniconda@835234971496cad1653abb28a638a281cf32541f # v3.2.0
         with:
           miniforge-version: latest
           use-mamba: 'true'
@@ -467,7 +452,7 @@ jobs:
 
       - name: ReSetup miniconda
         if: steps.setup_miniconda.outcome == 'failure'
-        uses: conda-incubator/setup-miniconda@505e6394dae86d6a5c7fbb6e3fb8938e3e863830 # v3.1.1
+        uses: conda-incubator/setup-miniconda@835234971496cad1653abb28a638a281cf32541f # v3.2.0
         with:
           miniforge-version: latest
           use-mamba: 'true'
@@ -506,6 +491,7 @@ jobs:
         os: [ubuntu-22.04]
 
     runs-on:  ${{ matrix.os }}
+    timeout-minutes: 15
 
     defaults:
       run:
@@ -519,7 +505,7 @@ jobs:
       dpnp-repo-path: '${{ github.workspace }}/source/'
       array-api-skips-file: '${{ github.workspace }}/source/.github/workflows/array-api-skips.txt'
       create-conda-channel-env: 'source/environments/create_conda_channel.yml'
-      python-ver: '3.12' # it has to be aligned with python in create_conda_channel.yml
+      python-ver: '3.13' # it has to be aligned with python in create_conda_channel.yml
       conda-env-name: 'array-api-conformity'
       channel-path: '${{ github.workspace }}/channel/'
       pkg-path-in-channel: '${{ github.workspace }}/channel/linux-64/'
@@ -527,13 +513,13 @@ jobs:
 
     steps:
       - name: Checkout DPNP repo
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
         with:
           fetch-depth: ${{ env.fetch-depth }}
           path: ${{ env.dpnp-repo-path }}
 
       - name: Download artifact
-        uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
+        uses: actions/download-artifact@634f93cb2916e3fdff6788551b99b062d0335ce0 # v5.0.0
         with:
           name: ${{ env.package-name }} ${{ runner.os }} Python ${{ env.python-ver }}
           path: ${{ env.pkg-path-in-channel }}
@@ -541,7 +527,7 @@ jobs:
       - name: Setup miniconda
         id: setup_miniconda
         continue-on-error: true
-        uses: conda-incubator/setup-miniconda@505e6394dae86d6a5c7fbb6e3fb8938e3e863830 # v3.1.1
+        uses: conda-incubator/setup-miniconda@835234971496cad1653abb28a638a281cf32541f # v3.2.0
         with:
           miniforge-version: latest
           use-mamba: 'true'
@@ -551,7 +537,7 @@ jobs:
 
       - name: ReSetup miniconda
         if: steps.setup_miniconda.outcome == 'failure'
-        uses: conda-incubator/setup-miniconda@505e6394dae86d6a5c7fbb6e3fb8938e3e863830 # v3.1.1
+        uses: conda-incubator/setup-miniconda@835234971496cad1653abb28a638a281cf32541f # v3.2.0
         with:
           miniforge-version: latest
           use-mamba: 'true'
@@ -599,7 +585,7 @@ jobs:
           python -c "import dpnp; print(dpnp.__version__)"
 
       - name: Clone array API tests repo
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
         with:
           repository: 'data-apis/array-api-tests'
           path: ${{ env.array-api-tests-path }}
@@ -668,6 +654,7 @@ jobs:
     needs: [upload]
 
     runs-on: 'ubuntu-latest'
+    timeout-minutes: 10
 
     defaults:
       run:
@@ -679,14 +666,14 @@ jobs:
 
     steps:
       - name: Checkout DPNP repo
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
         with:
           fetch-depth: ${{ env.fetch-depth }}
 
       - name: Setup miniconda
         id: setup_miniconda
         continue-on-error: true
-        uses: conda-incubator/setup-miniconda@505e6394dae86d6a5c7fbb6e3fb8938e3e863830 # v3.1.1
+        uses: conda-incubator/setup-miniconda@835234971496cad1653abb28a638a281cf32541f # v3.2.0
         with:
           miniforge-version: latest
           use-mamba: 'true'
@@ -696,7 +683,7 @@ jobs:
 
       - name: ReSetup miniconda
         if: steps.setup_miniconda.outcome == 'failure'
-        uses: conda-incubator/setup-miniconda@505e6394dae86d6a5c7fbb6e3fb8938e3e863830 # v3.1.1
+        uses: conda-incubator/setup-miniconda@835234971496cad1653abb28a638a281cf32541f # v3.2.0
         with:
           miniforge-version: latest
           use-mamba: 'true'
@@ -705,7 +692,7 @@ jobs:
           activate-environment: ${{ env.cleanup-env-name }}
 
       - name: Checkout repo
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
         with:
           repository: IntelPython/devops-tools
           fetch-depth: ${{ env.fetch-depth }}
diff --git a/.github/workflows/cron-run-tests.yaml b/.github/workflows/cron-run-tests.yaml
index f563534f53c0..786b711d6d56 100644
--- a/.github/workflows/cron-run-tests.yaml
+++ b/.github/workflows/cron-run-tests.yaml
@@ -1,13 +1,11 @@
 name: Run tests suite
 on:
-  # For Branch-Protection check. Only the default branch is supported. See
-  # https://github.com/ossf/scorecard/blob/main/docs/checks.md#branch-protection
-  branch_protection_rule:
+  # To be able to be triggered manually
+  workflow_dispatch:
   # To guarantee Maintained check is occasionally updated. See
   # https://github.com/ossf/scorecard/blob/main/docs/checks.md#maintained
   schedule:
     - cron: '28 2 * * *'
-  workflow_dispatch:
 
 permissions: read-all
 
@@ -26,10 +24,11 @@ jobs:
     if: github.event.repository.fork == false
 
     runs-on:  ${{ matrix.runner }}
+    timeout-minutes: 60
 
     defaults:
       run:
-        shell: ${{ matrix.runner == 'windows-2019' && 'cmd /C CALL {0}' || 'bash -el {0}' }}
+        shell: ${{ matrix.runner == 'windows-2022' && 'cmd /C CALL {0}' || 'bash -el {0}' }}
 
     permissions:
       # Needed to cancel any previous runs that are not completed for a given workflow
@@ -38,10 +37,20 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        # python 3.13 is blocked since BLAS requires "mkl<2025.0" (see https://github.com/conda-forge/blas-feedstock/pull/128
-        # which depends on resolving MKL issue https://github.com/conda-forge/intel_repack-feedstock/issues/83)
-        python: ['3.9', '3.10', '3.11', '3.12']
-        runner: [ubuntu-22.04, ubuntu-24.04, windows-2019]
+        python: ['3.9', '3.10', '3.11', '3.12', '3.13']
+        runner: [ubuntu-22.04, ubuntu-24.04, windows-2022]
+        include:
+          - python: 3.9
+            # do not install scipy due to import issue
+            test-packages: "pytest"
+          - python: 3.10
+            test-packages: "pytest scipy"
+          - python: 3.11
+            test-packages: "pytest scipy"
+          - python: 3.12
+            test-packages: "pytest scipy"
+          - python: 3.13
+            test-packages: "pytest scipy"
 
     steps:
       - name: Cancel Previous Runs
@@ -63,7 +72,7 @@ jobs:
       - name: Setup miniconda
         id: setup_miniconda
         continue-on-error: true
-        uses: conda-incubator/setup-miniconda@505e6394dae86d6a5c7fbb6e3fb8938e3e863830 # v3.1.1
+        uses: conda-incubator/setup-miniconda@835234971496cad1653abb28a638a281cf32541f # v3.2.0
         with:
           miniforge-version: latest
           use-mamba: 'true'
@@ -74,7 +83,7 @@ jobs:
 
       - name: ReSetup miniconda
         if: steps.setup_miniconda.outcome == 'failure'
-        uses: conda-incubator/setup-miniconda@505e6394dae86d6a5c7fbb6e3fb8938e3e863830 # v3.1.1
+        uses: conda-incubator/setup-miniconda@835234971496cad1653abb28a638a281cf32541f # v3.2.0
         with:
           miniforge-version: latest
           use-mamba: 'true'
@@ -87,18 +96,18 @@ jobs:
         id: install_dpnp
         continue-on-error: true
         run: |
-          mamba install ${{ env.package-name }}=${{ steps.find_latest_tag.outputs.tag }} pytest ${{ env.channels-list }}
+          mamba install ${{ env.package-name }}=${{ steps.find_latest_tag.outputs.tag }} ${{ matrix.test-packages }} ${{ env.channels-list }}
 
       - name: ReInstall dpnp
         if: steps.install_dpnp.outcome == 'failure'
         run: |
-          mamba install ${{ env.package-name }}=${{ steps.find_latest_tag.outputs.tag }} pytest ${{ env.channels-list }}
+          mamba install ${{ env.package-name }}=${{ steps.find_latest_tag.outputs.tag }} ${{ matrix.test-packages }} ${{ env.channels-list }}
 
       - name: List installed packages
         run: mamba list
 
       - name: Activate OCL CPU RT
-        if: matrix.runner == 'windows-2019'
+        if: matrix.runner == 'windows-2022'
         shell: pwsh
         run: |
           $script_path="$env:CONDA_PREFIX\Scripts\set-intel-ocl-icd-registry.ps1"
@@ -125,7 +134,7 @@ jobs:
           SYCL_CACHE_PERSISTENT: 1
 
       - name: ReRun tests on Linux
-        if: steps.run_tests.outcome == 'failure' && matrix.runner != 'windows-2019'
+        if: steps.run_tests.outcome == 'failure' && matrix.runner != 'windows-2022'
         id: run_tests_linux
         uses: nick-fields/retry@ce71cc2ab81d554ebbe88c79ab5975992d79ba08 # v3.0.2
         with:
@@ -142,7 +151,7 @@ jobs:
           SYCL_CACHE_PERSISTENT: 1
 
       - name: ReRun tests on Windows
-        if: steps.run_tests.outcome == 'failure' && matrix.runner == 'windows-2019'
+        if: steps.run_tests.outcome == 'failure' && matrix.runner == 'windows-2022'
         id: run_tests_win
         uses: nick-fields/retry@ce71cc2ab81d554ebbe88c79ab5975992d79ba08 # v3.0.2
         with:
diff --git a/.github/workflows/generate_coverage.yaml b/.github/workflows/generate_coverage.yaml
index 2866b41e66ca..10ea34c21806 100644
--- a/.github/workflows/generate_coverage.yaml
+++ b/.github/workflows/generate_coverage.yaml
@@ -9,7 +9,9 @@ permissions: read-all
 jobs:
   generate-coverage:
     name: Generate coverage and push to Coveralls.io
+
     runs-on: ubuntu-latest
+    timeout-minutes: 120
 
     permissions:
       # Needed to cancel any previous runs that are not completed for a given workflow
@@ -23,6 +25,7 @@ jobs:
       environment-file: 'environments/environment.yml'
       build-with-oneapi-env: 'environments/build_with_oneapi.yml'
       coverage-env: 'environments/coverage.yml'
+      test-pkg-env: 'environments/test.yml'
       oneapi-pkgs-env: ''
       # Enable env when it's required to use only conda packages without OneAPI installation
       # oneapi-pkgs-env: '${{ github.workspace }}/environments/oneapi_pkgs.yml'
@@ -60,7 +63,7 @@ jobs:
           sudo gem install coveralls-lcov
 
       - name: Checkout repo
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
         with:
           fetch-depth: 0
 
@@ -71,13 +74,16 @@ jobs:
 
       - name: Merge conda env files
         run: |
-          conda-merge ${{ env.build-with-oneapi-env }} ${{ env.coverage-env }} ${{ env.oneapi-pkgs-env }} > ${{ env.environment-file }}
+          conda-merge ${{ env.build-with-oneapi-env }} \
+                      ${{ env.coverage-env }}          \
+                      ${{ env.oneapi-pkgs-env }}       \
+                      ${{ env.test-pkg-env }} > ${{ env.environment-file }}
           cat ${{ env.environment-file }}
 
       - name: Setup miniconda
         id: setup_miniconda
         continue-on-error: true
-        uses: conda-incubator/setup-miniconda@505e6394dae86d6a5c7fbb6e3fb8938e3e863830 # v3.1.1
+        uses: conda-incubator/setup-miniconda@835234971496cad1653abb28a638a281cf32541f # v3.2.0
         with:
           miniforge-version: latest
           use-mamba: 'true'
@@ -87,7 +93,7 @@ jobs:
 
       - name: ReSetup miniconda
         if: steps.setup_miniconda.outcome == 'failure'
-        uses: conda-incubator/setup-miniconda@505e6394dae86d6a5c7fbb6e3fb8938e3e863830 # v3.1.1
+        uses: conda-incubator/setup-miniconda@835234971496cad1653abb28a638a281cf32541f # v3.2.0
         with:
           miniforge-version: latest
           use-mamba: 'true'
@@ -154,6 +160,7 @@ jobs:
     needs: generate-coverage
 
     runs-on: ubuntu-latest
+    timeout-minutes: 10
 
     container: python:3-slim
 
diff --git a/.github/workflows/openssf-scorecard.yml b/.github/workflows/openssf-scorecard.yml
index 9f2441b84e3a..906a7cd957c7 100644
--- a/.github/workflows/openssf-scorecard.yml
+++ b/.github/workflows/openssf-scorecard.yml
@@ -7,13 +7,16 @@ on:
   # For Branch-Protection check. Only the default branch is supported. See
   # https://github.com/ossf/scorecard/blob/main/docs/checks.md#branch-protection
   branch_protection_rule:
+  # To be able to be triggered manually
+  workflow_dispatch:
   # To guarantee Maintained check is occasionally updated. See
   # https://github.com/ossf/scorecard/blob/main/docs/checks.md#maintained
   schedule:
     - cron: '28 2 * * 1'
     - cron: '28 2 * * 4'
   push:
-    branches: [ "master" ]
+    branches:
+      - master
 
 # Declare default permissions as read only.
 permissions: read-all
@@ -22,6 +25,7 @@ jobs:
   analysis:
     name: Scorecard analysis
     runs-on: ubuntu-latest
+    timeout-minutes: 10
     permissions:
       # Needed to upload the results to code-scanning dashboard.
       security-events: write
@@ -33,12 +37,12 @@ jobs:
 
     steps:
       - name: "Checkout code"
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
         with:
           persist-credentials: false
 
       - name: "Run analysis"
-        uses: ossf/scorecard-action@f49aabe0b5af0936a0987cfb85d86b75731b0186 # v2.4.1
+        uses: ossf/scorecard-action@05b42c624433fc40578a4040d5cf5e36ddca8cde # v2.4.2
         with:
           results_file: results.sarif
           results_format: sarif
@@ -68,6 +72,6 @@ jobs:
 
       # Upload the results to GitHub's code scanning dashboard.
       - name: "Upload to code-scanning"
-        uses: github/codeql-action/upload-sarif@ff0a06e83cb2de871e5a09832bc6a81e7276941f # v3.28.18
+        uses: github/codeql-action/upload-sarif@3599b3baa15b485a2e49ef411a7a4bb2452e7f93 # v3.30.5
         with:
           sarif_file: results.sarif
diff --git a/.github/workflows/pre-commit-autoupdate.yml b/.github/workflows/pre-commit-autoupdate.yml
new file mode 100644
index 000000000000..aef5e16daad1
--- /dev/null
+++ b/.github/workflows/pre-commit-autoupdate.yml
@@ -0,0 +1,50 @@
+name: Autoupdate pre-commit
+
+on:
+  # To be able to be triggered manually
+  workflow_dispatch:
+  # To guarantee Maintained check is occasionally updated. See
+  # https://github.com/ossf/scorecard/blob/main/docs/checks.md#maintained
+  schedule:
+    - cron: '28 2 * * 6' # Saturday at 02:28 UTC
+
+permissions: read-all
+
+jobs:
+  autoupdate:
+    name: Autoupdate
+
+    runs-on: ubuntu-latest
+    timeout-minutes: 10
+
+    permissions:
+      # Needed to create a PR with autoupdate changes
+      contents: write
+      pull-requests: write
+
+    steps:
+      - name: Checkout DPNP repo
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
+
+      - name: Set up python
+        uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6.0.0
+        with:
+          python-version: '3.13'
+
+      - name: Install pre-commit
+        run: pip install pre-commit
+
+      - name: Run pre-commit autoupdate
+        run: pre-commit autoupdate
+
+      - name: Create a PR with autoupdate changes
+        uses: peter-evans/create-pull-request@271a8d0340265f705b14b6d32b9829c1cb33d45e #v7.0.8
+        with:
+          commit-message: 'chore: update pre-commit hooks'
+          add-paths: .pre-commit-config.yaml
+          branch: 'bot/pre-commit-autoupdate'
+          delete-branch: true
+          title: Weekly pre-commit autoupdate
+          body: |
+            This PR updates the `.pre-commit-config.yaml` using `pre-commit autoupdate`.
+          labels: autoupdate
diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml
index 7b1b9146a44a..d1d2dbbae21a 100644
--- a/.github/workflows/pre-commit.yml
+++ b/.github/workflows/pre-commit.yml
@@ -1,4 +1,4 @@
-name: pre-commit
+name: Run pre-commit
 
 on:
   pull_request:
@@ -9,7 +9,11 @@ permissions: read-all
 
 jobs:
   pre-commit:
+    name: Check
+
     runs-on: ubuntu-22.04
+    timeout-minutes: 10
+
     steps:
       - name: Set up clang-format
         run: |
@@ -26,10 +30,13 @@ jobs:
             pylint
 
       - name: Checkout DPNP repo
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
+        with:
+          # use commit hash to make "no-commit-to-branch" check passing
+          ref: ${{ github.sha }}
 
       - name: Set up python
-        uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
+        uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6.0.0
         with:
           python-version: '3.13'
 
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index c1b902dc886f..a47715f4eb9b 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -2,20 +2,29 @@
 # See https://pre-commit.com/hooks.html for more hooks
 repos:
 -   repo: https://github.com/PyCQA/bandit
-    rev: '1.8.3'
+    rev: '1.8.6'
     hooks:
     -   id: bandit
         pass_filenames: false
         args: ["-r", "dpnp", "-lll"]
 -   repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v5.0.0
+    rev: v6.0.0
     hooks:
+    # Git
+    -   id: check-added-large-files
+    -   id: no-commit-to-branch
+        name: "ensure no direct commit to master/maintenance branches"
+        args: [--branch, "master", --pattern, "maintenance/.*"]
+    -   id: check-case-conflict
+    -   id: check-illegal-windows-names
+    # Contents
     -   id: check-ast
     -   id: check-builtin-literals
     -   id: check-case-conflict
     -   id: check-executables-have-shebangs
     -   id: check-merge-conflict
     -   id: check-shebang-scripts-are-executable
+    -   id: check-symlinks
     -   id: check-toml
     -   id: debug-statements
     -   id: destroyed-symlinks
@@ -51,7 +60,7 @@ repos:
         additional_dependencies:
             - tomli
 -   repo: https://github.com/psf/black
-    rev: 25.1.0
+    rev: 25.9.0
     hooks:
     -   id: black
         exclude: "dpnp/_version.py"
@@ -68,7 +77,7 @@ repos:
         name: isort (pyi)
         types: [pyi]
 -   repo: https://github.com/pycqa/flake8
-    rev: 7.2.0
+    rev: 7.3.0
     hooks:
     -   id: flake8
         args: ["--config=.flake8"]
@@ -81,7 +90,7 @@ repos:
     -   id: clang-format
         args: ["-i"]
 -   repo: https://github.com/gitleaks/gitleaks
-    rev: v8.26.0
+    rev: v8.28.0
     hooks:
     -   id: gitleaks
 -   repo: https://github.com/jumanjihouse/pre-commit-hooks
@@ -104,9 +113,9 @@ repos:
             "--disable=redefined-builtin",
             "--disable=unused-wildcard-import"
             ]
-        files: '^dpnp/(dpnp_iface.*|fft|linalg)'
+        files: '^dpnp/(dpnp_iface.*|fft|linalg|scipy|dpnp_array)'
 -   repo: https://github.com/macisamuele/language-formatters-pre-commit-hooks
-    rev: v2.14.0
+    rev: v2.15.0
     hooks:
     -   id: pretty-format-toml
         args: [--autofix]
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 2e990a601331..e69c47fd5ed1 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,10 +1,104 @@
 # Changelog
 All notable changes to this project will be documented in this file.
 
-The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
+The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
-## [0.18.0] - 05/DD/2025
+## [0.19.0] - 2025-10-06
+
+This release introduces a set of new `dpnp.ndarray` methods and SciPy-compatible functions to improve CuPy compatibility.
+It also enhances the performance of existing functions and improves documentation completeness.
+Additionally, it extends support for building `dpnp` from the source for NVIDIA GPUs, with optional architecture selection.
+This release is compatible with NumPy 2.3.3.
+
+### Added
+
+* Added `--target-cuda[=ARCH]` option to replace the deprecated `--target=cuda`, allowing users to build for CUDA devices with optional architecture selection using [CodePlay oneAPI plug-in](https://developer.codeplay.com/products/oneapi/nvidia/home/) [#2478](https://github.com/IntelPython/dpnp/pull/2478)
+* Added several new `pre-commit` rules, including protection against direct commits to master/maintenance branches [#2500](https://github.com/IntelPython/dpnp/pull/2500)
+* Added implementation of `dpnp.ndarray.view` method [#2520](https://github.com/IntelPython/dpnp/pull/2520)
+* Added a new backend routine `syrk` from oneMKL to perform symmetric rank-k update which is used for a specialized matrix multiplication where the result is a symmetric matrix [2509](https://github.com/IntelPython/dpnp/pull/2509)
+* Added `timeout-minutes` property to GitHub jobs [#2526](https://github.com/IntelPython/dpnp/pull/2526)
+* Added implementation of `dpnp.ndarray.data` and `dpnp.ndarray.data.ptr` attributes [#2521](https://github.com/IntelPython/dpnp/pull/2521)
+* Added `dpnp.ndarray.__contains__` method [#2534](https://github.com/IntelPython/dpnp/pull/2534)
+* Added implementation of `dpnp.linalg.lu_factor` (SciPy-compatible) [#2557](https://github.com/IntelPython/dpnp/pull/2557), [#2565](https://github.com/IntelPython/dpnp/pull/2565)
+* Added implementation of `dpnp.piecewise` [#2550](https://github.com/IntelPython/dpnp/pull/2550)
+* Added implementation of `dpnp.linalg.lu_solve` for 2D inputs (SciPy-compatible) [#2575](https://github.com/IntelPython/dpnp/pull/2575)
+* Added implementation of `dpnp.special.erfc` [#2588](https://github.com/IntelPython/dpnp/pull/2588)
+* Added `dpnp.scipy` submodule to aggregate new SciPy-compatible functions from `linalg` and `special` namespaces [#2603](https://github.com/IntelPython/dpnp/pull/2603)
+
+### Changed
+
+* Adjusted the `pre-commit` configuration to run autoupdate weekly [#2479](https://github.com/IntelPython/dpnp/pull/2479)
+* Improved validation of `--target-hip` build option to only accept a gfx-prefixed value [#2481](https://github.com/IntelPython/dpnp/pull/2481)
+* Simplifies backend implementation of `dpnp.kaiser` by getting rid of unnecessary template [#2472](https://github.com/IntelPython/dpnp/pull/2472)
+* `--onemkl-interfaces` and `--onemkl-interfaces-dir` options for building script are deprecated, instead `--onemath` and `--onemath-dir` are introduced to be aligned with [oneMath specification](https://oneapi-spec.uxlfoundation.org/specifications/oneapi/latest/elements/onemath/source/) [#2487](https://github.com/IntelPython/dpnp/pull/2487)
+* Clarified description of `xp` keyword in docstring of `dpnp.interp` [#2506](https://github.com/IntelPython/dpnp/pull/2506)
+* Updated existing GitHub workflows to add testing with Python 3.13 [#2510](https://github.com/IntelPython/dpnp/pull/2510)
+* Aligned the license expression with `PEP-639` [#2511](https://github.com/IntelPython/dpnp/pull/2511)
+* Bumped oneMKL version up to `v0.8` [#2514](https://github.com/IntelPython/dpnp/pull/2514)
+* Removed the use of class template argument deduction for alias template to conform to the C++17 standard [#2517](https://github.com/IntelPython/dpnp/pull/2517)
+* Changed the order of individual FFTs over `axes` for `dpnp.fft.irfftn` to be in forward order [#2524](https://github.com/IntelPython/dpnp/pull/2524)
+* Replaced the use of `numpy.testing.suppress_warnings` with appropriate calls from the warnings module [#2529](https://github.com/IntelPython/dpnp/pull/2529)
+* Improved documentations of `dpnp.ndarray` class and added a page with description of supported constants [#2422](https://github.com/IntelPython/dpnp/pull/2422)
+* Updated `dpnp.size` to accept tuple of ints for `axes` argument [#2536](https://github.com/IntelPython/dpnp/pull/2536)
+* Replaced `ci` section in `.pre-commit-config.yaml` with a new GitHub workflow with scheduled run to autoupdate the `pre-commit` configuration [#2542](https://github.com/IntelPython/dpnp/pull/2542)
+* FFT module is updated to perform in-place FFT in intermediate steps of ND FFT [#2543](https://github.com/IntelPython/dpnp/pull/2543)
+* Reused dpctl tensor include to enable experimental SYCL namespace for complex types [#2546](https://github.com/IntelPython/dpnp/pull/2546)
+* Changed Windows-specific logic in dpnp initialization [#2553](https://github.com/IntelPython/dpnp/pull/2553)
+* Added missing includes to files in ufunc and VM pybind11 extensions [#2571](https://github.com/IntelPython/dpnp/pull/2571)
+* Refactored backend implementation of `dpnp.linalg.solve` to use oneMKL LAPACK `gesv` directly [#2558](https://github.com/IntelPython/dpnp/pull/2558)
+* Improved performance of `dpnp.isclose` function by implementing a dedicated kernel for scalar `rtol` and `atol` arguments [#2540](https://github.com/IntelPython/dpnp/pull/2540)
+* Extended `dpnp.pad` to support `pad_width` keyword as a dictionary [#2535](https://github.com/IntelPython/dpnp/pull/2535)
+* Redesigned `dpnp.erf` function through pybind11 extension of OneMKL call or dedicated kernel in `ufunc` namespace [#2551](https://github.com/IntelPython/dpnp/pull/2551)
+* Improved performance of batched implementation of `dpnp.linalg.det` and `dpnp.linalg.slogdet` [#2572](https://github.com/IntelPython/dpnp/pull/2572)
+* Improved documentations of `dpnp.tril_indices` and `dpnp.triu_indices` to clarify the returned order of indices [#2586](https://github.com/IntelPython/dpnp/pull/2586)
+* `dpnp` uses pybind11 3.0.1 [#2594](https://github.com/IntelPython/dpnp/pull/2594)
+
+### Deprecated
+
+* `--onemkl-interfaces` and `--onemkl-interfaces-dir` options for building script are deprecated, instead `--onemath` and `--onemath-dir` are introduced to be aligned with [oneMath specification](https://oneapi-spec.uxlfoundation.org/specifications/oneapi/latest/elements/onemath/source/) [#2487](https://github.com/IntelPython/dpnp/pull/2487)
+
+### Removed
+
+* Cleaned up backend code to remove obsolete and unused parts of functionality [#2485](https://github.com/IntelPython/dpnp/pull/2485)
+
+### Fixed
+
+* Updated `pre-commit` GitHub workflow to pass `no-commit-to-branch` check [#2501](https://github.com/IntelPython/dpnp/pull/2501)
+* Updated the math formulas in summary of `dpnp.matvec` and `dpnp.vecmat` to correct a typo [#2503](https://github.com/IntelPython/dpnp/pull/2503)
+* Avoided negating unsigned integers in ceil division used in `dpnp.resize` implementation [#2508](https://github.com/IntelPython/dpnp/pull/2508)
+* Fixed `dpnp.unique` with 1d input array and `axis=0`, `equal_nan=True` keywords passed where the produced result doesn't collapse the NaNs [#2530](https://github.com/IntelPython/dpnp/pull/2530), [#2587](https://github.com/IntelPython/dpnp/pull/2587)
+* Resolved issue when `dpnp.ndarray` constructor is called with `dpnp.ndarray.data` as `buffer` keyword [#2533](https://github.com/IntelPython/dpnp/pull/2533)
+* Fixed `dpnp.linalg.cond` to always return a real dtype [#2547](https://github.com/IntelPython/dpnp/pull/2547)
+* Resolved the issue in `dpnp.random` functions to allow any value of `size` where each element is castable to `Py_ssize_t` type [#2578](https://github.com/IntelPython/dpnp/pull/2578)
+* Resolved `conda build --test` issue in python 3.9 environment [#2583](https://github.com/IntelPython/dpnp/pull/2583)
+* Fixed tests for the rounding functions to depend on minimum required numpy version [#2589](https://github.com/IntelPython/dpnp/pull/2589)
+* Fixed tests for the ufuncs to depend on minimum required numpy version [#2590](https://github.com/IntelPython/dpnp/pull/2590)
+* Added missing permission definition in `Autoupdate pre-commit` GitHub workflow [#2591](https://github.com/IntelPython/dpnp/pull/2591)
+* Resolved issue with the cyclic import in `linalg` submodule [#2608](https://github.com/IntelPython/dpnp/pull/2608)
+
+### Security
+
+## [0.18.1] - 2025-06-24
+
+This release achieves `dpnp` compatibility with Python 3.13 and enables distributing `dpnp` packages with the latest Python version.
+Moreover, the release provides compatibility with NumPy 2.3.0 and includes several bug fixes.
+
+### Added
+
+* Enabled support of Python 3.13 [#2490](https://github.com/IntelPython/dpnp/pull/2490)
+
+### Changed
+
+* Updated the tests scope to exclude several `matmul` tests in case of numpy 2.3.0 due to known NumPy issue [#2495](https://github.com/IntelPython/dpnp/pull/2495)
+
+### Fixed
+
+* Fixed a bug for calculating the norm (`dpnp.linalg.norm`) of empty arrays when `keepdims=True` is passed [#2477](https://github.com/IntelPython/dpnp/pull/2477)
+* Updated the tests for hyperbolic and trigonometric elementwise functions to set correct tolerance for `float16` dtype [#2483](https://github.com/IntelPython/dpnp/pull/2483)
+
+
+## [0.18.0] - 2025-06-04
 
 This release achieves 100% compliance with Python Array API specification (revision [2024.12](https://data-apis.org/array-api/2024.12/)).
 The release provides enhanced compatibility with NumPy 2.2.5. Window and mathematical routines are complemented by a set of new functions.
@@ -18,6 +112,8 @@ Moreover, it adds support to build `dpnp` from the source for AMD GPUs.
 * Added implementation of `dpnp.bartlett` [#2366](https://github.com/IntelPython/dpnp/pull/2366)
 * Added implementation of `dpnp.convolve` [#2205](https://github.com/IntelPython/dpnp/pull/2205)
 * Added implementation of `dpnp.kaiser` [#2387](https://github.com/IntelPython/dpnp/pull/2387)
+* Added implementation of `dpnp.bitwise_count` [#2308](https://github.com/IntelPython/dpnp/pull/2308)
+* Added implementation of `dpnp.common_type` [#2391](https://github.com/IntelPython/dpnp/pull/2391)
 * Added implementation of `dpnp.interp` [#2417](https://github.com/IntelPython/dpnp/pull/2417)
 * Added support to build `dpnp` for specified AMD GPU architecture using [CodePlay oneAPI plug-in](https://developer.codeplay.com/products/oneapi/amd/home/) [#2302](https://github.com/IntelPython/dpnp/pull/2302)
 
@@ -35,7 +131,7 @@ Moreover, it adds support to build `dpnp` from the source for AMD GPUs.
 * Updated `dpnp.outer` to return the same dtype as NumPy when multiplying an array with a scalar [#2295](https://github.com/IntelPython/dpnp/pull/2295)
 * Changed `"max dimensions"` to `None` in array API capabilities [#2432](https://github.com/IntelPython/dpnp/pull/2432)
 * Updated kernel header `i0.hpp` to expose `cyl_bessel_i0` function depending on build target [#2440](https://github.com/IntelPython/dpnp/pull/2440)
-* Added MKL functions `arg`, `copysign`, `i0`, and `inv` from VM namespace to be used by implementation of the appropriate element-wise functions [#2445](https://github.com/IntelPython/dpnp/pull/2445)
+* Added MKL functions `arg`, `copysign`, `i0`, and `inv` from VM namespace to be used by implementation of the appropriate elementwise functions [#2445](https://github.com/IntelPython/dpnp/pull/2445)
 * Clarified details about conda install instructions in `Quick start quide` and `README` [#2446](https://github.com/IntelPython/dpnp/pull/2446)
 * Bumped oneMKL version up to `0.7` [#2448](https://github.com/IntelPython/dpnp/pull/2448)
 * The parameter `axis` in `dpnp.take_along_axis` function has now a default value of `-1` [#2442](https://github.com/IntelPython/dpnp/pull/2442)
@@ -43,7 +139,7 @@ Moreover, it adds support to build `dpnp` from the source for AMD GPUs.
 * Updated FFT module to ensure an input array is Hermitian before calling complex-to-real FFT [#2444](https://github.com/IntelPython/dpnp/pull/2444)
 * Aligned `black` configuration with the list of supported python versions [#2457](https://github.com/IntelPython/dpnp/pull/2457)
 * Use `pyproject.toml` instead of `setup.py` aligning with current packaging best practices [#2462](https://github.com/IntelPython/dpnp/pull/2462)
-* Added a clarification to `dpnp.linalg.cond` docstring about its behavior with singular matrices [#2500] (https://github.com/IntelPython/dpnp/pull/2500)
+* Added a clarification to `dpnp.linalg.cond` docstring about its behavior with singular matrices [#2460](https://github.com/IntelPython/dpnp/pull/2460)
 
 ### Fixed
 
@@ -52,9 +148,10 @@ Moreover, it adds support to build `dpnp` from the source for AMD GPUs.
 * Added handling of empty string passed to a test env variable defining data type scope as a `False` value [#2415](https://github.com/IntelPython/dpnp/pull/2415)
 * Resolved build issues on non-Intel targets in `dpnp.i0` and `dpnp.kaiser` [#2439](https://github.com/IntelPython/dpnp/pull/2439)
 * Ensure consistency in the `dpnp.linalg.LinAlgError` exception raised on singular input matrices for both non-batched and batched cases in `dpnp.linalg.inv` [#2458] (https://github.com/IntelPython/dpnp/pull/2458)
+* Updated test f/w to correct a check of array interface while converting to `numpy.ndarray` for comparison [#2467] (https://github.com/IntelPython/dpnp/pull/2467)
 
 
-## [0.17.0] - 02/26/2025
+## [0.17.0] - 2025-02-26
 
 This release achieves 100% compliance with Python Array API specification (revision [2023.12](https://data-apis.org/array-api/2023.12/)).
 The release provides enhanced compatibility with NumPy 2.2.3. Array manipulation, mathematical, logic, and statistics routines are complemented by a set of new functions.
@@ -155,21 +252,21 @@ Furthermore, a number of issues relating to running on NVIDIA GPUs have been res
 * Added a workaround to prevent a memory corruption in `dpnp.correlate` [#2333](https://github.com/IntelPython/dpnp/pull/2333)
 
 
-## [0.16.3] - 12/20/2024
+## [0.16.3] - 2024-12-20
 
 ### Fixed
 
 * Bumped min version of DPC++ compiler required to support experimental SYCL properties [#2231](https://github.com/IntelPython/dpnp/pull/2231)
 
 
-## [0.16.2] - 12/20/2024
+## [0.16.2] - 2024-12-20
 
 ### Fixed
 
 * Enabled `dpnp` in virtual environment on Windows platform [#2242](https://github.com/IntelPython/dpnp/pull/2242)
 
 
-## [0.16.1] - 12/06/2024
+## [0.16.1] - 2024-12-06
 
 This is a bug-fix release.
 
@@ -189,7 +286,7 @@ This is a bug-fix release.
 * Resolved a compilation error when building with DPC++ 2025.1 compiler [#2211](https://github.com/IntelPython/dpnp/pull/2211)
 
 
-## [0.16.0] - 10/14/2024
+## [0.16.0] - 2024-10-14
 
 This release reaches an important milestone by making offloading fully asynchronous. Calls to `dpnp` submit tasks for execution to DPC++ runtime and return without waiting for execution of these tasks to finish. The sequential semantics a user comes to expect from execution of Python script is preserved though.
 In addition, this release completes implementation of `dpnp.fft` module and adds several new array manipulation, indexing and elementwise routines. Moreover, it adds support to build `dpnp` for Nvidia GPUs.
@@ -306,7 +403,7 @@ In addition, this release completes implementation of `dpnp.fft` module and adds
 * Resolved compilation warning and error while building in debug mode [#2066](https://github.com/IntelPython/dpnp/pull/2066)
 * Fixed an issue with asynchronous execution in `dpnp.fft` module [#2067](https://github.com/IntelPython/dpnp/pull/2067)
 
-## [0.15.0] - 05/25/2024
+## [0.15.0] - 2024-05-25
 
 This release completes implementation of `dpnp.linalg` module and array creation routine, adds cumulative reductions and histogram functions.
 
@@ -375,7 +472,7 @@ and added implementation of `dpnp.mask_indices` function [#1814](https://github.
 * Resolved an unexpected `ValueError` exception raised from `dpnp.linalg.pinv` due to a shape issue in `dpnp.matmul` [#1843](https://github.com/IntelPython/dpnp/pull/1843)
 
 
-## [0.14.0] - 02/16/2024
+## [0.14.0] - 2024-02-16
 
 This release will require DPC++ `2024.1.0`, which no longer supports Intel Gen9 integrated GPUs found in Intel CPUs of 10th generation and older.
 
@@ -453,7 +550,7 @@ This release will require DPC++ `2024.1.0`, which no longer supports Intel Gen9
 * Improved performance of `dpnp.atleast_2d` and `dpnp.atleast_3d` functions and fixed to return a correct shape of resulting array [#1560](https://github.com/IntelPython/dpnp/pull/1560)
 
 
-## [0.13.0] - 09/29/2023
+## [0.13.0] - 2023-09-29
 
 ### Added
 
@@ -515,7 +612,7 @@ This release will require DPC++ `2024.1.0`, which no longer supports Intel Gen9
 * Resolved issues with running statistics functions on a device without fp64 support [#1494](https://github.com/IntelPython/dpnp/pull/1494)
 
 
-## [0.12.1] - 07/18/2023
+## [0.12.1] - 2023-07-18
 
 ### Added
 
@@ -548,7 +645,7 @@ This release will require DPC++ `2024.1.0`, which no longer supports Intel Gen9
 * Resolved a compilation warning with `std::getenv()` call on Windows [#1452](https://github.com/IntelPython/dpnp/pull/1452)
 * Corrected a link to OneAPI Toolkit in Installation Guide [#1445](https://github.com/IntelPython/dpnp/pull/1445)
 
-## [0.12.0] - 06/15/2023
+## [0.12.0] - 2023-06-15
 
 ### Added
 
diff --git a/CMakeLists.txt b/CMakeLists.txt
index c9d526cf47f9..cec18151f7f4 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,7 +1,7 @@
 cmake_minimum_required(VERSION 3.21...3.27 FATAL_ERROR)
 
 project(dpnp
-  VERSION 0.18
+  VERSION 0.19
   LANGUAGES CXX
   DESCRIPTION "NumPy-like API accelerated by SYCL."
 )
@@ -53,8 +53,8 @@ include(GNUInstallDirs)
 include(FetchContent)
 FetchContent_Declare(
     pybind11
-    URL https://github.com/pybind/pybind11/archive/refs/tags/v2.13.6.tar.gz
-    URL_HASH SHA256=e08cb87f4773da97fa7b5f035de8763abc656d87d5773e62f6da0587d1f0ec20
+    URL https://github.com/pybind/pybind11/archive/refs/tags/v3.0.1.tar.gz
+    URL_HASH SHA256=741633da746b7c738bb71f1854f957b9da660bcd2dce68d71949037f0969d0ca
     FIND_PACKAGE_ARGS NAMES pybind11
 )
 FetchContent_MakeAvailable(pybind11)
@@ -68,48 +68,68 @@ find_package(Dpctl REQUIRED)
 message(STATUS "Dpctl_INCLUDE_DIR=" ${Dpctl_INCLUDE_DIR})
 message(STATUS "Dpctl_TENSOR_INCLUDE_DIR=" ${Dpctl_TENSOR_INCLUDE_DIR})
 
-option(DPNP_TARGET_CUDA
-    "Build DPNP to target CUDA devices"
+option(DPNP_USE_ONEMATH
+    "Build DPNP with oneMath"
     OFF
 )
-option(DPNP_USE_ONEMKL_INTERFACES
-    "Build DPNP with oneMKL Interfaces"
-    OFF
+set(DPNP_TARGET_CUDA
+    ""
+    CACHE STRING
+    "Build DPNP to target CUDA device. \
+Set to a truthy value (e.g., ON, TRUE) to use default architecture (sm_50), \
+or to a specific architecture like sm_80."
 )
 set(HIP_TARGETS "" CACHE STRING "HIP architecture for target")
 
 set(_dpnp_sycl_targets)
-set(_use_onemkl_interfaces OFF)
-set(_use_onemkl_interfaces_cuda OFF)
-set(_use_onemkl_interfaces_hip OFF)
+set(_use_onemath OFF)
+set(_use_onemath_cuda OFF)
+set(_use_onemath_hip OFF)
 
 set(_dpnp_sycl_target_compile_options)
 set(_dpnp_sycl_target_link_options)
 
 if ("x${DPNP_SYCL_TARGETS}" STREQUAL "x")
-    if(DPNP_TARGET_CUDA)
-        set(_dpnp_sycl_targets "nvptx64-nvidia-cuda,spir64-unknown-unknown")
-        set(_use_onemkl_interfaces_cuda ON)
+    if (DPNP_TARGET_CUDA)
+        set(_dpnp_cuda_arch)
+        if(DPNP_TARGET_CUDA MATCHES "^sm_")
+            set(_dpnp_cuda_arch ${DPNP_TARGET_CUDA})
+        elseif(DPNP_TARGET_CUDA MATCHES "^(ON|TRUE|YES|Y|1)$")
+            set(_dpnp_cuda_arch "sm_50")
+        else()
+            message(FATAL_ERROR
+                "Invalid value for DPNP_TARGET_CUDA: \"${DPNP_TARGET_CUDA}\". "
+                "Expected 'ON', 'TRUE', 'YES', 'Y', '1', or a CUDA architecture like 'sm_80'."
+            )
+        endif()
+        set(_dpnp_sycl_targets "nvidia_gpu_${_dpnp_cuda_arch},spir64-unknown-unknown")
+        set(_use_onemath_cuda ON)
     endif()
 
-    if (NOT "x${HIP_TARGETS}" STREQUAL "x")
-        set(_use_onemkl_interfaces_hip ON)
-
-        if ("x${_dpnp_sycl_targets}" STREQUAL "x")
-            set(_dpnp_sycl_targets "amd_gpu_${HIP_TARGETS},spir64-unknown-unknown")
+    if (HIP_TARGETS)
+        if (HIP_TARGETS MATCHES "^gfx")
+            if ("x${_dpnp_sycl_targets}" STREQUAL "x")
+                set(_dpnp_sycl_targets "amd_gpu_${HIP_TARGETS},spir64-unknown-unknown")
+            else()
+                set(_dpnp_sycl_targets "amd_gpu_${HIP_TARGETS},${_dpnp_sycl_targets}")
+            endif()
+            set(_use_onemath_hip ON)
         else()
-            set(_dpnp_sycl_targets "amd_gpu_${HIP_TARGETS},${_dpnp_sycl_targets}")
+            message(FATAL_ERROR
+                "Invalid value for HIP_TARGETS: \"${HIP_TARGETS}\". "
+                "Expected an architecture name starting with 'gfx', e.g. 'gfx1030'."
+            )
         endif()
     endif()
 else()
     set(_dpnp_sycl_targets ${DPNP_SYCL_TARGETS})
 
-    if ("${DPNP_SYCL_TARGETS}" MATCHES "nvptx64-nvidia-cuda")
-        set(_use_onemkl_interfaces_cuda ON)
+    if("${DPNP_SYCL_TARGETS}" MATCHES "(nvidia_gpu_sm_|nvptx64-nvidia-cuda)")
+        set(_use_onemath_cuda ON)
     endif()
 
     if ("${DPNP_SYCL_TARGETS}" MATCHES "amd_gpu_")
-        set(_use_onemkl_interfaces_hip ON)
+        set(_use_onemath_hip ON)
 
         if ("x${HIP_TARGETS}" STREQUAL "x")
             message(FATAL_ERROR "HIP_TARGETS must be specified when using HIP backend")
@@ -130,58 +150,58 @@ if (_dpnp_sycl_targets)
    list(APPEND _dpnp_sycl_target_link_options -fsycl-targets=${_dpnp_sycl_targets})
 endif()
 
-if(DPNP_USE_ONEMKL_INTERFACES)
-    set(_use_onemkl_interfaces ON)
+if(DPNP_USE_ONEMATH)
+    set(_use_onemath ON)
 else()
-    if(DEFINED ENV{DPNP_USE_ONEMKL_INTERFACES})
-        set(_use_onemkl_interfaces ON)
+    if(DEFINED ENV{DPNP_USE_ONEMATH})
+        set(_use_onemath ON)
     endif()
 endif()
 
-if(_use_onemkl_interfaces)
+if(_use_onemath)
     set(BUILD_FUNCTIONAL_TESTS False)
     set(BUILD_EXAMPLES False)
     set(ENABLE_MKLGPU_BACKEND True)
     set(ENABLE_MKLCPU_BACKEND True)
 
-    if(_use_onemkl_interfaces_cuda)
+    if(_use_onemath_cuda)
         set(ENABLE_CUBLAS_BACKEND True)
         set(ENABLE_CUSOLVER_BACKEND True)
         set(ENABLE_CUFFT_BACKEND True)
         # set(ENABLE_CURAND_BACKEND True)
     endif()
-    if(_use_onemkl_interfaces_hip)
+    if(_use_onemath_hip)
         set(ENABLE_ROCBLAS_BACKEND True)
         set(ENABLE_ROCSOLVER_BACKEND True)
         set(ENABLE_ROCFFT_BACKEND True)
         # set(ENABLE_ROCRAND_BACKEND True)
     endif()
 
-    if(DPNP_ONEMKL_INTERFACES_DIR)
-        FetchContent_Declare(onemath_library SOURCE_DIR "${DPNP_ONEMKL_INTERFACES_DIR}")
+    if(DPNP_ONEMATH_DIR)
+        FetchContent_Declare(onemath_library SOURCE_DIR "${DPNP_ONEMATH_DIR}")
     else()
         FetchContent_Declare(
             onemath_library
                 GIT_REPOSITORY https://github.com/uxlfoundation/oneMath.git
-                GIT_TAG 20ba6fd7ae4af6ed693246cfd22c343e6522edbe  # v0.7
+                GIT_TAG 5c7e1e7a710556e51f70ecc8dd26dfd04e3abf41  # v0.8
         )
     endif()
 
     FetchContent_MakeAvailable(onemath_library)
     if(TARGET onemath)
-        set(MKL_INTERFACES_LIB "onemath" CACHE INTERNAL "OneMath lib target")
+        set(ONEMATH_LIB "onemath" CACHE INTERNAL "OneMath lib target")
     elseif(TARGET onemkl)
-        set(MKL_INTERFACES_LIB "onemkl" CACHE INTERNAL "OneMKL lib target")
+        set(ONEMATH_LIB "onemkl" CACHE INTERNAL "OneMKL lib target")
     else()
         message(FATAL_ERROR "Neither 'oneMath' nor 'oneMKL' found!")
     endif()
-    message(STATUS "MKL interfaces lib target used: ${MKL_INTERFACES_LIB}")
+    message(STATUS "OneMath lib target used: ${ONEMATH_LIB}")
     set(CMAKE_INSTALL_RPATH "${CMAKE_BINARY_DIR}/lib")
 else()
-    if(_use_onemkl_interfaces_cuda OR _use_onemkl_interfaces_hip)
+    if(_use_onemath_cuda OR _use_onemath_hip)
         message(FATAL_ERROR
-            "CUDA or HIP targets are enabled, but oneMKL Interfaces are not. "
-            "Please set DPNP_USE_ONEMKL_INTERFACES=ON to enable them."
+            "CUDA or HIP targets are enabled, but oneMath is not. "
+            "Please set DPNP_USE_ONEMATH=ON to enable them."
         )
     endif()
 endif()
diff --git a/conda-recipe/conda_build_config.yaml b/conda-recipe/conda_build_config.yaml
index 0f1e1f9078f8..c5df1a7f2e29 100644
--- a/conda-recipe/conda_build_config.yaml
+++ b/conda-recipe/conda_build_config.yaml
@@ -1,2 +1,18 @@
 numpy:
-    - 1.23
+    - '1.26'
+c_compiler:                   # [linux]
+    - gcc                     # [linux]
+cxx_compiler:                 # [linux]
+    - gxx                     # [linux]
+cxx_compiler_version:         # [linux]
+    - '14'                    # [linux]
+c_stdlib:                     # [linux]
+    - sysroot                 # [linux]
+c_stdlib_version:             # [linux]
+    - '2.28'                  # [linux]
+c_stdlib:                     # [win]
+    - vs                      # [win]
+cxx_compiler:                 # [win]
+    - vs2022                  # [win]
+c_compiler:                   # [win]
+    - vs2022                  # [win]
diff --git a/conda-recipe/meta.yaml b/conda-recipe/meta.yaml
index 22af5f008979..af96e710de41 100644
--- a/conda-recipe/meta.yaml
+++ b/conda-recipe/meta.yaml
@@ -1,6 +1,6 @@
 {% set max_compiler_and_mkl_version = environ.get("MAX_BUILD_CMPL_MKL_VERSION", "2026.0a0") %}
 {% set required_compiler_and_mkl_version = "2025.0" %}
-{% set required_dpctl_version = "0.20.0*" %}
+{% set required_dpctl_version = "0.21.0" %}
 
 {% set pyproject = load_file_data('pyproject.toml') %}
 {% set py_build_deps = pyproject.get('build-system', {}).get('requires', []) %}
@@ -37,8 +37,8 @@ requirements:
       - tbb-devel
     build:
       - {{ compiler('cxx') }}
+      - {{ stdlib('c') }}
       - {{ compiler('dpcpp') }} >={{ required_compiler_and_mkl_version }},<{{ max_compiler_and_mkl_version }}
-      - sysroot_linux-64 >=2.28 # [linux]
     run:
       - python
       - {{ pin_compatible('dpctl', min_pin='x.x.x', max_pin=None) }}
@@ -63,6 +63,7 @@ test:
     requires:
       - pytest
       - setuptools
+      - scipy # [py>39]
 
 about:
     home: https://github.com/IntelPython/dpnp
@@ -82,7 +83,5 @@ about:
 extra:
     recipe-maintainers:
         - antonwolfy
-        - AlexanderKalistratov
-        - vtavana
         - vlad-perevezentsev
         - ndgrigorian
diff --git a/doc/quick_start_guide.rst b/doc/quick_start_guide.rst
index 0e6f9dca74e2..8aa86fd8e88f 100644
--- a/doc/quick_start_guide.rst
+++ b/doc/quick_start_guide.rst
@@ -24,7 +24,7 @@ Follow device driver installation instructions to complete the step.
 Python Interpreter
 ==================
 
-You will need Python 3.9, 3.10, 3.11 or 3.12 installed on your system. If you
+You will need Python 3.9, 3.10, 3.11, 3.12 or 3.13 installed on your system. If you
 do not have one yet the easiest way to do that is to install
 `Intel Distribution for Python*`_. It installs all essential Python numerical
 and machine learning packages optimized for the Intel hardware, including
@@ -144,13 +144,40 @@ installation layout of compatible version. The following plugins from CodePlay a
 Building ``dpnp`` also requires `building Data Parallel Control Library for custom SYCL targets.
 <https://intelpython.github.io/dpctl/latest/beginners_guides/installation.html#building-for-custom-sycl-targets>`_
 
-``dpnp`` can be built for CUDA devices as follows:
+Builds for CUDA and AMD devices internally use SYCL alias targets that are passed to the compiler.
+A full list of available SYCL alias targets is available in the
+`DPC++ Compiler User Manual <https://intel.github.io/llvm/UsersManual.html>`_.
+
+CUDA build
+~~~~~~~~~~
+
+To build for CUDA devices, use the ``--target-cuda`` argument.
+
+To target a specific architecture (e.g., ``sm_80``):
+
+.. code-block:: bash
+
+    python scripts/build_locally.py --target-cuda=sm_80
+
+To use the default architecture (``sm_50``), run:
 
 .. code-block:: bash
 
-    python scripts/build_locally.py --target=cuda
+    python scripts/build_locally.py --target-cuda
+
+Note that kernels are built for the default architecture (``sm_50``), allowing them to work on a
+wider range of architectures, but limiting the usage of more recent CUDA features.
+
+For reference, compute architecture strings like ``sm_80`` correspond to specific
+CUDA Compute Capabilities (e.g., Compute Capability 8.0 corresponds to ``sm_80``).
+A complete mapping between NVIDIA GPU models and their respective
+Compute Capabilities can be found in the official
+`CUDA GPU Compute Capability <https://developer.nvidia.com/cuda-gpus>`_ documentation.
+
+AMD build
+~~~~~~~~~
 
-And for AMD devices:
+To build for AMD devices, use the ``--target-hip=<arch>`` argument:
 
 .. code-block:: bash
 
@@ -173,13 +200,17 @@ For example:
 .. code-block:: bash
     python scripts/build_locally.py --target-hip=gfx90a
 
+Multi-target build
+~~~~~~~~~~~~~~~~~~
 
-It is, however, possible to build for Intel devices, CUDA devices, and an AMD device
-architecture all at once:
+The default ``dpnp`` build from the source enables support of Intel devices only.
+Extending the build with a custom SYCL target additionally enables support of CUDA or AMD
+device in ``dpnp``. Besides, the support can be also extended to enable both CUDA and AMD
+devices at the same time:
 
 .. code-block:: bash
 
-    python scripts/build_locally.py --target=cuda --target-hip=gfx90a
+    python scripts/build_locally.py --target-cuda --target-hip=gfx90a
 
 
 Testing
diff --git a/doc/reference/constants.rst b/doc/reference/constants.rst
new file mode 100644
index 000000000000..256a2bdb12dd
--- /dev/null
+++ b/doc/reference/constants.rst
@@ -0,0 +1,162 @@
+Constants
+=========
+
+DPNP includes several constants:
+
+.. currentmodule:: dpnp
+
+.. autodata:: DLDeviceType
+
+.. data:: e
+
+    Euler's constant, base of natural logarithms, Napier's constant.
+
+    ``e = 2.71828182845904523536028747135266249775724709369995...``
+
+    .. rubric:: See Also
+
+    :func:`exp` : Exponential function
+
+    :func:`log` : Natural logarithm
+
+    .. rubric:: References
+
+    https://en.wikipedia.org/wiki/E_%28mathematical_constant%29
+
+
+.. data:: euler_gamma
+
+    ``γ = 0.5772156649015328606065120900824024310421...``
+
+    .. rubric:: References
+
+    https://en.wikipedia.org/wiki/Euler%27s_constant
+
+
+.. data:: inf
+
+    IEEE 754 floating point representation of (positive) infinity.
+
+    .. rubric:: Returns
+
+    y : float
+        A floating point representation of positive infinity.
+
+    .. rubric:: See Also
+
+    :func:`isinf` : Shows which elements are positive or negative infinity
+
+    :func:`isposinf` : Shows which elements are positive infinity
+
+    :func:`isneginf` : Shows which elements are negative infinity
+
+    :func:`isnan` : Shows which elements are Not a Number
+
+    :func:`isfinite` : Shows which elements are finite (not one of Not a Number,
+    positive infinity and negative infinity)
+
+    .. rubric:: Notes
+
+    DPNP uses the IEEE Standard for Binary Floating-Point for Arithmetic
+    (IEEE 754). This means that Not a Number is not equivalent to infinity.
+    Also that positive infinity is not equivalent to negative infinity. But
+    infinity is equivalent to positive infinity.
+
+    .. rubric:: Examples
+
+    .. code-block:: python
+
+        >>> import dpnp as np
+        >>> np.inf
+        inf
+        >>> np.array([1]) / 0.0
+        array([inf])
+
+
+.. data:: nan
+
+    IEEE 754 floating point representation of Not a Number (NaN).
+
+    .. rubric:: Returns
+
+    y : A floating point representation of Not a Number.
+
+    .. rubric:: See Also
+
+    :func:`isnan` : Shows which elements are Not a Number
+
+    :func:`isfinite` : Shows which elements are finite (not one of Not a Number,
+    positive infinity and negative infinity)
+
+    .. rubric:: Notes
+
+    DPNP uses the IEEE Standard for Binary Floating-Point for Arithmetic
+    (IEEE 754). This means that Not a Number is not equivalent to infinity.
+
+    .. rubric:: Examples
+
+    .. code-block:: python
+
+        >>> import dpnp as np
+        >>> np.nan
+        nan
+        >>> np.log(np.array(-1))
+        array(nan)
+        >>> np.log(np.array([-1, 1, 2]))
+        array([       nan, 0.        , 0.69314718])
+
+
+.. data:: newaxis
+
+    A convenient alias for *None*, useful for indexing arrays.
+
+    .. rubric:: Examples
+
+    .. code-block:: python
+
+        >>> import dpnp as np
+        >>> np.newaxis is None
+        True
+        >>> x = np.arange(3)
+        >>> x
+        array([0, 1, 2])
+        >>> x[:, np.newaxis]
+        array([[0],
+               [1],
+               [2]])
+        >>> x[:, np.newaxis, np.newaxis]
+        array([[[0]],
+               [[1]],
+               [[2]]])
+        >>> x[:, np.newaxis] * x
+        array([[0, 0, 0],
+               [0, 1, 2],
+               [0, 2, 4]])
+
+        Outer product, same as ``outer(x, y)``:
+
+        >>> y = np.arange(3, 6)
+        >>> x[:, np.newaxis] * y
+        array([[ 0,  0,  0],
+            [ 3,  4,  5],
+            [ 6,  8, 10]])
+
+        ``x[np.newaxis, :]`` is equivalent to ``x[np.newaxis]`` and ``x[None]``:
+
+        >>> x[np.newaxis, :].shape
+        (1, 3)
+        >>> x[np.newaxis].shape
+        (1, 3)
+        >>> x[None].shape
+        (1, 3)
+        >>> x[:, np.newaxis].shape
+        (3, 1)
+
+
+.. data:: pi
+
+    ``pi = 3.1415926535897932384626433...``
+
+    .. rubric:: References
+
+    https://en.wikipedia.org/wiki/Pi
diff --git a/doc/reference/linalg.rst b/doc/reference/linalg.rst
index 79b85ea81f1c..142c6052db87 100644
--- a/doc/reference/linalg.rst
+++ b/doc/reference/linalg.rst
@@ -43,6 +43,7 @@ Decompositions
    dpnp.linalg.cholesky
    dpnp.linalg.outer
    dpnp.linalg.qr
+   dpnp.linalg.lu_factor
    dpnp.linalg.svd
    dpnp.linalg.svdvals
 
@@ -85,6 +86,7 @@ Solving linear equations
    dpnp.linalg.solve
    dpnp.linalg.tensorsolve
    dpnp.linalg.lstsq
+   dpnp.linalg.lu_solve
    dpnp.linalg.inv
    dpnp.linalg.pinv
    dpnp.linalg.tensorinv
diff --git a/doc/reference/ndarray.rst b/doc/reference/ndarray.rst
index 4f9aef8a9160..9a03c680a012 100644
--- a/doc/reference/ndarray.rst
+++ b/doc/reference/ndarray.rst
@@ -66,7 +66,11 @@ of the array:
    dpnp.ndarray.size
    dpnp.ndarray.itemsize
    dpnp.ndarray.nbytes
-   dpnp.ndarray.base
+   dpnp.ndarray.device
+   dpnp.ndarray.sycl_context
+   dpnp.ndarray.sycl_device
+   dpnp.ndarray.sycl_queue
+   dpnp.ndarray.usm_type
 
 
 Data type
@@ -98,6 +102,17 @@ Other attributes
    dpnp.ndarray.flat
 
 
+Special attributes
+------------------
+
+.. autosummary::
+   :toctree: generated/
+   :nosignatures:
+
+   dpnp.ndarray.__sycl_usm_array_interface__
+   dpnp.ndarray.__usm_ndarray__
+
+
 Array methods
 -------------
 
@@ -145,6 +160,7 @@ Array conversion
    dpnp.ndarray.getfield
    dpnp.ndarray.setflags
    dpnp.ndarray.fill
+   dpnp.ndarray.get_array
 
 
 Shape manipulation
@@ -195,6 +211,26 @@ the operation should proceed.
 Calculation
 -----------
 
+Many of these methods take an argument named *axis*. In such cases,
+
+- If *axis* is *None* (the default), the array is treated as a 1-D array and
+  the operation is performed over the entire array. This behavior is also the
+  default if *self* is a 0-dimensional array.
+
+- If *axis* is an integer, then the operation is done over the given axis (for
+  each 1-D subarray that can be created along the given axis).
+
+The parameter *dtype* specifies the data type over which a reduction operation
+(like summing) should take place. The default reduce data type is the same as
+the data type of *self*. To avoid overflow, it can be useful to perform the
+reduction using a larger data type.
+
+For several methods, an optional *out* argument can also be provided and the
+result will be placed into the output array given. The *out* argument must be
+an :class:`dpnp.ndarray` and have the same number of elements as the result
+array. It can have a different data type in which case casting will be
+performed.
+
 .. autosummary::
    :toctree: generated/
    :nosignatures:
@@ -226,12 +262,11 @@ Arithmetic and comparison operations on :class:`dpnp.ndarrays <dpnp.ndarray>`
 are defined as element-wise operations, and generally yield
 :class:`dpnp.ndarray` objects as results.
 
-Each of the arithmetic operations (``+``, ``-``, ``*``, ``/``, ``//``,
-``%``, ``divmod()``, ``**`` or ``pow()``, ``<<``, ``>>``, ``&``,
-``^``, ``|``, ``~``) and the comparisons (``==``, ``<``, ``>``,
-``<=``, ``>=``, ``!=``) is equivalent to the corresponding
-universal function (or :term:`ufunc` for short) in DPNP. For
-more information, see the section on :ref:`Universal Functions
+Each of the arithmetic operations (``+``, ``-``, ``*``, ``/``, ``//``, ``%``,
+``divmod()``, ``**`` or ``pow()``, ``<<``, ``>>``, ``&``, ``^``, ``|``, ``~``)
+and the comparisons (``==``, ``<``, ``>``, ``<=``, ``>=``, ``!=``) is
+equivalent to the corresponding universal function (or :term:`ufunc` for short)
+in DPNP. For more information, see the section on :ref:`Universal Functions
 <ufuncs>`.
 
 
@@ -252,6 +287,7 @@ Truth value of an array (:class:`bool() <bool>`):
 
 .. autosummary::
    :toctree: generated/
+   :nosignatures:
 
    dpnp.ndarray.__bool__
 
@@ -343,6 +379,7 @@ Matrix Multiplication:
 
 .. autosummary::
    :toctree: generated/
+   :nosignatures:
 
    dpnp.ndarray.__matmul__
    dpnp.ndarray.__rmatmul__
@@ -371,7 +408,10 @@ Basic customization:
 
    dpnp.ndarray.__new__
    dpnp.ndarray.__array__
+   dpnp.ndarray.__array_namespace__
    dpnp.ndarray.__array_wrap__
+   dpnp.ndarray.__dlpack__
+   dpnp.ndarray.__dlpack_device__
 
 Container customization: (see :ref:`Indexing <routines.indexing>`)
 
@@ -380,12 +420,13 @@ Container customization: (see :ref:`Indexing <routines.indexing>`)
    :nosignatures:
 
    dpnp.ndarray.__len__
+   dpnp.ndarray.__iter__
    dpnp.ndarray.__getitem__
    dpnp.ndarray.__setitem__
    dpnp.ndarray.__contains__
 
-Conversion; the operations :class:`int() <int>`,
-:class:`float() <float>` and :class:`complex() <complex>`.
+Conversion; the operations :class:`int() <int>`, :class:`float() <float>`,
+:class:`complex() <complex>` and :func:`operator.index() <operator.index>`.
 They work only on arrays that have one element in them
 and return the appropriate scalar.
 
@@ -393,6 +434,7 @@ and return the appropriate scalar.
    :toctree: generated/
    :nosignatures:
 
+   dpnp.ndarray.__index__
    dpnp.ndarray.__int__
    dpnp.ndarray.__float__
    dpnp.ndarray.__complex__
diff --git a/doc/reference/routines.rst b/doc/reference/routines.rst
index 1dd4a205b0cf..1cfca82a15de 100644
--- a/doc/reference/routines.rst
+++ b/doc/reference/routines.rst
@@ -1,6 +1,5 @@
---------
-Routines
---------
+Routines (NumPy)
+================
 
 The following pages describe NumPy-compatible routines.
 These functions cover a subset of
@@ -11,6 +10,7 @@ These functions cover a subset of
 .. toctree::
    :maxdepth: 2
 
+   constants
    array-creation
    array-manipulation
    bitwise
diff --git a/doc/reference/scipy.rst b/doc/reference/scipy.rst
new file mode 100644
index 000000000000..e95b199710a0
--- /dev/null
+++ b/doc/reference/scipy.rst
@@ -0,0 +1,12 @@
+Routines (SciPy) (:mod:`dpnp.scipy`)
+====================================
+
+The following pages describe SciPy-compatible routines.
+These functions cover a subset of
+`SciPy routines <https://docs.scipy.org/doc/scipy/reference/#api-reference>`_.
+
+.. toctree::
+   :maxdepth: 2
+
+   scipy_linalg
+   scipy_special
diff --git a/doc/reference/scipy_linalg.rst b/doc/reference/scipy_linalg.rst
new file mode 100644
index 000000000000..0ab8d5a248ff
--- /dev/null
+++ b/doc/reference/scipy_linalg.rst
@@ -0,0 +1,17 @@
+.. currentmodule:: dpnp.scipy.linalg
+
+Linear algebra (:mod:`dpnp.scipy.linalg`)
+=========================================
+
+.. Hint:: `SciPy API Reference: Linear algebra (scipy.linalg) <https://docs.scipy.org/doc/scipy/reference/linalg.html>`_
+
+Decompositions
+--------------
+
+.. autosummary::
+   :toctree: generated/
+   :nosignatures:
+
+   lu
+   lu_factor
+   lu_solve
diff --git a/doc/reference/scipy_special.rst b/doc/reference/scipy_special.rst
new file mode 100644
index 000000000000..0df23dbd6ce2
--- /dev/null
+++ b/doc/reference/scipy_special.rst
@@ -0,0 +1,20 @@
+.. currentmodule:: dpnp.scipy.special
+
+Special functions (:mod:`dpnp.scipy.special`)
+=======================================
+
+.. Hint:: `SciPy API Reference: Special functions (scipy.special) <https://docs.scipy.org/doc/scipy/reference/special.html>`_
+
+Error function and Fresnel integrals
+------------------------------------
+
+.. autosummary::
+   :toctree: generated/
+   :nosignatures:
+
+   erf
+   erfc
+   erfcx
+   erfi
+   erfinv
+   erfcinv
diff --git a/doc/reference/special.rst b/doc/reference/special.rst
deleted file mode 100644
index 1261625e5e26..000000000000
--- a/doc/reference/special.rst
+++ /dev/null
@@ -1,17 +0,0 @@
-Special Functions
-=================
-
-.. https://docs.scipy.org/doc/scipy/reference/special.html
-
-Error Function
---------------
-
-.. autosummary::
-   :toctree: generated/
-   :nosignatures:
-
-   dpnp.erf
-   dpnp.erfc
-   dpnp.erfcx
-   dpnp.erfinv
-   dpnp.erfcinv
diff --git a/dpnp/__init__.py b/dpnp/__init__.py
index 170b73b76cc0..3d8145b5362c 100644
--- a/dpnp/__init__.py
+++ b/dpnp/__init__.py
@@ -40,17 +40,13 @@
 # where to search for DLLs towards both DPNP backend and DPCTL Sycl interface,
 # otherwise DPNP import will be failing. This is because the libraries
 # are not installed under any of default paths where Python is searching.
-from platform import system
-
-if system() == "Windows":  # pragma: no cover
-    if hasattr(os, "add_dll_directory"):
-        os.add_dll_directory(mypath)
-        os.add_dll_directory(dpctlpath)
 
+if sys.platform == "win32":  # pragma: no cover
+    os.add_dll_directory(mypath)
+    os.add_dll_directory(dpctlpath)
     os.environ["PATH"] = os.pathsep.join(
         [os.getenv("PATH", ""), mypath, dpctlpath]
     )
-
     # For virtual environments on Windows, add folder with DPC++ libraries
     # to the DLL search path
     if sys.base_exec_prefix != sys.exec_prefix and os.path.isfile(
@@ -74,10 +70,15 @@
 from .dpnp_iface_utils import *
 from .dpnp_iface_utils import __all__ as _ifaceutils__all__
 from ._version import get_versions
+from . import linalg as linalg
+from . import scipy as scipy
 
 __all__ = _iface__all__
 __all__ += _ifaceutils__all__
 
+# add submodules
+__all__ += ["linalg", "scipy"]
+
 
 __version__ = get_versions()["version"]
 del get_versions
diff --git a/dpnp/backend/CMakeLists.txt b/dpnp/backend/CMakeLists.txt
index 4a6b44d50f0d..17b986a38b92 100644
--- a/dpnp/backend/CMakeLists.txt
+++ b/dpnp/backend/CMakeLists.txt
@@ -26,7 +26,6 @@
 set(DPNP_SRC
     kernels/dpnp_krnl_arraycreation.cpp
     kernels/dpnp_krnl_common.cpp
-    kernels/dpnp_krnl_elemwise.cpp
     kernels/dpnp_krnl_mathematical.cpp
     kernels/dpnp_krnl_random.cpp
     kernels/dpnp_krnl_sorting.cpp
diff --git a/dpnp/backend/extensions/blas/CMakeLists.txt b/dpnp/backend/extensions/blas/CMakeLists.txt
index 3cca41032a5a..f5e0ecc737de 100644
--- a/dpnp/backend/extensions/blas/CMakeLists.txt
+++ b/dpnp/backend/extensions/blas/CMakeLists.txt
@@ -30,6 +30,7 @@ set(_module_src
     ${CMAKE_CURRENT_SOURCE_DIR}/gemm.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/gemm_batch.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/gemv.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/syrk.cpp
 )
 
 pybind11_add_module(${python_module_name} MODULE ${_module_src})
@@ -59,8 +60,8 @@ endif()
 
 set_target_properties(${python_module_name} PROPERTIES CMAKE_POSITION_INDEPENDENT_CODE ON)
 
-target_include_directories(${python_module_name} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../include)
-target_include_directories(${python_module_name} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../src)
+target_include_directories(${python_module_name} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../)
+target_include_directories(${python_module_name} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../common)
 
 target_include_directories(${python_module_name} PUBLIC ${Dpctl_INCLUDE_DIRS})
 target_include_directories(${python_module_name} PUBLIC ${Dpctl_TENSOR_INCLUDE_DIR})
@@ -83,11 +84,11 @@ if (DPNP_GENERATE_COVERAGE)
     target_link_options(${python_module_name} PRIVATE -fprofile-instr-generate -fcoverage-mapping)
 endif()
 
-if(_use_onemkl_interfaces)
-    target_link_libraries(${python_module_name} PRIVATE ${MKL_INTERFACES_LIB})
-    target_compile_options(${python_module_name} PRIVATE -DUSE_ONEMKL_INTERFACES)
-    if(_use_onemkl_interfaces_cuda)
-        target_compile_options(${python_module_name} PRIVATE -DUSE_ONEMKL_CUBLAS)
+if(_ues_onemath)
+    target_link_libraries(${python_module_name} PRIVATE ${ONEMATH_LIB})
+    target_compile_options(${python_module_name} PRIVATE -DUSE_ONEMATH)
+    if(_ues_onemath_cuda)
+        target_compile_options(${python_module_name} PRIVATE -DUSE_ONEMATH_CUBLAS)
     endif()
 else()
     target_link_libraries(${python_module_name} PUBLIC MKL::MKL_SYCL::BLAS)
diff --git a/dpnp/backend/extensions/blas/blas_py.cpp b/dpnp/backend/extensions/blas/blas_py.cpp
index 0321ff6fc6bd..36fc29bae482 100644
--- a/dpnp/backend/extensions/blas/blas_py.cpp
+++ b/dpnp/backend/extensions/blas/blas_py.cpp
@@ -30,17 +30,23 @@
 #include <pybind11/pybind11.h>
 #include <pybind11/stl.h>
 
+// utils extension header
+#include "ext/common.hpp"
+
 #include "dot.hpp"
 #include "dot_common.hpp"
 #include "dotc.hpp"
 #include "dotu.hpp"
 #include "gemm.hpp"
 #include "gemv.hpp"
+#include "syrk.hpp"
 
 namespace blas_ns = dpnp::extensions::blas;
 namespace py = pybind11;
 namespace dot_ns = blas_ns::dot;
+
 using dot_ns::dot_impl_fn_ptr_t;
+using ext::common::init_dispatch_vector;
 
 // populate dispatch vectors and tables
 void init_dispatch_vectors_tables(void)
@@ -48,6 +54,7 @@ void init_dispatch_vectors_tables(void)
     blas_ns::init_gemm_batch_dispatch_table();
     blas_ns::init_gemm_dispatch_table();
     blas_ns::init_gemv_dispatch_vector();
+    blas_ns::init_syrk_dispatch_vector();
 }
 
 static dot_impl_fn_ptr_t dot_dispatch_vector[dpctl_td_ns::num_types];
@@ -62,7 +69,7 @@ PYBIND11_MODULE(_blas_impl, m)
     using event_vecT = std::vector<sycl::event>;
 
     {
-        dot_ns::init_dot_dispatch_vector<blas_ns::DotContigFactory>(
+        init_dispatch_vector<dot_impl_fn_ptr_t, blas_ns::DotContigFactory>(
             dot_dispatch_vector);
 
         auto dot_pyapi = [&](sycl::queue &exec_q, const arrayT &src1,
@@ -73,14 +80,14 @@ PYBIND11_MODULE(_blas_impl, m)
         };
 
         m.def("_dot", dot_pyapi,
-              "Call `dot` from OneMKL BLAS library to compute "
+              "Call `dot` from oneMKL BLAS library to compute "
               "the dot product of two real-valued vectors.",
               py::arg("sycl_queue"), py::arg("vectorA"), py::arg("vectorB"),
               py::arg("result"), py::arg("depends") = py::list());
     }
 
     {
-        dot_ns::init_dot_dispatch_vector<blas_ns::DotcContigFactory>(
+        init_dispatch_vector<dot_impl_fn_ptr_t, blas_ns::DotcContigFactory>(
             dotc_dispatch_vector);
 
         auto dotc_pyapi = [&](sycl::queue &exec_q, const arrayT &src1,
@@ -91,7 +98,7 @@ PYBIND11_MODULE(_blas_impl, m)
         };
 
         m.def("_dotc", dotc_pyapi,
-              "Call `dotc` from OneMKL BLAS library to compute "
+              "Call `dotc` from oneMKL BLAS library to compute "
               "the dot product of two complex vectors, "
               "conjugating the first vector.",
               py::arg("sycl_queue"), py::arg("vectorA"), py::arg("vectorB"),
@@ -99,7 +106,7 @@ PYBIND11_MODULE(_blas_impl, m)
     }
 
     {
-        dot_ns::init_dot_dispatch_vector<blas_ns::DotuContigFactory>(
+        init_dispatch_vector<dot_impl_fn_ptr_t, blas_ns::DotuContigFactory>(
             dotu_dispatch_vector);
 
         auto dotu_pyapi = [&](sycl::queue &exec_q, const arrayT &src1,
@@ -110,7 +117,7 @@ PYBIND11_MODULE(_blas_impl, m)
         };
 
         m.def("_dotu", dotu_pyapi,
-              "Call `dotu` from OneMKL BLAS library to compute "
+              "Call `dotu` from oneMKL BLAS library to compute "
               "the dot product of two complex vectors.",
               py::arg("sycl_queue"), py::arg("vectorA"), py::arg("vectorB"),
               py::arg("result"), py::arg("depends") = py::list());
@@ -118,7 +125,7 @@ PYBIND11_MODULE(_blas_impl, m)
 
     {
         m.def("_gemm", &blas_ns::gemm,
-              "Call `gemm` from OneMKL BLAS library to compute "
+              "Call `gemm` from oneMKL BLAS library to compute "
               "the matrix-matrix product with 2-D matrices.",
               py::arg("sycl_queue"), py::arg("matrixA"), py::arg("matrixB"),
               py::arg("resultC"), py::arg("depends") = py::list());
@@ -126,7 +133,7 @@ PYBIND11_MODULE(_blas_impl, m)
 
     {
         m.def("_gemm_batch", &blas_ns::gemm_batch,
-              "Call `gemm_batch` from OneMKL BLAS library to compute "
+              "Call `gemm_batch` from oneMKL BLAS library to compute "
               "the matrix-matrix product for a batch of 2-D matrices.",
               py::arg("sycl_queue"), py::arg("matrixA"), py::arg("matrixB"),
               py::arg("resultC"), py::arg("depends") = py::list());
@@ -134,23 +141,31 @@ PYBIND11_MODULE(_blas_impl, m)
 
     {
         m.def("_gemv", &blas_ns::gemv,
-              "Call `gemv` from OneMKL BLAS library to compute "
+              "Call `gemv` from oneMKL BLAS library to compute "
               "the matrix-vector product with a general matrix.",
               py::arg("sycl_queue"), py::arg("matrixA"), py::arg("vectorX"),
               py::arg("vectorY"), py::arg("transpose"),
               py::arg("depends") = py::list());
     }
 
+    {
+        m.def("_syrk", &blas_ns::syrk,
+              "Call `syrk` from oneMKL BLAS library to compute "
+              "the matrix-vector product with a general matrix.",
+              py::arg("sycl_queue"), py::arg("matrixA"), py::arg("resultC"),
+              py::arg("depends") = py::list());
+    }
+
     {
         m.def(
-            "_using_onemkl_interfaces",
+            "_using_onemath",
             []() {
-#ifdef USE_ONEMKL_INTERFACES
+#ifdef USE_ONEMATH
                 return true;
 #else
                 return false;
 #endif
             },
-            "Check if the OneMKL interfaces are being used.");
+            "Check if OneMath is being used.");
     }
 }
diff --git a/dpnp/backend/extensions/blas/dot_common.hpp b/dpnp/backend/extensions/blas/dot_common.hpp
index fb9a1f078c53..faa31cb6b388 100644
--- a/dpnp/backend/extensions/blas/dot_common.hpp
+++ b/dpnp/backend/extensions/blas/dot_common.hpp
@@ -128,7 +128,8 @@ std::pair<sycl::event, sycl::event>
     dot_impl_fn_ptr_t dot_fn = dot_dispatch_vector[type_id];
     if (dot_fn == nullptr) {
         throw py::value_error(
-            "Types of input vectors and result array are mismatched.");
+            "No dot implementation is available for the specified data type "
+            "of the input and output arrays.");
     }
 
     char *x_typeless_ptr = vectorX.get_data();
@@ -164,13 +165,4 @@ std::pair<sycl::event, sycl::event>
 
     return std::make_pair(args_ev, dot_ev);
 }
-
-template <template <typename fnT, typename T> typename factoryT>
-void init_dot_dispatch_vector(dot_impl_fn_ptr_t dot_dispatch_vector[])
-{
-    dpctl_td_ns::DispatchVectorBuilder<dot_impl_fn_ptr_t, factoryT,
-                                       dpctl_td_ns::num_types>
-        contig;
-    contig.populate_dispatch_vector(dot_dispatch_vector);
-}
 } // namespace dpnp::extensions::blas::dot
diff --git a/dpnp/backend/extensions/blas/gemm.cpp b/dpnp/backend/extensions/blas/gemm.cpp
index 4d674010efd7..4d3a98afec66 100644
--- a/dpnp/backend/extensions/blas/gemm.cpp
+++ b/dpnp/backend/extensions/blas/gemm.cpp
@@ -27,6 +27,9 @@
 
 #include <pybind11/pybind11.h>
 
+// utils extension header
+#include "ext/common.hpp"
+
 // dpctl tensor headers
 #include "utils/memory_overlap.hpp"
 #include "utils/output_validation.hpp"
@@ -35,14 +38,14 @@
 #include "gemm.hpp"
 #include "types_matrix.hpp"
 
-#include "dpnp_utils.hpp"
-
 namespace dpnp::extensions::blas
 {
 namespace mkl_blas = oneapi::mkl::blas;
 namespace py = pybind11;
 namespace type_utils = dpctl::tensor::type_utils;
 
+using ext::common::init_dispatch_table;
+
 typedef sycl::event (*gemm_impl_fn_ptr_t)(sycl::queue &,
                                           oneapi::mkl::transpose,
                                           oneapi::mkl::transpose,
@@ -55,9 +58,7 @@ typedef sycl::event (*gemm_impl_fn_ptr_t)(sycl::queue &,
                                           const std::int64_t,
                                           char *,
                                           const std::int64_t,
-#if !defined(USE_ONEMKL_CUBLAS)
                                           const bool,
-#endif // !USE_ONEMKL_CUBLAS
                                           const std::vector<sycl::event> &);
 
 static gemm_impl_fn_ptr_t gemm_dispatch_table[dpctl_td_ns::num_types]
@@ -76,9 +77,7 @@ static sycl::event gemm_impl(sycl::queue &exec_q,
                              const std::int64_t ldb,
                              char *resultC,
                              const std::int64_t ldc,
-#if !defined(USE_ONEMKL_CUBLAS)
                              const bool is_row_major,
-#endif // !USE_ONEMKL_CUBLAS
                              const std::vector<sycl::event> &depends)
 {
     type_utils::validate_type_for_device<Tab>(exec_q);
@@ -100,11 +99,6 @@ static sycl::event gemm_impl(sycl::queue &exec_q,
                 const Tab *a, const std::int64_t lda, const Tab *b,
                 const std::int64_t ldb, Tab beta, Tc *c, const std::int64_t ldc,
                 const std::vector<sycl::event> &deps) -> sycl::event {
-#if defined(USE_ONEMKL_CUBLAS)
-            return mkl_blas::column_major::gemm(q, transA, transB, m, n, k,
-                                                alpha, a, lda, b, ldb, beta, c,
-                                                ldc, deps);
-#else
             if (is_row_major) {
                 return mkl_blas::row_major::gemm(q, transA, transB, m, n, k,
                                                  alpha, a, lda, b, ldb, beta, c,
@@ -115,7 +109,6 @@ static sycl::event gemm_impl(sycl::queue &exec_q,
                                                     alpha, a, lda, b, ldb, beta,
                                                     c, ldc, deps);
             }
-#endif // USE_ONEMKL_CUBLAS
         };
         gemm_event = gemm_func(
             exec_q,
@@ -129,8 +122,7 @@ static sycl::event gemm_impl(sycl::queue &exec_q,
             Tab(1), // Scaling factor for the product of matrices A and B.
             a,      // Pointer to matrix A.
             lda,    // Leading dimension of matrix A, which is the
-                    // stride between successive rows (for row major
-                    // layout).
+                    // stride between successive rows (for row major layout).
             b,      // Pointer to matrix B.
             ldb,    // Leading dimension of matrix B, similar to lda.
             Tab(0), // Scaling factor for matrix C.
@@ -168,7 +160,8 @@ std::tuple<sycl::event, sycl::event, bool>
     const int resultC_nd = resultC.get_ndim();
 
     if ((matrixA_nd != 2) || (matrixB_nd != 2) || (resultC_nd != 2)) {
-        throw py::value_error("Input matrices must be two-dimensional.");
+        throw py::value_error(
+            "Input and output matrices must be two-dimensional.");
     }
 
     auto const &overlap = dpctl::tensor::overlap::MemoryOverlap();
@@ -241,8 +234,8 @@ std::tuple<sycl::event, sycl::event, bool>
     std::int64_t ldb;
 
 // cuBLAS supports only column-major storage
-#if defined(USE_ONEMKL_CUBLAS)
-    const bool is_row_major = false;
+#if defined(USE_ONEMATH_CUBLAS)
+    constexpr bool is_row_major = false;
 
     transA = is_matrixA_c_contig ? oneapi::mkl::transpose::T
                                  : oneapi::mkl::transpose::N;
@@ -286,12 +279,14 @@ std::tuple<sycl::event, sycl::event, bool>
         }
     }
     else {
+        // both A and B are f_contig so using column-major gemm and
+        // no transpose is needed
         transA = oneapi::mkl::transpose::N;
         transB = oneapi::mkl::transpose::N;
         lda = m;
         ldb = k;
     }
-#endif // USE_ONEMKL_CUBLAS
+#endif // USE_ONEMATH_CUBLAS
 
     const std::int64_t ldc = is_row_major ? n : m;
 
@@ -313,22 +308,17 @@ std::tuple<sycl::event, sycl::event, bool>
         gemm_dispatch_table[matrixAB_type_id][resultC_type_id];
     if (gemm_fn == nullptr) {
         throw py::value_error(
-            "Types of input matrices and result matrix are mismatched.");
+            "No gemm implementation is available for the specified data type "
+            "of the input and output arrays.");
     }
 
     const char *a_typeless_ptr = matrixA.get_data();
     const char *b_typeless_ptr = matrixB.get_data();
     char *r_typeless_ptr = resultC.get_data();
 
-#if defined(USE_ONEMKL_CUBLAS)
-    sycl::event gemm_ev =
-        gemm_fn(exec_q, transA, transB, m, n, k, a_typeless_ptr, lda,
-                b_typeless_ptr, ldb, r_typeless_ptr, ldc, depends);
-#else
     sycl::event gemm_ev = gemm_fn(exec_q, transA, transB, m, n, k,
                                   a_typeless_ptr, lda, b_typeless_ptr, ldb,
                                   r_typeless_ptr, ldc, is_row_major, depends);
-#endif // USE_ONEMKL_CUBLAS
 
     sycl::event args_ev = dpctl::utils::keep_args_alive(
         exec_q, {matrixA, matrixB, resultC}, {gemm_ev});
@@ -352,9 +342,7 @@ struct GemmContigFactory
 
 void init_gemm_dispatch_table(void)
 {
-    dpctl_td_ns::DispatchTableBuilder<gemm_impl_fn_ptr_t, GemmContigFactory,
-                                      dpctl_td_ns::num_types>
-        contig;
-    contig.populate_dispatch_table(gemm_dispatch_table);
+    init_dispatch_table<gemm_impl_fn_ptr_t, GemmContigFactory>(
+        gemm_dispatch_table);
 }
 } // namespace dpnp::extensions::blas
diff --git a/dpnp/backend/extensions/blas/gemm_batch.cpp b/dpnp/backend/extensions/blas/gemm_batch.cpp
index d844457689ad..093b7d630b9d 100644
--- a/dpnp/backend/extensions/blas/gemm_batch.cpp
+++ b/dpnp/backend/extensions/blas/gemm_batch.cpp
@@ -27,6 +27,9 @@
 
 #include <pybind11/pybind11.h>
 
+// utils extension header
+#include "ext/common.hpp"
+
 // dpctl tensor headers
 #include "utils/memory_overlap.hpp"
 #include "utils/output_validation.hpp"
@@ -35,14 +38,14 @@
 #include "gemm.hpp"
 #include "types_matrix.hpp"
 
-#include "dpnp_utils.hpp"
-
 namespace dpnp::extensions::blas
 {
 namespace mkl_blas = oneapi::mkl::blas;
 namespace py = pybind11;
 namespace type_utils = dpctl::tensor::type_utils;
 
+using ext::common::init_dispatch_table;
+
 typedef sycl::event (*gemm_batch_impl_fn_ptr_t)(
     sycl::queue &,
     const std::int64_t,
@@ -60,9 +63,7 @@ typedef sycl::event (*gemm_batch_impl_fn_ptr_t)(
     const char *,
     const char *,
     char *,
-#if !defined(USE_ONEMKL_CUBLAS)
     const bool,
-#endif // !USE_ONEMKL_CUBLAS
     const std::vector<sycl::event> &);
 
 static gemm_batch_impl_fn_ptr_t
@@ -85,9 +86,7 @@ static sycl::event gemm_batch_impl(sycl::queue &exec_q,
                                    const char *matrixA,
                                    const char *matrixB,
                                    char *resultC,
-#if !defined(USE_ONEMKL_CUBLAS)
                                    const bool is_row_major,
-#endif // !USE_ONEMKL_CUBLAS
                                    const std::vector<sycl::event> &depends)
 {
     type_utils::validate_type_for_device<Tab>(exec_q);
@@ -112,11 +111,6 @@ static sycl::event gemm_batch_impl(sycl::queue &exec_q,
                 Tc *c, const std::int64_t ldc, const std::int64_t stridec,
                 const std::int64_t batch_size,
                 const std::vector<sycl::event> &deps) -> sycl::event {
-#if defined(USE_ONEMKL_CUBLAS)
-            return mkl_blas::column_major::gemm_batch(
-                q, transA, transB, m, n, k, alpha, a, lda, stridea, b, ldb,
-                strideb, beta, c, ldc, stridec, batch_size, deps);
-#else
             if (is_row_major) {
                 return mkl_blas::row_major::gemm_batch(
                     q, transA, transB, m, n, k, alpha, a, lda, stridea, b, ldb,
@@ -127,7 +121,6 @@ static sycl::event gemm_batch_impl(sycl::queue &exec_q,
                     q, transA, transB, m, n, k, alpha, a, lda, stridea, b, ldb,
                     strideb, beta, c, ldc, stridec, batch_size, deps);
             }
-#endif // USE_ONEMKL_CUBLAS
         };
         gemm_batch_event = gemm_batch_func(
             exec_q,
@@ -316,8 +309,8 @@ std::tuple<sycl::event, sycl::event, bool>
     std::int64_t ldb;
 
 // cuBLAS supports only column-major storage
-#if defined(USE_ONEMKL_CUBLAS)
-    const bool is_row_major = false;
+#if defined(USE_ONEMATH_CUBLAS)
+    constexpr bool is_row_major = false;
 
     transA = A_base_is_c_contig ? oneapi::mkl::transpose::T
                                 : oneapi::mkl::transpose::N;
@@ -367,7 +360,7 @@ std::tuple<sycl::event, sycl::event, bool>
         lda = m;
         ldb = k;
     }
-#endif // USE_ONEMKL_CUBLAS
+#endif // USE_ONEMATH_CUBLAS
 
     const std::int64_t ldc = is_row_major ? n : m;
 
@@ -389,24 +382,18 @@ std::tuple<sycl::event, sycl::event, bool>
         gemm_batch_dispatch_table[matrixAB_type_id][resultC_type_id];
     if (gemm_batch_fn == nullptr) {
         throw py::value_error(
-            "Types of input matrices and result matrix are mismatched.");
+            "No gemm_batch implementation is available for the specified data "
+            "type of the input and output arrays.");
     }
 
     const char *a_typeless_ptr = matrixA.get_data();
     const char *b_typeless_ptr = matrixB.get_data();
     char *r_typeless_ptr = resultC.get_data();
 
-#if defined(USE_ONEMKL_CUBLAS)
-    sycl::event gemm_batch_ev =
-        gemm_batch_fn(exec_q, m, n, k, batch_size, lda, ldb, ldc, stridea,
-                      strideb, stridec, transA, transB, a_typeless_ptr,
-                      b_typeless_ptr, r_typeless_ptr, depends);
-#else
     sycl::event gemm_batch_ev =
         gemm_batch_fn(exec_q, m, n, k, batch_size, lda, ldb, ldc, stridea,
                       strideb, stridec, transA, transB, a_typeless_ptr,
                       b_typeless_ptr, r_typeless_ptr, is_row_major, depends);
-#endif // USE_ONEMKL_CUBLAS
 
     sycl::event args_ev = dpctl::utils::keep_args_alive(
         exec_q, {matrixA, matrixB, resultC}, {gemm_batch_ev});
@@ -431,10 +418,7 @@ struct GemmBatchContigFactory
 
 void init_gemm_batch_dispatch_table(void)
 {
-    dpctl_td_ns::DispatchTableBuilder<gemm_batch_impl_fn_ptr_t,
-                                      GemmBatchContigFactory,
-                                      dpctl_td_ns::num_types>
-        contig;
-    contig.populate_dispatch_table(gemm_batch_dispatch_table);
+    init_dispatch_table<gemm_batch_impl_fn_ptr_t, GemmBatchContigFactory>(
+        gemm_batch_dispatch_table);
 }
 } // namespace dpnp::extensions::blas
diff --git a/dpnp/backend/extensions/blas/gemv.cpp b/dpnp/backend/extensions/blas/gemv.cpp
index 87730fbec9a8..3e535cee2c36 100644
--- a/dpnp/backend/extensions/blas/gemv.cpp
+++ b/dpnp/backend/extensions/blas/gemv.cpp
@@ -27,6 +27,9 @@
 
 #include <pybind11/pybind11.h>
 
+// utils extension header
+#include "ext/common.hpp"
+
 // dpctl tensor headers
 #include "utils/memory_overlap.hpp"
 #include "utils/output_validation.hpp"
@@ -35,14 +38,14 @@
 #include "gemv.hpp"
 #include "types_matrix.hpp"
 
-#include "dpnp_utils.hpp"
-
 namespace dpnp::extensions::blas
 {
 namespace mkl_blas = oneapi::mkl::blas;
 namespace py = pybind11;
 namespace type_utils = dpctl::tensor::type_utils;
 
+using ext::common::init_dispatch_vector;
+
 typedef sycl::event (*gemv_impl_fn_ptr_t)(sycl::queue &,
                                           oneapi::mkl::transpose,
                                           const std::int64_t,
@@ -53,9 +56,7 @@ typedef sycl::event (*gemv_impl_fn_ptr_t)(sycl::queue &,
                                           const std::int64_t,
                                           char *,
                                           const std::int64_t,
-#if !defined(USE_ONEMKL_CUBLAS)
                                           const bool,
-#endif // !USE_ONEMKL_CUBLAS
                                           const std::vector<sycl::event> &);
 
 static gemv_impl_fn_ptr_t gemv_dispatch_vector[dpctl_td_ns::num_types];
@@ -71,9 +72,7 @@ static sycl::event gemv_impl(sycl::queue &exec_q,
                              const std::int64_t incx,
                              char *vectorY,
                              const std::int64_t incy,
-#if !defined(USE_ONEMKL_CUBLAS)
                              const bool is_row_major,
-#endif // !USE_ONEMKL_CUBLAS
                              const std::vector<sycl::event> &depends)
 {
     type_utils::validate_type_for_device<T>(exec_q);
@@ -93,10 +92,6 @@ static sycl::event gemv_impl(sycl::queue &exec_q,
                 const std::int64_t lda, const T *x, const std::int64_t incx,
                 T beta, T *y, const std::int64_t incy,
                 const std::vector<sycl::event> &deps) -> sycl::event {
-#if defined(USE_ONEMKL_CUBLAS)
-            return mkl_blas::column_major::gemv(q, transA, m, n, alpha, a, lda,
-                                                x, incx, beta, y, incy, deps);
-#else
             if (is_row_major) {
                 return mkl_blas::row_major::gemv(q, transA, m, n, alpha, a, lda,
                                                  x, incx, beta, y, incy, deps);
@@ -106,7 +101,6 @@ static sycl::event gemv_impl(sycl::queue &exec_q,
                                                     lda, x, incx, beta, y, incy,
                                                     deps);
             }
-#endif // USE_ONEMKL_CUBLAS
         };
         gemv_event = gemv_func(
             exec_q,
@@ -118,8 +112,7 @@ static sycl::event gemv_impl(sycl::queue &exec_q,
             T(1),   // Scaling factor for the matrix-vector product.
             a,      // Pointer to the input matrix A.
             lda,    // Leading dimension of matrix A, which is the
-                    // stride between successive rows (for row major
-                    // layout).
+                    // stride between successive rows (for row major layout).
             x,      // Pointer to the input vector x.
             incx,   // The stride of vector x.
             T(0),   // Scaling factor for vector y.
@@ -190,13 +183,33 @@ std::pair<sycl::event, sycl::event>
     const py::ssize_t *a_shape = matrixA.get_shape_raw();
     const py::ssize_t *x_shape = vectorX.get_shape_raw();
     const py::ssize_t *y_shape = vectorY.get_shape_raw();
+    if (transpose) {
+        if (a_shape[0] != x_shape[0]) {
+            throw py::value_error("The number of rows in A must be equal to "
+                                  "the number of elements in X.");
+        }
+        if (a_shape[1] != y_shape[0]) {
+            throw py::value_error("The number of columns in A must be equal to "
+                                  "the number of elements in Y.");
+        }
+    }
+    else {
+        if (a_shape[1] != x_shape[0]) {
+            throw py::value_error("The number of columns in A must be equal to "
+                                  "the number of elements in X.");
+        }
+        if (a_shape[0] != y_shape[0]) {
+            throw py::value_error("The number of rows in A must be equal to "
+                                  "the number of elements in Y.");
+        }
+    }
 
     oneapi::mkl::transpose transA;
     std::size_t src_nelems;
 
 // cuBLAS supports only column-major storage
-#if defined(USE_ONEMKL_CUBLAS)
-    const bool is_row_major = false;
+#if defined(USE_ONEMATH_CUBLAS)
+    constexpr bool is_row_major = false;
     std::int64_t m;
     std::int64_t n;
 
@@ -241,28 +254,7 @@ std::pair<sycl::event, sycl::event>
         transA = oneapi::mkl::transpose::N;
         src_nelems = m;
     }
-#endif // USE_ONEMKL_CUBLAS
-
-    if (transpose) {
-        if (a_shape[0] != x_shape[0]) {
-            throw py::value_error("The number of rows in A must be equal to "
-                                  "the number of elements in X.");
-        }
-        if (a_shape[1] != y_shape[0]) {
-            throw py::value_error("The number of columns in A must be equal to "
-                                  "the number of elements in Y.");
-        }
-    }
-    else {
-        if (a_shape[1] != x_shape[0]) {
-            throw py::value_error("The number of columns in A must be equal to "
-                                  "the number of elements in X.");
-        }
-        if (a_shape[0] != y_shape[0]) {
-            throw py::value_error("The number of rows in A must be equal to "
-                                  "the number of elements in Y.");
-        }
-    }
+#endif // USE_ONEMATH_CUBLAS
 
     const std::int64_t lda = is_row_major ? n : m;
     dpctl::tensor::validation::CheckWritable::throw_if_not_writable(vectorY);
@@ -284,10 +276,11 @@ std::pair<sycl::event, sycl::event>
     gemv_impl_fn_ptr_t gemv_fn = gemv_dispatch_vector[type_id];
     if (gemv_fn == nullptr) {
         throw py::value_error(
-            "Types of input arrays and result array are mismatched.");
+            "No gemv implementation is available for the specified data type "
+            "of the input and output arrays.");
     }
 
-    char *a_typeless_ptr = matrixA.get_data();
+    const char *a_typeless_ptr = matrixA.get_data();
     char *x_typeless_ptr = vectorX.get_data();
     char *y_typeless_ptr = vectorY.get_data();
 
@@ -304,15 +297,9 @@ std::pair<sycl::event, sycl::event>
         y_typeless_ptr -= (y_shape[0] - 1) * std::abs(incy) * y_elemsize;
     }
 
-#if defined(USE_ONEMKL_CUBLAS)
-    sycl::event gemv_ev =
-        gemv_fn(exec_q, transA, m, n, a_typeless_ptr, lda, x_typeless_ptr, incx,
-                y_typeless_ptr, incy, depends);
-#else
     sycl::event gemv_ev =
         gemv_fn(exec_q, transA, m, n, a_typeless_ptr, lda, x_typeless_ptr, incx,
                 y_typeless_ptr, incy, is_row_major, depends);
-#endif // USE_ONEMKL_CUBLAS
 
     sycl::event args_ev = dpctl::utils::keep_args_alive(
         exec_q, {matrixA, vectorX, vectorY}, {gemv_ev});
@@ -336,9 +323,7 @@ struct GemvContigFactory
 
 void init_gemv_dispatch_vector(void)
 {
-    dpctl_td_ns::DispatchVectorBuilder<gemv_impl_fn_ptr_t, GemvContigFactory,
-                                       dpctl_td_ns::num_types>
-        contig;
-    contig.populate_dispatch_vector(gemv_dispatch_vector);
+    init_dispatch_vector<gemv_impl_fn_ptr_t, GemvContigFactory>(
+        gemv_dispatch_vector);
 }
 } // namespace dpnp::extensions::blas
diff --git a/dpnp/backend/extensions/blas/gemv.hpp b/dpnp/backend/extensions/blas/gemv.hpp
index 88e9f9c5c6f0..094cdafdc483 100644
--- a/dpnp/backend/extensions/blas/gemv.hpp
+++ b/dpnp/backend/extensions/blas/gemv.hpp
@@ -41,5 +41,4 @@ extern std::pair<sycl::event, sycl::event>
          const std::vector<sycl::event> &depends);
 
 extern void init_gemv_dispatch_vector(void);
-extern void init_gemv_batch_dispatch_vector(void);
 } // namespace dpnp::extensions::blas
diff --git a/dpnp/backend/extensions/blas/syrk.cpp b/dpnp/backend/extensions/blas/syrk.cpp
new file mode 100644
index 000000000000..f016b90a73ab
--- /dev/null
+++ b/dpnp/backend/extensions/blas/syrk.cpp
@@ -0,0 +1,356 @@
+//*****************************************************************************
+// Copyright (c) 2025, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+
+#include <cassert>
+#include <stdexcept>
+
+#include <pybind11/pybind11.h>
+
+// utils extension header
+#include "ext/common.hpp"
+
+// dpctl tensor headers
+#include "utils/memory_overlap.hpp"
+#include "utils/output_validation.hpp"
+#include "utils/type_utils.hpp"
+
+#include "syrk.hpp"
+#include "types_matrix.hpp"
+
+using ext::common::Align;
+
+namespace dpnp::extensions::blas
+{
+namespace mkl_blas = oneapi::mkl::blas;
+namespace py = pybind11;
+namespace type_utils = dpctl::tensor::type_utils;
+
+using ext::common::init_dispatch_vector;
+
+typedef sycl::event (*syrk_impl_fn_ptr_t)(sycl::queue &,
+                                          const oneapi::mkl::transpose,
+                                          const std::int64_t,
+                                          const std::int64_t,
+                                          const char *,
+                                          const std::int64_t,
+                                          char *,
+                                          const std::int64_t,
+                                          const bool,
+                                          const std::vector<sycl::event> &);
+
+static syrk_impl_fn_ptr_t syrk_dispatch_vector[dpctl_td_ns::num_types];
+
+template <typename T>
+constexpr void copy_to_lower_triangle(T *res,
+                                      const std::size_t i,
+                                      const std::size_t j,
+                                      const std::int64_t ldc,
+                                      const std::size_t n,
+                                      const bool is_row_major)
+{
+    if (i < n && j < n && i > j) {
+        // result form row_major::syrk is row major and result form
+        // column_major::syrk is column major, so copying upper
+        // triangle to lower triangle is different for each case
+        if (is_row_major) {
+            res[i * ldc + j] = res[j * ldc + i];
+        }
+        else {
+            res[j * ldc + i] = res[i * ldc + j];
+        }
+    }
+}
+
+template <typename T, bool use_wg>
+class copy_kernel;
+
+template <typename T, bool use_wg = false>
+void submit_copy_kernel(T *res,
+                        const std::int64_t ldc,
+                        const std::size_t n,
+                        const bool is_row_major,
+                        sycl::handler &cgh)
+{
+    using KernelName = copy_kernel<T, use_wg>;
+
+    if constexpr (use_wg) {
+        static constexpr std::size_t tile_sz = 8;
+        sycl::range<2> global_range(Align(n, tile_sz), Align(n, tile_sz));
+        sycl::range<2> local_range(tile_sz, tile_sz);
+
+        cgh.parallel_for<KernelName>(
+            sycl::nd_range<2>(global_range, local_range),
+            [=](sycl::nd_item<2> item) {
+                std::size_t i = item.get_global_id(0);
+                std::size_t j = item.get_global_id(1);
+
+                copy_to_lower_triangle(res, i, j, n, ldc, is_row_major);
+            });
+    }
+    else {
+        cgh.parallel_for<KernelName>(
+            sycl::range<2>{n, n}, [=](sycl::id<2> idx) {
+                std::size_t i = idx[0];
+                std::size_t j = idx[1];
+
+                copy_to_lower_triangle(res, i, j, n, ldc, is_row_major);
+            });
+    }
+}
+
+// kernel to copy upper triangle to lower triangle
+template <typename T>
+sycl::event run_copy(sycl::queue &exec_q,
+                     T *res,
+                     const std::int64_t ldc,
+                     const std::int64_t n,
+                     const bool is_row_major,
+                     sycl::event &depends)
+{
+    const sycl::device &dev = exec_q.get_device();
+
+    return exec_q.submit([&](sycl::handler &cgh) {
+        cgh.depends_on(depends);
+
+        // two separate kernels are used to have better performance compared
+        // to gemm on both CPU and GPU
+        if (dev.is_gpu()) {
+            submit_copy_kernel<T, false>(res, ldc, n, is_row_major, cgh);
+        }
+        else {
+            assert(dev.is_cpu());
+            submit_copy_kernel<T, true>(res, ldc, n, is_row_major, cgh);
+        }
+    });
+}
+
+template <typename T>
+static sycl::event syrk_impl(sycl::queue &exec_q,
+                             const oneapi::mkl::transpose transA,
+                             const std::int64_t n,
+                             const std::int64_t k,
+                             const char *matrixA,
+                             const std::int64_t lda,
+                             char *resultC,
+                             const std::int64_t ldc,
+                             const bool is_row_major,
+                             const std::vector<sycl::event> &depends)
+{
+    type_utils::validate_type_for_device<T>(exec_q);
+
+    const T *a = reinterpret_cast<const T *>(matrixA);
+    T *res = reinterpret_cast<T *>(resultC);
+
+    std::stringstream error_msg;
+    bool is_exception_caught = false;
+
+    sycl::event syrk_event;
+    try {
+        auto syrk_func =
+            [&](sycl::queue &q, oneapi::mkl::uplo upper_lower,
+                oneapi::mkl::transpose transA, const std::int64_t n,
+                const std::int64_t k, T alpha, const T *a,
+                const std::int64_t lda, T beta, T *c, const std::int64_t ldc,
+                const std::vector<sycl::event> &deps) -> sycl::event {
+            if (is_row_major) {
+                return mkl_blas::row_major::syrk(q, upper_lower, transA, n, k,
+                                                 alpha, a, lda, beta, c, ldc,
+                                                 deps);
+            }
+            else {
+                return mkl_blas::column_major::syrk(q, upper_lower, transA, n,
+                                                    k, alpha, a, lda, beta, c,
+                                                    ldc, deps);
+            }
+        };
+
+        // we pass beta = 0, so passing upper or lower does not matter
+        static constexpr auto uplo = oneapi::mkl::uplo::upper;
+        syrk_event = syrk_func(
+            exec_q,
+            uplo,   // Specifies whether C’s data is stored in its upper
+                    // or lower triangle
+            transA, // Defines the transpose operation for matrix A:
+                    // 'N' indicates no transpose, 'T' for transpose,
+                    // or 'C' for a conjugate transpose.
+            n,      // Number of rows in op(A).
+                    // Number of rows and columns in C.
+            k,      // Number of columns in op(A).
+            T(1),   // Scaling factor for the rank-k update.
+            a,      // Pointer to the input matrix A.
+            lda,    // Leading dimension of matrix A, which is the
+                    // stride between successive rows (for row major layout).
+            T(0),   // Scaling factor for matrix C.
+            res,    // Pointer to output matrix c, where the result is stored.
+            ldc,    // Leading dimension of matrix C.
+            depends);
+    } catch (oneapi::mkl::exception const &e) {
+        error_msg << "Unexpected MKL exception caught during syrk() "
+                     "call:\nreason: "
+                  << e.what();
+        is_exception_caught = true;
+    } catch (sycl::exception const &e) {
+        error_msg << "Unexpected SYCL exception caught during syrk() call:\n"
+                  << e.what();
+        is_exception_caught = true;
+    }
+
+    if (is_exception_caught) // an unexpected error occurs
+    {
+        throw std::runtime_error(error_msg.str());
+    }
+
+    return run_copy(exec_q, res, ldc, n, is_row_major, syrk_event);
+}
+
+std::pair<sycl::event, sycl::event>
+    syrk(sycl::queue &exec_q,
+         const dpctl::tensor::usm_ndarray &matrixA,
+         const dpctl::tensor::usm_ndarray &resultC,
+         const std::vector<sycl::event> &depends)
+{
+    const int matrixA_nd = matrixA.get_ndim();
+    const int resultC_nd = resultC.get_ndim();
+
+    if ((matrixA_nd != 2) || (resultC_nd != 2)) {
+        throw py::value_error("The given arrays have incorrect dimensions.");
+    }
+
+    auto const &overlap = dpctl::tensor::overlap::MemoryOverlap();
+    if (overlap(matrixA, resultC)) {
+        throw py::value_error("Input and output matrices are overlapping "
+                              "segments of memory");
+    }
+
+    if (!dpctl::utils::queues_are_compatible(
+            exec_q, {matrixA.get_queue(), resultC.get_queue()}))
+    {
+        throw py::value_error(
+            "USM allocations are not compatible with the execution queue.");
+    }
+
+    const py::ssize_t *a_shape = matrixA.get_shape_raw();
+    const py::ssize_t *c_shape = resultC.get_shape_raw();
+    if (c_shape[0] != c_shape[1]) {
+        throw py::value_error("The output matrix should be square.");
+    }
+    if (a_shape[0] != c_shape[0]) {
+        throw py::value_error("The number of rows in A must be equal to "
+                              "the number of rows in result array.");
+    }
+
+    const bool is_matrixA_f_contig = matrixA.is_f_contiguous();
+    const bool is_matrixA_c_contig = matrixA.is_c_contiguous();
+    if (!is_matrixA_f_contig && !is_matrixA_c_contig) {
+        throw py::value_error(
+            "Input matrix is not c-contiguous nor f-contiguous.");
+    }
+
+    oneapi::mkl::transpose transA;
+    std::size_t src_nelems;
+
+// cuBLAS supports only column-major storage
+#if defined(USE_ONEMATH_CUBLAS)
+    constexpr bool is_row_major = false;
+    std::int64_t n;
+    std::int64_t k;
+
+    if (is_matrixA_f_contig) {
+        transA = oneapi::mkl::transpose::N;
+        n = a_shape[0];
+        k = a_shape[1];
+        src_nelems = n * n;
+    }
+    else {
+        transA = oneapi::mkl::transpose::T;
+        k = a_shape[0];
+        n = a_shape[1];
+        src_nelems = k * k;
+    }
+#else
+    bool is_row_major = true;
+    if (is_matrixA_f_contig) {
+        is_row_major = false;
+    }
+
+    transA = oneapi::mkl::transpose::N;
+    const std::int64_t n = a_shape[0];
+    const std::int64_t k = a_shape[1];
+    src_nelems = n * n;
+#endif // USE_ONEMATH_CUBLAS
+
+    const std::int64_t lda = is_row_major ? k : n;
+    const std::int64_t ldc = n;
+    dpctl::tensor::validation::CheckWritable::throw_if_not_writable(resultC);
+    dpctl::tensor::validation::AmpleMemory::throw_if_not_ample(resultC,
+                                                               src_nelems);
+
+    const int matrixA_typenum = matrixA.get_typenum();
+    const int resultC_typenum = resultC.get_typenum();
+    if (matrixA_typenum != resultC_typenum) {
+        throw py::value_error("Given arrays must be of the same type.");
+    }
+
+    auto array_types = dpctl_td_ns::usm_ndarray_types();
+    const int type_id = array_types.typenum_to_lookup_id(matrixA_typenum);
+    syrk_impl_fn_ptr_t syrk_fn = syrk_dispatch_vector[type_id];
+    if (syrk_fn == nullptr) {
+        throw py::value_error("No syrk implementation is available for the "
+                              "specified data type "
+                              "of the input and output arrays.");
+    }
+
+    const char *a_typeless_ptr = matrixA.get_data();
+    char *r_typeless_ptr = resultC.get_data();
+
+    sycl::event syrk_ev = syrk_fn(exec_q, transA, n, k, a_typeless_ptr, lda,
+                                  r_typeless_ptr, ldc, is_row_major, depends);
+
+    sycl::event args_ev =
+        dpctl::utils::keep_args_alive(exec_q, {matrixA, resultC}, {syrk_ev});
+
+    return std::make_pair(args_ev, syrk_ev);
+}
+
+template <typename fnT, typename varT>
+struct SyrkContigFactory
+{
+    fnT get()
+    {
+        if constexpr (types::SyrkTypePairSupportFactory<varT>::is_defined) {
+            return syrk_impl<varT>;
+        }
+        else {
+            return nullptr;
+        }
+    } // namespace dpnp::extensions::blas
+};
+
+void init_syrk_dispatch_vector(void)
+{
+    init_dispatch_vector<syrk_impl_fn_ptr_t, SyrkContigFactory>(
+        syrk_dispatch_vector);
+}
+} // namespace dpnp::extensions::blas
diff --git a/dpnp/backend/extensions/blas/syrk.hpp b/dpnp/backend/extensions/blas/syrk.hpp
new file mode 100644
index 000000000000..7fd38a9abdb7
--- /dev/null
+++ b/dpnp/backend/extensions/blas/syrk.hpp
@@ -0,0 +1,42 @@
+//*****************************************************************************
+// Copyright (c) 2025, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+
+#pragma once
+
+#include <oneapi/mkl.hpp>
+#include <sycl/sycl.hpp>
+
+#include <dpctl4pybind11.hpp>
+
+namespace dpnp::extensions::blas
+{
+extern std::pair<sycl::event, sycl::event>
+    syrk(sycl::queue &exec_q,
+         const dpctl::tensor::usm_ndarray &matrixA,
+         const dpctl::tensor::usm_ndarray &resultC,
+         const std::vector<sycl::event> &depends);
+
+extern void init_syrk_dispatch_vector(void);
+} // namespace dpnp::extensions::blas
diff --git a/dpnp/backend/extensions/blas/types_matrix.hpp b/dpnp/backend/extensions/blas/types_matrix.hpp
index 22fc98f05137..3d70255be313 100644
--- a/dpnp/backend/extensions/blas/types_matrix.hpp
+++ b/dpnp/backend/extensions/blas/types_matrix.hpp
@@ -110,10 +110,10 @@ template <typename Tab, typename Tc>
 struct GemmTypePairSupportFactory
 {
     static constexpr bool is_defined = std::disjunction<
-#if !defined(USE_ONEMKL_INTERFACES)
+#if !defined(USE_ONEMATH)
         dpctl_td_ns::TypePairDefinedEntry<Tab, std::int8_t, Tc, std::int32_t>,
         dpctl_td_ns::TypePairDefinedEntry<Tab, std::int8_t, Tc, float>,
-#endif // USE_ONEMKL_INTERFACES
+#endif // USE_ONEMATH
         dpctl_td_ns::TypePairDefinedEntry<Tab, sycl::half, Tc, float>,
         dpctl_td_ns::TypePairDefinedEntry<Tab, sycl::half, Tc, sycl::half>,
         dpctl_td_ns::TypePairDefinedEntry<Tab, float, Tc, float>,
@@ -142,10 +142,10 @@ template <typename Tab, typename Tc>
 struct GemmBatchTypePairSupportFactory
 {
     static constexpr bool is_defined = std::disjunction<
-#if !defined(USE_ONEMKL_INTERFACES)
+#if !defined(USE_ONEMATH)
         dpctl_td_ns::TypePairDefinedEntry<Tab, std::int8_t, Tc, std::int32_t>,
         dpctl_td_ns::TypePairDefinedEntry<Tab, std::int8_t, Tc, float>,
-#endif // USE_ONEMKL_INTERFACES
+#endif // USE_ONEMATH
         dpctl_td_ns::TypePairDefinedEntry<Tab, sycl::half, Tc, float>,
         dpctl_td_ns::TypePairDefinedEntry<Tab, sycl::half, Tc, sycl::half>,
         dpctl_td_ns::TypePairDefinedEntry<Tab, float, Tc, float>,
@@ -186,4 +186,29 @@ struct GemvTypePairSupportFactory
         // fall-through
         dpctl_td_ns::NotDefinedEntry>::is_defined;
 };
+
+/**
+ * @brief A factory to define pairs of supported types for which
+ * MKL BLAS library provides support in oneapi::mkl::blas::syrk<T>
+ * function.
+ *
+ * @tparam T Type of input and output arrays.
+ */
+template <typename T>
+struct SyrkTypePairSupportFactory
+{
+    static constexpr bool is_defined = std::disjunction<
+        dpctl_td_ns::TypePairDefinedEntry<T, float, T, float>,
+        dpctl_td_ns::TypePairDefinedEntry<T, double, T, double>,
+        dpctl_td_ns::TypePairDefinedEntry<T,
+                                          std::complex<float>,
+                                          T,
+                                          std::complex<float>>,
+        dpctl_td_ns::TypePairDefinedEntry<T,
+                                          std::complex<double>,
+                                          T,
+                                          std::complex<double>>,
+        // fall-through
+        dpctl_td_ns::NotDefinedEntry>::is_defined;
+};
 } // namespace dpnp::extensions::blas::types
diff --git a/dpnp/backend/extensions/common/ext/common.hpp b/dpnp/backend/extensions/common/ext/common.hpp
index 080df62b25e3..1c1216670425 100644
--- a/dpnp/backend/extensions/common/ext/common.hpp
+++ b/dpnp/backend/extensions/common/ext/common.hpp
@@ -30,10 +30,13 @@
 #include <pybind11/pybind11.h>
 #include <sycl/sycl.hpp>
 
+// dpctl tensor headers
 #include "utils/math_utils.hpp"
+#include "utils/type_dispatch.hpp"
 #include "utils/type_utils.hpp"
 
 namespace type_utils = dpctl::tensor::type_utils;
+namespace type_dispatch = dpctl::tensor::type_dispatch;
 
 namespace ext::common
 {
@@ -206,6 +209,25 @@ sycl::nd_range<1>
 // headers of dpctl.
 pybind11::dtype dtype_from_typenum(int dst_typenum);
 
+template <typename dispatchT,
+          template <typename fnT, typename T>
+          typename factoryT,
+          int _num_types = type_dispatch::num_types>
+inline void init_dispatch_vector(dispatchT dispatch_vector[])
+{
+    type_dispatch::DispatchVectorBuilder<dispatchT, factoryT, _num_types> dvb;
+    dvb.populate_dispatch_vector(dispatch_vector);
+}
+
+template <typename dispatchT,
+          template <typename fnT, typename D, typename S>
+          typename factoryT,
+          int _num_types = type_dispatch::num_types>
+inline void init_dispatch_table(dispatchT dispatch_table[][_num_types])
+{
+    type_dispatch::DispatchTableBuilder<dispatchT, factoryT, _num_types> dtb;
+    dtb.populate_dispatch_table(dispatch_table);
+}
 } // namespace ext::common
 
 #include "ext/details/common_internal.hpp"
diff --git a/dpnp/backend/extensions/common/ext/details/validation_utils_internal.hpp b/dpnp/backend/extensions/common/ext/details/validation_utils_internal.hpp
index 2ff800aced59..816ea0453ade 100644
--- a/dpnp/backend/extensions/common/ext/details/validation_utils_internal.hpp
+++ b/dpnp/backend/extensions/common/ext/details/validation_utils_internal.hpp
@@ -114,8 +114,10 @@ inline void check_no_overlap(const array_ptr &input,
     }
 
     const auto &overlap = dpctl::tensor::overlap::MemoryOverlap();
+    const auto &same_logical_tensors =
+        dpctl::tensor::overlap::SameLogicalTensors();
 
-    if (overlap(*input, *output)) {
+    if (overlap(*input, *output) && !same_logical_tensors(*input, *output)) {
         throw py::value_error(name_of(input, names) +
                               " has overlapping memory segments with " +
                               name_of(output, names));
diff --git a/dpnp/backend/extensions/fft/CMakeLists.txt b/dpnp/backend/extensions/fft/CMakeLists.txt
index dbca2191eb31..b613d5cfdb48 100644
--- a/dpnp/backend/extensions/fft/CMakeLists.txt
+++ b/dpnp/backend/extensions/fft/CMakeLists.txt
@@ -80,9 +80,9 @@ if (DPNP_GENERATE_COVERAGE)
     target_link_options(${python_module_name} PRIVATE -fprofile-instr-generate -fcoverage-mapping)
 endif()
 
-if(_use_onemkl_interfaces)
-    target_link_libraries(${python_module_name} PRIVATE ${MKL_INTERFACES_LIB})
-    target_compile_options(${python_module_name} PRIVATE -DUSE_ONEMKL_INTERFACES)
+if(_use_onemath)
+    target_link_libraries(${python_module_name} PRIVATE ${ONEMATH_LIB})
+    target_compile_options(${python_module_name} PRIVATE -DUSE_ONEMATH)
 else()
     target_link_libraries(${python_module_name} PUBLIC MKL::MKL_SYCL::DFT)
 endif()
diff --git a/dpnp/backend/extensions/fft/common.hpp b/dpnp/backend/extensions/fft/common.hpp
index bf8a3d7a6ae0..fbe384fed874 100644
--- a/dpnp/backend/extensions/fft/common.hpp
+++ b/dpnp/backend/extensions/fft/common.hpp
@@ -205,7 +205,7 @@ class DescriptorWrapper
     // config_param::PLACEMENT
     bool get_in_place()
     {
-#if defined(USE_ONEMKL_INTERFACES) || INTEL_MKL_VERSION >= 20250000
+#if defined(USE_ONEMATH) || INTEL_MKL_VERSION >= 20250000
         mkl_dft::config_value placement;
         descr_.get_value(mkl_dft::config_param::PLACEMENT, &placement);
         return (placement == mkl_dft::config_value::INPLACE);
@@ -214,12 +214,12 @@ class DescriptorWrapper
         DFTI_CONFIG_VALUE placement;
         descr_.get_value(mkl_dft::config_param::PLACEMENT, &placement);
         return (placement == DFTI_CONFIG_VALUE::DFTI_INPLACE);
-#endif // USE_ONEMKL_INTERFACES or INTEL_MKL_VERSION
+#endif // USE_ONEMATH or INTEL_MKL_VERSION
     }
 
     void set_in_place(const bool &in_place_request)
     {
-#if defined(USE_ONEMKL_INTERFACES) || INTEL_MKL_VERSION >= 20250000
+#if defined(USE_ONEMATH) || INTEL_MKL_VERSION >= 20250000
         descr_.set_value(mkl_dft::config_param::PLACEMENT,
                          (in_place_request)
                              ? mkl_dft::config_value::INPLACE
@@ -230,7 +230,7 @@ class DescriptorWrapper
                          (in_place_request)
                              ? DFTI_CONFIG_VALUE::DFTI_INPLACE
                              : DFTI_CONFIG_VALUE::DFTI_NOT_INPLACE);
-#endif // USE_ONEMKL_INTERFACES or INTEL_MKL_VERSION
+#endif // USE_ONEMATH or INTEL_MKL_VERSION
     }
 
     // config_param::PRECISION
@@ -245,7 +245,7 @@ class DescriptorWrapper
     // config_param::COMMIT_STATUS
     bool is_committed()
     {
-#if defined(USE_ONEMKL_INTERFACES) || INTEL_MKL_VERSION >= 20250000
+#if defined(USE_ONEMATH) || INTEL_MKL_VERSION >= 20250000
         mkl_dft::config_value committed;
         descr_.get_value(mkl_dft::config_param::COMMIT_STATUS, &committed);
         return (committed == mkl_dft::config_value::COMMITTED);
@@ -254,7 +254,7 @@ class DescriptorWrapper
         DFTI_CONFIG_VALUE committed;
         descr_.get_value(mkl_dft::config_param::COMMIT_STATUS, &committed);
         return (committed == DFTI_CONFIG_VALUE::DFTI_COMMITTED);
-#endif // USE_ONEMKL_INTERFACES or INTEL_MKL_VERSION
+#endif // USE_ONEMATH or INTEL_MKL_VERSION
     }
 
 private:
diff --git a/dpnp/backend/extensions/indexing/CMakeLists.txt b/dpnp/backend/extensions/indexing/CMakeLists.txt
index 527097d4ffdf..d9de70ec1010 100644
--- a/dpnp/backend/extensions/indexing/CMakeLists.txt
+++ b/dpnp/backend/extensions/indexing/CMakeLists.txt
@@ -57,8 +57,8 @@ endif()
 
 set_target_properties(${python_module_name} PROPERTIES CMAKE_POSITION_INDEPENDENT_CODE ON)
 
-target_include_directories(${python_module_name} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../include)
-target_include_directories(${python_module_name} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../src)
+target_include_directories(${python_module_name} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../)
+target_include_directories(${python_module_name} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../common)
 
 target_include_directories(${python_module_name} PUBLIC ${Dpctl_INCLUDE_DIR})
 target_include_directories(${python_module_name} PUBLIC ${Dpctl_TENSOR_INCLUDE_DIR})
diff --git a/dpnp/backend/extensions/indexing/choose.cpp b/dpnp/backend/extensions/indexing/choose.cpp
index e0fd0767aca2..78794bab44b5 100644
--- a/dpnp/backend/extensions/indexing/choose.cpp
+++ b/dpnp/backend/extensions/indexing/choose.cpp
@@ -36,6 +36,11 @@
 
 #include "choose_kernel.hpp"
 #include "dpctl4pybind11.hpp"
+
+// utils extension header
+#include "ext/common.hpp"
+
+// dpctl tensor headers
 #include "utils/indexing_utils.hpp"
 #include "utils/memory_overlap.hpp"
 #include "utils/output_validation.hpp"
@@ -432,18 +437,13 @@ using ChooseClipFactory = ChooseFactory<fnT, IndT, T, ClipIndex<IndT>>;
 
 void init_choose_dispatch_tables(void)
 {
-    using namespace td_ns;
+    using ext::common::init_dispatch_table;
     using kernels::choose_fn_ptr_t;
 
-    DispatchTableBuilder<choose_fn_ptr_t, ChooseClipFactory, num_types>
-        dtb_choose_clip;
-    dtb_choose_clip.populate_dispatch_table(choose_clip_dispatch_table);
-
-    DispatchTableBuilder<choose_fn_ptr_t, ChooseWrapFactory, num_types>
-        dtb_choose_wrap;
-    dtb_choose_wrap.populate_dispatch_table(choose_wrap_dispatch_table);
-
-    return;
+    init_dispatch_table<choose_fn_ptr_t, ChooseClipFactory>(
+        choose_clip_dispatch_table);
+    init_dispatch_table<choose_fn_ptr_t, ChooseWrapFactory>(
+        choose_wrap_dispatch_table);
 }
 
 void init_choose(py::module_ m)
diff --git a/dpnp/backend/extensions/lapack/CMakeLists.txt b/dpnp/backend/extensions/lapack/CMakeLists.txt
index ccc680a1f445..fb5e55faa01d 100644
--- a/dpnp/backend/extensions/lapack/CMakeLists.txt
+++ b/dpnp/backend/extensions/lapack/CMakeLists.txt
@@ -76,8 +76,8 @@ endif()
 
 set_target_properties(${python_module_name} PROPERTIES CMAKE_POSITION_INDEPENDENT_CODE ON)
 
-target_include_directories(${python_module_name} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../include)
-target_include_directories(${python_module_name} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../src)
+target_include_directories(${python_module_name} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../)
+target_include_directories(${python_module_name} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../common)
 
 target_include_directories(${python_module_name} PUBLIC ${Dpctl_INCLUDE_DIR})
 target_include_directories(${python_module_name} PUBLIC ${Dpctl_TENSOR_INCLUDE_DIR})
@@ -100,9 +100,9 @@ if (DPNP_GENERATE_COVERAGE)
     target_link_options(${python_module_name} PRIVATE -fprofile-instr-generate -fcoverage-mapping)
 endif()
 
-if(_use_onemkl_interfaces)
-    target_link_libraries(${python_module_name} PRIVATE ${MKL_INTERFACES_LIB})
-    target_compile_options(${python_module_name} PRIVATE -DUSE_ONEMKL_INTERFACES)
+if(_use_onemath)
+    target_link_libraries(${python_module_name} PRIVATE ${ONEMATH_LIB})
+    target_compile_options(${python_module_name} PRIVATE -DUSE_ONEMATH)
 else()
     target_link_libraries(${python_module_name} PUBLIC MKL::MKL_SYCL::LAPACK)
 endif()
diff --git a/dpnp/backend/extensions/lapack/evd_batch_common.hpp b/dpnp/backend/extensions/lapack/evd_batch_common.hpp
index 9610d6aa568a..3545db01458c 100644
--- a/dpnp/backend/extensions/lapack/evd_batch_common.hpp
+++ b/dpnp/backend/extensions/lapack/evd_batch_common.hpp
@@ -97,7 +97,8 @@ std::pair<sycl::event, sycl::event>
         evd_batch_dispatch_table[eig_vecs_type_id][eig_vals_type_id];
     if (evd_batch_fn == nullptr) {
         throw py::value_error(
-            "Types of input vectors and result array are mismatched.");
+            "No evd_batch implementation is available for the specified data "
+            "type of the input and output arrays.");
     }
 
     char *eig_vecs_data = eig_vecs.get_data();
diff --git a/dpnp/backend/extensions/lapack/evd_common.hpp b/dpnp/backend/extensions/lapack/evd_common.hpp
index 5503d8f82052..3964943c5305 100644
--- a/dpnp/backend/extensions/lapack/evd_common.hpp
+++ b/dpnp/backend/extensions/lapack/evd_common.hpp
@@ -91,7 +91,8 @@ std::pair<sycl::event, sycl::event>
         evd_dispatch_table[eig_vecs_type_id][eig_vals_type_id];
     if (evd_fn == nullptr) {
         throw py::value_error(
-            "Types of input vectors and result array are mismatched.");
+            "No evd implementation is available for the specified data type "
+            "of the input and output arrays.");
     }
 
     char *eig_vecs_data = eig_vecs.get_data();
diff --git a/dpnp/backend/extensions/lapack/evd_common_utils.hpp b/dpnp/backend/extensions/lapack/evd_common_utils.hpp
index 8525b275f436..3701438c98fd 100644
--- a/dpnp/backend/extensions/lapack/evd_common_utils.hpp
+++ b/dpnp/backend/extensions/lapack/evd_common_utils.hpp
@@ -30,25 +30,11 @@
 // dpctl tensor headers
 #include "utils/memory_overlap.hpp"
 #include "utils/output_validation.hpp"
-#include "utils/type_dispatch.hpp"
 
 namespace dpnp::extensions::lapack::evd
 {
-namespace dpctl_td_ns = dpctl::tensor::type_dispatch;
 namespace py = pybind11;
 
-template <typename dispatchT,
-          template <typename fnT, typename T, typename RealT>
-          typename factoryT>
-void init_evd_dispatch_table(
-    dispatchT evd_dispatch_table[][dpctl_td_ns::num_types])
-{
-    dpctl_td_ns::DispatchTableBuilder<dispatchT, factoryT,
-                                      dpctl_td_ns::num_types>
-        contig;
-    contig.populate_dispatch_table(evd_dispatch_table);
-}
-
 inline void common_evd_checks(sycl::queue &exec_q,
                               const dpctl::tensor::usm_ndarray &eig_vecs,
                               const dpctl::tensor::usm_ndarray &eig_vals,
diff --git a/dpnp/backend/extensions/lapack/geqrf.cpp b/dpnp/backend/extensions/lapack/geqrf.cpp
index 4f7c7e64a19e..03b8f25501e4 100644
--- a/dpnp/backend/extensions/lapack/geqrf.cpp
+++ b/dpnp/backend/extensions/lapack/geqrf.cpp
@@ -27,6 +27,9 @@
 
 #include <pybind11/pybind11.h>
 
+// utils extension header
+#include "ext/common.hpp"
+
 // dpctl tensor headers
 #include "utils/memory_overlap.hpp"
 #include "utils/sycl_alloc_utils.hpp"
@@ -35,14 +38,14 @@
 #include "geqrf.hpp"
 #include "types_matrix.hpp"
 
-#include "dpnp_utils.hpp"
-
 namespace dpnp::extensions::lapack
 {
 namespace mkl_lapack = oneapi::mkl::lapack;
 namespace py = pybind11;
 namespace type_utils = dpctl::tensor::type_utils;
 
+using ext::common::init_dispatch_vector;
+
 typedef sycl::event (*geqrf_impl_fn_ptr_t)(sycl::queue &,
                                            const std::int64_t,
                                            const std::int64_t,
@@ -250,9 +253,7 @@ struct GeqrfContigFactory
 
 void init_geqrf_dispatch_vector(void)
 {
-    dpctl_td_ns::DispatchVectorBuilder<geqrf_impl_fn_ptr_t, GeqrfContigFactory,
-                                       dpctl_td_ns::num_types>
-        contig;
-    contig.populate_dispatch_vector(geqrf_dispatch_vector);
+    init_dispatch_vector<geqrf_impl_fn_ptr_t, GeqrfContigFactory>(
+        geqrf_dispatch_vector);
 }
 } // namespace dpnp::extensions::lapack
diff --git a/dpnp/backend/extensions/lapack/geqrf_batch.cpp b/dpnp/backend/extensions/lapack/geqrf_batch.cpp
index 9783b9f55df2..98104cf65128 100644
--- a/dpnp/backend/extensions/lapack/geqrf_batch.cpp
+++ b/dpnp/backend/extensions/lapack/geqrf_batch.cpp
@@ -27,6 +27,9 @@
 
 #include <pybind11/pybind11.h>
 
+// utils extension header
+#include "ext/common.hpp"
+
 // dpctl tensor headers
 #include "utils/memory_overlap.hpp"
 #include "utils/sycl_alloc_utils.hpp"
@@ -35,14 +38,14 @@
 #include "geqrf.hpp"
 #include "types_matrix.hpp"
 
-#include "dpnp_utils.hpp"
-
 namespace dpnp::extensions::lapack
 {
 namespace mkl_lapack = oneapi::mkl::lapack;
 namespace py = pybind11;
 namespace type_utils = dpctl::tensor::type_utils;
 
+using ext::common::init_dispatch_vector;
+
 typedef sycl::event (*geqrf_batch_impl_fn_ptr_t)(
     sycl::queue &,
     std::int64_t,
@@ -260,10 +263,7 @@ struct GeqrfBatchContigFactory
 
 void init_geqrf_batch_dispatch_vector(void)
 {
-    dpctl_td_ns::DispatchVectorBuilder<geqrf_batch_impl_fn_ptr_t,
-                                       GeqrfBatchContigFactory,
-                                       dpctl_td_ns::num_types>
-        contig;
-    contig.populate_dispatch_vector(geqrf_batch_dispatch_vector);
+    init_dispatch_vector<geqrf_batch_impl_fn_ptr_t, GeqrfBatchContigFactory>(
+        geqrf_batch_dispatch_vector);
 }
 } // namespace dpnp::extensions::lapack
diff --git a/dpnp/backend/extensions/lapack/gesv.cpp b/dpnp/backend/extensions/lapack/gesv.cpp
index 30cf9b97304e..22154be9b3a5 100644
--- a/dpnp/backend/extensions/lapack/gesv.cpp
+++ b/dpnp/backend/extensions/lapack/gesv.cpp
@@ -28,6 +28,9 @@
 
 #include <pybind11/pybind11.h>
 
+// utils extension header
+#include "ext/common.hpp"
+
 // dpctl tensor headers
 #include "utils/type_utils.hpp"
 
@@ -43,6 +46,7 @@ namespace py = pybind11;
 namespace type_utils = dpctl::tensor::type_utils;
 
 using dpctl::tensor::alloc_utils::sycl_free_noexcept;
+using ext::common::init_dispatch_vector;
 
 typedef sycl::event (*gesv_impl_fn_ptr_t)(sycl::queue &,
                                           const std::int64_t,
@@ -76,7 +80,7 @@ static sycl::event gesv_impl(sycl::queue &exec_q,
     std::stringstream error_msg;
     bool is_exception_caught = false;
 
-#if defined(USE_ONEMKL_INTERFACES)
+#if defined(USE_ONEMATH)
     // Use transpose::T if the LU-factorized array is passed as C-contiguous.
     // For F-contiguous we use transpose::N.
     // Since gesv takes F-contiguous as input, we use transpose::N.
@@ -90,7 +94,7 @@ static sycl::event gesv_impl(sycl::queue &exec_q,
     scratchpad_size =
         mkl_lapack::gesv_scratchpad_size<T>(exec_q, n, nrhs, lda, ldb);
 
-#endif // USE_ONEMKL_INTERFACES
+#endif // USE_ONEMATH
 
     T *scratchpad = helper::alloc_scratchpad<T>(scratchpad_size, exec_q);
 
@@ -102,7 +106,7 @@ static sycl::event gesv_impl(sycl::queue &exec_q,
         throw;
     }
 
-#if defined(USE_ONEMKL_INTERFACES)
+#if defined(USE_ONEMATH)
     sycl::event getrf_event;
     try {
         getrf_event = mkl_lapack::getrf(
@@ -146,7 +150,7 @@ static sycl::event gesv_impl(sycl::queue &exec_q,
                                       scratchpad, ipiv, e, error_msg);
     } catch (oneapi::mkl::computation_error const &e) {
         // TODO: remove this catch when gh-642(oneMath) is fixed
-        // Workaround for oneMath interfaces
+        // Workaround for oneMath
         // oneapi::mkl::computation_error is thrown instead of
         // oneapi::mkl::lapack::computation_error.
         if (scratchpad != nullptr)
@@ -188,7 +192,7 @@ static sycl::event gesv_impl(sycl::queue &exec_q,
         error_msg << "Unexpected SYCL exception caught during gesv() call:\n"
                   << e.what();
     }
-#endif // USE_ONEMKL_INTERFACES
+#endif // USE_ONEMATH
 
     if (is_exception_caught) // an unexpected error occurs
     {
@@ -284,9 +288,7 @@ struct GesvContigFactory
 
 void init_gesv_dispatch_vector(void)
 {
-    dpctl_td_ns::DispatchVectorBuilder<gesv_impl_fn_ptr_t, GesvContigFactory,
-                                       dpctl_td_ns::num_types>
-        contig;
-    contig.populate_dispatch_vector(gesv_dispatch_vector);
+    init_dispatch_vector<gesv_impl_fn_ptr_t, GesvContigFactory>(
+        gesv_dispatch_vector);
 }
 } // namespace dpnp::extensions::lapack
diff --git a/dpnp/backend/extensions/lapack/gesv_batch.cpp b/dpnp/backend/extensions/lapack/gesv_batch.cpp
index 199d75874dc1..d3ca2510efbe 100644
--- a/dpnp/backend/extensions/lapack/gesv_batch.cpp
+++ b/dpnp/backend/extensions/lapack/gesv_batch.cpp
@@ -28,6 +28,9 @@
 
 #include <pybind11/pybind11.h>
 
+// utils extension header
+#include "ext/common.hpp"
+
 // dpctl tensor headers
 #include "utils/type_utils.hpp"
 
@@ -43,16 +46,17 @@ namespace py = pybind11;
 namespace type_utils = dpctl::tensor::type_utils;
 
 using dpctl::tensor::alloc_utils::sycl_free_noexcept;
+using ext::common::init_dispatch_vector;
 
 typedef sycl::event (*gesv_batch_impl_fn_ptr_t)(
     sycl::queue &,
     const std::int64_t,
     const std::int64_t,
     const std::int64_t,
-#if defined(USE_ONEMKL_INTERFACES)
+#if defined(USE_ONEMATH)
     const std::int64_t,
     const std::int64_t,
-#endif // USE_ONEMKL_INTERFACES
+#endif // USE_ONEMATH
     char *,
     char *,
     const std::vector<sycl::event> &);
@@ -65,10 +69,10 @@ static sycl::event gesv_batch_impl(sycl::queue &exec_q,
                                    const std::int64_t n,
                                    const std::int64_t nrhs,
                                    const std::int64_t batch_size,
-#if defined(USE_ONEMKL_INTERFACES)
+#if defined(USE_ONEMATH)
                                    const std::int64_t stride_a,
                                    const std::int64_t stride_b,
-#endif // USE_ONEMKL_INTERFACES
+#endif // USE_ONEMATH
                                    char *in_a,
                                    char *in_b,
                                    const std::vector<sycl::event> &depends)
@@ -89,7 +93,7 @@ static sycl::event gesv_batch_impl(sycl::queue &exec_q,
     std::stringstream error_msg;
     bool is_exception_caught = false;
 
-#if defined(USE_ONEMKL_INTERFACES)
+#if defined(USE_ONEMATH)
     // Use transpose::T if the LU-factorized array is passed as C-contiguous.
     // For F-contiguous we use transpose::N.
     // Since gesv_batch takes F-contiguous as input, we use transpose::N.
@@ -281,7 +285,7 @@ static sycl::event gesv_batch_impl(sycl::queue &exec_q,
         // Update the event dependencies for the current stream
         comp_evs[stream_id] = {gesv_event};
     }
-#endif // USE_ONEMKL_INTERFACES
+#endif // USE_ONEMATH
 
     if (is_exception_caught) // an unexpected error occurs
     {
@@ -293,13 +297,13 @@ static sycl::event gesv_batch_impl(sycl::queue &exec_q,
     }
 
     sycl::event ht_ev = exec_q.submit([&](sycl::handler &cgh) {
-#if defined(USE_ONEMKL_INTERFACES)
+#if defined(USE_ONEMATH)
         cgh.depends_on(comp_event);
 #else
         for (const auto &ev : comp_evs) {
             cgh.depends_on(ev);
         }
-#endif // USE_ONEMKL_INTERFACES
+#endif // USE_ONEMATH
         auto ctx = exec_q.get_context();
         cgh.host_task([ctx, scratchpad, ipiv]() {
             sycl_free_noexcept(scratchpad, ctx);
@@ -383,7 +387,7 @@ std::pair<sycl::event, sycl::event>
 
     sycl::event gesv_ev;
 
-#if defined(USE_ONEMKL_INTERFACES)
+#if defined(USE_ONEMATH)
     auto const &coeff_matrix_strides = coeff_matrix.get_strides_vector();
     auto const &dependent_vals_strides = dependent_vals.get_strides_vector();
 
@@ -401,7 +405,7 @@ std::pair<sycl::event, sycl::event>
 #else
     gesv_ev = gesv_batch_fn(exec_q, n, nrhs, batch_size, coeff_matrix_data,
                             dependent_vals_data, depends);
-#endif // USE_ONEMKL_INTERFACES
+#endif // USE_ONEMATH
 
     sycl::event ht_ev = dpctl::utils::keep_args_alive(
         exec_q, {coeff_matrix, dependent_vals}, {gesv_ev});
@@ -425,10 +429,7 @@ struct GesvBatchContigFactory
 
 void init_gesv_batch_dispatch_vector(void)
 {
-    dpctl_td_ns::DispatchVectorBuilder<gesv_batch_impl_fn_ptr_t,
-                                       GesvBatchContigFactory,
-                                       dpctl_td_ns::num_types>
-        contig;
-    contig.populate_dispatch_vector(gesv_batch_dispatch_vector);
+    init_dispatch_vector<gesv_batch_impl_fn_ptr_t, GesvBatchContigFactory>(
+        gesv_batch_dispatch_vector);
 }
 } // namespace dpnp::extensions::lapack
diff --git a/dpnp/backend/extensions/lapack/gesvd.cpp b/dpnp/backend/extensions/lapack/gesvd.cpp
index 16840b2be8f8..c3b00f66ea46 100644
--- a/dpnp/backend/extensions/lapack/gesvd.cpp
+++ b/dpnp/backend/extensions/lapack/gesvd.cpp
@@ -27,6 +27,9 @@
 
 #include <pybind11/pybind11.h>
 
+// utils extension header
+#include "ext/common.hpp"
+
 // dpctl tensor headers
 #include "utils/type_utils.hpp"
 
@@ -41,6 +44,8 @@ namespace mkl_lapack = oneapi::mkl::lapack;
 namespace py = pybind11;
 namespace type_utils = dpctl::tensor::type_utils;
 
+using ext::common::init_dispatch_table;
+
 typedef sycl::event (*gesvd_impl_fn_ptr_t)(sycl::queue &,
                                            const oneapi::mkl::jobsvd,
                                            const oneapi::mkl::jobsvd,
@@ -227,9 +232,7 @@ struct GesvdContigFactory
 
 void init_gesvd_dispatch_table(void)
 {
-    dpctl_td_ns::DispatchTableBuilder<gesvd_impl_fn_ptr_t, GesvdContigFactory,
-                                      dpctl_td_ns::num_types>
-        contig;
-    contig.populate_dispatch_table(gesvd_dispatch_table);
+    init_dispatch_table<gesvd_impl_fn_ptr_t, GesvdContigFactory>(
+        gesvd_dispatch_table);
 }
 } // namespace dpnp::extensions::lapack
diff --git a/dpnp/backend/extensions/lapack/gesvd_batch.cpp b/dpnp/backend/extensions/lapack/gesvd_batch.cpp
index f2c9db43895b..7a031e472cd9 100644
--- a/dpnp/backend/extensions/lapack/gesvd_batch.cpp
+++ b/dpnp/backend/extensions/lapack/gesvd_batch.cpp
@@ -27,6 +27,9 @@
 
 #include <pybind11/pybind11.h>
 
+// utils extension header
+#include "ext/common.hpp"
+
 // dpctl tensor headers
 #include "utils/type_utils.hpp"
 
@@ -41,6 +44,8 @@ namespace mkl_lapack = oneapi::mkl::lapack;
 namespace py = pybind11;
 namespace type_utils = dpctl::tensor::type_utils;
 
+using ext::common::init_dispatch_table;
+
 typedef sycl::event (*gesvd_batch_impl_fn_ptr_t)(
     sycl::queue &,
     const oneapi::mkl::jobsvd,
@@ -296,10 +301,7 @@ struct GesvdBatchContigFactory
 
 void init_gesvd_batch_dispatch_table(void)
 {
-    dpctl_td_ns::DispatchTableBuilder<gesvd_batch_impl_fn_ptr_t,
-                                      GesvdBatchContigFactory,
-                                      dpctl_td_ns::num_types>
-        contig;
-    contig.populate_dispatch_table(gesvd_batch_dispatch_table);
+    init_dispatch_table<gesvd_batch_impl_fn_ptr_t, GesvdBatchContigFactory>(
+        gesvd_batch_dispatch_table);
 }
 } // namespace dpnp::extensions::lapack
diff --git a/dpnp/backend/extensions/lapack/getrf.cpp b/dpnp/backend/extensions/lapack/getrf.cpp
index 204d682984ef..aae089d7f7e6 100644
--- a/dpnp/backend/extensions/lapack/getrf.cpp
+++ b/dpnp/backend/extensions/lapack/getrf.cpp
@@ -27,6 +27,9 @@
 
 #include <pybind11/pybind11.h>
 
+// utils extension header
+#include "ext/common.hpp"
+
 // dpctl tensor headers
 #include "utils/memory_overlap.hpp"
 #include "utils/sycl_alloc_utils.hpp"
@@ -35,15 +38,16 @@
 #include "getrf.hpp"
 #include "types_matrix.hpp"
 
-#include "dpnp_utils.hpp"
-
 namespace dpnp::extensions::lapack
 {
 namespace mkl_lapack = oneapi::mkl::lapack;
 namespace py = pybind11;
 namespace type_utils = dpctl::tensor::type_utils;
 
+using ext::common::init_dispatch_vector;
+
 typedef sycl::event (*getrf_impl_fn_ptr_t)(sycl::queue &,
+                                           const std::int64_t,
                                            const std::int64_t,
                                            char *,
                                            std::int64_t,
@@ -56,6 +60,7 @@ static getrf_impl_fn_ptr_t getrf_dispatch_vector[dpctl_td_ns::num_types];
 
 template <typename T>
 static sycl::event getrf_impl(sycl::queue &exec_q,
+                              const std::int64_t m,
                               const std::int64_t n,
                               char *in_a,
                               std::int64_t lda,
@@ -69,7 +74,7 @@ static sycl::event getrf_impl(sycl::queue &exec_q,
     T *a = reinterpret_cast<T *>(in_a);
 
     const std::int64_t scratchpad_size =
-        mkl_lapack::getrf_scratchpad_size<T>(exec_q, n, n, lda);
+        mkl_lapack::getrf_scratchpad_size<T>(exec_q, m, n, lda);
     T *scratchpad = nullptr;
 
     std::stringstream error_msg;
@@ -82,13 +87,13 @@ static sycl::event getrf_impl(sycl::queue &exec_q,
 
         getrf_event = mkl_lapack::getrf(
             exec_q,
-            n,    // The order of the square matrix A (0 ≤ n).
+            m,    // The number of rows in the input matrix A (0 ≤ m).
                   // It must be a non-negative integer.
-            n,    // The number of columns in the square matrix A (0 ≤ n).
+            n,    // The number of columns in the input matrix A (0 ≤ n).
                   // It must be a non-negative integer.
-            a,    // Pointer to the square matrix A (n x n).
+            a,    // Pointer to the input matrix A (m x n).
             lda,  // The leading dimension of matrix A.
-                  // It must be at least max(1, n).
+                  // It must be at least max(1, m).
             ipiv, // Pointer to the output array of pivot indices.
             scratchpad, // Pointer to scratchpad memory to be used by MKL
                         // routine for storing intermediate results.
@@ -99,7 +104,7 @@ static sycl::event getrf_impl(sycl::queue &exec_q,
 
         if (info < 0) {
             error_msg << "Parameter number " << -info
-                      << " had an illegal value.";
+                      << " had an illegal value";
         }
         else if (info == scratchpad_size && e.detail() != 0) {
             error_msg
@@ -122,7 +127,7 @@ static sycl::event getrf_impl(sycl::queue &exec_q,
         }
     } catch (oneapi::mkl::computation_error const &e) {
         // TODO: remove this catch when gh-642(oneMath) is fixed
-        // Workaround for oneMath interfaces
+        // Workaround for oneMath
         // oneapi::mkl::computation_error is thrown instead of
         // oneapi::mkl::lapack::computation_error.
         is_exception_caught = false;
@@ -168,13 +173,13 @@ std::pair<sycl::event, sycl::event>
     if (a_array_nd != 2) {
         throw py::value_error(
             "The input array has ndim=" + std::to_string(a_array_nd) +
-            ", but a 2-dimensional array is expected.");
+            ", but a 2-dimensional array is expected");
     }
 
     if (ipiv_array_nd != 1) {
         throw py::value_error("The array of pivot indices has ndim=" +
                               std::to_string(ipiv_array_nd) +
-                              ", but a 1-dimensional array is expected.");
+                              ", but a 1-dimensional array is expected");
     }
 
     // check compatibility of execution queue and allocation queue
@@ -190,10 +195,11 @@ std::pair<sycl::event, sycl::event>
     }
 
     bool is_a_array_c_contig = a_array.is_c_contiguous();
+    bool is_a_array_f_contig = a_array.is_f_contiguous();
     bool is_ipiv_array_c_contig = ipiv_array.is_c_contiguous();
-    if (!is_a_array_c_contig) {
+    if (!is_a_array_c_contig && !is_a_array_f_contig) {
         throw py::value_error("The input array "
-                              "must be C-contiguous");
+                              "must be contiguous");
     }
     if (!is_ipiv_array_c_contig) {
         throw py::value_error("The array of pivot indices "
@@ -208,7 +214,7 @@ std::pair<sycl::event, sycl::event>
     if (getrf_fn == nullptr) {
         throw py::value_error(
             "No getrf implementation defined for the provided type "
-            "of the input matrix.");
+            "of the input matrix");
     }
 
     auto ipiv_types = dpctl_td_ns::usm_ndarray_types();
@@ -216,19 +222,25 @@ std::pair<sycl::event, sycl::event>
         ipiv_types.typenum_to_lookup_id(ipiv_array.get_typenum());
 
     if (ipiv_array_type_id != static_cast<int>(dpctl_td_ns::typenum_t::INT64)) {
-        throw py::value_error("The type of 'ipiv_array' must be int64.");
+        throw py::value_error("The type of 'ipiv_array' must be int64");
     }
 
-    const std::int64_t n = a_array.get_shape_raw()[0];
+    const py::ssize_t *a_array_shape = a_array.get_shape_raw();
+    const std::int64_t m = a_array_shape[0];
+    const std::int64_t n = a_array_shape[1];
+    const std::int64_t lda = std::max<size_t>(1UL, m);
+
+    if (ipiv_array.get_size() != std::min(m, n)) {
+        throw py::value_error("The size of 'ipiv_array' must be min(m, n)");
+    }
 
     char *a_array_data = a_array.get_data();
-    const std::int64_t lda = std::max<size_t>(1UL, n);
 
     char *ipiv_array_data = ipiv_array.get_data();
     std::int64_t *d_ipiv = reinterpret_cast<std::int64_t *>(ipiv_array_data);
 
     std::vector<sycl::event> host_task_events;
-    sycl::event getrf_ev = getrf_fn(exec_q, n, a_array_data, lda, d_ipiv,
+    sycl::event getrf_ev = getrf_fn(exec_q, m, n, a_array_data, lda, d_ipiv,
                                     dev_info, host_task_events, depends);
 
     sycl::event args_ev = dpctl::utils::keep_args_alive(
@@ -253,9 +265,7 @@ struct GetrfContigFactory
 
 void init_getrf_dispatch_vector(void)
 {
-    dpctl_td_ns::DispatchVectorBuilder<getrf_impl_fn_ptr_t, GetrfContigFactory,
-                                       dpctl_td_ns::num_types>
-        contig;
-    contig.populate_dispatch_vector(getrf_dispatch_vector);
+    init_dispatch_vector<getrf_impl_fn_ptr_t, GetrfContigFactory>(
+        getrf_dispatch_vector);
 }
 } // namespace dpnp::extensions::lapack
diff --git a/dpnp/backend/extensions/lapack/getrf.hpp b/dpnp/backend/extensions/lapack/getrf.hpp
index 5fd9ecdcc499..952b244ef132 100644
--- a/dpnp/backend/extensions/lapack/getrf.hpp
+++ b/dpnp/backend/extensions/lapack/getrf.hpp
@@ -44,6 +44,7 @@ extern std::pair<sycl::event, sycl::event>
                 const dpctl::tensor::usm_ndarray &a_array,
                 const dpctl::tensor::usm_ndarray &ipiv_array,
                 py::list dev_info,
+                std::int64_t m,
                 std::int64_t n,
                 std::int64_t stride_a,
                 std::int64_t stride_ipiv,
diff --git a/dpnp/backend/extensions/lapack/getrf_batch.cpp b/dpnp/backend/extensions/lapack/getrf_batch.cpp
index 83d06816fce4..ad5977fb6494 100644
--- a/dpnp/backend/extensions/lapack/getrf_batch.cpp
+++ b/dpnp/backend/extensions/lapack/getrf_batch.cpp
@@ -27,6 +27,9 @@
 
 #include <pybind11/pybind11.h>
 
+// utils extension header
+#include "ext/common.hpp"
+
 // dpctl tensor headers
 #include "utils/memory_overlap.hpp"
 #include "utils/sycl_alloc_utils.hpp"
@@ -35,17 +38,18 @@
 #include "getrf.hpp"
 #include "types_matrix.hpp"
 
-#include "dpnp_utils.hpp"
-
 namespace dpnp::extensions::lapack
 {
 namespace mkl_lapack = oneapi::mkl::lapack;
 namespace py = pybind11;
 namespace type_utils = dpctl::tensor::type_utils;
 
+using ext::common::init_dispatch_vector;
+
 typedef sycl::event (*getrf_batch_impl_fn_ptr_t)(
     sycl::queue &,
     std::int64_t,
+    std::int64_t,
     char *,
     std::int64_t,
     std::int64_t,
@@ -61,6 +65,7 @@ static getrf_batch_impl_fn_ptr_t
 
 template <typename T>
 static sycl::event getrf_batch_impl(sycl::queue &exec_q,
+                                    std::int64_t m,
                                     std::int64_t n,
                                     char *in_a,
                                     std::int64_t lda,
@@ -77,7 +82,7 @@ static sycl::event getrf_batch_impl(sycl::queue &exec_q,
     T *a = reinterpret_cast<T *>(in_a);
 
     const std::int64_t scratchpad_size =
-        mkl_lapack::getrf_batch_scratchpad_size<T>(exec_q, n, n, lda, stride_a,
+        mkl_lapack::getrf_batch_scratchpad_size<T>(exec_q, m, n, lda, stride_a,
                                                    stride_ipiv, batch_size);
     T *scratchpad = nullptr;
 
@@ -91,11 +96,11 @@ static sycl::event getrf_batch_impl(sycl::queue &exec_q,
 
         getrf_batch_event = mkl_lapack::getrf_batch(
             exec_q,
-            n, // The order of each square matrix in the batch; (0 ≤ n).
+            m, // The number of rows in each matrix in the batch; (0 ≤ m).
                // It must be a non-negative integer.
             n, // The number of columns in each matrix in the batch; (0 ≤ n).
                // It must be a non-negative integer.
-            a, // Pointer to the batch of square matrices, each of size (n x n).
+            a, // Pointer to the batch of input matrices, each of size (m x n).
             lda,      // The leading dimension of each matrix in the batch.
             stride_a, // Stride between consecutive matrices in the batch.
             ipiv, // Pointer to the array of pivot indices for each matrix in
@@ -179,6 +184,7 @@ std::pair<sycl::event, sycl::event>
                 const dpctl::tensor::usm_ndarray &a_array,
                 const dpctl::tensor::usm_ndarray &ipiv_array,
                 py::list dev_info,
+                std::int64_t m,
                 std::int64_t n,
                 std::int64_t stride_a,
                 std::int64_t stride_ipiv,
@@ -191,13 +197,13 @@ std::pair<sycl::event, sycl::event>
     if (a_array_nd < 3) {
         throw py::value_error(
             "The input array has ndim=" + std::to_string(a_array_nd) +
-            ", but an array with ndim >= 3 is expected.");
+            ", but an array with ndim >= 3 is expected");
     }
 
     if (ipiv_array_nd != 2) {
         throw py::value_error("The array of pivot indices has ndim=" +
                               std::to_string(ipiv_array_nd) +
-                              ", but a 2-dimensional array is expected.");
+                              ", but a 2-dimensional array is expected");
     }
 
     const int dev_info_size = py::len(dev_info);
@@ -205,7 +211,7 @@ std::pair<sycl::event, sycl::event>
         throw py::value_error("The size of 'dev_info' (" +
                               std::to_string(dev_info_size) +
                               ") does not match the expected batch size (" +
-                              std::to_string(batch_size) + ").");
+                              std::to_string(batch_size) + ")");
     }
 
     // check compatibility of execution queue and allocation queue
@@ -221,10 +227,11 @@ std::pair<sycl::event, sycl::event>
     }
 
     bool is_a_array_c_contig = a_array.is_c_contiguous();
+    bool is_a_array_f_contig = a_array.is_f_contiguous();
     bool is_ipiv_array_c_contig = ipiv_array.is_c_contiguous();
-    if (!is_a_array_c_contig) {
+    if (!is_a_array_c_contig && !is_a_array_f_contig) {
         throw py::value_error("The input array "
-                              "must be C-contiguous");
+                              "must be must contiguous");
     }
     if (!is_ipiv_array_c_contig) {
         throw py::value_error("The array of pivot indices "
@@ -240,7 +247,7 @@ std::pair<sycl::event, sycl::event>
     if (getrf_batch_fn == nullptr) {
         throw py::value_error(
             "No getrf_batch implementation defined for the provided type "
-            "of the input matrix.");
+            "of the input matrix");
     }
 
     auto ipiv_types = dpctl_td_ns::usm_ndarray_types();
@@ -248,19 +255,26 @@ std::pair<sycl::event, sycl::event>
         ipiv_types.typenum_to_lookup_id(ipiv_array.get_typenum());
 
     if (ipiv_array_type_id != static_cast<int>(dpctl_td_ns::typenum_t::INT64)) {
-        throw py::value_error("The type of 'ipiv_array' must be int64.");
+        throw py::value_error("The type of 'ipiv_array' must be int64");
+    }
+
+    const py::ssize_t *ipiv_array_shape = ipiv_array.get_shape_raw();
+    if (ipiv_array_shape[0] != batch_size ||
+        ipiv_array_shape[1] != std::min(m, n)) {
+        throw py::value_error(
+            "The shape of 'ipiv_array' must be (batch_size, min(m, n))");
     }
 
     char *a_array_data = a_array.get_data();
-    const std::int64_t lda = std::max<size_t>(1UL, n);
+    const std::int64_t lda = std::max<size_t>(1UL, m);
 
     char *ipiv_array_data = ipiv_array.get_data();
     std::int64_t *d_ipiv = reinterpret_cast<std::int64_t *>(ipiv_array_data);
 
     std::vector<sycl::event> host_task_events;
     sycl::event getrf_batch_ev = getrf_batch_fn(
-        exec_q, n, a_array_data, lda, stride_a, d_ipiv, stride_ipiv, batch_size,
-        dev_info, host_task_events, depends);
+        exec_q, m, n, a_array_data, lda, stride_a, d_ipiv, stride_ipiv,
+        batch_size, dev_info, host_task_events, depends);
 
     sycl::event args_ev = dpctl::utils::keep_args_alive(
         exec_q, {a_array, ipiv_array}, host_task_events);
@@ -284,10 +298,7 @@ struct GetrfBatchContigFactory
 
 void init_getrf_batch_dispatch_vector(void)
 {
-    dpctl_td_ns::DispatchVectorBuilder<getrf_batch_impl_fn_ptr_t,
-                                       GetrfBatchContigFactory,
-                                       dpctl_td_ns::num_types>
-        contig;
-    contig.populate_dispatch_vector(getrf_batch_dispatch_vector);
+    init_dispatch_vector<getrf_batch_impl_fn_ptr_t, GetrfBatchContigFactory>(
+        getrf_batch_dispatch_vector);
 }
 } // namespace dpnp::extensions::lapack
diff --git a/dpnp/backend/extensions/lapack/getri_batch.cpp b/dpnp/backend/extensions/lapack/getri_batch.cpp
index a59e7e3ae606..8293da2bc179 100644
--- a/dpnp/backend/extensions/lapack/getri_batch.cpp
+++ b/dpnp/backend/extensions/lapack/getri_batch.cpp
@@ -27,6 +27,9 @@
 
 #include <pybind11/pybind11.h>
 
+// utils extension header
+#include "ext/common.hpp"
+
 // dpctl tensor headers
 #include "utils/memory_overlap.hpp"
 #include "utils/sycl_alloc_utils.hpp"
@@ -35,14 +38,14 @@
 #include "getri.hpp"
 #include "types_matrix.hpp"
 
-#include "dpnp_utils.hpp"
-
 namespace dpnp::extensions::lapack
 {
 namespace mkl_lapack = oneapi::mkl::lapack;
 namespace py = pybind11;
 namespace type_utils = dpctl::tensor::type_utils;
 
+using ext::common::init_dispatch_vector;
+
 typedef sycl::event (*getri_batch_impl_fn_ptr_t)(
     sycl::queue &,
     std::int64_t,
@@ -282,10 +285,7 @@ struct GetriBatchContigFactory
 
 void init_getri_batch_dispatch_vector(void)
 {
-    dpctl_td_ns::DispatchVectorBuilder<getri_batch_impl_fn_ptr_t,
-                                       GetriBatchContigFactory,
-                                       dpctl_td_ns::num_types>
-        contig;
-    contig.populate_dispatch_vector(getri_batch_dispatch_vector);
+    init_dispatch_vector<getri_batch_impl_fn_ptr_t, GetriBatchContigFactory>(
+        getri_batch_dispatch_vector);
 }
 } // namespace dpnp::extensions::lapack
diff --git a/dpnp/backend/extensions/lapack/getrs.cpp b/dpnp/backend/extensions/lapack/getrs.cpp
index b7ac5311cb34..4112a7c95ae7 100644
--- a/dpnp/backend/extensions/lapack/getrs.cpp
+++ b/dpnp/backend/extensions/lapack/getrs.cpp
@@ -27,6 +27,9 @@
 
 #include <pybind11/pybind11.h>
 
+// utils extension header
+#include "ext/common.hpp"
+
 // dpctl tensor headers
 #include "utils/memory_overlap.hpp"
 #include "utils/sycl_alloc_utils.hpp"
@@ -36,14 +39,14 @@
 #include "linalg_exceptions.hpp"
 #include "types_matrix.hpp"
 
-#include "dpnp_utils.hpp"
-
 namespace dpnp::extensions::lapack
 {
 namespace mkl_lapack = oneapi::mkl::lapack;
 namespace py = pybind11;
 namespace type_utils = dpctl::tensor::type_utils;
 
+using ext::common::init_dispatch_vector;
+
 typedef sycl::event (*getrs_impl_fn_ptr_t)(sycl::queue &,
                                            oneapi::mkl::transpose,
                                            const std::int64_t,
@@ -166,6 +169,7 @@ std::pair<sycl::event, sycl::event>
           const dpctl::tensor::usm_ndarray &a_array,
           const dpctl::tensor::usm_ndarray &ipiv_array,
           const dpctl::tensor::usm_ndarray &b_array,
+          oneapi::mkl::transpose trans,
           const std::vector<sycl::event> &depends)
 {
     const int a_array_nd = a_array.get_ndim();
@@ -264,12 +268,6 @@ std::pair<sycl::event, sycl::event>
     const std::int64_t lda = std::max<size_t>(1UL, n);
     const std::int64_t ldb = std::max<size_t>(1UL, n);
 
-    // Use transpose::T if the LU-factorized array is passed as C-contiguous.
-    // For F-contiguous we use transpose::N.
-    oneapi::mkl::transpose trans = is_a_array_c_contig
-                                       ? oneapi::mkl::transpose::T
-                                       : oneapi::mkl::transpose::N;
-
     char *a_array_data = a_array.get_data();
     char *b_array_data = b_array.get_data();
     char *ipiv_array_data = ipiv_array.get_data();
@@ -303,9 +301,7 @@ struct GetrsContigFactory
 
 void init_getrs_dispatch_vector(void)
 {
-    dpctl_td_ns::DispatchVectorBuilder<getrs_impl_fn_ptr_t, GetrsContigFactory,
-                                       dpctl_td_ns::num_types>
-        contig;
-    contig.populate_dispatch_vector(getrs_dispatch_vector);
+    init_dispatch_vector<getrs_impl_fn_ptr_t, GetrsContigFactory>(
+        getrs_dispatch_vector);
 }
 } // namespace dpnp::extensions::lapack
diff --git a/dpnp/backend/extensions/lapack/getrs.hpp b/dpnp/backend/extensions/lapack/getrs.hpp
index 8fa4889c99af..d8952f3f0b3f 100644
--- a/dpnp/backend/extensions/lapack/getrs.hpp
+++ b/dpnp/backend/extensions/lapack/getrs.hpp
@@ -37,6 +37,7 @@ extern std::pair<sycl::event, sycl::event>
           const dpctl::tensor::usm_ndarray &a_array,
           const dpctl::tensor::usm_ndarray &ipiv_array,
           const dpctl::tensor::usm_ndarray &b_array,
+          oneapi::mkl::transpose trans,
           const std::vector<sycl::event> &depends = {});
 
 extern void init_getrs_dispatch_vector(void);
diff --git a/dpnp/backend/extensions/lapack/heevd.cpp b/dpnp/backend/extensions/lapack/heevd.cpp
index 19d875951c9b..9f5d7617ef28 100644
--- a/dpnp/backend/extensions/lapack/heevd.cpp
+++ b/dpnp/backend/extensions/lapack/heevd.cpp
@@ -30,6 +30,9 @@
 #include "evd_common.hpp"
 #include "heevd.hpp"
 
+// utils extension header
+#include "ext/common.hpp"
+
 // dpctl tensor headers
 #include "utils/type_utils.hpp"
 
@@ -38,6 +41,8 @@ namespace dpnp::extensions::lapack
 namespace mkl_lapack = oneapi::mkl::lapack;
 namespace type_utils = dpctl::tensor::type_utils;
 
+using ext::common::init_dispatch_table;
+
 template <typename T, typename RealT>
 static sycl::event heevd_impl(sycl::queue &exec_q,
                               const oneapi::mkl::job jobz,
@@ -137,7 +142,7 @@ void init_heevd(py::module_ m)
                                                  [dpctl_td_ns::num_types];
 
     {
-        evd::init_evd_dispatch_table<evd_impl_fn_ptr_t, HeevdContigFactory>(
+        init_dispatch_table<evd_impl_fn_ptr_t, HeevdContigFactory>(
             heevd_dispatch_table);
 
         auto heevd_pyapi = [&](sycl::queue &exec_q, const std::int8_t jobz,
diff --git a/dpnp/backend/extensions/lapack/heevd_batch.cpp b/dpnp/backend/extensions/lapack/heevd_batch.cpp
index d700e7505da5..de554cb8f8e6 100644
--- a/dpnp/backend/extensions/lapack/heevd_batch.cpp
+++ b/dpnp/backend/extensions/lapack/heevd_batch.cpp
@@ -31,6 +31,9 @@
 #include "evd_batch_common.hpp"
 #include "heevd_batch.hpp"
 
+// utils extension header
+#include "ext/common.hpp"
+
 // dpctl tensor headers
 #include "utils/type_utils.hpp"
 
@@ -39,6 +42,8 @@ namespace dpnp::extensions::lapack
 namespace mkl_lapack = oneapi::mkl::lapack;
 namespace type_utils = dpctl::tensor::type_utils;
 
+using ext::common::init_dispatch_table;
+
 template <typename T, typename RealT>
 static sycl::event heevd_batch_impl(sycl::queue &exec_q,
                                     const oneapi::mkl::job jobz,
@@ -175,8 +180,7 @@ void init_heevd_batch(py::module_ m)
                                   [dpctl_td_ns::num_types];
 
     {
-        evd::init_evd_dispatch_table<evd_batch_impl_fn_ptr_t,
-                                     HeevdBatchContigFactory>(
+        init_dispatch_table<evd_batch_impl_fn_ptr_t, HeevdBatchContigFactory>(
             heevd_batch_dispatch_table);
 
         auto heevd_batch_pyapi =
diff --git a/dpnp/backend/extensions/lapack/lapack_py.cpp b/dpnp/backend/extensions/lapack/lapack_py.cpp
index 4d5adfe09e4a..5e072bfeac84 100644
--- a/dpnp/backend/extensions/lapack/lapack_py.cpp
+++ b/dpnp/backend/extensions/lapack/lapack_py.cpp
@@ -76,10 +76,16 @@ void init_dispatch_tables(void)
 
 PYBIND11_MODULE(_lapack_impl, m)
 {
-    // Register a custom LinAlgError exception in the dpnp.linalg submodule
-    py::module_ linalg_module = py::module_::import("dpnp.linalg");
-    py::register_exception<lapack_ext::LinAlgError>(
-        linalg_module, "LinAlgError", PyExc_ValueError);
+    // Expose oneMKL transpose enum to Python
+    py::enum_<oneapi::mkl::transpose>(m, "Transpose")
+        .value("N", oneapi::mkl::transpose::N)
+        .value("T", oneapi::mkl::transpose::T)
+        .value("C", oneapi::mkl::transpose::C)
+        .export_values(); // Optional, allows access like `Transpose.N`
+
+    // Register a LinAlgError exception in the current submodule
+    py::register_exception<lapack_ext::LinAlgError>(m, "LinAlgError",
+                                                    PyExc_ValueError);
 
     init_dispatch_vectors();
     init_dispatch_tables();
@@ -135,16 +141,16 @@ PYBIND11_MODULE(_lapack_impl, m)
 
     m.def("_getrf", &lapack_ext::getrf,
           "Call `getrf` from OneMKL LAPACK library to return "
-          "the LU factorization of a general n x n matrix",
+          "the LU factorization of a general m x n matrix",
           py::arg("sycl_queue"), py::arg("a_array"), py::arg("ipiv_array"),
           py::arg("dev_info"), py::arg("depends") = py::list());
 
     m.def("_getrf_batch", &lapack_ext::getrf_batch,
           "Call `getrf_batch` from OneMKL LAPACK library to return "
-          "the LU factorization of a batch of general n x n matrices",
+          "the LU factorization of a batch of general m x n matrices",
           py::arg("sycl_queue"), py::arg("a_array"), py::arg("ipiv_array"),
-          py::arg("dev_info_array"), py::arg("n"), py::arg("stride_a"),
-          py::arg("stride_ipiv"), py::arg("batch_size"),
+          py::arg("dev_info_array"), py::arg("m"), py::arg("n"),
+          py::arg("stride_a"), py::arg("stride_ipiv"), py::arg("batch_size"),
           py::arg("depends") = py::list());
 
     m.def("_getri_batch", &lapack_ext::getri_batch,
@@ -160,7 +166,8 @@ PYBIND11_MODULE(_lapack_impl, m)
           "the solves of linear equations with an LU-factored "
           "square coefficient matrix, with multiple right-hand sides",
           py::arg("sycl_queue"), py::arg("a_array"), py::arg("ipiv_array"),
-          py::arg("b_array"), py::arg("depends") = py::list());
+          py::arg("b_array"), py::arg("trans") = oneapi::mkl::transpose::N,
+          py::arg("depends") = py::list());
 
     m.def("_orgqr_batch", &lapack_ext::orgqr_batch,
           "Call `_orgqr_batch` from OneMKL LAPACK library to return "
diff --git a/dpnp/backend/extensions/lapack/orgqr.cpp b/dpnp/backend/extensions/lapack/orgqr.cpp
index 33d67ac5a86a..296a4506ec0d 100644
--- a/dpnp/backend/extensions/lapack/orgqr.cpp
+++ b/dpnp/backend/extensions/lapack/orgqr.cpp
@@ -27,6 +27,9 @@
 
 #include <pybind11/pybind11.h>
 
+// utils extension header
+#include "ext/common.hpp"
+
 // dpctl tensor headers
 #include "utils/memory_overlap.hpp"
 #include "utils/sycl_alloc_utils.hpp"
@@ -35,14 +38,14 @@
 #include "orgqr.hpp"
 #include "types_matrix.hpp"
 
-#include "dpnp_utils.hpp"
-
 namespace dpnp::extensions::lapack
 {
 namespace mkl_lapack = oneapi::mkl::lapack;
 namespace py = pybind11;
 namespace type_utils = dpctl::tensor::type_utils;
 
+using ext::common::init_dispatch_vector;
+
 typedef sycl::event (*orgqr_impl_fn_ptr_t)(sycl::queue &,
                                            const std::int64_t,
                                            const std::int64_t,
@@ -251,9 +254,7 @@ struct OrgqrContigFactory
 
 void init_orgqr_dispatch_vector(void)
 {
-    dpctl_td_ns::DispatchVectorBuilder<orgqr_impl_fn_ptr_t, OrgqrContigFactory,
-                                       dpctl_td_ns::num_types>
-        contig;
-    contig.populate_dispatch_vector(orgqr_dispatch_vector);
+    init_dispatch_vector<orgqr_impl_fn_ptr_t, OrgqrContigFactory>(
+        orgqr_dispatch_vector);
 }
 } // namespace dpnp::extensions::lapack
diff --git a/dpnp/backend/extensions/lapack/orgqr_batch.cpp b/dpnp/backend/extensions/lapack/orgqr_batch.cpp
index c4689eabc34b..89324e2ba33f 100644
--- a/dpnp/backend/extensions/lapack/orgqr_batch.cpp
+++ b/dpnp/backend/extensions/lapack/orgqr_batch.cpp
@@ -27,6 +27,9 @@
 
 #include <pybind11/pybind11.h>
 
+// utils extension header
+#include "ext/common.hpp"
+
 // dpctl tensor headers
 #include "utils/memory_overlap.hpp"
 #include "utils/sycl_alloc_utils.hpp"
@@ -35,14 +38,14 @@
 #include "orgqr.hpp"
 #include "types_matrix.hpp"
 
-#include "dpnp_utils.hpp"
-
 namespace dpnp::extensions::lapack
 {
 namespace mkl_lapack = oneapi::mkl::lapack;
 namespace py = pybind11;
 namespace type_utils = dpctl::tensor::type_utils;
 
+using ext::common::init_dispatch_vector;
+
 typedef sycl::event (*orgqr_batch_impl_fn_ptr_t)(
     sycl::queue &,
     std::int64_t,
@@ -265,10 +268,7 @@ struct OrgqrBatchContigFactory
 
 void init_orgqr_batch_dispatch_vector(void)
 {
-    dpctl_td_ns::DispatchVectorBuilder<orgqr_batch_impl_fn_ptr_t,
-                                       OrgqrBatchContigFactory,
-                                       dpctl_td_ns::num_types>
-        contig;
-    contig.populate_dispatch_vector(orgqr_batch_dispatch_vector);
+    init_dispatch_vector<orgqr_batch_impl_fn_ptr_t, OrgqrBatchContigFactory>(
+        orgqr_batch_dispatch_vector);
 }
 } // namespace dpnp::extensions::lapack
diff --git a/dpnp/backend/extensions/lapack/potrf.cpp b/dpnp/backend/extensions/lapack/potrf.cpp
index fc6616f01cca..382a7dd8a770 100644
--- a/dpnp/backend/extensions/lapack/potrf.cpp
+++ b/dpnp/backend/extensions/lapack/potrf.cpp
@@ -27,6 +27,9 @@
 
 #include <pybind11/pybind11.h>
 
+// utils extension header
+#include "ext/common.hpp"
+
 // dpctl tensor headers
 #include "utils/memory_overlap.hpp"
 #include "utils/sycl_alloc_utils.hpp"
@@ -36,14 +39,14 @@
 #include "potrf.hpp"
 #include "types_matrix.hpp"
 
-#include "dpnp_utils.hpp"
-
 namespace dpnp::extensions::lapack
 {
 namespace mkl_lapack = oneapi::mkl::lapack;
 namespace py = pybind11;
 namespace type_utils = dpctl::tensor::type_utils;
 
+using ext::common::init_dispatch_vector;
+
 typedef sycl::event (*potrf_impl_fn_ptr_t)(sycl::queue &,
                                            const oneapi::mkl::uplo,
                                            const std::int64_t,
@@ -209,9 +212,7 @@ struct PotrfContigFactory
 
 void init_potrf_dispatch_vector(void)
 {
-    dpctl_td_ns::DispatchVectorBuilder<potrf_impl_fn_ptr_t, PotrfContigFactory,
-                                       dpctl_td_ns::num_types>
-        contig;
-    contig.populate_dispatch_vector(potrf_dispatch_vector);
+    init_dispatch_vector<potrf_impl_fn_ptr_t, PotrfContigFactory>(
+        potrf_dispatch_vector);
 }
 } // namespace dpnp::extensions::lapack
diff --git a/dpnp/backend/extensions/lapack/potrf_batch.cpp b/dpnp/backend/extensions/lapack/potrf_batch.cpp
index 1a56e4b13bde..6da47a960cc2 100644
--- a/dpnp/backend/extensions/lapack/potrf_batch.cpp
+++ b/dpnp/backend/extensions/lapack/potrf_batch.cpp
@@ -27,6 +27,9 @@
 
 #include <pybind11/pybind11.h>
 
+// utils extension header
+#include "ext/common.hpp"
+
 // dpctl tensor headers
 #include "utils/memory_overlap.hpp"
 #include "utils/sycl_alloc_utils.hpp"
@@ -36,14 +39,14 @@
 #include "potrf.hpp"
 #include "types_matrix.hpp"
 
-#include "dpnp_utils.hpp"
-
 namespace dpnp::extensions::lapack
 {
 namespace mkl_lapack = oneapi::mkl::lapack;
 namespace py = pybind11;
 namespace type_utils = dpctl::tensor::type_utils;
 
+using ext::common::init_dispatch_vector;
+
 typedef sycl::event (*potrf_batch_impl_fn_ptr_t)(
     sycl::queue &,
     const oneapi::mkl::uplo,
@@ -244,10 +247,7 @@ struct PotrfBatchContigFactory
 
 void init_potrf_batch_dispatch_vector(void)
 {
-    dpctl_td_ns::DispatchVectorBuilder<potrf_batch_impl_fn_ptr_t,
-                                       PotrfBatchContigFactory,
-                                       dpctl_td_ns::num_types>
-        contig;
-    contig.populate_dispatch_vector(potrf_batch_dispatch_vector);
+    init_dispatch_vector<potrf_batch_impl_fn_ptr_t, PotrfBatchContigFactory>(
+        potrf_batch_dispatch_vector);
 }
 } // namespace dpnp::extensions::lapack
diff --git a/dpnp/backend/extensions/lapack/syevd.cpp b/dpnp/backend/extensions/lapack/syevd.cpp
index 516589bf557a..98753119e680 100644
--- a/dpnp/backend/extensions/lapack/syevd.cpp
+++ b/dpnp/backend/extensions/lapack/syevd.cpp
@@ -30,6 +30,9 @@
 #include "evd_common.hpp"
 #include "syevd.hpp"
 
+// utils extension header
+#include "ext/common.hpp"
+
 // dpctl tensor headers
 #include "utils/type_utils.hpp"
 
@@ -38,6 +41,8 @@ namespace dpnp::extensions::lapack
 namespace mkl_lapack = oneapi::mkl::lapack;
 namespace type_utils = dpctl::tensor::type_utils;
 
+using ext::common::init_dispatch_table;
+
 template <typename T, typename RealT>
 static sycl::event syevd_impl(sycl::queue &exec_q,
                               const oneapi::mkl::job jobz,
@@ -137,7 +142,7 @@ void init_syevd(py::module_ m)
                                                  [dpctl_td_ns::num_types];
 
     {
-        evd::init_evd_dispatch_table<evd_impl_fn_ptr_t, SyevdContigFactory>(
+        init_dispatch_table<evd_impl_fn_ptr_t, SyevdContigFactory>(
             syevd_dispatch_table);
 
         auto syevd_pyapi = [&](sycl::queue &exec_q, const std::int8_t jobz,
diff --git a/dpnp/backend/extensions/lapack/syevd_batch.cpp b/dpnp/backend/extensions/lapack/syevd_batch.cpp
index 99ecd2911987..bff66b6527b3 100644
--- a/dpnp/backend/extensions/lapack/syevd_batch.cpp
+++ b/dpnp/backend/extensions/lapack/syevd_batch.cpp
@@ -31,6 +31,9 @@
 #include "evd_batch_common.hpp"
 #include "syevd_batch.hpp"
 
+// utils extension header
+#include "ext/common.hpp"
+
 // dpctl tensor headers
 #include "utils/type_utils.hpp"
 
@@ -39,6 +42,8 @@ namespace dpnp::extensions::lapack
 namespace mkl_lapack = oneapi::mkl::lapack;
 namespace type_utils = dpctl::tensor::type_utils;
 
+using ext::common::init_dispatch_table;
+
 template <typename T, typename RealT>
 static sycl::event syevd_batch_impl(sycl::queue &exec_q,
                                     const oneapi::mkl::job jobz,
@@ -175,8 +180,7 @@ void init_syevd_batch(py::module_ m)
                                   [dpctl_td_ns::num_types];
 
     {
-        evd::init_evd_dispatch_table<evd_batch_impl_fn_ptr_t,
-                                     SyevdBatchContigFactory>(
+        init_dispatch_table<evd_batch_impl_fn_ptr_t, SyevdBatchContigFactory>(
             syevd_batch_dispatch_table);
 
         auto syevd_batch_pyapi =
diff --git a/dpnp/backend/extensions/lapack/ungqr.cpp b/dpnp/backend/extensions/lapack/ungqr.cpp
index 73faae36d633..e23362310da5 100644
--- a/dpnp/backend/extensions/lapack/ungqr.cpp
+++ b/dpnp/backend/extensions/lapack/ungqr.cpp
@@ -27,6 +27,9 @@
 
 #include <pybind11/pybind11.h>
 
+// utils extension header
+#include "ext/common.hpp"
+
 // dpctl tensor headers
 #include "utils/memory_overlap.hpp"
 #include "utils/sycl_alloc_utils.hpp"
@@ -35,14 +38,14 @@
 #include "types_matrix.hpp"
 #include "ungqr.hpp"
 
-#include "dpnp_utils.hpp"
-
 namespace dpnp::extensions::lapack
 {
 namespace mkl_lapack = oneapi::mkl::lapack;
 namespace py = pybind11;
 namespace type_utils = dpctl::tensor::type_utils;
 
+using ext::common::init_dispatch_vector;
+
 typedef sycl::event (*ungqr_impl_fn_ptr_t)(sycl::queue &,
                                            const std::int64_t,
                                            const std::int64_t,
@@ -251,9 +254,7 @@ struct UngqrContigFactory
 
 void init_ungqr_dispatch_vector(void)
 {
-    dpctl_td_ns::DispatchVectorBuilder<ungqr_impl_fn_ptr_t, UngqrContigFactory,
-                                       dpctl_td_ns::num_types>
-        contig;
-    contig.populate_dispatch_vector(ungqr_dispatch_vector);
+    init_dispatch_vector<ungqr_impl_fn_ptr_t, UngqrContigFactory>(
+        ungqr_dispatch_vector);
 }
 } // namespace dpnp::extensions::lapack
diff --git a/dpnp/backend/extensions/lapack/ungqr_batch.cpp b/dpnp/backend/extensions/lapack/ungqr_batch.cpp
index b5029bf2934d..a3ddc9bd5f1b 100644
--- a/dpnp/backend/extensions/lapack/ungqr_batch.cpp
+++ b/dpnp/backend/extensions/lapack/ungqr_batch.cpp
@@ -27,6 +27,9 @@
 
 #include <pybind11/pybind11.h>
 
+// utils extension header
+#include "ext/common.hpp"
+
 // dpctl tensor headers
 #include "utils/memory_overlap.hpp"
 #include "utils/sycl_alloc_utils.hpp"
@@ -35,14 +38,14 @@
 #include "types_matrix.hpp"
 #include "ungqr.hpp"
 
-#include "dpnp_utils.hpp"
-
 namespace dpnp::extensions::lapack
 {
 namespace mkl_lapack = oneapi::mkl::lapack;
 namespace py = pybind11;
 namespace type_utils = dpctl::tensor::type_utils;
 
+using ext::common::init_dispatch_vector;
+
 typedef sycl::event (*ungqr_batch_impl_fn_ptr_t)(
     sycl::queue &,
     std::int64_t,
@@ -265,10 +268,7 @@ struct UngqrBatchContigFactory
 
 void init_ungqr_batch_dispatch_vector(void)
 {
-    dpctl_td_ns::DispatchVectorBuilder<ungqr_batch_impl_fn_ptr_t,
-                                       UngqrBatchContigFactory,
-                                       dpctl_td_ns::num_types>
-        contig;
-    contig.populate_dispatch_vector(ungqr_batch_dispatch_vector);
+    init_dispatch_vector<ungqr_batch_impl_fn_ptr_t, UngqrBatchContigFactory>(
+        ungqr_batch_dispatch_vector);
 }
 } // namespace dpnp::extensions::lapack
diff --git a/dpnp/backend/extensions/statistics/histogram.cpp b/dpnp/backend/extensions/statistics/histogram.cpp
index 5e05a44858f2..be752566d514 100644
--- a/dpnp/backend/extensions/statistics/histogram.cpp
+++ b/dpnp/backend/extensions/statistics/histogram.cpp
@@ -139,12 +139,14 @@ struct HistogramF
             auto dispatch_edges = [&](uint32_t local_mem, const auto &weights,
                                       auto &hist) {
                 if (device.is_gpu() && (local_mem >= bins_count + 1)) {
-                    auto edges = CachedEdges(bins_edges, bins_count + 1, cgh);
+                    auto edges =
+                        CachedEdges<BinsT>(bins_edges, bins_count + 1, cgh);
                     submit_histogram(in, size, dims, WorkPI, hist, edges,
                                      weights, nd_range, cgh);
                 }
                 else {
-                    auto edges = UncachedEdges(bins_edges, bins_count + 1, cgh);
+                    auto edges =
+                        UncachedEdges<BinsT>(bins_edges, bins_count + 1, cgh);
                     submit_histogram(in, size, dims, WorkPI, hist, edges,
                                      weights, nd_range, cgh);
                 }
@@ -165,7 +167,8 @@ struct HistogramF
                 }
                 else {
                     auto hist = HistGlobalMemory<HistType>(out);
-                    auto edges = UncachedEdges(bins_edges, bins_count + 1, cgh);
+                    auto edges =
+                        UncachedEdges<BinsT>(bins_edges, bins_count + 1, cgh);
                     submit_histogram(in, size, dims, WorkPI, hist, edges,
                                      weights, nd_range, cgh);
                 }
diff --git a/dpnp/backend/extensions/ufunc/CMakeLists.txt b/dpnp/backend/extensions/ufunc/CMakeLists.txt
index 65d30d49f549..5ee02ffe5fe1 100644
--- a/dpnp/backend/extensions/ufunc/CMakeLists.txt
+++ b/dpnp/backend/extensions/ufunc/CMakeLists.txt
@@ -27,6 +27,7 @@ set(_elementwise_sources
     ${CMAKE_CURRENT_SOURCE_DIR}/elementwise_functions/bitwise_count.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/elementwise_functions/common.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/elementwise_functions/degrees.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/elementwise_functions/erf_funcs.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/elementwise_functions/fabs.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/elementwise_functions/fix.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/elementwise_functions/float_power.cpp
@@ -37,6 +38,7 @@ set(_elementwise_sources
     ${CMAKE_CURRENT_SOURCE_DIR}/elementwise_functions/heaviside.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/elementwise_functions/i0.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/elementwise_functions/interpolate.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/elementwise_functions/isclose.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/elementwise_functions/lcm.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/elementwise_functions/ldexp.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/elementwise_functions/logaddexp2.cpp
diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/bitwise_count.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/bitwise_count.cpp
index 2fe60d2a5efe..051e036aa94f 100644
--- a/dpnp/backend/extensions/ufunc/elementwise_functions/bitwise_count.cpp
+++ b/dpnp/backend/extensions/ufunc/elementwise_functions/bitwise_count.cpp
@@ -23,6 +23,10 @@
 // THE POSSIBILITY OF SUCH DAMAGE.
 //*****************************************************************************
 
+#include <cstdint>
+#include <type_traits>
+#include <vector>
+
 #include <sycl/sycl.hpp>
 
 #include "dpctl4pybind11.hpp"
diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/common.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/common.cpp
index c6dd3e038eb1..287d36d13915 100644
--- a/dpnp/backend/extensions/ufunc/elementwise_functions/common.cpp
+++ b/dpnp/backend/extensions/ufunc/elementwise_functions/common.cpp
@@ -27,6 +27,7 @@
 
 #include "bitwise_count.hpp"
 #include "degrees.hpp"
+#include "erf_funcs.hpp"
 #include "fabs.hpp"
 #include "fix.hpp"
 #include "float_power.hpp"
@@ -37,6 +38,7 @@
 #include "heaviside.hpp"
 #include "i0.hpp"
 #include "interpolate.hpp"
+#include "isclose.hpp"
 #include "lcm.hpp"
 #include "ldexp.hpp"
 #include "logaddexp2.hpp"
@@ -56,6 +58,7 @@ void init_elementwise_functions(py::module_ m)
 {
     init_bitwise_count(m);
     init_degrees(m);
+    init_erf_funcs(m);
     init_fabs(m);
     init_fix(m);
     init_float_power(m);
@@ -66,6 +69,7 @@ void init_elementwise_functions(py::module_ m)
     init_heaviside(m);
     init_i0(m);
     init_interpolate(m);
+    init_isclose(m);
     init_lcm(m);
     init_ldexp(m);
     init_logaddexp2(m);
diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/degrees.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/degrees.cpp
index 420a7ebf1331..9bf17aa1594a 100644
--- a/dpnp/backend/extensions/ufunc/elementwise_functions/degrees.cpp
+++ b/dpnp/backend/extensions/ufunc/elementwise_functions/degrees.cpp
@@ -23,6 +23,9 @@
 // THE POSSIBILITY OF SUCH DAMAGE.
 //*****************************************************************************
 
+#include <type_traits>
+#include <vector>
+
 #include <sycl/sycl.hpp>
 
 #include "dpctl4pybind11.hpp"
diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/erf_funcs.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/erf_funcs.cpp
new file mode 100644
index 000000000000..1a441d23e830
--- /dev/null
+++ b/dpnp/backend/extensions/ufunc/elementwise_functions/erf_funcs.cpp
@@ -0,0 +1,232 @@
+//*****************************************************************************
+// Copyright (c) 2025, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+
+#include <type_traits>
+#include <vector>
+
+#include <sycl/sycl.hpp>
+
+#include "dpctl4pybind11.hpp"
+
+#include "erf_funcs.hpp"
+#include "kernels/elementwise_functions/erf.hpp"
+
+// utils extension header
+#include "ext/common.hpp"
+
+// include a local copy of elementwise common header from dpctl tensor:
+// dpctl/tensor/libtensor/source/elementwise_functions/elementwise_functions.hpp
+// TODO: replace by including dpctl header once available
+#include "../../elementwise_functions/elementwise_functions.hpp"
+
+// dpctl tensor headers
+#include "kernels/elementwise_functions/common.hpp"
+#include "utils/type_dispatch.hpp"
+
+namespace dpnp::extensions::ufunc
+{
+namespace py = pybind11;
+namespace py_int = dpnp::extensions::py_internal;
+
+using ext::common::init_dispatch_vector;
+
+namespace impl
+{
+namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common;
+namespace td_ns = dpctl::tensor::type_dispatch;
+
+/**
+ * @brief A factory to define pairs of supported types for which
+ * an erf-like function in SYCL namespace is available.
+ *
+ * @tparam T Type of input vector `a` and of result vector `y`.
+ */
+template <typename T>
+struct OutputType
+{
+    /**
+     * scipy>=1.16 assumes a pair 'e->d', but dpnp 'e->f' without an extra
+     * kernel 'e->d' (when fp64 supported) to reduce memory footprint
+     */
+    using value_type = typename std::disjunction<
+        td_ns::TypeMapResultEntry<T, sycl::half, float>,
+        td_ns::TypeMapResultEntry<T, float>,
+        td_ns::TypeMapResultEntry<T, double>,
+        td_ns::DefaultResultEntry<void>>::result_type;
+};
+
+static int output_typeid_vector[td_ns::num_types];
+
+template <typename fnT, typename T>
+struct TypeMapFactory
+{
+    std::enable_if_t<std::is_same<fnT, int>::value, int> get()
+    {
+        using rT = typename OutputType<T>::value_type;
+        return td_ns::GetTypeid<rT>{}.get();
+    }
+};
+
+using ew_cmn_ns::unary_contig_impl_fn_ptr_t;
+using ew_cmn_ns::unary_strided_impl_fn_ptr_t;
+
+#define MACRO_DEFINE_IMPL(__name__, __f_name__)                                \
+                                                                               \
+    using dpnp::kernels::erfs::__f_name__##Functor;                            \
+                                                                               \
+    template <typename argT, typename resT = argT, unsigned int vec_sz = 4,    \
+              unsigned int n_vecs = 2, bool enable_sg_loadstore = true>        \
+    using __f_name__##ContigFunctor =                                          \
+        ew_cmn_ns::UnaryContigFunctor<argT, resT,                              \
+                                      __f_name__##Functor<argT, resT>, vec_sz, \
+                                      n_vecs, enable_sg_loadstore>;            \
+                                                                               \
+    template <typename argTy, typename resTy, typename IndexerT>               \
+    using __f_name__##StridedFunctor =                                         \
+        ew_cmn_ns::UnaryStridedFunctor<argTy, resTy, IndexerT,                 \
+                                       __f_name__##Functor<argTy, resTy>>;     \
+                                                                               \
+    static unary_contig_impl_fn_ptr_t                                          \
+        __name__##_contig_dispatch_vector[td_ns::num_types];                   \
+    static unary_strided_impl_fn_ptr_t                                         \
+        __name__##_strided_dispatch_vector[td_ns::num_types];                  \
+                                                                               \
+    template <typename T1, typename T2, unsigned int vec_sz,                   \
+              unsigned int n_vecs>                                             \
+    class __name__##_contig_kernel;                                            \
+                                                                               \
+    template <typename argTy>                                                  \
+    sycl::event __name__##_contig_impl(                                        \
+        sycl::queue &exec_q, size_t nelems, const char *arg_p, char *res_p,    \
+        const std::vector<sycl::event> &depends = {})                          \
+    {                                                                          \
+        return ew_cmn_ns::unary_contig_impl<argTy, OutputType,                 \
+                                            __f_name__##ContigFunctor,         \
+                                            __name__##_contig_kernel>(         \
+            exec_q, nelems, arg_p, res_p, depends);                            \
+    }                                                                          \
+                                                                               \
+    template <typename fnT, typename T>                                        \
+    struct __f_name__##ContigFactory                                           \
+    {                                                                          \
+        fnT get()                                                              \
+        {                                                                      \
+            if constexpr (std::is_same_v<typename OutputType<T>::value_type,   \
+                                         void>) {                              \
+                fnT fn = nullptr;                                              \
+                return fn;                                                     \
+            }                                                                  \
+            else {                                                             \
+                fnT fn = __name__##_contig_impl<T>;                            \
+                return fn;                                                     \
+            }                                                                  \
+        }                                                                      \
+    };                                                                         \
+                                                                               \
+    template <typename T1, typename T2, typename T3>                           \
+    class __name__##_strided_kernel;                                           \
+                                                                               \
+    template <typename argTy>                                                  \
+    sycl::event __name__##_strided_impl(                                       \
+        sycl::queue &exec_q, size_t nelems, int nd,                            \
+        const py::ssize_t *shape_and_strides, const char *arg_p,               \
+        py::ssize_t arg_offset, char *res_p, py::ssize_t res_offset,           \
+        const std::vector<sycl::event> &depends,                               \
+        const std::vector<sycl::event> &additional_depends)                    \
+    {                                                                          \
+        return ew_cmn_ns::unary_strided_impl<argTy, OutputType,                \
+                                             __f_name__##StridedFunctor,       \
+                                             __name__##_strided_kernel>(       \
+            exec_q, nelems, nd, shape_and_strides, arg_p, arg_offset, res_p,   \
+            res_offset, depends, additional_depends);                          \
+    }                                                                          \
+                                                                               \
+    template <typename fnT, typename T>                                        \
+    struct __f_name__##StridedFactory                                          \
+    {                                                                          \
+        fnT get()                                                              \
+        {                                                                      \
+            if constexpr (std::is_same_v<typename OutputType<T>::value_type,   \
+                                         void>) {                              \
+                fnT fn = nullptr;                                              \
+                return fn;                                                     \
+            }                                                                  \
+            else {                                                             \
+                fnT fn = __name__##_strided_impl<T>;                           \
+                return fn;                                                     \
+            }                                                                  \
+        }                                                                      \
+    };
+
+template <template <typename fnT, typename T> typename contigFactoryT,
+          template <typename fnT, typename T>
+          typename stridedFactoryT>
+static void populate(py::module_ m,
+                     const char *name,
+                     const char *docstring,
+                     unary_contig_impl_fn_ptr_t *contig_dispatch_vector,
+                     unary_strided_impl_fn_ptr_t *strided_dispatch_vector)
+{
+    init_dispatch_vector<unary_contig_impl_fn_ptr_t, contigFactoryT>(
+        contig_dispatch_vector);
+
+    init_dispatch_vector<unary_strided_impl_fn_ptr_t, stridedFactoryT>(
+        strided_dispatch_vector);
+
+    using arrayT = dpctl::tensor::usm_ndarray;
+    auto pyapi = [&, contig_dispatch_vector, strided_dispatch_vector](
+                     const arrayT &src, const arrayT &dst, sycl::queue &exec_q,
+                     const std::vector<sycl::event> &depends = {}) {
+        return py_int::py_unary_ufunc(
+            src, dst, exec_q, depends, output_typeid_vector,
+            contig_dispatch_vector, strided_dispatch_vector);
+    };
+    m.def(name, pyapi, docstring, py::arg("sycl_queue"), py::arg("src"),
+          py::arg("dst"), py::arg("depends") = py::list());
+}
+
+MACRO_DEFINE_IMPL(erf, Erf);
+MACRO_DEFINE_IMPL(erfc, Erfc);
+} // namespace impl
+
+void init_erf_funcs(py::module_ m)
+{
+    using impl::output_typeid_vector;
+    init_dispatch_vector<int, impl::TypeMapFactory>(output_typeid_vector);
+
+    auto erf_result_type_pyapi = [&](const py::dtype &dtype) {
+        return py_int::py_unary_ufunc_result_type(dtype, output_typeid_vector);
+    };
+    m.def("_erf_result_type", erf_result_type_pyapi);
+
+    impl::populate<impl::ErfContigFactory, impl::ErfStridedFactory>(
+        m, "_erf", "", impl::erf_contig_dispatch_vector,
+        impl::erf_strided_dispatch_vector);
+
+    impl::populate<impl::ErfcContigFactory, impl::ErfcStridedFactory>(
+        m, "_erfc", "", impl::erfc_contig_dispatch_vector,
+        impl::erfc_strided_dispatch_vector);
+}
+} // namespace dpnp::extensions::ufunc
diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/erf_funcs.hpp b/dpnp/backend/extensions/ufunc/elementwise_functions/erf_funcs.hpp
new file mode 100644
index 000000000000..195f9d62da4f
--- /dev/null
+++ b/dpnp/backend/extensions/ufunc/elementwise_functions/erf_funcs.hpp
@@ -0,0 +1,35 @@
+//*****************************************************************************
+// Copyright (c) 2025, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+
+#pragma once
+
+#include <pybind11/pybind11.h>
+
+namespace py = pybind11;
+
+namespace dpnp::extensions::ufunc
+{
+void init_erf_funcs(py::module_ m);
+} // namespace dpnp::extensions::ufunc
diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/fabs.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/fabs.cpp
index 4d1b061f3f58..16c5c1bddd66 100644
--- a/dpnp/backend/extensions/ufunc/elementwise_functions/fabs.cpp
+++ b/dpnp/backend/extensions/ufunc/elementwise_functions/fabs.cpp
@@ -23,6 +23,9 @@
 // THE POSSIBILITY OF SUCH DAMAGE.
 //*****************************************************************************
 
+#include <type_traits>
+#include <vector>
+
 #include <sycl/sycl.hpp>
 
 #include "dpctl4pybind11.hpp"
diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/fix.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/fix.cpp
index 9fefbac85d41..1fe99a02ead2 100644
--- a/dpnp/backend/extensions/ufunc/elementwise_functions/fix.cpp
+++ b/dpnp/backend/extensions/ufunc/elementwise_functions/fix.cpp
@@ -23,6 +23,9 @@
 // THE POSSIBILITY OF SUCH DAMAGE.
 //*****************************************************************************
 
+#include <type_traits>
+#include <vector>
+
 #include <sycl/sycl.hpp>
 
 #include "dpctl4pybind11.hpp"
diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/float_power.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/float_power.cpp
index bf2ca7e6fa2d..6abf2f217a11 100644
--- a/dpnp/backend/extensions/ufunc/elementwise_functions/float_power.cpp
+++ b/dpnp/backend/extensions/ufunc/elementwise_functions/float_power.cpp
@@ -23,12 +23,19 @@
 // THE POSSIBILITY OF SUCH DAMAGE.
 //*****************************************************************************
 
+#include <complex>
+#include <type_traits>
+#include <vector>
+
 #include <sycl/sycl.hpp>
 
 #include "dpctl4pybind11.hpp"
 
 #include "float_power.hpp"
 
+// utils extension header
+#include "ext/common.hpp"
+
 // include a local copy of elementwise common header from dpctl tensor:
 // dpctl/tensor/libtensor/source/elementwise_functions/elementwise_functions.hpp
 // TODO: replace by including dpctl header once available
@@ -46,6 +53,8 @@ namespace impl
 {
 namespace td_ns = dpctl::tensor::type_dispatch;
 
+using ext::common::init_dispatch_table;
+
 // Supports only float and complex types
 template <typename T1, typename T2>
 struct OutputType
@@ -78,10 +87,9 @@ struct TypeMapFactory
     }
 };
 
-void populate_float_power_dispatch_tables(void)
+static void populate_float_power_dispatch_tables(void)
 {
-    td_ns::DispatchTableBuilder<int, TypeMapFactory, td_ns::num_types> dvb;
-    dvb.populate_dispatch_table(float_power_output_typeid_table);
+    init_dispatch_table<int, TypeMapFactory>(float_power_output_typeid_table);
 }
 } // namespace impl
 
diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/fmax.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/fmax.cpp
index 84a0c03a1f6e..063ad57c1b34 100644
--- a/dpnp/backend/extensions/ufunc/elementwise_functions/fmax.cpp
+++ b/dpnp/backend/extensions/ufunc/elementwise_functions/fmax.cpp
@@ -23,6 +23,8 @@
 // THE POSSIBILITY OF SUCH DAMAGE.
 //*****************************************************************************
 
+#include <vector>
+
 #include <sycl/sycl.hpp>
 
 #include "dpctl4pybind11.hpp"
diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/fmin.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/fmin.cpp
index 0211be04866f..8af827f1756c 100644
--- a/dpnp/backend/extensions/ufunc/elementwise_functions/fmin.cpp
+++ b/dpnp/backend/extensions/ufunc/elementwise_functions/fmin.cpp
@@ -23,6 +23,8 @@
 // THE POSSIBILITY OF SUCH DAMAGE.
 //*****************************************************************************
 
+#include <vector>
+
 #include <sycl/sycl.hpp>
 
 #include "dpctl4pybind11.hpp"
diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/fmod.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/fmod.cpp
index cb35d50c9f98..ed0de9a772ea 100644
--- a/dpnp/backend/extensions/ufunc/elementwise_functions/fmod.cpp
+++ b/dpnp/backend/extensions/ufunc/elementwise_functions/fmod.cpp
@@ -23,6 +23,10 @@
 // THE POSSIBILITY OF SUCH DAMAGE.
 //*****************************************************************************
 
+#include <cstdint>
+#include <type_traits>
+#include <vector>
+
 #include <sycl/sycl.hpp>
 
 #include "dpctl4pybind11.hpp"
diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/gcd.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/gcd.cpp
index d9d83e273b2b..cff994f83c21 100644
--- a/dpnp/backend/extensions/ufunc/elementwise_functions/gcd.cpp
+++ b/dpnp/backend/extensions/ufunc/elementwise_functions/gcd.cpp
@@ -23,6 +23,10 @@
 // THE POSSIBILITY OF SUCH DAMAGE.
 //*****************************************************************************
 
+#include <cstdint>
+#include <type_traits>
+#include <vector>
+
 #include <sycl/sycl.hpp>
 
 #include "dpctl4pybind11.hpp"
diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/heaviside.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/heaviside.cpp
index 273f8a1e5d07..624d3446c007 100644
--- a/dpnp/backend/extensions/ufunc/elementwise_functions/heaviside.cpp
+++ b/dpnp/backend/extensions/ufunc/elementwise_functions/heaviside.cpp
@@ -23,6 +23,9 @@
 // THE POSSIBILITY OF SUCH DAMAGE.
 //*****************************************************************************
 
+#include <type_traits>
+#include <vector>
+
 #include <sycl/sycl.hpp>
 
 #include "dpctl4pybind11.hpp"
diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/i0.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/i0.cpp
index c74d67c4810f..ca11b78534f2 100644
--- a/dpnp/backend/extensions/ufunc/elementwise_functions/i0.cpp
+++ b/dpnp/backend/extensions/ufunc/elementwise_functions/i0.cpp
@@ -23,6 +23,9 @@
 // THE POSSIBILITY OF SUCH DAMAGE.
 //*****************************************************************************
 
+#include <type_traits>
+#include <vector>
+
 #include <sycl/sycl.hpp>
 
 #include "dpctl4pybind11.hpp"
diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/interpolate.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/interpolate.cpp
index 248f9be6c675..ad0955f07f35 100644
--- a/dpnp/backend/extensions/ufunc/elementwise_functions/interpolate.cpp
+++ b/dpnp/backend/extensions/ufunc/elementwise_functions/interpolate.cpp
@@ -24,8 +24,16 @@
 //*****************************************************************************
 
 #include <complex>
+#include <cstddef>
+#include <cstdint>
+#include <optional>
+#include <string>
+#include <type_traits>
+#include <utility>
 #include <vector>
 
+#include <sycl/sycl.hpp>
+
 #include "dpctl4pybind11.hpp"
 #include <pybind11/pybind11.h>
 #include <pybind11/stl.h>
@@ -36,6 +44,7 @@
 
 #include "kernels/elementwise_functions/interpolate.hpp"
 
+// utils extension headers
 #include "ext/common.hpp"
 #include "ext/validation_utils.hpp"
 
@@ -59,6 +68,7 @@ namespace dpnp::extensions::ufunc
 
 namespace impl
 {
+using ext::common::init_dispatch_vector;
 
 template <typename T>
 using value_type_of_t = typename value_type_of<T>::type;
@@ -234,13 +244,10 @@ struct InterpolateFactory
     }
 };
 
-void init_interpolate_dispatch_vectors()
+static void init_interpolate_dispatch_vectors()
 {
-    using namespace td_ns;
-
-    DispatchVectorBuilder<interpolate_fn_ptr_t, InterpolateFactory, num_types>
-        dtb_interpolate;
-    dtb_interpolate.populate_dispatch_vector(interpolate_dispatch_vector);
+    init_dispatch_vector<interpolate_fn_ptr_t, InterpolateFactory>(
+        interpolate_dispatch_vector);
 }
 
 } // namespace impl
diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp
new file mode 100644
index 000000000000..b96ecd365909
--- /dev/null
+++ b/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp
@@ -0,0 +1,378 @@
+//*****************************************************************************
+// Copyright (c) 2025, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+
+#include <complex>
+#include <cstddef>
+#include <stdexcept>
+#include <type_traits>
+#include <vector>
+
+#include <sycl/sycl.hpp>
+
+#include "dpctl4pybind11.hpp"
+#include <pybind11/pybind11.h>
+#include <pybind11/stl.h>
+
+#include "kernels/elementwise_functions/isclose.hpp"
+
+#include "../../elementwise_functions/simplify_iteration_space.hpp"
+
+// dpctl tensor headers
+#include "utils/offset_utils.hpp"
+#include "utils/output_validation.hpp"
+#include "utils/type_dispatch.hpp"
+#include "utils/type_utils.hpp"
+
+// utils extension headers
+#include "ext/common.hpp"
+#include "ext/validation_utils.hpp"
+
+namespace py = pybind11;
+namespace td_ns = dpctl::tensor::type_dispatch;
+
+using ext::common::value_type_of_t;
+using ext::validation::array_names;
+
+using ext::common::dtype_from_typenum;
+using ext::validation::check_has_dtype;
+using ext::validation::check_no_overlap;
+using ext::validation::check_num_dims;
+using ext::validation::check_queue;
+using ext::validation::check_same_dtype;
+using ext::validation::check_same_size;
+using ext::validation::check_writable;
+
+namespace dpnp::extensions::ufunc
+{
+
+namespace impl
+{
+using ext::common::init_dispatch_vector;
+
+typedef sycl::event (*isclose_strided_scalar_fn_ptr_t)(
+    sycl::queue &,
+    const int,           // nd
+    const std::size_t,   // nelems
+    const py::ssize_t *, // shape_strides
+    const py::object &,  // rtol
+    const py::object &,  // atol
+    const py::object &,  // equal_nan
+    const char *,        // a
+    py::ssize_t,         // a_offset
+    const char *,        // b
+    py::ssize_t,         // b_offset
+    char *,              // out
+    py::ssize_t,         // out_offset
+    const std::vector<sycl::event> &);
+
+template <typename T>
+sycl::event isclose_strided_scalar_call(sycl::queue &exec_q,
+                                        const int nd,
+                                        const std::size_t nelems,
+                                        const py::ssize_t *shape_strides,
+                                        const py::object &py_rtol,
+                                        const py::object &py_atol,
+                                        const py::object &py_equal_nan,
+                                        const char *in1_p,
+                                        py::ssize_t in1_offset,
+                                        const char *in2_p,
+                                        py::ssize_t in2_offset,
+                                        char *out_p,
+                                        py::ssize_t out_offset,
+                                        const std::vector<sycl::event> &depends)
+{
+    using dpctl::tensor::type_utils::is_complex_v;
+    using scT = std::conditional_t<is_complex_v<T>, value_type_of_t<T>, T>;
+
+    const scT rtol = py::cast<scT>(py_rtol);
+    const scT atol = py::cast<scT>(py_atol);
+    const bool equal_nan = py::cast<bool>(py_equal_nan);
+
+    return dpnp::kernels::isclose::isclose_strided_scalar_impl<T, scT>(
+        exec_q, nd, nelems, shape_strides, rtol, atol, equal_nan, in1_p,
+        in1_offset, in2_p, in2_offset, out_p, out_offset, depends);
+}
+
+typedef sycl::event (*isclose_contig_scalar_fn_ptr_t)(
+    sycl::queue &,
+    const std::size_t,  // nelems
+    const py::object &, // rtol
+    const py::object &, // atol
+    const py::object &, // equal_nan
+    const char *,       // a
+    const char *,       // b
+    char *,             // out
+    const std::vector<sycl::event> &);
+
+template <typename T>
+sycl::event isclose_contig_scalar_call(sycl::queue &q,
+                                       const std::size_t nelems,
+                                       const py::object &py_rtol,
+                                       const py::object &py_atol,
+                                       const py::object &py_equal_nan,
+                                       const char *in1_p,
+                                       const char *in2_p,
+                                       char *out_p,
+                                       const std::vector<sycl::event> &depends)
+{
+    using dpctl::tensor::type_utils::is_complex_v;
+    using scT = std::conditional_t<is_complex_v<T>, value_type_of_t<T>, T>;
+
+    const scT rtol = py::cast<scT>(py_rtol);
+    const scT atol = py::cast<scT>(py_atol);
+    const bool equal_nan = py::cast<bool>(py_equal_nan);
+
+    return dpnp::kernels::isclose::isclose_contig_scalar_impl<T, scT>(
+        q, nelems, rtol, atol, equal_nan, in1_p, in2_p, out_p, depends);
+}
+
+isclose_strided_scalar_fn_ptr_t
+    isclose_strided_scalar_dispatch_vector[td_ns::num_types];
+isclose_contig_scalar_fn_ptr_t isclose_contig_dispatch_vector[td_ns::num_types];
+
+std::pair<sycl::event, sycl::event>
+    py_isclose_scalar(const dpctl::tensor::usm_ndarray &a,
+                      const dpctl::tensor::usm_ndarray &b,
+                      const py::object &py_rtol,
+                      const py::object &py_atol,
+                      const py::object &py_equal_nan,
+                      const dpctl::tensor::usm_ndarray &res,
+                      sycl::queue &exec_q,
+                      const std::vector<sycl::event> &depends)
+{
+    array_names names = {{&a, "a"}, {&b, "b"}, {&res, "res"}};
+
+    check_same_dtype(&a, &b, names);
+    check_has_dtype(&res, td_ns::typenum_t::BOOL, names);
+
+    check_same_size({&a, &b, &res}, names);
+    int res_nd = res.get_ndim();
+    check_num_dims({&a, &b}, res_nd, names);
+
+    check_queue({&a, &b, &res}, names, exec_q);
+    check_no_overlap({&a, &b}, {&res}, names);
+    check_writable({&res}, names);
+
+    auto types = td_ns::usm_ndarray_types();
+    // a_typeid == b_typeid (check_same_dtype(&a, &b, names))
+    int a_b_typeid = types.typenum_to_lookup_id(a.get_typenum());
+
+    const py::ssize_t *a_shape = a.get_shape_raw();
+    const py::ssize_t *b_shape = b.get_shape_raw();
+    const py::ssize_t *res_shape = res.get_shape_raw();
+    bool shapes_equal(true);
+    std::size_t nelems(1);
+
+    for (int i = 0; i < res_nd; ++i) {
+        nelems *= static_cast<std::size_t>(a_shape[i]);
+        shapes_equal = shapes_equal && (a_shape[i] == res_shape[i] &&
+                                        b_shape[i] == res_shape[i]);
+    }
+    if (!shapes_equal) {
+        throw py::value_error("Array shapes are not the same");
+    }
+
+    // if nelems is zero, return
+    if (nelems == 0) {
+        return std::make_pair(sycl::event(), sycl::event());
+    }
+
+    dpctl::tensor::validation::AmpleMemory::throw_if_not_ample(res, nelems);
+
+    const char *a_data = a.get_data();
+    const char *b_data = b.get_data();
+    char *res_data = res.get_data();
+
+    // handle contiguous inputs
+    const bool is_a_c_contig = a.is_c_contiguous();
+    const bool is_a_f_contig = a.is_f_contiguous();
+
+    const bool is_b_c_contig = b.is_c_contiguous();
+    const bool is_b_f_contig = b.is_f_contiguous();
+
+    const bool is_res_c_contig = res.is_c_contiguous();
+    const bool is_res_f_contig = res.is_f_contiguous();
+
+    const bool all_c_contig =
+        (is_a_c_contig && is_b_c_contig && is_res_c_contig);
+    const bool all_f_contig =
+        (is_a_f_contig && is_b_f_contig && is_res_f_contig);
+
+    if (all_c_contig || all_f_contig) {
+        auto contig_fn = isclose_contig_dispatch_vector[a_b_typeid];
+
+        if (contig_fn == nullptr) {
+            py::dtype a_b_dtype_py = dtype_from_typenum(a_b_typeid);
+            throw std::runtime_error(
+                "Contiguous implementation is missing for " +
+                std::string(py::str(a_b_dtype_py)) + "data type");
+        }
+
+        auto comp_ev = contig_fn(exec_q, nelems, py_rtol, py_atol, py_equal_nan,
+                                 a_data, b_data, res_data, depends);
+        sycl::event ht_ev =
+            dpctl::utils::keep_args_alive(exec_q, {a, b, res}, {comp_ev});
+
+        return std::make_pair(ht_ev, comp_ev);
+    }
+
+    // simplify iteration space
+    //     if 1d with strides 1 - input is contig
+    //     dispatch to strided
+
+    auto const &a_strides = a.get_strides_vector();
+    auto const &b_strides = b.get_strides_vector();
+    auto const &res_strides = res.get_strides_vector();
+
+    using shT = std::vector<py::ssize_t>;
+    shT simplified_shape;
+    shT simplified_a_strides;
+    shT simplified_b_strides;
+    shT simplified_res_strides;
+    py::ssize_t a_offset(0);
+    py::ssize_t b_offset(0);
+    py::ssize_t res_offset(0);
+
+    int nd = res_nd;
+    const py::ssize_t *shape = a_shape;
+
+    py_internal::simplify_iteration_space_3(
+        nd, shape, a_strides, b_strides, res_strides,
+        // output
+        simplified_shape, simplified_a_strides, simplified_b_strides,
+        simplified_res_strides, a_offset, b_offset, res_offset);
+
+    auto strided_fn = isclose_strided_scalar_dispatch_vector[a_b_typeid];
+
+    if (strided_fn == nullptr) {
+        py::dtype a_b_dtype_py = dtype_from_typenum(a_b_typeid);
+        throw std::runtime_error("Strided implementation is missing for " +
+                                 std::string(py::str(a_b_dtype_py)) +
+                                 "data type");
+    }
+
+    using dpctl::tensor::offset_utils::device_allocate_and_pack;
+
+    std::vector<sycl::event> host_tasks{};
+    host_tasks.reserve(2);
+
+    auto ptr_size_event_triple_ = device_allocate_and_pack<py::ssize_t>(
+        exec_q, host_tasks, simplified_shape, simplified_a_strides,
+        simplified_b_strides, simplified_res_strides);
+    auto shape_strides_owner = std::move(std::get<0>(ptr_size_event_triple_));
+    const sycl::event &copy_shape_ev = std::get<2>(ptr_size_event_triple_);
+    const py::ssize_t *shape_strides = shape_strides_owner.get();
+
+    std::vector<sycl::event> all_deps;
+    all_deps.reserve(depends.size() + 1);
+    all_deps.insert(all_deps.end(), depends.begin(), depends.end());
+    all_deps.push_back(copy_shape_ev);
+
+    sycl::event comp_ev = strided_fn(
+        exec_q, nd, nelems, shape_strides, py_rtol, py_atol, py_equal_nan,
+        a_data, a_offset, b_data, b_offset, res_data, res_offset, all_deps);
+
+    // async free of shape_strides temporary
+    sycl::event tmp_cleanup_ev = dpctl::tensor::alloc_utils::async_smart_free(
+        exec_q, {comp_ev}, shape_strides_owner);
+
+    host_tasks.push_back(tmp_cleanup_ev);
+
+    return std::make_pair(
+        dpctl::utils::keep_args_alive(exec_q, {a, b, res}, host_tasks),
+        comp_ev);
+}
+
+/**
+ * @brief A factory to define pairs of supported types for which
+ * isclose function is available.
+ *
+ * @tparam T Type of input vector `a` and `b` and of result vector `y`.
+ */
+template <typename T>
+struct IsCloseOutputType
+{
+    using value_type = typename std::disjunction<
+        td_ns::TypeMapResultEntry<T, sycl::half>,
+        td_ns::TypeMapResultEntry<T, float>,
+        td_ns::TypeMapResultEntry<T, double>,
+        td_ns::TypeMapResultEntry<T, std::complex<float>>,
+        td_ns::TypeMapResultEntry<T, std::complex<double>>,
+        td_ns::DefaultResultEntry<void>>::result_type;
+};
+
+template <typename fnT, typename T>
+struct IsCloseStridedScalarFactory
+{
+    fnT get()
+    {
+        if constexpr (std::is_same_v<typename IsCloseOutputType<T>::value_type,
+                                     void>) {
+            return nullptr;
+        }
+        else {
+            return isclose_strided_scalar_call<T>;
+        }
+    }
+};
+
+template <typename fnT, typename T>
+struct IsCloseContigScalarFactory
+{
+    fnT get()
+    {
+        if constexpr (std::is_same_v<typename IsCloseOutputType<T>::value_type,
+                                     void>) {
+            return nullptr;
+        }
+        else {
+            return isclose_contig_scalar_call<T>;
+        }
+    }
+};
+
+static void populate_isclose_dispatch_vectors()
+{
+    init_dispatch_vector<isclose_strided_scalar_fn_ptr_t,
+                         IsCloseStridedScalarFactory>(
+        isclose_strided_scalar_dispatch_vector);
+    init_dispatch_vector<isclose_contig_scalar_fn_ptr_t,
+                         IsCloseContigScalarFactory>(
+        isclose_contig_dispatch_vector);
+}
+
+} // namespace impl
+
+void init_isclose(py::module_ m)
+{
+    impl::populate_isclose_dispatch_vectors();
+
+    m.def("_isclose_scalar", &impl::py_isclose_scalar, "", py::arg("a"),
+          py::arg("b"), py::arg("py_rtol"), py::arg("py_atol"),
+          py::arg("py_equal_nan"), py::arg("res"), py::arg("sycl_queue"),
+          py::arg("depends") = py::list());
+}
+
+} // namespace dpnp::extensions::ufunc
diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.hpp b/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.hpp
new file mode 100644
index 000000000000..5216db4f3a29
--- /dev/null
+++ b/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.hpp
@@ -0,0 +1,35 @@
+//*****************************************************************************
+// Copyright (c) 2025, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+
+#pragma once
+
+#include <pybind11/pybind11.h>
+
+namespace py = pybind11;
+
+namespace dpnp::extensions::ufunc
+{
+void init_isclose(py::module_ m);
+} // namespace dpnp::extensions::ufunc
diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/lcm.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/lcm.cpp
index 63481a8ded74..3bc67a118153 100644
--- a/dpnp/backend/extensions/ufunc/elementwise_functions/lcm.cpp
+++ b/dpnp/backend/extensions/ufunc/elementwise_functions/lcm.cpp
@@ -23,6 +23,10 @@
 // THE POSSIBILITY OF SUCH DAMAGE.
 //*****************************************************************************
 
+#include <cstdint>
+#include <type_traits>
+#include <vector>
+
 #include <sycl/sycl.hpp>
 
 #include "dpctl4pybind11.hpp"
diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/ldexp.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/ldexp.cpp
index c59b0b1c0ef8..8c3676c35d22 100644
--- a/dpnp/backend/extensions/ufunc/elementwise_functions/ldexp.cpp
+++ b/dpnp/backend/extensions/ufunc/elementwise_functions/ldexp.cpp
@@ -23,6 +23,10 @@
 // THE POSSIBILITY OF SUCH DAMAGE.
 //*****************************************************************************
 
+#include <cstdint>
+#include <type_traits>
+#include <vector>
+
 #include <sycl/sycl.hpp>
 
 #include "dpctl4pybind11.hpp"
diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/logaddexp2.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/logaddexp2.cpp
index d2d2df691b42..191a01b00512 100644
--- a/dpnp/backend/extensions/ufunc/elementwise_functions/logaddexp2.cpp
+++ b/dpnp/backend/extensions/ufunc/elementwise_functions/logaddexp2.cpp
@@ -23,6 +23,8 @@
 // THE POSSIBILITY OF SUCH DAMAGE.
 //*****************************************************************************
 
+#include <vector>
+
 #include <sycl/sycl.hpp>
 
 #include "dpctl4pybind11.hpp"
diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/nan_to_num.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/nan_to_num.cpp
index 9880006331d5..0a700bbd695e 100644
--- a/dpnp/backend/extensions/ufunc/elementwise_functions/nan_to_num.cpp
+++ b/dpnp/backend/extensions/ufunc/elementwise_functions/nan_to_num.cpp
@@ -52,6 +52,7 @@
 #include "utils/type_dispatch.hpp"
 #include "utils/type_utils.hpp"
 
+// utils extension header
 #include "ext/common.hpp"
 
 namespace py = pybind11;
@@ -65,6 +66,7 @@ namespace dpnp::extensions::ufunc
 
 namespace impl
 {
+using ext::common::init_dispatch_vector;
 
 template <typename T>
 using value_type_of_t = typename value_type_of<T>::type;
@@ -176,7 +178,7 @@ std::pair<sycl::event, sycl::event>
 
     dpctl::tensor::validation::CheckWritable::throw_if_not_writable(dst);
 
-    int src_nd = src.get_ndim();
+    const int src_nd = src.get_ndim();
     if (src_nd != dst.get_ndim()) {
         throw py::value_error("Array dimensions are not the same.");
     }
@@ -184,8 +186,9 @@ std::pair<sycl::event, sycl::event>
     const py::ssize_t *src_shape = src.get_shape_raw();
     const py::ssize_t *dst_shape = dst.get_shape_raw();
 
-    std::size_t nelems = src.get_size();
-    bool shapes_equal = std::equal(src_shape, src_shape + src_nd, dst_shape);
+    const std::size_t nelems = src.get_size();
+    const bool shapes_equal =
+        std::equal(src_shape, src_shape + src_nd, dst_shape);
     if (!shapes_equal) {
         throw py::value_error("Array shapes are not the same.");
     }
@@ -209,14 +212,14 @@ std::pair<sycl::event, sycl::event>
     char *dst_data = dst.get_data();
 
     // handle contiguous inputs
-    bool is_src_c_contig = src.is_c_contiguous();
-    bool is_src_f_contig = src.is_f_contiguous();
+    const bool is_src_c_contig = src.is_c_contiguous();
+    const bool is_src_f_contig = src.is_f_contiguous();
 
-    bool is_dst_c_contig = dst.is_c_contiguous();
-    bool is_dst_f_contig = dst.is_f_contiguous();
+    const bool is_dst_c_contig = dst.is_c_contiguous();
+    const bool is_dst_f_contig = dst.is_f_contiguous();
 
-    bool both_c_contig = (is_src_c_contig && is_dst_c_contig);
-    bool both_f_contig = (is_src_f_contig && is_dst_f_contig);
+    const bool both_c_contig = (is_src_c_contig && is_dst_c_contig);
+    const bool both_f_contig = (is_src_f_contig && is_dst_f_contig);
 
     if (both_c_contig || both_f_contig) {
         auto contig_fn = nan_to_num_contig_dispatch_vector[src_typeid];
@@ -369,17 +372,12 @@ struct NanToNumContigFactory
     }
 };
 
-void populate_nan_to_num_dispatch_vectors(void)
+static void populate_nan_to_num_dispatch_vectors(void)
 {
-    using namespace td_ns;
-
-    DispatchVectorBuilder<nan_to_num_fn_ptr_t, NanToNumFactory, num_types> dvb1;
-    dvb1.populate_dispatch_vector(nan_to_num_dispatch_vector);
-
-    DispatchVectorBuilder<nan_to_num_contig_fn_ptr_t, NanToNumContigFactory,
-                          num_types>
-        dvb2;
-    dvb2.populate_dispatch_vector(nan_to_num_contig_dispatch_vector);
+    init_dispatch_vector<nan_to_num_fn_ptr_t, NanToNumFactory>(
+        nan_to_num_dispatch_vector);
+    init_dispatch_vector<nan_to_num_contig_fn_ptr_t, NanToNumContigFactory>(
+        nan_to_num_contig_dispatch_vector);
 }
 
 } // namespace impl
diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/populate.hpp b/dpnp/backend/extensions/ufunc/elementwise_functions/populate.hpp
index d0ffe8abfd9f..98a4b2702cc2 100644
--- a/dpnp/backend/extensions/ufunc/elementwise_functions/populate.hpp
+++ b/dpnp/backend/extensions/ufunc/elementwise_functions/populate.hpp
@@ -25,6 +25,11 @@
 
 #pragma once
 
+// utils extension header
+#include "ext/common.hpp"
+
+namespace ext_ns = ext::common;
+
 /**
  * @brief A macro used to define factories and a populating unary universal
  * functions.
@@ -107,19 +112,14 @@
                                                                                \
     void populate_##__name__##_dispatch_vectors(void)                          \
     {                                                                          \
-        td_ns::DispatchVectorBuilder<unary_contig_impl_fn_ptr_t,               \
-                                     ContigFactory, td_ns::num_types>          \
-            dvb1;                                                              \
-        dvb1.populate_dispatch_vector(__name__##_contig_dispatch_vector);      \
-                                                                               \
-        td_ns::DispatchVectorBuilder<unary_strided_impl_fn_ptr_t,              \
-                                     StridedFactory, td_ns::num_types>         \
-            dvb2;                                                              \
-        dvb2.populate_dispatch_vector(__name__##_strided_dispatch_vector);     \
-                                                                               \
-        td_ns::DispatchVectorBuilder<int, TypeMapFactory, td_ns::num_types>    \
-            dvb3;                                                              \
-        dvb3.populate_dispatch_vector(__name__##_output_typeid_vector);        \
+        ext_ns::init_dispatch_vector<unary_contig_impl_fn_ptr_t,               \
+                                     ContigFactory>(                           \
+            __name__##_contig_dispatch_vector);                                \
+        ext_ns::init_dispatch_vector<unary_strided_impl_fn_ptr_t,              \
+                                     StridedFactory>(                          \
+            __name__##_strided_dispatch_vector);                               \
+        ext_ns::init_dispatch_vector<int, TypeMapFactory>(                     \
+            __name__##_output_typeid_vector);                                  \
     };
 
 /**
@@ -214,17 +214,12 @@
                                                                                \
     void populate_##__name__##_dispatch_tables(void)                           \
     {                                                                          \
-        td_ns::DispatchTableBuilder<binary_contig_impl_fn_ptr_t,               \
-                                    ContigFactory, td_ns::num_types>           \
-            dvb1;                                                              \
-        dvb1.populate_dispatch_table(__name__##_contig_dispatch_table);        \
-                                                                               \
-        td_ns::DispatchTableBuilder<binary_strided_impl_fn_ptr_t,              \
-                                    StridedFactory, td_ns::num_types>          \
-            dvb2;                                                              \
-        dvb2.populate_dispatch_table(__name__##_strided_dispatch_table);       \
-                                                                               \
-        td_ns::DispatchTableBuilder<int, TypeMapFactory, td_ns::num_types>     \
-            dvb3;                                                              \
-        dvb3.populate_dispatch_table(__name__##_output_typeid_table);          \
+        ext_ns::init_dispatch_table<binary_contig_impl_fn_ptr_t,               \
+                                    ContigFactory>(                            \
+            __name__##_contig_dispatch_table);                                 \
+        ext_ns::init_dispatch_table<binary_strided_impl_fn_ptr_t,              \
+                                    StridedFactory>(                           \
+            __name__##_strided_dispatch_table);                                \
+        ext_ns::init_dispatch_table<int, TypeMapFactory>(                      \
+            __name__##_output_typeid_table);                                   \
     };
diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/radians.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/radians.cpp
index b2f774f271ba..cf922dd735f8 100644
--- a/dpnp/backend/extensions/ufunc/elementwise_functions/radians.cpp
+++ b/dpnp/backend/extensions/ufunc/elementwise_functions/radians.cpp
@@ -23,6 +23,9 @@
 // THE POSSIBILITY OF SUCH DAMAGE.
 //*****************************************************************************
 
+#include <type_traits>
+#include <vector>
+
 #include <sycl/sycl.hpp>
 
 #include "dpctl4pybind11.hpp"
diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/sinc.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/sinc.cpp
index 6a59f2d82c56..f3ce28108cf9 100644
--- a/dpnp/backend/extensions/ufunc/elementwise_functions/sinc.cpp
+++ b/dpnp/backend/extensions/ufunc/elementwise_functions/sinc.cpp
@@ -23,6 +23,10 @@
 // THE POSSIBILITY OF SUCH DAMAGE.
 //*****************************************************************************
 
+#include <complex>
+#include <type_traits>
+#include <vector>
+
 #include <sycl/sycl.hpp>
 
 #include "dpctl4pybind11.hpp"
diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/spacing.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/spacing.cpp
index d28c42476b2a..c99bd043c212 100644
--- a/dpnp/backend/extensions/ufunc/elementwise_functions/spacing.cpp
+++ b/dpnp/backend/extensions/ufunc/elementwise_functions/spacing.cpp
@@ -23,6 +23,9 @@
 // THE POSSIBILITY OF SUCH DAMAGE.
 //*****************************************************************************
 
+#include <type_traits>
+#include <vector>
+
 #include <sycl/sycl.hpp>
 
 #include "dpctl4pybind11.hpp"
diff --git a/dpnp/backend/extensions/vm/CMakeLists.txt b/dpnp/backend/extensions/vm/CMakeLists.txt
index 9ad513ccbff6..1c12f404bca8 100644
--- a/dpnp/backend/extensions/vm/CMakeLists.txt
+++ b/dpnp/backend/extensions/vm/CMakeLists.txt
@@ -23,7 +23,7 @@
 # THE POSSIBILITY OF SUCH DAMAGE.
 # *****************************************************************************
 
-if(NOT _use_onemkl_interfaces)
+if(NOT _use_onemath)
     set(_elementwise_sources
         ${CMAKE_CURRENT_SOURCE_DIR}/abs.cpp
         ${CMAKE_CURRENT_SOURCE_DIR}/acos.cpp
@@ -42,6 +42,7 @@ if(NOT _use_onemkl_interfaces)
         ${CMAKE_CURRENT_SOURCE_DIR}/cos.cpp
         ${CMAKE_CURRENT_SOURCE_DIR}/cosh.cpp
         ${CMAKE_CURRENT_SOURCE_DIR}/div.cpp
+        ${CMAKE_CURRENT_SOURCE_DIR}/erf_funcs.cpp
         ${CMAKE_CURRENT_SOURCE_DIR}/exp.cpp
         ${CMAKE_CURRENT_SOURCE_DIR}/exp2.cpp
         ${CMAKE_CURRENT_SOURCE_DIR}/expm1.cpp
@@ -94,8 +95,8 @@ endif()
 
 set_target_properties(${python_module_name} PROPERTIES CMAKE_POSITION_INDEPENDENT_CODE ON)
 
-target_include_directories(${python_module_name} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../include)
-target_include_directories(${python_module_name} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../src)
+target_include_directories(${python_module_name} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../)
+target_include_directories(${python_module_name} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../common)
 
 target_include_directories(${python_module_name} PUBLIC ${Dpctl_INCLUDE_DIR})
 target_include_directories(${python_module_name} PUBLIC ${Dpctl_TENSOR_INCLUDE_DIR})
@@ -118,8 +119,8 @@ if (DPNP_GENERATE_COVERAGE)
     target_link_options(${python_module_name} PRIVATE -fprofile-instr-generate -fcoverage-mapping)
 endif()
 
-if(_use_onemkl_interfaces)
-    target_compile_options(${python_module_name} PRIVATE -DUSE_ONEMKL_INTERFACES)
+if(_use_onemath)
+    target_compile_options(${python_module_name} PRIVATE -DUSE_ONEMATH)
 else()
     target_link_libraries(${python_module_name} PUBLIC MKL::MKL_SYCL::VM)
 endif()
diff --git a/dpnp/backend/extensions/vm/abs.cpp b/dpnp/backend/extensions/vm/abs.cpp
index 0842146c04e1..301b443085ad 100644
--- a/dpnp/backend/extensions/vm/abs.cpp
+++ b/dpnp/backend/extensions/vm/abs.cpp
@@ -23,6 +23,12 @@
 // THE POSSIBILITY OF SUCH DAMAGE.
 //*****************************************************************************
 
+#include <complex>
+#include <cstddef>
+#include <cstdint>
+#include <type_traits>
+#include <vector>
+
 #include <oneapi/mkl.hpp>
 #include <sycl/sycl.hpp>
 
diff --git a/dpnp/backend/extensions/vm/acos.cpp b/dpnp/backend/extensions/vm/acos.cpp
index e21005208c39..6fdf8335a3d9 100644
--- a/dpnp/backend/extensions/vm/acos.cpp
+++ b/dpnp/backend/extensions/vm/acos.cpp
@@ -23,6 +23,12 @@
 // THE POSSIBILITY OF SUCH DAMAGE.
 //*****************************************************************************
 
+#include <complex>
+#include <cstddef>
+#include <cstdint>
+#include <type_traits>
+#include <vector>
+
 #include <oneapi/mkl.hpp>
 #include <sycl/sycl.hpp>
 
diff --git a/dpnp/backend/extensions/vm/acosh.cpp b/dpnp/backend/extensions/vm/acosh.cpp
index 169c6d7f8868..738547aeaca8 100644
--- a/dpnp/backend/extensions/vm/acosh.cpp
+++ b/dpnp/backend/extensions/vm/acosh.cpp
@@ -23,6 +23,12 @@
 // THE POSSIBILITY OF SUCH DAMAGE.
 //*****************************************************************************
 
+#include <complex>
+#include <cstddef>
+#include <cstdint>
+#include <type_traits>
+#include <vector>
+
 #include <oneapi/mkl.hpp>
 #include <sycl/sycl.hpp>
 
diff --git a/dpnp/backend/extensions/vm/add.cpp b/dpnp/backend/extensions/vm/add.cpp
index 3cb5ac0ec816..03979b3b51b4 100644
--- a/dpnp/backend/extensions/vm/add.cpp
+++ b/dpnp/backend/extensions/vm/add.cpp
@@ -23,7 +23,12 @@
 // THE POSSIBILITY OF SUCH DAMAGE.
 //*****************************************************************************
 
+#include <complex>
+#include <cstddef>
+#include <cstdint>
 #include <stdexcept>
+#include <type_traits>
+#include <vector>
 
 #include <oneapi/mkl.hpp>
 #include <sycl/sycl.hpp>
diff --git a/dpnp/backend/extensions/vm/arg.cpp b/dpnp/backend/extensions/vm/arg.cpp
index 174f61c72e7c..844d21ac37f3 100644
--- a/dpnp/backend/extensions/vm/arg.cpp
+++ b/dpnp/backend/extensions/vm/arg.cpp
@@ -23,6 +23,12 @@
 // THE POSSIBILITY OF SUCH DAMAGE.
 //*****************************************************************************
 
+#include <complex>
+#include <cstddef>
+#include <cstdint>
+#include <type_traits>
+#include <vector>
+
 #include <oneapi/mkl.hpp>
 #include <sycl/sycl.hpp>
 
diff --git a/dpnp/backend/extensions/vm/asin.cpp b/dpnp/backend/extensions/vm/asin.cpp
index 51b7a0b3a219..b7a865ab920b 100644
--- a/dpnp/backend/extensions/vm/asin.cpp
+++ b/dpnp/backend/extensions/vm/asin.cpp
@@ -23,6 +23,12 @@
 // THE POSSIBILITY OF SUCH DAMAGE.
 //*****************************************************************************
 
+#include <complex>
+#include <cstddef>
+#include <cstdint>
+#include <type_traits>
+#include <vector>
+
 #include <oneapi/mkl.hpp>
 #include <sycl/sycl.hpp>
 
diff --git a/dpnp/backend/extensions/vm/asinh.cpp b/dpnp/backend/extensions/vm/asinh.cpp
index f1fc016bd3bd..756bdbd3ec10 100644
--- a/dpnp/backend/extensions/vm/asinh.cpp
+++ b/dpnp/backend/extensions/vm/asinh.cpp
@@ -23,6 +23,12 @@
 // THE POSSIBILITY OF SUCH DAMAGE.
 //*****************************************************************************
 
+#include <complex>
+#include <cstddef>
+#include <cstdint>
+#include <type_traits>
+#include <vector>
+
 #include <oneapi/mkl.hpp>
 #include <sycl/sycl.hpp>
 
diff --git a/dpnp/backend/extensions/vm/atan.cpp b/dpnp/backend/extensions/vm/atan.cpp
index 5b68bf896342..8af845a1ed42 100644
--- a/dpnp/backend/extensions/vm/atan.cpp
+++ b/dpnp/backend/extensions/vm/atan.cpp
@@ -23,6 +23,12 @@
 // THE POSSIBILITY OF SUCH DAMAGE.
 //*****************************************************************************
 
+#include <complex>
+#include <cstddef>
+#include <cstdint>
+#include <type_traits>
+#include <vector>
+
 #include <oneapi/mkl.hpp>
 #include <sycl/sycl.hpp>
 
diff --git a/dpnp/backend/extensions/vm/atan2.cpp b/dpnp/backend/extensions/vm/atan2.cpp
index d4a76c6fcc25..8ab154a47c01 100644
--- a/dpnp/backend/extensions/vm/atan2.cpp
+++ b/dpnp/backend/extensions/vm/atan2.cpp
@@ -23,7 +23,11 @@
 // THE POSSIBILITY OF SUCH DAMAGE.
 //*****************************************************************************
 
+#include <cstddef>
+#include <cstdint>
 #include <stdexcept>
+#include <type_traits>
+#include <vector>
 
 #include <oneapi/mkl.hpp>
 #include <sycl/sycl.hpp>
diff --git a/dpnp/backend/extensions/vm/atanh.cpp b/dpnp/backend/extensions/vm/atanh.cpp
index 3ef18e01cc81..fe189fa7234e 100644
--- a/dpnp/backend/extensions/vm/atanh.cpp
+++ b/dpnp/backend/extensions/vm/atanh.cpp
@@ -23,6 +23,12 @@
 // THE POSSIBILITY OF SUCH DAMAGE.
 //*****************************************************************************
 
+#include <complex>
+#include <cstddef>
+#include <cstdint>
+#include <type_traits>
+#include <vector>
+
 #include <oneapi/mkl.hpp>
 #include <sycl/sycl.hpp>
 
diff --git a/dpnp/backend/extensions/vm/cbrt.cpp b/dpnp/backend/extensions/vm/cbrt.cpp
index 074f1ef92587..86c41503302e 100644
--- a/dpnp/backend/extensions/vm/cbrt.cpp
+++ b/dpnp/backend/extensions/vm/cbrt.cpp
@@ -23,6 +23,11 @@
 // THE POSSIBILITY OF SUCH DAMAGE.
 //*****************************************************************************
 
+#include <cstddef>
+#include <cstdint>
+#include <type_traits>
+#include <vector>
+
 #include <oneapi/mkl.hpp>
 #include <sycl/sycl.hpp>
 
diff --git a/dpnp/backend/extensions/vm/ceil.cpp b/dpnp/backend/extensions/vm/ceil.cpp
index 6e85da6bd731..ed334b806b21 100644
--- a/dpnp/backend/extensions/vm/ceil.cpp
+++ b/dpnp/backend/extensions/vm/ceil.cpp
@@ -23,6 +23,11 @@
 // THE POSSIBILITY OF SUCH DAMAGE.
 //*****************************************************************************
 
+#include <cstddef>
+#include <cstdint>
+#include <type_traits>
+#include <vector>
+
 #include <oneapi/mkl.hpp>
 #include <sycl/sycl.hpp>
 
diff --git a/dpnp/backend/extensions/vm/common.hpp b/dpnp/backend/extensions/vm/common.hpp
index b58f55ede815..16a166a68bff 100644
--- a/dpnp/backend/extensions/vm/common.hpp
+++ b/dpnp/backend/extensions/vm/common.hpp
@@ -25,21 +25,35 @@
 
 #pragma once
 
+#include <type_traits>
+
 #include <oneapi/mkl.hpp>
 #include <sycl/sycl.hpp>
 
 #include <dpctl4pybind11.hpp>
 #include <pybind11/pybind11.h>
 
+// utils extension header
+#include "ext/common.hpp"
+
 // dpctl tensor headers
 #include "utils/memory_overlap.hpp"
 #include "utils/type_dispatch.hpp"
 
-#include "dpnp_utils.hpp"
+/**
+ * Version of Intel MKL at which transition to OneMKL release 2023.2.0 occurs.
+ *
+ * @note with OneMKL=2023.1.0 the call of oneapi::mkl::vm::div() was dead
+ * locked inside ~usm_wrapper_to_host()->{...; q_->wait_and_throw(); ...}
+ */
+#ifndef __INTEL_MKL_2023_2_0_VERSION_REQUIRED
+#define __INTEL_MKL_2023_2_0_VERSION_REQUIRED 20230002L
+#endif
 
 static_assert(INTEL_MKL_VERSION >= __INTEL_MKL_2023_2_0_VERSION_REQUIRED,
               "OneMKL does not meet minimum version requirement");
 
+namespace ext_ns = ext::common;
 namespace py = pybind11;
 namespace td_ns = dpctl::tensor::type_dispatch;
 
@@ -276,11 +290,10 @@ bool need_to_call_binary_ufunc(sycl::queue &exec_q,
                                                                                \
     static void populate_dispatch_vectors(void)                                \
     {                                                                          \
-        py_internal::init_ufunc_dispatch_vector<int, TypeMapFactory>(          \
+        ext_ns::init_dispatch_vector<int, TypeMapFactory>(                     \
             output_typeid_vector);                                             \
-        py_internal::init_ufunc_dispatch_vector<unary_contig_impl_fn_ptr_t,    \
-                                                ContigFactory>(                \
-            contig_dispatch_vector);                                           \
+        ext_ns::init_dispatch_vector<unary_contig_impl_fn_ptr_t,               \
+                                     ContigFactory>(contig_dispatch_vector);   \
     };
 
 /**
@@ -317,30 +330,9 @@ bool need_to_call_binary_ufunc(sycl::queue &exec_q,
                                                                                \
     static void populate_dispatch_tables(void)                                 \
     {                                                                          \
-        py_internal::init_ufunc_dispatch_table<int, TypeMapFactory>(           \
+        ext_ns::init_dispatch_table<int, TypeMapFactory>(                      \
             output_typeid_vector);                                             \
-        py_internal::init_ufunc_dispatch_table<binary_contig_impl_fn_ptr_t,    \
-                                               ContigFactory>(                 \
-            contig_dispatch_vector);                                           \
+        ext_ns::init_dispatch_table<binary_contig_impl_fn_ptr_t,               \
+                                    ContigFactory>(contig_dispatch_vector);    \
     };
-
-template <typename dispatchT,
-          template <typename fnT, typename T>
-          typename factoryT,
-          int _num_types = td_ns::num_types>
-void init_ufunc_dispatch_vector(dispatchT dispatch_vector[])
-{
-    td_ns::DispatchVectorBuilder<dispatchT, factoryT, _num_types> dvb;
-    dvb.populate_dispatch_vector(dispatch_vector);
-}
-
-template <typename dispatchT,
-          template <typename fnT, typename D, typename S>
-          typename factoryT,
-          int _num_types = td_ns::num_types>
-void init_ufunc_dispatch_table(dispatchT dispatch_table[][_num_types])
-{
-    td_ns::DispatchTableBuilder<dispatchT, factoryT, _num_types> dtb;
-    dtb.populate_dispatch_table(dispatch_table);
-}
 } // namespace dpnp::extensions::vm::py_internal
diff --git a/dpnp/backend/extensions/vm/conj.cpp b/dpnp/backend/extensions/vm/conj.cpp
index 34c265b046a1..c5911c2cf97d 100644
--- a/dpnp/backend/extensions/vm/conj.cpp
+++ b/dpnp/backend/extensions/vm/conj.cpp
@@ -23,6 +23,12 @@
 // THE POSSIBILITY OF SUCH DAMAGE.
 //*****************************************************************************
 
+#include <complex>
+#include <cstddef>
+#include <cstdint>
+#include <type_traits>
+#include <vector>
+
 #include <oneapi/mkl.hpp>
 #include <sycl/sycl.hpp>
 
diff --git a/dpnp/backend/extensions/vm/copysign.cpp b/dpnp/backend/extensions/vm/copysign.cpp
index 7a703124f641..91fef94e1b4a 100644
--- a/dpnp/backend/extensions/vm/copysign.cpp
+++ b/dpnp/backend/extensions/vm/copysign.cpp
@@ -23,7 +23,11 @@
 // THE POSSIBILITY OF SUCH DAMAGE.
 //*****************************************************************************
 
+#include <cstddef>
+#include <cstdint>
 #include <stdexcept>
+#include <type_traits>
+#include <vector>
 
 #include <oneapi/mkl.hpp>
 #include <sycl/sycl.hpp>
diff --git a/dpnp/backend/extensions/vm/cos.cpp b/dpnp/backend/extensions/vm/cos.cpp
index d0eaa531df84..5c4a8e794f7b 100644
--- a/dpnp/backend/extensions/vm/cos.cpp
+++ b/dpnp/backend/extensions/vm/cos.cpp
@@ -23,6 +23,12 @@
 // THE POSSIBILITY OF SUCH DAMAGE.
 //*****************************************************************************
 
+#include <complex>
+#include <cstddef>
+#include <cstdint>
+#include <type_traits>
+#include <vector>
+
 #include <oneapi/mkl.hpp>
 #include <sycl/sycl.hpp>
 
diff --git a/dpnp/backend/extensions/vm/cosh.cpp b/dpnp/backend/extensions/vm/cosh.cpp
index 11b671d76bea..3572c26af08e 100644
--- a/dpnp/backend/extensions/vm/cosh.cpp
+++ b/dpnp/backend/extensions/vm/cosh.cpp
@@ -23,6 +23,12 @@
 // THE POSSIBILITY OF SUCH DAMAGE.
 //*****************************************************************************
 
+#include <complex>
+#include <cstddef>
+#include <cstdint>
+#include <type_traits>
+#include <vector>
+
 #include <oneapi/mkl.hpp>
 #include <sycl/sycl.hpp>
 
diff --git a/dpnp/backend/extensions/vm/div.cpp b/dpnp/backend/extensions/vm/div.cpp
index ebad4fadf02a..bc7b99db0703 100644
--- a/dpnp/backend/extensions/vm/div.cpp
+++ b/dpnp/backend/extensions/vm/div.cpp
@@ -23,7 +23,12 @@
 // THE POSSIBILITY OF SUCH DAMAGE.
 //*****************************************************************************
 
+#include <complex>
+#include <cstddef>
+#include <cstdint>
 #include <stdexcept>
+#include <type_traits>
+#include <vector>
 
 #include <oneapi/mkl.hpp>
 #include <sycl/sycl.hpp>
diff --git a/dpnp/backend/extensions/vm/erf_funcs.cpp b/dpnp/backend/extensions/vm/erf_funcs.cpp
new file mode 100644
index 000000000000..2414c56a9e50
--- /dev/null
+++ b/dpnp/backend/extensions/vm/erf_funcs.cpp
@@ -0,0 +1,188 @@
+//*****************************************************************************
+// Copyright (c) 2025, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+
+#include <cstddef>
+#include <cstdint>
+#include <type_traits>
+#include <vector>
+
+#include <oneapi/mkl.hpp>
+#include <sycl/sycl.hpp>
+
+#include "dpctl4pybind11.hpp"
+
+#include "common.hpp"
+#include "erf_funcs.hpp"
+
+// include a local copy of elementwise common header from dpctl tensor:
+// dpctl/tensor/libtensor/source/elementwise_functions/elementwise_functions.hpp
+// TODO: replace by including dpctl header once available
+#include "../elementwise_functions/elementwise_functions.hpp"
+
+// dpctl tensor headers
+#include "kernels/elementwise_functions/common.hpp"
+#include "utils/type_dispatch.hpp"
+#include "utils/type_utils.hpp"
+
+namespace dpnp::extensions::vm
+{
+namespace py = pybind11;
+namespace py_int = dpnp::extensions::py_internal;
+namespace td_ns = dpctl::tensor::type_dispatch;
+
+using ext::common::init_dispatch_vector;
+
+namespace impl
+{
+namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common;
+namespace mkl_vm = oneapi::mkl::vm; // OneMKL namespace with VM functions
+namespace tu_ns = dpctl::tensor::type_utils;
+
+/**
+ * @brief A factory to define pairs of supported types for which
+ * MKL VM library provides support for a family of erf-like functions.
+ *
+ * @tparam T Type of input vector `a` and of result vector `y`.
+ */
+template <typename T>
+struct OutputType
+{
+    using value_type =
+        typename std::disjunction<td_ns::TypeMapResultEntry<T, float>,
+                                  td_ns::TypeMapResultEntry<T, double>,
+                                  td_ns::DefaultResultEntry<void>>::result_type;
+};
+
+static int output_typeid_vector[td_ns::num_types];
+
+template <typename fnT, typename T>
+struct TypeMapFactory
+{
+    std::enable_if_t<std::is_same<fnT, int>::value, int> get()
+    {
+        using rT = typename OutputType<T>::value_type;
+        return td_ns::GetTypeid<rT>{}.get();
+    }
+};
+
+using ew_cmn_ns::unary_contig_impl_fn_ptr_t;
+
+#define MACRO_DEFINE_IMPL(__name__, __f_name__)                                \
+    template <typename T>                                                      \
+    static sycl::event __name__##_contig_impl(                                 \
+        sycl::queue &exec_q, std::size_t in_n, const char *in_a, char *out_y,  \
+        const std::vector<sycl::event> &depends)                               \
+    {                                                                          \
+        tu_ns::validate_type_for_device<T>(exec_q);                            \
+                                                                               \
+        std::int64_t n = static_cast<std::int64_t>(in_n);                      \
+        const T *a = reinterpret_cast<const T *>(in_a);                        \
+                                                                               \
+        using resTy = typename OutputType<T>::value_type;                      \
+        resTy *y = reinterpret_cast<resTy *>(out_y);                           \
+                                                                               \
+        return mkl_vm::__name__(                                               \
+            exec_q, n, /* number of elements to be calculated*/                \
+            a,         /* pointer `a` containing input vector of size n*/      \
+            y,         /* pointer `y` to the output vector of size n*/         \
+            depends);                                                          \
+    }                                                                          \
+                                                                               \
+    static unary_contig_impl_fn_ptr_t                                          \
+        __name__##_contig_dispatch_vector[td_ns::num_types];                   \
+                                                                               \
+    template <typename fnT, typename T>                                        \
+    struct __f_name__##ContigFactory                                           \
+    {                                                                          \
+        fnT get()                                                              \
+        {                                                                      \
+            if constexpr (std::is_same_v<typename OutputType<T>::value_type,   \
+                                         void>) {                              \
+                return nullptr;                                                \
+            }                                                                  \
+            else {                                                             \
+                return __name__##_contig_impl<T>;                              \
+            }                                                                  \
+        }                                                                      \
+    };
+
+MACRO_DEFINE_IMPL(erf, Erf);
+MACRO_DEFINE_IMPL(erfc, Erfc);
+
+template <template <typename fnT, typename T> typename factoryT>
+static void populate(py::module_ m,
+                     const char *name,
+                     const char *docstring,
+                     unary_contig_impl_fn_ptr_t *contig_dispatch_vector)
+{
+    init_dispatch_vector<unary_contig_impl_fn_ptr_t, factoryT>(
+        contig_dispatch_vector);
+
+    using arrayT = dpctl::tensor::usm_ndarray;
+    auto pyapi = [&, contig_dispatch_vector](
+                     sycl::queue &exec_q, const arrayT &src, const arrayT &dst,
+                     const std::vector<sycl::event> &depends = {}) {
+        return py_int::py_unary_ufunc(
+            src, dst, exec_q, depends, output_typeid_vector,
+            contig_dispatch_vector,
+            // no support of strided implementation in OneMKL
+            td_ns::NullPtrVector<ew_cmn_ns::unary_strided_impl_fn_ptr_t>{});
+    };
+    m.def(name, pyapi, docstring, py::arg("sycl_queue"), py::arg("src"),
+          py::arg("dst"), py::arg("depends") = py::list());
+}
+} // namespace impl
+
+void init_erf_funcs(py::module_ m)
+{
+    using arrayT = dpctl::tensor::usm_ndarray;
+    using impl::output_typeid_vector;
+
+    init_dispatch_vector<int, impl::TypeMapFactory>(output_typeid_vector);
+
+    auto erf_need_to_call_pyapi = [&](sycl::queue &exec_q, const arrayT &src,
+                                      const arrayT &dst) {
+        return py_internal::need_to_call_unary_ufunc(
+            exec_q, src, dst, output_typeid_vector,
+            impl::erf_contig_dispatch_vector);
+    };
+    m.def("_mkl_erf_to_call", erf_need_to_call_pyapi,
+          "Check input arguments to answer if any erf-like function from "
+          "OneMKL VM library can be used",
+          py::arg("sycl_queue"), py::arg("src"), py::arg("dst"));
+
+    impl::populate<impl::ErfContigFactory>(
+        m, "_erf",
+        "Call `erf` function from OneMKL VM library to compute the error "
+        "function value of vector elements",
+        impl::erf_contig_dispatch_vector);
+
+    impl::populate<impl::ErfcContigFactory>(
+        m, "_erfc",
+        "Call `erfc` function from OneMKL VM library to compute the "
+        "complementary error function value of vector elements",
+        impl::erfc_contig_dispatch_vector);
+}
+} // namespace dpnp::extensions::vm
diff --git a/dpnp/backend/extensions/vm/erf_funcs.hpp b/dpnp/backend/extensions/vm/erf_funcs.hpp
new file mode 100644
index 000000000000..771ba474aa49
--- /dev/null
+++ b/dpnp/backend/extensions/vm/erf_funcs.hpp
@@ -0,0 +1,35 @@
+//*****************************************************************************
+// Copyright (c) 2025, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+
+#pragma once
+
+#include <pybind11/pybind11.h>
+
+namespace py = pybind11;
+
+namespace dpnp::extensions::vm
+{
+void init_erf_funcs(py::module_ m);
+} // namespace dpnp::extensions::vm
diff --git a/dpnp/backend/extensions/vm/exp.cpp b/dpnp/backend/extensions/vm/exp.cpp
index 83ab13300b72..3117005dc648 100644
--- a/dpnp/backend/extensions/vm/exp.cpp
+++ b/dpnp/backend/extensions/vm/exp.cpp
@@ -23,6 +23,12 @@
 // THE POSSIBILITY OF SUCH DAMAGE.
 //*****************************************************************************
 
+#include <complex>
+#include <cstddef>
+#include <cstdint>
+#include <type_traits>
+#include <vector>
+
 #include <oneapi/mkl.hpp>
 #include <sycl/sycl.hpp>
 
diff --git a/dpnp/backend/extensions/vm/exp2.cpp b/dpnp/backend/extensions/vm/exp2.cpp
index 9342e2c13565..0176d3bb31fd 100644
--- a/dpnp/backend/extensions/vm/exp2.cpp
+++ b/dpnp/backend/extensions/vm/exp2.cpp
@@ -23,6 +23,11 @@
 // THE POSSIBILITY OF SUCH DAMAGE.
 //*****************************************************************************
 
+#include <cstddef>
+#include <cstdint>
+#include <type_traits>
+#include <vector>
+
 #include <oneapi/mkl.hpp>
 #include <sycl/sycl.hpp>
 
diff --git a/dpnp/backend/extensions/vm/expm1.cpp b/dpnp/backend/extensions/vm/expm1.cpp
index 394456969e09..0d8a733fb0ce 100644
--- a/dpnp/backend/extensions/vm/expm1.cpp
+++ b/dpnp/backend/extensions/vm/expm1.cpp
@@ -23,6 +23,11 @@
 // THE POSSIBILITY OF SUCH DAMAGE.
 //*****************************************************************************
 
+#include <cstddef>
+#include <cstdint>
+#include <type_traits>
+#include <vector>
+
 #include <oneapi/mkl.hpp>
 #include <sycl/sycl.hpp>
 
diff --git a/dpnp/backend/extensions/vm/floor.cpp b/dpnp/backend/extensions/vm/floor.cpp
index 87c55c2d7843..202bb546ad49 100644
--- a/dpnp/backend/extensions/vm/floor.cpp
+++ b/dpnp/backend/extensions/vm/floor.cpp
@@ -23,6 +23,11 @@
 // THE POSSIBILITY OF SUCH DAMAGE.
 //*****************************************************************************
 
+#include <cstddef>
+#include <cstdint>
+#include <type_traits>
+#include <vector>
+
 #include <oneapi/mkl.hpp>
 #include <sycl/sycl.hpp>
 
diff --git a/dpnp/backend/extensions/vm/fmax.cpp b/dpnp/backend/extensions/vm/fmax.cpp
index 688a6c005c48..022d049aeda4 100644
--- a/dpnp/backend/extensions/vm/fmax.cpp
+++ b/dpnp/backend/extensions/vm/fmax.cpp
@@ -23,7 +23,11 @@
 // THE POSSIBILITY OF SUCH DAMAGE.
 //*****************************************************************************
 
+#include <cstddef>
+#include <cstdint>
 #include <stdexcept>
+#include <type_traits>
+#include <vector>
 
 #include <oneapi/mkl.hpp>
 #include <sycl/sycl.hpp>
diff --git a/dpnp/backend/extensions/vm/fmin.cpp b/dpnp/backend/extensions/vm/fmin.cpp
index 4239667923be..1ae33b9cfd18 100644
--- a/dpnp/backend/extensions/vm/fmin.cpp
+++ b/dpnp/backend/extensions/vm/fmin.cpp
@@ -23,7 +23,11 @@
 // THE POSSIBILITY OF SUCH DAMAGE.
 //*****************************************************************************
 
+#include <cstddef>
+#include <cstdint>
 #include <stdexcept>
+#include <type_traits>
+#include <vector>
 
 #include <oneapi/mkl.hpp>
 #include <sycl/sycl.hpp>
diff --git a/dpnp/backend/extensions/vm/fmod.cpp b/dpnp/backend/extensions/vm/fmod.cpp
index 30361b6c8afd..258621dd70b1 100644
--- a/dpnp/backend/extensions/vm/fmod.cpp
+++ b/dpnp/backend/extensions/vm/fmod.cpp
@@ -23,7 +23,11 @@
 // THE POSSIBILITY OF SUCH DAMAGE.
 //*****************************************************************************
 
+#include <cstddef>
+#include <cstdint>
 #include <stdexcept>
+#include <type_traits>
+#include <vector>
 
 #include <oneapi/mkl.hpp>
 #include <sycl/sycl.hpp>
diff --git a/dpnp/backend/extensions/vm/hypot.cpp b/dpnp/backend/extensions/vm/hypot.cpp
index 282257c5ac75..a3d4ff21a456 100644
--- a/dpnp/backend/extensions/vm/hypot.cpp
+++ b/dpnp/backend/extensions/vm/hypot.cpp
@@ -23,7 +23,11 @@
 // THE POSSIBILITY OF SUCH DAMAGE.
 //*****************************************************************************
 
+#include <cstddef>
+#include <cstdint>
 #include <stdexcept>
+#include <type_traits>
+#include <vector>
 
 #include <oneapi/mkl.hpp>
 #include <sycl/sycl.hpp>
diff --git a/dpnp/backend/extensions/vm/i0.cpp b/dpnp/backend/extensions/vm/i0.cpp
index f9d910d85543..26d81fb23b26 100644
--- a/dpnp/backend/extensions/vm/i0.cpp
+++ b/dpnp/backend/extensions/vm/i0.cpp
@@ -23,6 +23,11 @@
 // THE POSSIBILITY OF SUCH DAMAGE.
 //*****************************************************************************
 
+#include <cstddef>
+#include <cstdint>
+#include <type_traits>
+#include <vector>
+
 #include <oneapi/mkl.hpp>
 #include <sycl/sycl.hpp>
 
diff --git a/dpnp/backend/extensions/vm/inv.cpp b/dpnp/backend/extensions/vm/inv.cpp
index 45cb07d3a7c2..6c6994b670ff 100644
--- a/dpnp/backend/extensions/vm/inv.cpp
+++ b/dpnp/backend/extensions/vm/inv.cpp
@@ -23,6 +23,11 @@
 // THE POSSIBILITY OF SUCH DAMAGE.
 //*****************************************************************************
 
+#include <cstddef>
+#include <cstdint>
+#include <type_traits>
+#include <vector>
+
 #include <oneapi/mkl.hpp>
 #include <sycl/sycl.hpp>
 
diff --git a/dpnp/backend/extensions/vm/ln.cpp b/dpnp/backend/extensions/vm/ln.cpp
index e31250344542..5080fa43b93f 100644
--- a/dpnp/backend/extensions/vm/ln.cpp
+++ b/dpnp/backend/extensions/vm/ln.cpp
@@ -23,6 +23,12 @@
 // THE POSSIBILITY OF SUCH DAMAGE.
 //*****************************************************************************
 
+#include <complex>
+#include <cstddef>
+#include <cstdint>
+#include <type_traits>
+#include <vector>
+
 #include <oneapi/mkl.hpp>
 #include <sycl/sycl.hpp>
 
diff --git a/dpnp/backend/extensions/vm/log10.cpp b/dpnp/backend/extensions/vm/log10.cpp
index cb108c09bed6..81e9873e5fef 100644
--- a/dpnp/backend/extensions/vm/log10.cpp
+++ b/dpnp/backend/extensions/vm/log10.cpp
@@ -23,6 +23,12 @@
 // THE POSSIBILITY OF SUCH DAMAGE.
 //*****************************************************************************
 
+#include <complex>
+#include <cstddef>
+#include <cstdint>
+#include <type_traits>
+#include <vector>
+
 #include <oneapi/mkl.hpp>
 #include <sycl/sycl.hpp>
 
diff --git a/dpnp/backend/extensions/vm/log1p.cpp b/dpnp/backend/extensions/vm/log1p.cpp
index e9bac5bd6854..55578bfda111 100644
--- a/dpnp/backend/extensions/vm/log1p.cpp
+++ b/dpnp/backend/extensions/vm/log1p.cpp
@@ -23,6 +23,11 @@
 // THE POSSIBILITY OF SUCH DAMAGE.
 //*****************************************************************************
 
+#include <cstddef>
+#include <cstdint>
+#include <type_traits>
+#include <vector>
+
 #include <oneapi/mkl.hpp>
 #include <sycl/sycl.hpp>
 
diff --git a/dpnp/backend/extensions/vm/log2.cpp b/dpnp/backend/extensions/vm/log2.cpp
index d78054047e85..f1e5fe6091a6 100644
--- a/dpnp/backend/extensions/vm/log2.cpp
+++ b/dpnp/backend/extensions/vm/log2.cpp
@@ -23,6 +23,11 @@
 // THE POSSIBILITY OF SUCH DAMAGE.
 //*****************************************************************************
 
+#include <cstddef>
+#include <cstdint>
+#include <type_traits>
+#include <vector>
+
 #include <oneapi/mkl.hpp>
 #include <sycl/sycl.hpp>
 
diff --git a/dpnp/backend/extensions/vm/mul.cpp b/dpnp/backend/extensions/vm/mul.cpp
index ba3e1df835e0..58c29115fbdb 100644
--- a/dpnp/backend/extensions/vm/mul.cpp
+++ b/dpnp/backend/extensions/vm/mul.cpp
@@ -23,7 +23,12 @@
 // THE POSSIBILITY OF SUCH DAMAGE.
 //*****************************************************************************
 
+#include <complex>
+#include <cstddef>
+#include <cstdint>
 #include <stdexcept>
+#include <type_traits>
+#include <vector>
 
 #include <oneapi/mkl.hpp>
 #include <sycl/sycl.hpp>
diff --git a/dpnp/backend/extensions/vm/nextafter.cpp b/dpnp/backend/extensions/vm/nextafter.cpp
index 7a06d6aff15e..5f90b5d1a7d6 100644
--- a/dpnp/backend/extensions/vm/nextafter.cpp
+++ b/dpnp/backend/extensions/vm/nextafter.cpp
@@ -23,7 +23,11 @@
 // THE POSSIBILITY OF SUCH DAMAGE.
 //*****************************************************************************
 
+#include <cstddef>
+#include <cstdint>
 #include <stdexcept>
+#include <type_traits>
+#include <vector>
 
 #include <oneapi/mkl.hpp>
 #include <sycl/sycl.hpp>
diff --git a/dpnp/backend/extensions/vm/pow.cpp b/dpnp/backend/extensions/vm/pow.cpp
index 8274a203c608..ae7d30ab893c 100644
--- a/dpnp/backend/extensions/vm/pow.cpp
+++ b/dpnp/backend/extensions/vm/pow.cpp
@@ -23,7 +23,12 @@
 // THE POSSIBILITY OF SUCH DAMAGE.
 //*****************************************************************************
 
+#include <complex>
+#include <cstddef>
+#include <cstdint>
 #include <stdexcept>
+#include <type_traits>
+#include <vector>
 
 #include <oneapi/mkl.hpp>
 #include <sycl/sycl.hpp>
diff --git a/dpnp/backend/extensions/vm/rint.cpp b/dpnp/backend/extensions/vm/rint.cpp
index 6d101807b393..3e7ac164f94c 100644
--- a/dpnp/backend/extensions/vm/rint.cpp
+++ b/dpnp/backend/extensions/vm/rint.cpp
@@ -23,6 +23,11 @@
 // THE POSSIBILITY OF SUCH DAMAGE.
 //*****************************************************************************
 
+#include <cstddef>
+#include <cstdint>
+#include <type_traits>
+#include <vector>
+
 #include <oneapi/mkl.hpp>
 #include <sycl/sycl.hpp>
 
diff --git a/dpnp/backend/extensions/vm/sin.cpp b/dpnp/backend/extensions/vm/sin.cpp
index 446acd67d3b7..f04dab92e210 100644
--- a/dpnp/backend/extensions/vm/sin.cpp
+++ b/dpnp/backend/extensions/vm/sin.cpp
@@ -23,6 +23,12 @@
 // THE POSSIBILITY OF SUCH DAMAGE.
 //*****************************************************************************
 
+#include <complex>
+#include <cstddef>
+#include <cstdint>
+#include <type_traits>
+#include <vector>
+
 #include <oneapi/mkl.hpp>
 #include <sycl/sycl.hpp>
 
diff --git a/dpnp/backend/extensions/vm/sinh.cpp b/dpnp/backend/extensions/vm/sinh.cpp
index 77c5548abbf2..1144c8e2b7e4 100644
--- a/dpnp/backend/extensions/vm/sinh.cpp
+++ b/dpnp/backend/extensions/vm/sinh.cpp
@@ -23,6 +23,12 @@
 // THE POSSIBILITY OF SUCH DAMAGE.
 //*****************************************************************************
 
+#include <complex>
+#include <cstddef>
+#include <cstdint>
+#include <type_traits>
+#include <vector>
+
 #include <oneapi/mkl.hpp>
 #include <sycl/sycl.hpp>
 
diff --git a/dpnp/backend/extensions/vm/sqr.cpp b/dpnp/backend/extensions/vm/sqr.cpp
index efd78f4d89c0..80b9b7db2832 100644
--- a/dpnp/backend/extensions/vm/sqr.cpp
+++ b/dpnp/backend/extensions/vm/sqr.cpp
@@ -23,6 +23,11 @@
 // THE POSSIBILITY OF SUCH DAMAGE.
 //*****************************************************************************
 
+#include <cstddef>
+#include <cstdint>
+#include <type_traits>
+#include <vector>
+
 #include <oneapi/mkl.hpp>
 #include <sycl/sycl.hpp>
 
diff --git a/dpnp/backend/extensions/vm/sqrt.cpp b/dpnp/backend/extensions/vm/sqrt.cpp
index a3b42e80c255..e873a1aeab63 100644
--- a/dpnp/backend/extensions/vm/sqrt.cpp
+++ b/dpnp/backend/extensions/vm/sqrt.cpp
@@ -23,6 +23,12 @@
 // THE POSSIBILITY OF SUCH DAMAGE.
 //*****************************************************************************
 
+#include <complex>
+#include <cstddef>
+#include <cstdint>
+#include <type_traits>
+#include <vector>
+
 #include <oneapi/mkl.hpp>
 #include <sycl/sycl.hpp>
 
diff --git a/dpnp/backend/extensions/vm/sub.cpp b/dpnp/backend/extensions/vm/sub.cpp
index dcdba3b21a69..9f9fd96d4bf5 100644
--- a/dpnp/backend/extensions/vm/sub.cpp
+++ b/dpnp/backend/extensions/vm/sub.cpp
@@ -23,7 +23,12 @@
 // THE POSSIBILITY OF SUCH DAMAGE.
 //*****************************************************************************
 
+#include <complex>
+#include <cstddef>
+#include <cstdint>
 #include <stdexcept>
+#include <type_traits>
+#include <vector>
 
 #include <oneapi/mkl.hpp>
 #include <sycl/sycl.hpp>
diff --git a/dpnp/backend/extensions/vm/tan.cpp b/dpnp/backend/extensions/vm/tan.cpp
index 5dd9e906aa8e..4de2e137676e 100644
--- a/dpnp/backend/extensions/vm/tan.cpp
+++ b/dpnp/backend/extensions/vm/tan.cpp
@@ -23,6 +23,12 @@
 // THE POSSIBILITY OF SUCH DAMAGE.
 //*****************************************************************************
 
+#include <complex>
+#include <cstddef>
+#include <cstdint>
+#include <type_traits>
+#include <vector>
+
 #include <oneapi/mkl.hpp>
 #include <sycl/sycl.hpp>
 
diff --git a/dpnp/backend/extensions/vm/tanh.cpp b/dpnp/backend/extensions/vm/tanh.cpp
index 68fcdcc375b9..5d774fc40c02 100644
--- a/dpnp/backend/extensions/vm/tanh.cpp
+++ b/dpnp/backend/extensions/vm/tanh.cpp
@@ -23,6 +23,12 @@
 // THE POSSIBILITY OF SUCH DAMAGE.
 //*****************************************************************************
 
+#include <complex>
+#include <cstddef>
+#include <cstdint>
+#include <type_traits>
+#include <vector>
+
 #include <oneapi/mkl.hpp>
 #include <sycl/sycl.hpp>
 
diff --git a/dpnp/backend/extensions/vm/trunc.cpp b/dpnp/backend/extensions/vm/trunc.cpp
index 9a8984007593..ef2941299ce8 100644
--- a/dpnp/backend/extensions/vm/trunc.cpp
+++ b/dpnp/backend/extensions/vm/trunc.cpp
@@ -23,6 +23,11 @@
 // THE POSSIBILITY OF SUCH DAMAGE.
 //*****************************************************************************
 
+#include <cstddef>
+#include <cstdint>
+#include <type_traits>
+#include <vector>
+
 #include <oneapi/mkl.hpp>
 #include <sycl/sycl.hpp>
 
diff --git a/dpnp/backend/extensions/vm/vm_py.cpp b/dpnp/backend/extensions/vm/vm_py.cpp
index ad52e499419b..54b7cbab2318 100644
--- a/dpnp/backend/extensions/vm/vm_py.cpp
+++ b/dpnp/backend/extensions/vm/vm_py.cpp
@@ -27,7 +27,7 @@
 //
 //*****************************************************************************
 
-#if not defined(USE_ONEMKL_INTERFACES)
+#if not defined(USE_ONEMATH)
 #include "abs.hpp"
 #include "acos.hpp"
 #include "acosh.hpp"
@@ -45,6 +45,7 @@
 #include "cos.hpp"
 #include "cosh.hpp"
 #include "div.hpp"
+#include "erf_funcs.hpp"
 #include "exp.hpp"
 #include "exp2.hpp"
 #include "expm1.hpp"
@@ -73,13 +74,13 @@
 #include "trunc.hpp"
 
 namespace vm_ns = dpnp::extensions::vm;
-#endif // USE_ONEMKL_INTERFACES
+#endif // USE_ONEMATH
 
 #include <pybind11/pybind11.h>
 
 PYBIND11_MODULE(_vm_impl, m)
 {
-#if not defined(USE_ONEMKL_INTERFACES)
+#if not defined(USE_ONEMATH)
     vm_ns::init_abs(m);
     vm_ns::init_acos(m);
     vm_ns::init_acosh(m);
@@ -97,6 +98,7 @@ PYBIND11_MODULE(_vm_impl, m)
     vm_ns::init_cos(m);
     vm_ns::init_cosh(m);
     vm_ns::init_div(m);
+    vm_ns::init_erf_funcs(m);
     vm_ns::init_exp(m);
     vm_ns::init_exp2(m);
     vm_ns::init_expm1(m);
@@ -123,15 +125,15 @@ PYBIND11_MODULE(_vm_impl, m)
     vm_ns::init_tan(m);
     vm_ns::init_tanh(m);
     vm_ns::init_trunc(m);
-#endif // USE_ONEMKL_INTERFACES
+#endif // USE_ONEMATH
     m.def(
         "_is_available",
         [](void) {
-#if defined(USE_ONEMKL_INTERFACES)
+#if defined(USE_ONEMATH)
             return false;
 #else
             return true;
-#endif // USE_ONEMKL_INTERFACES
+#endif // USE_ONEMATH
         },
         "Check if the OneMKL VM library can be used.");
 }
diff --git a/dpnp/backend/extensions/window/CMakeLists.txt b/dpnp/backend/extensions/window/CMakeLists.txt
index 27b60c0fdc64..f8bd38d3a469 100644
--- a/dpnp/backend/extensions/window/CMakeLists.txt
+++ b/dpnp/backend/extensions/window/CMakeLists.txt
@@ -57,8 +57,8 @@ endif()
 
 set_target_properties(${python_module_name} PROPERTIES CMAKE_POSITION_INDEPENDENT_CODE ON)
 
-target_include_directories(${python_module_name} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../include)
-target_include_directories(${python_module_name} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../src)
+target_include_directories(${python_module_name} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../)
+target_include_directories(${python_module_name} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../common)
 
 target_include_directories(${python_module_name} PUBLIC ${Dpctl_INCLUDE_DIR})
 target_include_directories(${python_module_name} PUBLIC ${Dpctl_TENSOR_INCLUDE_DIR})
diff --git a/dpnp/backend/extensions/window/bartlett.hpp b/dpnp/backend/extensions/window/bartlett.hpp
index 00c24bb48521..5a02fed47c73 100644
--- a/dpnp/backend/extensions/window/bartlett.hpp
+++ b/dpnp/backend/extensions/window/bartlett.hpp
@@ -16,7 +16,7 @@
 // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
 // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, RES, OR PROFITS; OR BUSINESS
 // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
@@ -35,18 +35,18 @@ template <typename T>
 class BartlettFunctor
 {
 private:
-    T *data = nullptr;
+    T *res = nullptr;
     const std::size_t N;
 
 public:
-    BartlettFunctor(T *data, const std::size_t N) : data(data), N(N) {}
+    BartlettFunctor(T *res, const std::size_t N) : res(res), N(N) {}
 
     void operator()(sycl::id<1> id) const
     {
         const auto i = id.get(0);
 
         const T alpha = (N - 1) / T(2);
-        data[i] = T(1) - sycl::fabs(i - alpha) / alpha;
+        res[i] = T(1) - sycl::fabs(i - alpha) / alpha;
     }
 };
 
diff --git a/dpnp/backend/extensions/window/blackman.hpp b/dpnp/backend/extensions/window/blackman.hpp
index 82b6243a9434..4074721999fa 100644
--- a/dpnp/backend/extensions/window/blackman.hpp
+++ b/dpnp/backend/extensions/window/blackman.hpp
@@ -35,19 +35,19 @@ template <typename T>
 class BlackmanFunctor
 {
 private:
-    T *data = nullptr;
+    T *res = nullptr;
     const std::size_t N;
 
 public:
-    BlackmanFunctor(T *data, const std::size_t N) : data(data), N(N) {}
+    BlackmanFunctor(T *res, const std::size_t N) : res(res), N(N) {}
 
     void operator()(sycl::id<1> id) const
     {
         const auto i = id.get(0);
 
         const T alpha = T(2) * i / (N - 1);
-        data[i] = T(0.42) - T(0.5) * sycl::cospi(alpha) +
-                  T(0.08) * sycl::cospi(T(2) * alpha);
+        res[i] = T(0.42) - T(0.5) * sycl::cospi(alpha) +
+                 T(0.08) * sycl::cospi(T(2) * alpha);
     }
 };
 
diff --git a/dpnp/backend/extensions/window/common.hpp b/dpnp/backend/extensions/window/common.hpp
index b2f581d32e6b..584e5aeaa639 100644
--- a/dpnp/backend/extensions/window/common.hpp
+++ b/dpnp/backend/extensions/window/common.hpp
@@ -30,6 +30,8 @@
 #include <sycl/sycl.hpp>
 
 #include "dpctl4pybind11.hpp"
+
+// dpctl tensor headers
 #include "utils/output_validation.hpp"
 #include "utils/type_dispatch.hpp"
 #include "utils/type_utils.hpp"
@@ -47,16 +49,16 @@ typedef sycl::event (*window_fn_ptr_t)(sycl::queue &,
                                        const std::vector<sycl::event> &);
 
 template <typename T, template <typename> class Functor>
-sycl::event window_impl(sycl::queue &q,
+sycl::event window_impl(sycl::queue &exec_q,
                         char *result,
                         const std::size_t nelems,
                         const std::vector<sycl::event> &depends)
 {
-    dpctl::tensor::type_utils::validate_type_for_device<T>(q);
+    dpctl::tensor::type_utils::validate_type_for_device<T>(exec_q);
 
     T *res = reinterpret_cast<T *>(result);
 
-    sycl::event window_ev = q.submit([&](sycl::handler &cgh) {
+    sycl::event window_ev = exec_q.submit([&](sycl::handler &cgh) {
         cgh.depends_on(depends);
 
         using WindowKernel = Functor<T>;
@@ -75,7 +77,7 @@ std::tuple<size_t, char *, funcPtrT>
 {
     dpctl::tensor::validation::CheckWritable::throw_if_not_writable(result);
 
-    int nd = result.get_ndim();
+    const int nd = result.get_ndim();
     if (nd != 1) {
         throw py::value_error("Array should be 1d");
     }
@@ -87,17 +89,17 @@ std::tuple<size_t, char *, funcPtrT>
 
     const bool is_result_c_contig = result.is_c_contiguous();
     if (!is_result_c_contig) {
-        throw py::value_error("The result input array is not c-contiguous.");
+        throw py::value_error("The result array is not c-contiguous.");
     }
 
-    size_t nelems = result.get_size();
+    const std::size_t nelems = result.get_size();
     if (nelems == 0) {
         return std::make_tuple(nelems, nullptr, nullptr);
     }
 
-    int result_typenum = result.get_typenum();
+    const int result_typenum = result.get_typenum();
     auto array_types = dpctl_td_ns::usm_ndarray_types();
-    int result_type_id = array_types.typenum_to_lookup_id(result_typenum);
+    const int result_type_id = array_types.typenum_to_lookup_id(result_typenum);
     funcPtrT fn = window_dispatch_vector[result_type_id];
 
     if (fn == nullptr) {
@@ -127,18 +129,4 @@ inline std::pair<sycl::event, sycl::event>
 
     return std::make_pair(args_ev, window_ev);
 }
-
-template <typename funcPtrT,
-          template <typename fnT, typename T>
-          typename factoryT>
-void init_window_dispatch_vectors(funcPtrT window_dispatch_vector[])
-{
-    dpctl_td_ns::DispatchVectorBuilder<funcPtrT, factoryT,
-                                       dpctl_td_ns::num_types>
-        contig;
-    contig.populate_dispatch_vector(window_dispatch_vector);
-
-    return;
-}
-
 } // namespace dpnp::extensions::window
diff --git a/dpnp/backend/extensions/window/hamming.hpp b/dpnp/backend/extensions/window/hamming.hpp
index 266f153cfb44..9dec34c8268b 100644
--- a/dpnp/backend/extensions/window/hamming.hpp
+++ b/dpnp/backend/extensions/window/hamming.hpp
@@ -35,17 +35,17 @@ template <typename T>
 class HammingFunctor
 {
 private:
-    T *data = nullptr;
+    T *res = nullptr;
     const std::size_t N;
 
 public:
-    HammingFunctor(T *data, const std::size_t N) : data(data), N(N) {}
+    HammingFunctor(T *res, const std::size_t N) : res(res), N(N) {}
 
     void operator()(sycl::id<1> id) const
     {
         const auto i = id.get(0);
 
-        data[i] = T(0.54) - T(0.46) * sycl::cospi(T(2) * i / (N - 1));
+        res[i] = T(0.54) - T(0.46) * sycl::cospi(T(2) * i / (N - 1));
     }
 };
 
diff --git a/dpnp/backend/extensions/window/hanning.hpp b/dpnp/backend/extensions/window/hanning.hpp
index 9d6b1ebf3e9b..51e84a2f92cb 100644
--- a/dpnp/backend/extensions/window/hanning.hpp
+++ b/dpnp/backend/extensions/window/hanning.hpp
@@ -35,17 +35,17 @@ template <typename T>
 class HanningFunctor
 {
 private:
-    T *data = nullptr;
+    T *res = nullptr;
     const std::size_t N;
 
 public:
-    HanningFunctor(T *data, const std::size_t N) : data(data), N(N) {}
+    HanningFunctor(T *res, const std::size_t N) : res(res), N(N) {}
 
     void operator()(sycl::id<1> id) const
     {
         const auto i = id.get(0);
 
-        data[i] = T(0.5) - T(0.5) * sycl::cospi(T(2) * i / (N - 1));
+        res[i] = T(0.5) - T(0.5) * sycl::cospi(T(2) * i / (N - 1));
     }
 };
 
diff --git a/dpnp/backend/extensions/window/kaiser.cpp b/dpnp/backend/extensions/window/kaiser.cpp
index 68ac1774c63e..ade5f6450fa7 100644
--- a/dpnp/backend/extensions/window/kaiser.cpp
+++ b/dpnp/backend/extensions/window/kaiser.cpp
@@ -26,18 +26,24 @@
 #include "kaiser.hpp"
 #include "common.hpp"
 
+// utils extension header
+#include "ext/common.hpp"
+
+// dpctl tensor headers
 #include "utils/output_validation.hpp"
 #include "utils/type_dispatch.hpp"
 #include "utils/type_utils.hpp"
 
 #include <sycl/sycl.hpp>
 
-#include "../kernels/elementwise_functions/i0.hpp"
+#include "kernels/elementwise_functions/i0.hpp"
 
 namespace dpnp::extensions::window
 {
 namespace dpctl_td_ns = dpctl::tensor::type_dispatch;
 
+using ext::common::init_dispatch_vector;
+
 typedef sycl::event (*kaiser_fn_ptr_t)(sycl::queue &,
                                        char *,
                                        const std::size_t,
@@ -50,13 +56,13 @@ template <typename T>
 class KaiserFunctor
 {
 private:
-    T *data = nullptr;
+    T *res = nullptr;
     const std::size_t N;
     const T beta;
 
 public:
-    KaiserFunctor(T *data, const std::size_t N, const T beta)
-        : data(data), N(N), beta(beta)
+    KaiserFunctor(T *res, const std::size_t N, const T beta)
+        : res(res), N(N), beta(beta)
     {
     }
 
@@ -67,27 +73,27 @@ class KaiserFunctor
         const auto i = id.get(0);
         const T alpha = (N - 1) / T(2);
         const T tmp = (i - alpha) / alpha;
-        data[i] = cyl_bessel_i0(beta * sycl::sqrt(1 - tmp * tmp)) /
-                  cyl_bessel_i0(beta);
+        res[i] = cyl_bessel_i0(beta * sycl::sqrt(1 - tmp * tmp)) /
+                 cyl_bessel_i0(beta);
     }
 };
 
-template <typename T, template <typename> class Functor>
-sycl::event kaiser_impl(sycl::queue &q,
+template <typename T>
+sycl::event kaiser_impl(sycl::queue &exec_q,
                         char *result,
                         const std::size_t nelems,
                         const py::object &py_beta,
                         const std::vector<sycl::event> &depends)
 {
-    dpctl::tensor::type_utils::validate_type_for_device<T>(q);
+    dpctl::tensor::type_utils::validate_type_for_device<T>(exec_q);
 
     T *res = reinterpret_cast<T *>(result);
     const T beta = py::cast<const T>(py_beta);
 
-    sycl::event kaiser_ev = q.submit([&](sycl::handler &cgh) {
+    sycl::event kaiser_ev = exec_q.submit([&](sycl::handler &cgh) {
         cgh.depends_on(depends);
 
-        using KaiserKernel = Functor<T>;
+        using KaiserKernel = KaiserFunctor<T>;
         cgh.parallel_for<KaiserKernel>(sycl::range<1>(nelems),
                                        KaiserKernel(res, nelems, beta));
     });
@@ -101,7 +107,7 @@ struct KaiserFactory
     fnT get()
     {
         if constexpr (std::is_floating_point_v<T>) {
-            return kaiser_impl<T, KaiserFunctor>;
+            return kaiser_impl<T>;
         }
         else {
             return nullptr;
@@ -115,7 +121,7 @@ std::pair<sycl::event, sycl::event>
               const dpctl::tensor::usm_ndarray &result,
               const std::vector<sycl::event> &depends)
 {
-    auto [nelems, result_typeless_ptr, fn] =
+    auto [nelems, result_typeless_ptr, kaiser_fn] =
         window_fn<kaiser_fn_ptr_t>(exec_q, result, kaiser_dispatch_vector);
 
     if (nelems == 0) {
@@ -123,7 +129,7 @@ std::pair<sycl::event, sycl::event>
     }
 
     sycl::event kaiser_ev =
-        fn(exec_q, result_typeless_ptr, nelems, py_beta, depends);
+        kaiser_fn(exec_q, result_typeless_ptr, nelems, py_beta, depends);
     sycl::event args_ev =
         dpctl::utils::keep_args_alive(exec_q, {result}, {kaiser_ev});
 
@@ -132,7 +138,7 @@ std::pair<sycl::event, sycl::event>
 
 void init_kaiser_dispatch_vectors()
 {
-    init_window_dispatch_vectors<kaiser_fn_ptr_t, KaiserFactory>(
+    init_dispatch_vector<kaiser_fn_ptr_t, KaiserFactory>(
         kaiser_dispatch_vector);
 }
 
diff --git a/dpnp/backend/extensions/window/window_py.cpp b/dpnp/backend/extensions/window/window_py.cpp
index 3324fe98e1d2..94830183b4f1 100644
--- a/dpnp/backend/extensions/window/window_py.cpp
+++ b/dpnp/backend/extensions/window/window_py.cpp
@@ -37,8 +37,13 @@
 #include "hanning.hpp"
 #include "kaiser.hpp"
 
+// utils extension header
+#include "ext/common.hpp"
+
 namespace window_ns = dpnp::extensions::window;
 namespace py = pybind11;
+
+using ext::common::init_dispatch_vector;
 using window_ns::window_fn_ptr_t;
 
 namespace dpctl_td_ns = dpctl::tensor::type_dispatch;
@@ -54,8 +59,8 @@ PYBIND11_MODULE(_window_impl, m)
     using event_vecT = std::vector<sycl::event>;
 
     {
-        window_ns::init_window_dispatch_vectors<
-            window_ns::window_fn_ptr_t, window_ns::kernels::BartlettFactory>(
+        init_dispatch_vector<window_ns::window_fn_ptr_t,
+                             window_ns::kernels::BartlettFactory>(
             bartlett_dispatch_vector);
 
         auto bartlett_pyapi = [&](sycl::queue &exec_q, const arrayT &result,
@@ -70,8 +75,8 @@ PYBIND11_MODULE(_window_impl, m)
     }
 
     {
-        window_ns::init_window_dispatch_vectors<
-            window_ns::window_fn_ptr_t, window_ns::kernels::BlackmanFactory>(
+        init_dispatch_vector<window_ns::window_fn_ptr_t,
+                             window_ns::kernels::BlackmanFactory>(
             blackman_dispatch_vector);
 
         auto blackman_pyapi = [&](sycl::queue &exec_q, const arrayT &result,
@@ -86,8 +91,8 @@ PYBIND11_MODULE(_window_impl, m)
     }
 
     {
-        window_ns::init_window_dispatch_vectors<
-            window_ns::window_fn_ptr_t, window_ns::kernels::HammingFactory>(
+        init_dispatch_vector<window_ns::window_fn_ptr_t,
+                             window_ns::kernels::HammingFactory>(
             hamming_dispatch_vector);
 
         auto hamming_pyapi = [&](sycl::queue &exec_q, const arrayT &result,
@@ -102,8 +107,8 @@ PYBIND11_MODULE(_window_impl, m)
     }
 
     {
-        window_ns::init_window_dispatch_vectors<
-            window_ns::window_fn_ptr_t, window_ns::kernels::HanningFactory>(
+        init_dispatch_vector<window_ns::window_fn_ptr_t,
+                             window_ns::kernels::HanningFactory>(
             hanning_dispatch_vector);
 
         auto hanning_pyapi = [&](sycl::queue &exec_q, const arrayT &result,
diff --git a/dpnp/backend/include/dpnp_gen_1arg_1type_tbl.hpp b/dpnp/backend/include/dpnp_gen_1arg_1type_tbl.hpp
deleted file mode 100644
index 8a415a57bf8f..000000000000
--- a/dpnp/backend/include/dpnp_gen_1arg_1type_tbl.hpp
+++ /dev/null
@@ -1,94 +0,0 @@
-//*****************************************************************************
-// Copyright (c) 2016-2025, Intel Corporation
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-// - Redistributions of source code must retain the above copyright notice,
-//   this list of conditions and the following disclaimer.
-// - Redistributions in binary form must reproduce the above copyright notice,
-//   this list of conditions and the following disclaimer in the documentation
-//   and/or other materials provided with the distribution.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
-// THE POSSIBILITY OF SUCH DAMAGE.
-//*****************************************************************************
-
-#if defined(MACRO_1ARG_1TYPE_OP)
-
-/*
- * This header file contains single argument element wise functions definitions
- *
- * Macro `MACRO_1ARG_1TYPE_OP` must be defined before usage
- *
- * Parameters:
- * - public name of the function and kernel name
- * - operation used to calculate the result
- * - mkl operation used to calculate the result
- *
- */
-
-#ifdef _SECTION_DOCUMENTATION_GENERATION_
-
-#define MACRO_1ARG_1TYPE_OP(__name__, __operation1__, __operation2__)          \
-    /** @ingroup BACKEND_API */                                                \
-    /** @brief Per element operation function __name__ */                      \
-    /** */                                                                     \
-    /** Function "__name__" executes operator "__operation1__" over            \
-     * each element of the array                        */                     \
-    /** */                                                                     \
-    /** @param[in]  q_ref              Reference to SYCL queue. */             \
-    /** @param[out] result_out         Output array. */                        \
-    /** @param[in]  result_size        Output array size. */                   \
-    /** @param[in]  result_ndim        Number of output array                  \
-     * dimensions. */                                                          \
-    /** @param[in]  result_shape       Output array shape. */                  \
-    /** @param[in]  result_strides     Output array strides. */                \
-    /** @param[in]  input1_in          Input array 1. */                       \
-    /** @param[in]  input1_size        Input array 1 size. */                  \
-    /** @param[in]  input1_ndim        Number of input array 1                 \
-     * dimensions. */                                                          \
-    /** @param[in]  input1_shape       Input array 1 shape. */                 \
-    /** @param[in]  input1_strides     Input array 1 strides. */               \
-    /** @param[in]  where              Where condition. */                     \
-    /** @param[in]  dep_event_vec_ref  Reference to vector of SYCL             \
-     * events. */                                                              \
-    template <typename _DataType>                                              \
-    DPCTLSyclEventRef __name__(                                                \
-        DPCTLSyclQueueRef q_ref, void *result_out, const size_t result_size,   \
-        const size_t result_ndim, const shape_elem_type *result_shape,         \
-        const shape_elem_type *result_strides, const void *input1_in,          \
-        const size_t input1_size, const size_t input1_ndim,                    \
-        const shape_elem_type *input1_shape,                                   \
-        const shape_elem_type *input1_strides, const size_t *where,            \
-        const DPCTLEventVectorRef dep_event_vec_ref);                          \
-                                                                               \
-    template <typename _DataType>                                              \
-    void __name__(                                                             \
-        void *result_out, const size_t result_size, const size_t result_ndim,  \
-        const shape_elem_type *result_shape,                                   \
-        const shape_elem_type *result_strides, const void *input1_in,          \
-        const size_t input1_size, const size_t input1_ndim,                    \
-        const shape_elem_type *input1_shape,                                   \
-        const shape_elem_type *input1_strides, const size_t *where);
-
-#endif // _SECTION_DOCUMENTATION_GENERATION_
-
-MACRO_1ARG_1TYPE_OP(dpnp_erf_c,
-                    dispatch_erf_op(input_elem),
-                    oneapi::mkl::vm::erf(q, input1_size, input1_data, result))
-
-#undef MACRO_1ARG_1TYPE_OP
-
-#else
-#error "MACRO_1ARG_1TYPE_OP is not defined"
-#endif // MACRO_1ARG_1TYPE_OP
diff --git a/dpnp/backend/include/dpnp_iface.hpp b/dpnp/backend/include/dpnp_iface.hpp
index 5a0698a978af..5e892faa70c8 100644
--- a/dpnp/backend/include/dpnp_iface.hpp
+++ b/dpnp/backend/include/dpnp_iface.hpp
@@ -153,28 +153,6 @@ INP_DLLEXPORT DPCTLSyclEventRef
 template <typename _DataType>
 INP_DLLEXPORT void dpnp_initval_c(void *result1, void *value, size_t size);
 
-#define MACRO_1ARG_1TYPE_OP(__name__, __operation1__, __operation2__)          \
-    template <typename _DataType>                                              \
-    INP_DLLEXPORT DPCTLSyclEventRef __name__(                                  \
-        DPCTLSyclQueueRef q_ref, void *result_out, const size_t result_size,   \
-        const size_t result_ndim, const shape_elem_type *result_shape,         \
-        const shape_elem_type *result_strides, const void *input1_in,          \
-        const size_t input1_size, const size_t input1_ndim,                    \
-        const shape_elem_type *input1_shape,                                   \
-        const shape_elem_type *input1_strides, const size_t *where,            \
-        const DPCTLEventVectorRef dep_event_vec_ref);                          \
-                                                                               \
-    template <typename _DataType>                                              \
-    INP_DLLEXPORT void __name__(                                               \
-        void *result_out, const size_t result_size, const size_t result_ndim,  \
-        const shape_elem_type *result_shape,                                   \
-        const shape_elem_type *result_strides, const void *input1_in,          \
-        const size_t input1_size, const size_t input1_ndim,                    \
-        const shape_elem_type *input1_shape,                                   \
-        const shape_elem_type *input1_strides, const size_t *where);
-
-#include <dpnp_gen_1arg_1type_tbl.hpp>
-
 /**
  * @ingroup BACKEND_API
  * @brief modf function.
diff --git a/dpnp/backend/include/dpnp_iface_fptr.hpp b/dpnp/backend/include/dpnp_iface_fptr.hpp
index ea94d4a43b56..9f81077f6bf1 100644
--- a/dpnp/backend/include/dpnp_iface_fptr.hpp
+++ b/dpnp/backend/include/dpnp_iface_fptr.hpp
@@ -59,9 +59,6 @@
 enum class DPNPFuncName : size_t
 {
     DPNP_FN_NONE,    /**< Very first element of the enumeration */
-    DPNP_FN_ERF,     /**< Used in scipy.special.erf impl  */
-    DPNP_FN_ERF_EXT, /**< Used in scipy.special.erf impl, requires extra
-                        parameters */
     DPNP_FN_INITVAL, /**< Used in numpy ones, ones_like, zeros, zeros_like impls
                       */
     DPNP_FN_INITVAL_EXT, /**< Used in numpy ones, ones_like, zeros, zeros_like
diff --git a/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp b/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp
deleted file mode 100644
index 2617ff29fa9c..000000000000
--- a/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp
+++ /dev/null
@@ -1,255 +0,0 @@
-//*****************************************************************************
-// Copyright (c) 2016-2025, Intel Corporation
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-// - Redistributions of source code must retain the above copyright notice,
-//   this list of conditions and the following disclaimer.
-// - Redistributions in binary form must reproduce the above copyright notice,
-//   this list of conditions and the following disclaimer in the documentation
-//   and/or other materials provided with the distribution.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
-// THE POSSIBILITY OF SUCH DAMAGE.
-//*****************************************************************************
-
-#include <cmath>
-#include <iostream>
-#include <stdexcept>
-
-#include <dpnp_iface.hpp>
-
-#include "dpnp_fptr.hpp"
-#include "dpnp_utils.hpp"
-#include "queue_sycl.hpp"
-
-template <typename T>
-constexpr T dispatch_erf_op(T elem)
-{
-    if constexpr (is_any_v<T, std::int32_t, std::int64_t>) {
-        // TODO: need to convert to double when possible
-        return sycl::erf((float)elem);
-    }
-    else {
-        return sycl::erf(elem);
-    }
-}
-
-#define MACRO_1ARG_1TYPE_OP(__name__, __operation1__, __operation2__)          \
-    template <typename _KernelNameSpecialization>                              \
-    class __name__##_kernel;                                                   \
-                                                                               \
-    template <typename _KernelNameSpecialization>                              \
-    class __name__##_strides_kernel;                                           \
-                                                                               \
-    template <typename _DataType>                                              \
-    DPCTLSyclEventRef __name__(                                                \
-        DPCTLSyclQueueRef q_ref, void *result_out, const size_t result_size,   \
-        const size_t result_ndim, const shape_elem_type *result_shape,         \
-        const shape_elem_type *result_strides, const void *input1_in,          \
-        const size_t input1_size, const size_t input1_ndim,                    \
-        const shape_elem_type *input1_shape,                                   \
-        const shape_elem_type *input1_strides, const size_t *where,            \
-        const DPCTLEventVectorRef dep_event_vec_ref)                           \
-    {                                                                          \
-        /* avoid warning unused variable*/                                     \
-        (void)result_shape;                                                    \
-        (void)where;                                                           \
-        (void)dep_event_vec_ref;                                               \
-                                                                               \
-        DPCTLSyclEventRef event_ref = nullptr;                                 \
-                                                                               \
-        if (!input1_size) {                                                    \
-            return event_ref;                                                  \
-        }                                                                      \
-                                                                               \
-        sycl::queue q = *(reinterpret_cast<sycl::queue *>(q_ref));             \
-                                                                               \
-        _DataType *input1_data =                                               \
-            static_cast<_DataType *>(const_cast<void *>(input1_in));           \
-        _DataType *result = static_cast<_DataType *>(result_out);              \
-                                                                               \
-        shape_elem_type *input1_shape_offsets =                                \
-            new shape_elem_type[input1_ndim];                                  \
-                                                                               \
-        get_shape_offsets_inkernel(input1_shape, input1_ndim,                  \
-                                   input1_shape_offsets);                      \
-        bool use_strides = !array_equal(input1_strides, input1_ndim,           \
-                                        input1_shape_offsets, input1_ndim);    \
-        delete[] input1_shape_offsets;                                         \
-                                                                               \
-        sycl::event event;                                                     \
-        sycl::range<1> gws(result_size);                                       \
-                                                                               \
-        if (use_strides) {                                                     \
-            if (result_ndim != input1_ndim) {                                  \
-                throw std::runtime_error(                                      \
-                    "Result ndim=" + std::to_string(result_ndim) +             \
-                    " mismatches with input1 ndim=" +                          \
-                    std::to_string(input1_ndim));                              \
-            }                                                                  \
-                                                                               \
-            /* memory transfer optimization, use USM-host for temporary speeds \
-             * up transfer to device */                                        \
-            using usm_host_allocatorT =                                        \
-                sycl::usm_allocator<shape_elem_type, sycl::usm::alloc::host>;  \
-                                                                               \
-            size_t strides_size = 2 * result_ndim;                             \
-            shape_elem_type *dev_strides_data =                                \
-                sycl::malloc_device<shape_elem_type>(strides_size, q);         \
-                                                                               \
-            /* create host temporary for packed strides managed by shared      \
-             * pointer */                                                      \
-            auto strides_host_packed =                                         \
-                std::vector<shape_elem_type, usm_host_allocatorT>(             \
-                    strides_size, usm_host_allocatorT(q));                     \
-                                                                               \
-            /* packed vector is concatenation of result_strides,               \
-             * input1_strides and input2_strides */                            \
-            std::copy(result_strides, result_strides + result_ndim,            \
-                      strides_host_packed.begin());                            \
-            std::copy(input1_strides, input1_strides + result_ndim,            \
-                      strides_host_packed.begin() + result_ndim);              \
-                                                                               \
-            auto copy_strides_ev = q.copy<shape_elem_type>(                    \
-                strides_host_packed.data(), dev_strides_data,                  \
-                strides_host_packed.size());                                   \
-                                                                               \
-            auto kernel_parallel_for_func = [=](sycl::id<1> global_id) {       \
-                size_t output_id = global_id[0]; /* for (size_t i = 0; i <     \
-                                                    result_size; ++i) */       \
-                {                                                              \
-                    const shape_elem_type *result_strides_data =               \
-                        &dev_strides_data[0];                                  \
-                    const shape_elem_type *input1_strides_data =               \
-                        &dev_strides_data[result_ndim];                        \
-                                                                               \
-                    size_t input_id = 0;                                       \
-                    for (size_t i = 0; i < input1_ndim; ++i) {                 \
-                        const size_t output_xyz_id =                           \
-                            get_xyz_id_by_id_inkernel(output_id,               \
-                                                      result_strides_data,     \
-                                                      result_ndim, i);         \
-                        input_id += output_xyz_id * input1_strides_data[i];    \
-                    }                                                          \
-                                                                               \
-                    const _DataType input_elem = input1_data[input_id];        \
-                    result[output_id] = __operation1__;                        \
-                }                                                              \
-            };                                                                 \
-            auto kernel_func = [&](sycl::handler &cgh) {                       \
-                cgh.depends_on(copy_strides_ev);                               \
-                cgh.parallel_for<class __name__##_strides_kernel<_DataType>>(  \
-                    gws, kernel_parallel_for_func);                            \
-            };                                                                 \
-                                                                               \
-            q.submit(kernel_func).wait();                                      \
-                                                                               \
-            sycl::free(dev_strides_data, q);                                   \
-            return event_ref;                                                  \
-        }                                                                      \
-        else {                                                                 \
-            auto kernel_parallel_for_func = [=](sycl::id<1> global_id) {       \
-                size_t i = global_id[0]; /* for (size_t i = 0; i <             \
-                                            result_size; ++i) */               \
-                {                                                              \
-                    const _DataType input_elem = input1_data[i];               \
-                    result[i] = __operation1__;                                \
-                }                                                              \
-            };                                                                 \
-            auto kernel_func = [&](sycl::handler &cgh) {                       \
-                cgh.parallel_for<class __name__##_kernel<_DataType>>(          \
-                    gws, kernel_parallel_for_func);                            \
-            };                                                                 \
-                                                                               \
-            if constexpr (is_any_v<_DataType, float, double>) {                \
-                if (q.get_device().has(sycl::aspect::fp64)) {                  \
-                    event = __operation2__;                                    \
-                                                                               \
-                    event_ref = reinterpret_cast<DPCTLSyclEventRef>(&event);   \
-                    return DPCTLEvent_Copy(event_ref);                         \
-                }                                                              \
-            }                                                                  \
-            event = q.submit(kernel_func);                                     \
-        }                                                                      \
-                                                                               \
-        event_ref = reinterpret_cast<DPCTLSyclEventRef>(&event);               \
-        return DPCTLEvent_Copy(event_ref);                                     \
-    }                                                                          \
-                                                                               \
-    template <typename _DataType>                                              \
-    void __name__(                                                             \
-        void *result_out, const size_t result_size, const size_t result_ndim,  \
-        const shape_elem_type *result_shape,                                   \
-        const shape_elem_type *result_strides, const void *input1_in,          \
-        const size_t input1_size, const size_t input1_ndim,                    \
-        const shape_elem_type *input1_shape,                                   \
-        const shape_elem_type *input1_strides, const size_t *where)            \
-    {                                                                          \
-        DPCTLSyclQueueRef q_ref =                                              \
-            reinterpret_cast<DPCTLSyclQueueRef>(&DPNP_QUEUE);                  \
-        DPCTLEventVectorRef dep_event_vec_ref = nullptr;                       \
-        DPCTLSyclEventRef event_ref = __name__<_DataType>(                     \
-            q_ref, result_out, result_size, result_ndim, result_shape,         \
-            result_strides, input1_in, input1_size, input1_ndim, input1_shape, \
-            input1_strides, where, dep_event_vec_ref);                         \
-        DPCTLEvent_WaitAndThrow(event_ref);                                    \
-        DPCTLEvent_Delete(event_ref);                                          \
-    }                                                                          \
-                                                                               \
-    template <typename _DataType>                                              \
-    void (*__name__##_default)(                                                \
-        void *, const size_t, const size_t, const shape_elem_type *,           \
-        const shape_elem_type *, const void *, const size_t, const size_t,     \
-        const shape_elem_type *, const shape_elem_type *, const size_t *) =    \
-        __name__<_DataType>;                                                   \
-                                                                               \
-    template <typename _DataType>                                              \
-    DPCTLSyclEventRef (*__name__##_ext)(                                       \
-        DPCTLSyclQueueRef, void *, const size_t, const size_t,                 \
-        const shape_elem_type *, const shape_elem_type *, const void *,        \
-        const size_t, const size_t, const shape_elem_type *,                   \
-        const shape_elem_type *, const size_t *, const DPCTLEventVectorRef) =  \
-        __name__<_DataType>;
-
-#include <dpnp_gen_1arg_1type_tbl.hpp>
-
-static void func_map_init_elemwise_1arg_1type(func_map_t &fmap)
-{
-    fmap[DPNPFuncName::DPNP_FN_ERF][eft_INT][eft_INT] = {
-        eft_INT, (void *)dpnp_erf_c_default<int32_t>};
-    fmap[DPNPFuncName::DPNP_FN_ERF][eft_LNG][eft_LNG] = {
-        eft_LNG, (void *)dpnp_erf_c_default<int64_t>};
-    fmap[DPNPFuncName::DPNP_FN_ERF][eft_FLT][eft_FLT] = {
-        eft_FLT, (void *)dpnp_erf_c_default<float>};
-    fmap[DPNPFuncName::DPNP_FN_ERF][eft_DBL][eft_DBL] = {
-        eft_DBL, (void *)dpnp_erf_c_default<double>};
-
-    fmap[DPNPFuncName::DPNP_FN_ERF_EXT][eft_INT][eft_INT] = {
-        eft_INT, (void *)dpnp_erf_c_ext<int32_t>};
-    fmap[DPNPFuncName::DPNP_FN_ERF_EXT][eft_LNG][eft_LNG] = {
-        eft_LNG, (void *)dpnp_erf_c_ext<int64_t>};
-    fmap[DPNPFuncName::DPNP_FN_ERF_EXT][eft_FLT][eft_FLT] = {
-        eft_FLT, (void *)dpnp_erf_c_ext<float>};
-    fmap[DPNPFuncName::DPNP_FN_ERF_EXT][eft_DBL][eft_DBL] = {
-        eft_DBL, (void *)dpnp_erf_c_ext<double>};
-
-    return;
-}
-
-void func_map_init_elemwise(func_map_t &fmap)
-{
-    func_map_init_elemwise_1arg_1type(fmap);
-
-    return;
-}
diff --git a/dpnp/backend/kernels/dpnp_krnl_mathematical.cpp b/dpnp/backend/kernels/dpnp_krnl_mathematical.cpp
index 793b66341ecf..3e0d0bcfee4d 100644
--- a/dpnp/backend/kernels/dpnp_krnl_mathematical.cpp
+++ b/dpnp/backend/kernels/dpnp_krnl_mathematical.cpp
@@ -30,7 +30,6 @@
 #include <dpnp_iface.hpp>
 
 #include "dpnp_fptr.hpp"
-#include "dpnp_iterator.hpp"
 #include "dpnp_utils.hpp"
 #include "dpnpc_memory_adapter.hpp"
 #include "queue_sycl.hpp"
diff --git a/dpnp/backend/kernels/elementwise_functions/erf.hpp b/dpnp/backend/kernels/elementwise_functions/erf.hpp
new file mode 100644
index 000000000000..a292a0a3f3f6
--- /dev/null
+++ b/dpnp/backend/kernels/elementwise_functions/erf.hpp
@@ -0,0 +1,74 @@
+//*****************************************************************************
+// Copyright (c) 2025, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+
+#pragma once
+
+#include <type_traits>
+
+#include <sycl/sycl.hpp>
+
+namespace dpnp::kernels::erfs
+{
+template <typename OpT, typename ArgT, typename ResT>
+struct BaseFunctor
+{
+    // is function constant for given ArgT
+    using is_constant = typename std::false_type;
+    // constant value, if constant
+    // constexpr ResT constant_value = ResT{};
+    // is function defined for sycl::vec
+    using supports_vec = typename std::false_type;
+    // do both ArgT and ResT support subgroup store/load operation
+    using supports_sg_loadstore = typename std::true_type;
+
+    ResT operator()(const ArgT &x) const
+    {
+        if constexpr (std::is_same_v<ArgT, sycl::half> &&
+                      std::is_same_v<ResT, float>) {
+            // cast sycl::half to float for accuracy reasons
+            return OpT::apply(float(x));
+        }
+        else {
+            return OpT::apply(x);
+        }
+    }
+};
+
+#define MACRO_DEFINE_FUNCTOR(__name__, __f_name__)                             \
+    struct __f_name__##Op                                                      \
+    {                                                                          \
+        template <typename Tp>                                                 \
+        static Tp apply(const Tp &x)                                           \
+        {                                                                      \
+            return sycl::__name__(x);                                          \
+        }                                                                      \
+    };                                                                         \
+                                                                               \
+    template <typename ArgT, typename ResT>                                    \
+    using __f_name__##Functor = BaseFunctor<__f_name__##Op, ArgT, ResT>;
+
+MACRO_DEFINE_FUNCTOR(erf, Erf);
+MACRO_DEFINE_FUNCTOR(erfc, Erfc);
+} // namespace dpnp::kernels::erfs
diff --git a/dpnp/backend/kernels/elementwise_functions/isclose.hpp b/dpnp/backend/kernels/elementwise_functions/isclose.hpp
new file mode 100644
index 000000000000..b2a0db782f5f
--- /dev/null
+++ b/dpnp/backend/kernels/elementwise_functions/isclose.hpp
@@ -0,0 +1,337 @@
+//*****************************************************************************
+// Copyright (c) 2025, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+
+#pragma once
+
+#include <algorithm>
+#include <complex>
+#include <cstddef>
+#include <vector>
+
+#include <sycl/sycl.hpp>
+// dpctl tensor headers
+#include "kernels/alignment.hpp"
+#include "kernels/dpctl_tensor_types.hpp"
+#include "kernels/elementwise_functions/sycl_complex.hpp"
+#include "utils/offset_utils.hpp"
+#include "utils/sycl_utils.hpp"
+#include "utils/type_utils.hpp"
+
+namespace dpnp::kernels::isclose
+{
+
+template <typename T>
+inline bool isclose(const T a,
+                    const T b,
+                    const T rtol,
+                    const T atol,
+                    const bool equal_nan)
+{
+    static_assert(std::is_floating_point_v<T> || std::is_same_v<T, sycl::half>);
+
+    if (sycl::isfinite(a) && sycl::isfinite(b)) {
+        return sycl::fabs(a - b) <= atol + rtol * sycl::fabs(b);
+    }
+
+    if (sycl::isnan(a) && sycl::isnan(b)) {
+        return equal_nan;
+    }
+
+    return a == b;
+}
+
+template <typename T>
+inline bool isclose(const std::complex<T> a,
+                    const std::complex<T> b,
+                    const T rtol,
+                    const T atol,
+                    const bool equal_nan)
+{
+    const bool a_finite = sycl::isfinite(a.real()) && sycl::isfinite(a.imag());
+    const bool b_finite = sycl::isfinite(b.real()) && sycl::isfinite(b.imag());
+
+    if (a_finite && b_finite) {
+        return exprm_ns::abs(exprm_ns::complex<T>(a - b)) <=
+               atol + rtol * exprm_ns::abs(exprm_ns::complex<T>(b));
+    }
+
+    if (sycl::isnan(a.real()) && sycl::isnan(a.imag()) &&
+        sycl::isnan(b.real()) && sycl::isnan(b.imag()))
+    {
+        return equal_nan;
+    }
+
+    return a == b;
+}
+
+template <typename T,
+          typename scT,
+          typename resTy,
+          typename ThreeOffsets_IndexerT>
+struct IsCloseStridedScalarFunctor
+{
+private:
+    const T *a_ = nullptr;
+    const T *b_ = nullptr;
+    resTy *out_ = nullptr;
+    const ThreeOffsets_IndexerT three_offsets_indexer_;
+    const scT rtol_;
+    const scT atol_;
+    const bool equal_nan_;
+
+public:
+    IsCloseStridedScalarFunctor(const T *a,
+                                const T *b,
+                                resTy *out,
+                                const ThreeOffsets_IndexerT &inps_res_indexer,
+                                const scT rtol,
+                                const scT atol,
+                                const bool equal_nan)
+        : a_(a), b_(b), out_(out), three_offsets_indexer_(inps_res_indexer),
+          rtol_(rtol), atol_(atol), equal_nan_(equal_nan)
+    {
+    }
+
+    void operator()(sycl::id<1> wid) const
+    {
+        const auto &three_offsets_ = three_offsets_indexer_(wid.get(0));
+        const dpctl::tensor::ssize_t &inp1_offset =
+            three_offsets_.get_first_offset();
+        const dpctl::tensor::ssize_t &inp2_offset =
+            three_offsets_.get_second_offset();
+        const dpctl::tensor::ssize_t &out_offset =
+            three_offsets_.get_third_offset();
+
+        out_[out_offset] =
+            isclose(a_[inp1_offset], b_[inp2_offset], rtol_, atol_, equal_nan_);
+    }
+};
+
+template <typename T,
+          typename scT,
+          typename resTy,
+          std::uint8_t vec_sz = 4u,
+          std::uint8_t n_vecs = 2u,
+          bool enable_sg_loadstore = true>
+struct IsCloseContigScalarFunctor
+{
+private:
+    const T *a_ = nullptr;
+    const T *b_ = nullptr;
+    resTy *out_ = nullptr;
+    std::size_t nelems_;
+    const scT rtol_;
+    const scT atol_;
+    const bool equal_nan_;
+
+public:
+    IsCloseContigScalarFunctor(const T *a,
+                               const T *b,
+                               resTy *out,
+                               const std::size_t n_elems,
+                               const scT rtol,
+                               const scT atol,
+                               const bool equal_nan)
+        : a_(a), b_(b), out_(out), nelems_(n_elems), rtol_(rtol), atol_(atol),
+          equal_nan_(equal_nan)
+    {
+    }
+
+    void operator()(sycl::nd_item<1> ndit) const
+    {
+        constexpr std::uint8_t elems_per_wi = n_vecs * vec_sz;
+        /* Each work-item processes vec_sz elements, contiguous in memory */
+        /* NOTE: work-group size must be divisible by sub-group size */
+
+        using dpctl::tensor::type_utils::is_complex_v;
+        if constexpr (enable_sg_loadstore && !is_complex_v<T>) {
+            auto sg = ndit.get_sub_group();
+            const std::uint16_t sgSize = sg.get_max_local_range()[0];
+            const std::size_t base =
+                elems_per_wi * (ndit.get_group(0) * ndit.get_local_range(0) +
+                                sg.get_group_id()[0] * sgSize);
+
+            if (base + elems_per_wi * sgSize < nelems_) {
+                using dpctl::tensor::sycl_utils::sub_group_load;
+                using dpctl::tensor::sycl_utils::sub_group_store;
+#pragma unroll
+                for (std::uint8_t it = 0; it < elems_per_wi; it += vec_sz) {
+                    const std::size_t offset = base + it * sgSize;
+                    auto a_multi_ptr = sycl::address_space_cast<
+                        sycl::access::address_space::global_space,
+                        sycl::access::decorated::yes>(&a_[offset]);
+                    auto b_multi_ptr = sycl::address_space_cast<
+                        sycl::access::address_space::global_space,
+                        sycl::access::decorated::yes>(&b_[offset]);
+                    auto out_multi_ptr = sycl::address_space_cast<
+                        sycl::access::address_space::global_space,
+                        sycl::access::decorated::yes>(&out_[offset]);
+
+                    const sycl::vec<T, vec_sz> a_vec =
+                        sub_group_load<vec_sz>(sg, a_multi_ptr);
+                    const sycl::vec<T, vec_sz> b_vec =
+                        sub_group_load<vec_sz>(sg, b_multi_ptr);
+
+                    sycl::vec<resTy, vec_sz> res_vec;
+#pragma unroll
+                    for (std::uint8_t vec_id = 0; vec_id < vec_sz; ++vec_id) {
+                        res_vec[vec_id] = isclose(a_vec[vec_id], b_vec[vec_id],
+                                                  rtol_, atol_, equal_nan_);
+                    }
+                    sub_group_store<vec_sz>(sg, res_vec, out_multi_ptr);
+                }
+            }
+            else {
+                const std::size_t lane_id = sg.get_local_id()[0];
+                for (std::size_t k = base + lane_id; k < nelems_; k += sgSize) {
+                    out_[k] = isclose(a_[k], b_[k], rtol_, atol_, equal_nan_);
+                }
+            }
+        }
+        else {
+            const std::uint16_t sgSize =
+                ndit.get_sub_group().get_local_range()[0];
+            const std::size_t gid = ndit.get_global_linear_id();
+            const std::uint16_t elems_per_sg = sgSize * elems_per_wi;
+
+            const std::size_t start =
+                (gid / sgSize) * (elems_per_sg - sgSize) + gid;
+            const std::size_t end = std::min(nelems_, start + elems_per_sg);
+            for (std::size_t offset = start; offset < end; offset += sgSize) {
+                out_[offset] =
+                    isclose(a_[offset], b_[offset], rtol_, atol_, equal_nan_);
+            }
+        }
+    }
+};
+
+template <typename T, typename scT>
+sycl::event
+    isclose_strided_scalar_impl(sycl::queue &exec_q,
+                                const int nd,
+                                std::size_t nelems,
+                                const dpctl::tensor::ssize_t *shape_strides,
+                                const scT rtol,
+                                const scT atol,
+                                const bool equal_nan,
+                                const char *a_cp,
+                                const dpctl::tensor::ssize_t a_offset,
+                                const char *b_cp,
+                                const dpctl::tensor::ssize_t b_offset,
+                                char *out_cp,
+                                const dpctl::tensor::ssize_t out_offset,
+                                const std::vector<sycl::event> &depends)
+{
+    dpctl::tensor::type_utils::validate_type_for_device<T>(exec_q);
+
+    const T *a_tp = reinterpret_cast<const T *>(a_cp);
+    const T *b_tp = reinterpret_cast<const T *>(b_cp);
+
+    using resTy = bool;
+    resTy *out_tp = reinterpret_cast<resTy *>(out_cp);
+
+    using IndexerT =
+        typename dpctl::tensor::offset_utils::ThreeOffsets_StridedIndexer;
+    const IndexerT indexer{nd, a_offset, b_offset, out_offset, shape_strides};
+
+    sycl::event comp_ev = exec_q.submit([&](sycl::handler &cgh) {
+        cgh.depends_on(depends);
+
+        using IsCloseFunc =
+            IsCloseStridedScalarFunctor<T, scT, resTy, IndexerT>;
+        cgh.parallel_for<IsCloseFunc>(
+            {nelems},
+            IsCloseFunc(a_tp, b_tp, out_tp, indexer, rtol, atol, equal_nan));
+    });
+    return comp_ev;
+}
+
+template <typename T,
+          typename scT,
+          std::uint8_t vec_sz = 4u,
+          std::uint8_t n_vecs = 2u>
+sycl::event
+    isclose_contig_scalar_impl(sycl::queue &exec_q,
+                               std::size_t nelems,
+                               const scT rtol,
+                               const scT atol,
+                               const bool equal_nan,
+                               const char *a_cp,
+                               const char *b_cp,
+                               char *out_cp,
+                               const std::vector<sycl::event> &depends = {})
+{
+    constexpr std::uint8_t elems_per_wi = n_vecs * vec_sz;
+    const std::size_t n_work_items_needed = nelems / elems_per_wi;
+    const std::size_t empirical_threshold = std::size_t(1) << 21;
+    const std::size_t lws = (n_work_items_needed <= empirical_threshold)
+                                ? std::size_t(128)
+                                : std::size_t(256);
+
+    const std::size_t n_groups =
+        ((nelems + lws * elems_per_wi - 1) / (lws * elems_per_wi));
+    const auto gws_range = sycl::range<1>(n_groups * lws);
+    const auto lws_range = sycl::range<1>(lws);
+
+    const T *a_tp = reinterpret_cast<const T *>(a_cp);
+    const T *b_tp = reinterpret_cast<const T *>(b_cp);
+
+    using resTy = bool;
+    resTy *out_tp = reinterpret_cast<resTy *>(out_cp);
+
+    sycl::event comp_ev = exec_q.submit([&](sycl::handler &cgh) {
+        cgh.depends_on(depends);
+
+        using dpctl::tensor::kernels::alignment_utils::is_aligned;
+        using dpctl::tensor::kernels::alignment_utils::required_alignment;
+        if (is_aligned<required_alignment>(a_tp) &&
+            is_aligned<required_alignment>(b_tp) &&
+            is_aligned<required_alignment>(out_tp))
+        {
+            constexpr bool enable_sg_loadstore = true;
+            using IsCloseFunc =
+                IsCloseContigScalarFunctor<T, scT, resTy, vec_sz, n_vecs,
+                                           enable_sg_loadstore>;
+
+            cgh.parallel_for<IsCloseFunc>(
+                sycl::nd_range<1>(gws_range, lws_range),
+                IsCloseFunc(a_tp, b_tp, out_tp, nelems, rtol, atol, equal_nan));
+        }
+        else {
+            constexpr bool disable_sg_loadstore = false;
+            using IsCloseFunc =
+                IsCloseContigScalarFunctor<T, scT, resTy, vec_sz, n_vecs,
+                                           disable_sg_loadstore>;
+
+            cgh.parallel_for<IsCloseFunc>(
+                sycl::nd_range<1>(gws_range, lws_range),
+                IsCloseFunc(a_tp, b_tp, out_tp, nelems, rtol, atol, equal_nan));
+        }
+    });
+
+    return comp_ev;
+}
+
+} // namespace dpnp::kernels::isclose
diff --git a/dpnp/backend/kernels/elementwise_functions/sinc.hpp b/dpnp/backend/kernels/elementwise_functions/sinc.hpp
index 1de5953a19bc..c21a7888760f 100644
--- a/dpnp/backend/kernels/elementwise_functions/sinc.hpp
+++ b/dpnp/backend/kernels/elementwise_functions/sinc.hpp
@@ -25,16 +25,15 @@
 
 #pragma once
 
-#define SYCL_EXT_ONEAPI_COMPLEX
-#if __has_include(<sycl/ext/oneapi/experimental/sycl_complex.hpp>)
-#include <sycl/ext/oneapi/experimental/sycl_complex.hpp>
-#else
-#include <sycl/ext/oneapi/experimental/complex/complex.hpp>
-#endif
+#include <cmath>
+#include <complex>
+#include <limits>
+#include <type_traits>
 
 #include <sycl/sycl.hpp>
 
 // dpctl tensor headers
+#include "kernels/elementwise_functions/sycl_complex.hpp"
 #include "utils/type_utils.hpp"
 
 namespace dpnp::kernels::sinc
@@ -43,8 +42,6 @@ namespace tu_ns = dpctl::tensor::type_utils;
 
 namespace impl
 {
-namespace exprm_ns = sycl::ext::oneapi::experimental;
-
 template <typename Tp>
 inline Tp sin(const Tp &in)
 {
diff --git a/dpnp/backend/src/dpnp_fptr.hpp b/dpnp/backend/src/dpnp_fptr.hpp
index 8a7d50597c12..35ac48829a30 100644
--- a/dpnp/backend/src/dpnp_fptr.hpp
+++ b/dpnp/backend/src/dpnp_fptr.hpp
@@ -157,7 +157,6 @@ class dpnp_less_comp
  * FPTR interface initialization functions
  */
 void func_map_init_arraycreation(func_map_t &fmap);
-void func_map_init_elemwise(func_map_t &fmap);
 void func_map_init_linalg(func_map_t &fmap);
 void func_map_init_mathematical(func_map_t &fmap);
 void func_map_init_random(func_map_t &fmap);
diff --git a/dpnp/backend/src/dpnp_iface_fptr.cpp b/dpnp/backend/src/dpnp_iface_fptr.cpp
index c921f5e9bd27..4c980a3f51c0 100644
--- a/dpnp/backend/src/dpnp_iface_fptr.cpp
+++ b/dpnp/backend/src/dpnp_iface_fptr.cpp
@@ -96,7 +96,6 @@ static func_map_t func_map_init()
     func_map_t fmap;
 
     func_map_init_arraycreation(fmap);
-    func_map_init_elemwise(fmap);
     func_map_init_linalg(fmap);
     func_map_init_mathematical(fmap);
     func_map_init_random(fmap);
diff --git a/dpnp/backend/src/dpnp_iterator.hpp b/dpnp/backend/src/dpnp_iterator.hpp
deleted file mode 100644
index e6008a97c052..000000000000
--- a/dpnp/backend/src/dpnp_iterator.hpp
+++ /dev/null
@@ -1,707 +0,0 @@
-//*****************************************************************************
-// Copyright (c) 2016-2025, Intel Corporation
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-// - Redistributions of source code must retain the above copyright notice,
-//   this list of conditions and the following disclaimer.
-// - Redistributions in binary form must reproduce the above copyright notice,
-//   this list of conditions and the following disclaimer in the documentation
-//   and/or other materials provided with the distribution.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
-// THE POSSIBILITY OF SUCH DAMAGE.
-//*****************************************************************************
-
-#pragma once
-#ifndef DPNP_ITERATOR_H // Cython compatibility
-#define DPNP_ITERATOR_H
-
-#include <algorithm>
-#include <cassert>
-#include <iostream>
-#include <iterator>
-#include <numeric>
-#include <vector>
-
-#include <dpnp_utils.hpp>
-
-/**
- * @ingroup BACKEND_UTILS
- * @brief Iterator for @ref DPNPC_id type
- *
- * This type should be used to simplify data iteration over input with
- * parameters
- * "[axis|axes]" It is designed to be used in SYCL environment
- *
- */
-template <typename _Tp>
-class DPNP_USM_iterator final
-{
-public:
-    using value_type = _Tp;
-    using difference_type = std::ptrdiff_t;
-    using iterator_category = std::random_access_iterator_tag;
-    using pointer = value_type *;
-    using reference = value_type &;
-    using size_type = shape_elem_type;
-
-    DPNP_USM_iterator(pointer __base_ptr,
-                      size_type __id,
-                      const size_type *__shape_stride = nullptr,
-                      const size_type *__axes_stride = nullptr,
-                      size_type __shape_size = 0)
-        : base(__base_ptr), iter_id(__id), iteration_shape_size(__shape_size),
-          iteration_shape_strides(__shape_stride),
-          axes_shape_strides(__axes_stride)
-    {
-    }
-
-    DPNP_USM_iterator() = delete;
-
-    inline reference operator*() const
-    {
-        return *ptr();
-    }
-
-    inline pointer operator->() const
-    {
-        return ptr();
-    }
-
-    /// prefix increment
-    inline DPNP_USM_iterator &operator++()
-    {
-        ++iter_id;
-
-        return *this;
-    }
-
-    /// postfix increment
-    inline DPNP_USM_iterator operator++(int)
-    {
-        DPNP_USM_iterator tmp = *this;
-        ++(*this); // call prefix increment
-
-        return tmp;
-    }
-
-    inline bool operator==(const DPNP_USM_iterator &__rhs) const
-    {
-        assert(base == __rhs.base); // iterators are incomparable if base
-                                    // pointers are different
-        return (iter_id == __rhs.iter_id);
-    };
-
-    inline bool operator!=(const DPNP_USM_iterator &__rhs) const
-    {
-        return !(*this == __rhs);
-    };
-
-    inline bool operator<(const DPNP_USM_iterator &__rhs) const
-    {
-        return iter_id < __rhs.iter_id;
-    };
-
-    // TODO need more operators
-
-    // Random access iterator requirements
-    inline reference operator[](size_type __n) const
-    {
-        return *ptr(__n);
-    }
-
-    inline difference_type operator-(const DPNP_USM_iterator &__rhs) const
-    {
-        difference_type diff =
-            difference_type(iter_id) - difference_type(__rhs.iter_id);
-
-        return diff;
-    }
-
-    /// Print this container in human readable form in error reporting
-    friend std::ostream &operator<<(std::ostream &__out,
-                                    const DPNP_USM_iterator &__it)
-    {
-        const std::vector<size_type> it_strides(__it.iteration_shape_strides,
-                                                __it.iteration_shape_strides +
-                                                    __it.iteration_shape_size);
-        const std::vector<size_type> it_axes_strides(
-            __it.axes_shape_strides,
-            __it.axes_shape_strides + __it.iteration_shape_size);
-
-        __out << "DPNP_USM_iterator(base=" << __it.base;
-        __out << ", iter_id=" << __it.iter_id;
-        __out << ", iteration_shape_size=" << __it.iteration_shape_size;
-        __out << ", iteration_shape_strides=" << it_strides;
-        __out << ", axes_shape_strides=" << it_axes_strides;
-        __out << ")";
-
-        return __out;
-    }
-
-private:
-    inline pointer ptr() const
-    {
-        return ptr(iter_id);
-    }
-
-    inline pointer ptr(size_type iteration_id) const
-    {
-        size_type offset = 0;
-
-        if (iteration_shape_size > 0) {
-            long reminder = iteration_id;
-            for (size_t it = 0; it < static_cast<size_t>(iteration_shape_size);
-                 ++it) {
-                const size_type axis_val = iteration_shape_strides[it];
-                size_type xyz_id = reminder / axis_val;
-                offset += (xyz_id * axes_shape_strides[it]);
-
-                reminder = reminder % axis_val;
-            }
-        }
-        else {
-            offset = iteration_id;
-        }
-
-        return base + offset;
-    }
-
-    const pointer base = nullptr;
-    size_type iter_id =
-        size_type{}; /**< Iterator logical ID over iteration shape */
-    const size_type iteration_shape_size =
-        size_type{}; /**< Number of elements in @ref iteration_shape_strides
-                        array */
-    const size_type *iteration_shape_strides = nullptr;
-    const size_type *axes_shape_strides = nullptr;
-};
-
-/**
- * @ingroup BACKEND_UTILS
- * @brief Type to keep USM array pointers used in kernels
- *
- * This type should be used in host part of the code to provide pre-calculated
- * data. The @ref DPNP_USM_iterator will be used later in SYCL environment
- *
- */
-template <typename _Tp>
-class DPNPC_id final
-{
-public:
-    using value_type = _Tp;
-    using iterator = DPNP_USM_iterator<value_type>;
-    using pointer = value_type *;
-    using reference = value_type &;
-    using size_type = shape_elem_type;
-
-    DPNPC_id(DPCTLSyclQueueRef q_ref,
-             pointer __ptr,
-             const size_type *__shape,
-             const size_type __shape_size)
-    {
-        queue_ref = q_ref;
-        std::vector<size_type> shape(__shape, __shape + __shape_size);
-        init_container(__ptr, shape);
-    }
-
-    DPNPC_id(DPCTLSyclQueueRef q_ref,
-             pointer __ptr,
-             const size_type *__shape,
-             const size_type *__strides,
-             const size_type __ndim)
-    {
-        queue_ref = q_ref;
-        std::vector<size_type> shape(__shape, __shape + __ndim);
-        std::vector<size_type> strides(__strides, __strides + __ndim);
-        init_container(__ptr, shape, strides);
-    }
-
-    /**
-     * @ingroup BACKEND_UTILS
-     * @brief Main container for reduction iterator
-     *
-     * Construct object to hold @ref __ptr data with shape @ref __shape.
-     * It is needed to provide reduction iterator over the data.
-     *
-     * @note this function is designed for non-SYCL environment execution
-     *
-     * @param [in]  q_ref    Reference to SYCL queue.
-     * @param [in]  __ptr    Pointer to input data. Used to get values only.
-     * @param [in]  __shape  Shape of data provided by @ref __ptr.
-     *                       Empty container means scalar value pointed by @ref
-     * __ptr.
-     */
-    DPNPC_id(DPCTLSyclQueueRef q_ref,
-             pointer __ptr,
-             const std::vector<size_type> &__shape)
-    {
-        queue_ref = q_ref;
-        init_container(__ptr, __shape);
-    }
-
-    /**
-     * @ingroup BACKEND_UTILS
-     * @brief Main container for reduction/broadcasting iterator
-     *
-     * Construct object to hold @ref __ptr data with shape @ref __shape and
-     * strides @ref __strides.
-     *
-     * @note this function is designed for non-SYCL environment execution
-     *
-     * @param [in]  __ptr      Pointer to input data. Used to get values only.
-     * @param [in]  __shape    Shape of data provided by @ref __ptr.
-     *                         Empty container means scalar value pointed by
-     * @ref __ptr.
-     * @param [in]  __strides  Strides of data provided by @ref __ptr.
-     */
-    DPNPC_id(pointer __ptr,
-             const std::vector<size_type> &__shape,
-             const std::vector<size_type> &__strides)
-    {
-        init_container(__ptr, __shape, __strides);
-    }
-
-    DPNPC_id() = delete;
-
-    ~DPNPC_id()
-    {
-        free_memory();
-    }
-
-    /// this function return number of elements in output
-    inline size_type get_output_size() const
-    {
-        return output_size;
-    }
-
-    inline void broadcast_to_shape(const size_type *__shape,
-                                   const size_type __shape_size)
-    {
-        std::vector<size_type> shape(__shape, __shape + __shape_size);
-        broadcast_to_shape(shape);
-    }
-
-    /**
-     * @ingroup BACKEND_UTILS
-     * @brief Broadcast input data to specified shape.
-     *
-     * Set output shape to use in computation of input index by output index.
-     *
-     * @note this function is designed for non-SYCL environment execution
-     *
-     * @param [in]  __shape       Output shape.
-     */
-    inline void broadcast_to_shape(const std::vector<size_type> &__shape)
-    {
-        if (axis_use) {
-            return;
-        }
-
-        if (broadcastable(input_shape, input_shape_size, __shape)) {
-            free_broadcast_axes_memory();
-            free_output_memory();
-
-            std::vector<size_type> valid_axes;
-            broadcast_use = true;
-
-            output_shape_size = __shape.size();
-            const size_type output_shape_size_in_bytes =
-                output_shape_size * sizeof(size_type);
-            output_shape = reinterpret_cast<size_type *>(
-                dpnp_memory_alloc_c(queue_ref, output_shape_size_in_bytes));
-
-            for (int irit = input_shape_size - 1, orit = output_shape_size - 1;
-                 orit >= 0; --irit, --orit)
-            {
-                output_shape[orit] = __shape[orit];
-
-                // ex: input_shape = {7, 1, 5}, output_shape = {8, 7, 6, 5} =>
-                // valid_axes = {0, 2}
-                if (irit < 0 || input_shape[irit] != output_shape[orit]) {
-                    valid_axes.insert(valid_axes.begin(), orit);
-                }
-            }
-
-            broadcast_axes_size = valid_axes.size();
-            const size_type broadcast_axes_size_in_bytes =
-                broadcast_axes_size * sizeof(size_type);
-            broadcast_axes = reinterpret_cast<size_type *>(
-                dpnp_memory_alloc_c(queue_ref, broadcast_axes_size_in_bytes));
-            std::copy(valid_axes.begin(), valid_axes.end(), broadcast_axes);
-
-            output_size =
-                std::accumulate(output_shape, output_shape + output_shape_size,
-                                size_type(1), std::multiplies<size_type>());
-
-            output_shape_strides = reinterpret_cast<size_type *>(
-                dpnp_memory_alloc_c(queue_ref, output_shape_size_in_bytes));
-            get_shape_offsets_inkernel<size_type>(
-                output_shape, output_shape_size, output_shape_strides);
-
-            iteration_size = 1;
-        }
-    }
-
-    /**
-     * @ingroup BACKEND_UTILS
-     * @brief Set axis for the data object to use in computation.
-     *
-     * Set axis of the shape of input array to use in iteration.
-     * Axis might be negative to indicate right to left axes indexing in a shape
-     *
-     * Indexing goes from left to right.
-     * Reduction example:
-     *   Input shape A[6, 7, 8, 9]
-     *   set_axis(1)                       // same as -3 in this example
-     *   output shape will be C[6, 8, 9]
-     *
-     * @note this function is designed for non-SYCL environment execution
-     *
-     * @param [in]  __axis    Axis in a shape of input array.
-     */
-    inline void set_axis(shape_elem_type __axis)
-    {
-        set_axes({__axis});
-    }
-
-    inline void set_axes(const shape_elem_type *__axes, const size_t axes_ndim)
-    {
-        const std::vector<shape_elem_type> axes_vec(__axes, __axes + axes_ndim);
-        set_axes(axes_vec);
-    }
-
-    /**
-     * @ingroup BACKEND_UTILS
-     * @brief Set axes for the data object to use in computation.
-     *
-     * Set axes of the shape of input array to use in iteration.
-     * Axes might be negative to indicate axes as reverse iterator
-     *
-     * Indexing goes from left to right.
-     * Reduction example:
-     *   Input shape A[2, 3, 4, 5]
-     *   set_axes({1, 2})                 // same as {-3, -2}
-     *   output shape will be C[2, 5]
-     *
-     * @note this function is designed for non-SYCL environment execution
-     *
-     * @param [in]  __axes       Vector of axes of a shape of input array.
-     */
-    inline void set_axes(const std::vector<shape_elem_type> &__axes)
-    {
-        if (broadcast_use) {
-            return;
-        }
-
-        if (!__axes.empty() && input_shape_size) {
-            free_axes_memory();
-            free_iteration_memory();
-            free_output_memory();
-
-            axes = get_validated_axes(__axes, input_shape_size);
-            axis_use = true;
-
-            output_shape_size = input_shape_size - axes.size();
-            const size_type output_shape_size_in_bytes =
-                output_shape_size * sizeof(size_type);
-
-            iteration_shape_size = axes.size();
-            const size_type iteration_shape_size_in_bytes =
-                iteration_shape_size * sizeof(size_type);
-            std::vector<size_type> iteration_shape;
-
-            output_shape = reinterpret_cast<size_type *>(
-                dpnp_memory_alloc_c(queue_ref, output_shape_size_in_bytes));
-            size_type *output_shape_it = output_shape;
-            for (size_type i = 0; i < input_shape_size; ++i) {
-                if (std::find(axes.begin(), axes.end(), i) == axes.end()) {
-                    *output_shape_it = input_shape[i];
-                    ++output_shape_it;
-                }
-            }
-
-            output_size =
-                std::accumulate(output_shape, output_shape + output_shape_size,
-                                size_type(1), std::multiplies<size_type>());
-
-            output_shape_strides = reinterpret_cast<size_type *>(
-                dpnp_memory_alloc_c(queue_ref, output_shape_size_in_bytes));
-            get_shape_offsets_inkernel<size_type>(
-                output_shape, output_shape_size, output_shape_strides);
-
-            iteration_size = 1;
-            iteration_shape.reserve(iteration_shape_size);
-            for (const auto &axis : axes) {
-                const size_type axis_dim = input_shape[axis];
-                iteration_shape.push_back(axis_dim);
-                iteration_size *= axis_dim;
-            }
-
-            iteration_shape_strides = reinterpret_cast<size_type *>(
-                dpnp_memory_alloc_c(queue_ref, iteration_shape_size_in_bytes));
-            get_shape_offsets_inkernel<size_type>(iteration_shape.data(),
-                                                  iteration_shape.size(),
-                                                  iteration_shape_strides);
-
-            axes_shape_strides = reinterpret_cast<size_type *>(
-                dpnp_memory_alloc_c(queue_ref, iteration_shape_size_in_bytes));
-            for (size_t i = 0; i < static_cast<size_t>(iteration_shape_size);
-                 ++i) {
-                axes_shape_strides[i] = input_shape_strides[axes[i]];
-            }
-        }
-    }
-
-    /// this function is designed for SYCL environment execution
-    inline iterator begin(size_type output_global_id = 0) const
-    {
-        return iterator(data + get_input_begin_offset(output_global_id), 0,
-                        iteration_shape_strides, axes_shape_strides,
-                        iteration_shape_size);
-    }
-
-    /// this function is designed for SYCL environment execution
-    inline iterator end(size_type output_global_id = 0) const
-    {
-        // TODO it is better to get begin() iterator as a parameter
-
-        return iterator(data + get_input_begin_offset(output_global_id),
-                        get_iteration_size(), iteration_shape_strides,
-                        axes_shape_strides, iteration_shape_size);
-    }
-
-    /// this function is designed for SYCL environment execution
-    inline reference operator[](size_type __n) const
-    {
-        if (broadcast_use) {
-            return *begin(__n);
-        }
-
-        const iterator it = begin();
-        return it[__n];
-    }
-
-private:
-    void init_container(pointer __ptr, const std::vector<size_type> &__shape)
-    {
-        // TODO needs to address negative values in __shape with exception
-        if ((__ptr == nullptr) && __shape.empty()) {
-            return;
-        }
-
-        if (__ptr != nullptr) {
-            data = __ptr;
-            input_size = 1;  // means scalar at this stage
-            output_size = 1; // if input size is not zero it means we have
-                             // scalar as output
-            iteration_size = 1;
-        }
-
-        if (!__shape.empty()) {
-            input_size =
-                std::accumulate(__shape.begin(), __shape.end(), size_type(1),
-                                std::multiplies<size_type>());
-            if (input_size == 0)
-            { // shape might be shape[3, 4, 0, 6]. This means no input memory
-              // and no output expected
-                output_size = 0; // depends on axes. zero at this stage only
-            }
-
-            input_shape_size = __shape.size();
-            input_shape = reinterpret_cast<size_type *>(dpnp_memory_alloc_c(
-                queue_ref, input_shape_size * sizeof(size_type)));
-            std::copy(__shape.begin(), __shape.end(), input_shape);
-
-            input_shape_strides =
-                reinterpret_cast<size_type *>(dpnp_memory_alloc_c(
-                    queue_ref, input_shape_size * sizeof(size_type)));
-            get_shape_offsets_inkernel<size_type>(input_shape, input_shape_size,
-                                                  input_shape_strides);
-        }
-        iteration_size = input_size;
-    }
-
-    void init_container(pointer __ptr,
-                        const std::vector<size_type> &__shape,
-                        const std::vector<size_type> &__strides)
-    {
-        // TODO needs to address negative values in __shape with exception
-        if ((__ptr == nullptr) && __shape.empty()) {
-            return;
-        }
-
-        if (__ptr != nullptr) {
-            data = __ptr;
-            input_size = 1;  // means scalar at this stage
-            output_size = 1; // if input size is not zero it means we have
-                             // scalar as output
-            iteration_size = 1;
-        }
-
-        if (!__shape.empty()) {
-            input_size =
-                std::accumulate(__shape.begin(), __shape.end(), size_type(1),
-                                std::multiplies<size_type>());
-            if (input_size == 0)
-            { // shape might be shape[3, 4, 0, 6]. This means no input memory
-              // and no output expected
-                output_size = 0; // depends on axes. zero at this stage only
-            }
-
-            input_shape_size = __shape.size();
-            input_shape = reinterpret_cast<size_type *>(dpnp_memory_alloc_c(
-                queue_ref, input_shape_size * sizeof(size_type)));
-            std::copy(__shape.begin(), __shape.end(), input_shape);
-
-            input_shape_strides =
-                reinterpret_cast<size_type *>(dpnp_memory_alloc_c(
-                    queue_ref, input_shape_size * sizeof(size_type)));
-            std::copy(__strides.begin(), __strides.end(), input_shape_strides);
-        }
-        iteration_size = input_size;
-    }
-
-    /// this function is designed for SYCL environment execution
-    size_type get_input_begin_offset(size_type output_global_id) const
-    {
-        size_type input_global_id = 0;
-        if (axis_use) {
-            assert(output_global_id < output_size);
-
-            for (size_t iit = 0, oit = 0;
-                 iit < static_cast<size_t>(input_shape_size); ++iit)
-            {
-                if (std::find(axes.begin(), axes.end(), iit) == axes.end()) {
-                    const size_type output_xyz_id = get_xyz_id_by_id_inkernel(
-                        output_global_id, output_shape_strides,
-                        output_shape_size, oit);
-                    input_global_id +=
-                        (output_xyz_id * input_shape_strides[iit]);
-                    ++oit;
-                }
-            }
-        }
-        else if (broadcast_use) {
-            assert(output_global_id < output_size);
-            assert(input_shape_size <= output_shape_size);
-
-            for (int irit = input_shape_size - 1, orit = output_shape_size - 1;
-                 irit >= 0; --irit, --orit)
-            {
-                size_type *broadcast_axes_end =
-                    broadcast_axes + broadcast_axes_size;
-                if (std::find(broadcast_axes, broadcast_axes_end, orit) ==
-                    broadcast_axes_end) {
-                    const size_type output_xyz_id = get_xyz_id_by_id_inkernel(
-                        output_global_id, output_shape_strides,
-                        output_shape_size, orit);
-                    input_global_id +=
-                        (output_xyz_id * input_shape_strides[irit]);
-                }
-            }
-        }
-
-        return input_global_id;
-    }
-
-    /// this function is designed for SYCL environment execution
-    size_type get_iteration_size() const
-    {
-        return iteration_size;
-    }
-
-    void free_axes_memory()
-    {
-        axes.clear();
-        dpnp_memory_free_c(queue_ref, axes_shape_strides);
-        axes_shape_strides = nullptr;
-    }
-
-    void free_broadcast_axes_memory()
-    {
-        broadcast_axes_size = size_type{};
-        dpnp_memory_free_c(queue_ref, broadcast_axes);
-        broadcast_axes = nullptr;
-    }
-
-    void free_input_memory()
-    {
-        input_size = size_type{};
-        input_shape_size = size_type{};
-        dpnp_memory_free_c(queue_ref, input_shape);
-        dpnp_memory_free_c(queue_ref, input_shape_strides);
-        input_shape = nullptr;
-        input_shape_strides = nullptr;
-    }
-
-    void free_iteration_memory()
-    {
-        iteration_size = size_type{};
-        iteration_shape_size = size_type{};
-        dpnp_memory_free_c(queue_ref, iteration_shape_strides);
-        iteration_shape_strides = nullptr;
-    }
-
-    void free_output_memory()
-    {
-        output_size = size_type{};
-        output_shape_size = size_type{};
-        dpnp_memory_free_c(queue_ref, output_shape);
-        dpnp_memory_free_c(queue_ref, output_shape_strides);
-        output_shape = nullptr;
-        output_shape_strides = nullptr;
-    }
-
-    void free_memory()
-    {
-        free_axes_memory();
-        free_broadcast_axes_memory();
-        free_input_memory();
-        free_iteration_memory();
-        free_output_memory();
-    }
-
-    DPCTLSyclQueueRef queue_ref = nullptr; /**< reference to SYCL queue */
-
-    pointer data = nullptr;                   /**< input array begin pointer */
-    size_type input_size = size_type{};       /**< input array size */
-    size_type *input_shape = nullptr;         /**< input array shape */
-    size_type input_shape_size = size_type{}; /**< input array shape size */
-    size_type *input_shape_strides =
-        nullptr; /**< input array shape strides (same size as input_shape) */
-
-    std::vector<size_type> axes; /**< input shape reduction axes */
-    bool axis_use = false;
-
-    size_type *broadcast_axes = nullptr; /**< input shape broadcast axes */
-    size_type broadcast_axes_size =
-        size_type{}; /**< input shape broadcast axes size */
-    bool broadcast_use = false;
-
-    size_type output_size =
-        size_type{}; /**< output array size. Expected is same as GWS */
-    size_type *output_shape = nullptr;         /**< output array shape */
-    size_type output_shape_size = size_type{}; /**< output array shape size */
-    size_type *output_shape_strides =
-        nullptr; /**< output array shape strides (same size as output_shape) */
-
-    size_type iteration_size =
-        size_type{}; /**< iteration array size in elements */
-    size_type iteration_shape_size = size_type{};
-    size_type *iteration_shape_strides = nullptr;
-    size_type *axes_shape_strides = nullptr;
-};
-
-#endif // DPNP_ITERATOR_H
diff --git a/dpnp/backend/src/dpnp_utils.hpp b/dpnp/backend/src/dpnp_utils.hpp
index cec94a46ded2..7111b212b204 100644
--- a/dpnp/backend/src/dpnp_utils.hpp
+++ b/dpnp/backend/src/dpnp_utils.hpp
@@ -27,25 +27,17 @@
 #ifndef BACKEND_UTILS_H // Cython compatibility
 #define BACKEND_UTILS_H
 
-#include <algorithm>
-#include <cassert>
 #include <complex>
 #include <iostream>
-#include <iterator>
 #include <stdexcept>
+#include <string>
+#include <type_traits>
+#include <vector>
 
 #include <sycl/sycl.hpp>
 
 #include <dpnp_iface_fptr.hpp>
 
-#define LIBSYCL_VERSION_GREATER(major, minor, patch)                           \
-    (__LIBSYCL_MAJOR_VERSION > major) ||                                       \
-        (__LIBSYCL_MAJOR_VERSION == major and                                  \
-         __LIBSYCL_MINOR_VERSION > minor) ||                                   \
-        (__LIBSYCL_MAJOR_VERSION == major and                                  \
-         __LIBSYCL_MINOR_VERSION == minor and                                  \
-         __LIBSYCL_PATCH_VERSION >= patch)
-
 /**
  * Version of SYCL DPC++ 2023 compiler where a return type of sycl::abs() is
  * changed from unsigned integer to signed one of input vector.
@@ -61,16 +53,6 @@
 #define __INTEL_MKL_2023_0_0_VERSION_REQUIRED 20230000
 #endif
 
-/**
- * Version of Intel MKL at which transition to OneMKL release 2023.2.0 occurs.
- *
- * @note with OneMKL=2023.1.0 the call of oneapi::mkl::vm::div() was dead
- * locked inside ~usm_wrapper_to_host()->{...; q_->wait_and_throw(); ...}
- */
-#ifndef __INTEL_MKL_2023_2_0_VERSION_REQUIRED
-#define __INTEL_MKL_2023_2_0_VERSION_REQUIRED 20230002L
-#endif
-
 /**
  * @defgroup BACKEND_UTILS Backend C++ library utilities
  * @{
@@ -78,212 +60,6 @@
  * @}
  */
 
-/**
- * @ingroup BACKEND_UTILS
- * @brief Shape offset calculation used in kernels
- *
- * Calculates offsets of the array with given shape
- * for example:
- *   input_array_shape[3, 4, 5]
- *   offsets should be [20, 5, 1]
- *
- * @param [in]  shape       array with input shape.
- * @param [in]  shape_size  array size for @ref shape parameter.
- * @param [out] offsets     Result array with @ref shape_size size.
- */
-template <typename _DataType>
-void get_shape_offsets_inkernel(const _DataType *shape,
-                                size_t shape_size,
-                                _DataType *offsets)
-{
-    size_t dim_prod_input = 1;
-    for (size_t i = 0; i < shape_size; ++i) {
-        long i_reverse = shape_size - 1 - i;
-        offsets[i_reverse] = dim_prod_input;
-        dim_prod_input *= shape[i_reverse];
-    }
-
-    return;
-}
-
-/**
- * @ingroup BACKEND_UTILS
- * @brief Calculate xyz id for given axis from linear index
- *
- * Calculates xyz id of the array with given shape.
- * for example:
- *   input_array_shape_offsets[20, 5, 1]
- *   global_id == 5
- *   axis == 1
- *   xyz_id should be 1
- *
- * @param [in]  global_id     linear index of the element in multy-D array.
- * @param [in]  offsets       array with input offsets.
- * @param [in]  offsets_size  array size for @ref offsets parameter.
- * @param [in]  axis          axis.
- */
-template <typename _DataType>
-_DataType get_xyz_id_by_id_inkernel(size_t global_id,
-                                    const _DataType *offsets,
-                                    size_t offsets_size,
-                                    size_t axis)
-{
-    /* avoid warning unused variable*/
-    (void)offsets_size;
-
-    assert(axis < offsets_size);
-
-    _DataType xyz_id = 0;
-    long reminder = global_id;
-    for (size_t i = 0; i < axis + 1; ++i) {
-        const _DataType axis_val = offsets[i];
-        xyz_id = reminder / axis_val;
-        reminder = reminder % axis_val;
-    }
-
-    return xyz_id;
-}
-
-/**
- * @ingroup BACKEND_UTILS
- * @brief Check input shape is broadcastable to output one.
- *
- * @param [in] input_shape        Input shape.
- * @param [in] output_shape       Output shape.
- *
- * @return                        Input shape is broadcastable to output one or
- * not.
- */
-static inline bool
-    broadcastable(const std::vector<shape_elem_type> &input_shape,
-                  const std::vector<shape_elem_type> &output_shape)
-{
-    if (input_shape.size() > output_shape.size()) {
-        return false;
-    }
-
-    std::vector<shape_elem_type>::const_reverse_iterator irit =
-        input_shape.rbegin();
-    std::vector<shape_elem_type>::const_reverse_iterator orit =
-        output_shape.rbegin();
-    for (; irit != input_shape.rend(); ++irit, ++orit) {
-        if (*irit != 1 && *irit != *orit) {
-            return false;
-        }
-    }
-
-    return true;
-}
-
-static inline bool
-    broadcastable(const shape_elem_type *input_shape,
-                  const size_t input_shape_size,
-                  const std::vector<shape_elem_type> &output_shape)
-{
-    const std::vector<shape_elem_type> input_shape_vec(
-        input_shape, input_shape + input_shape_size);
-    return broadcastable(input_shape_vec, output_shape);
-}
-
-/**
- * @ingroup BACKEND_UTILS
- * @brief Check arrays are equal.
- *
- * @param [in] input1        Input1.
- * @param [in] input1_size   Input1 size.
- * @param [in] input2        Input2.
- * @param [in] input2_size   Input2 size.
- *
- * @return                   Arrays are equal.
- */
-template <typename _DataType>
-static inline bool array_equal(const _DataType *input1,
-                               const size_t input1_size,
-                               const _DataType *input2,
-                               const size_t input2_size)
-{
-    if (input1_size != input2_size)
-        return false;
-
-    const std::vector<_DataType> input1_vec(input1, input1 + input1_size);
-    const std::vector<_DataType> input2_vec(input2, input2 + input2_size);
-
-    return std::equal(std::begin(input1_vec), std::end(input1_vec),
-                      std::begin(input2_vec));
-}
-
-/**
- * @ingroup BACKEND_UTILS
- * @brief Normalizes an axes into a non-negative integer axes.
- *
- * Return vector of normalized axes with a non-negative integer axes.
- *
- * By default, this forbids axes from being specified multiple times.
- *
- * @param [in] __axes             Array with positive or negative indexes.
- * @param [in] __shape_size       The number of dimensions of the array that
- * @ref __axes should be normalized against.
- * @param [in] __allow_duplicate  Disallow an axis from being specified twice.
- * Default: false
- *
- * @exception std::range_error    Particular axis is out of range or other
- * error.
- * @return                        The normalized axes indexes, such that `0 <=
- * result < __shape_size`
- */
-static inline std::vector<shape_elem_type>
-    get_validated_axes(const std::vector<shape_elem_type> &__axes,
-                       const size_t __shape_size,
-                       const bool __allow_duplicate = false)
-{
-    std::vector<shape_elem_type> result;
-
-    if (__axes.empty()) {
-        goto out;
-    }
-
-    if (__axes.size() > __shape_size) {
-        goto err;
-    }
-
-    result.reserve(__axes.size());
-    for (std::vector<shape_elem_type>::const_iterator it = __axes.cbegin();
-         it != __axes.cend(); ++it)
-    {
-        const shape_elem_type _axis = *it;
-        const shape_elem_type input_shape_size_signed =
-            static_cast<shape_elem_type>(__shape_size);
-        if (_axis >= input_shape_size_signed) { // positive axis range check
-            goto err;
-        }
-
-        if (_axis < -input_shape_size_signed) { // negative axis range check
-            goto err;
-        }
-
-        const shape_elem_type positive_axis =
-            _axis < 0 ? (_axis + input_shape_size_signed) : _axis;
-
-        if (!__allow_duplicate) {
-            if (std::find(result.begin(), result.end(), positive_axis) !=
-                result.end()) { // find axis duplication
-                goto err;
-            }
-        }
-
-        result.push_back(positive_axis);
-    }
-
-out:
-    return result;
-
-err:
-    // TODO exception if wrong axis? need common function for throwing
-    // exceptions
-    throw std::range_error(
-        "DPNP Error: validate_axes() failed with axis check");
-}
-
 /**
  * @ingroup BACKEND_UTILS
  * @brief check support of type T by SYCL device.
diff --git a/dpnp/backend/tests/CMakeLists.txt b/dpnp/backend/tests/CMakeLists.txt
index b3deef8ff584..bc220eaeee80 100644
--- a/dpnp/backend/tests/CMakeLists.txt
+++ b/dpnp/backend/tests/CMakeLists.txt
@@ -46,11 +46,8 @@ link_directories(${GTEST_LIB_DIR})
 
 # TODO split
 add_executable(dpnpc_tests
-               test_broadcast_iterator.cpp
                test_main.cpp
-               test_random.cpp
-               test_utils.cpp
-               test_utils_iterator.cpp)
+               test_random.cpp)
 target_link_libraries(dpnpc_tests GTest::GTest GTest::Main pthread dpnp_backend_library)
 
 # TODO split
diff --git a/dpnp/backend/tests/dpnp_test_utils.hpp b/dpnp/backend/tests/dpnp_test_utils.hpp
deleted file mode 100644
index cfae5b3f6c0e..000000000000
--- a/dpnp/backend/tests/dpnp_test_utils.hpp
+++ /dev/null
@@ -1,34 +0,0 @@
-#include <iostream>
-#include <vector>
-
-#include "dpnp_iterator.hpp"
-
-using namespace std;
-using dpnpc_it_t = DPNPC_id<size_t>::iterator;
-using dpnpc_value_t = dpnpc_it_t::value_type;
-using dpnpc_index_t = dpnpc_it_t::size_type;
-
-template <typename _DataType>
-vector<_DataType> get_input_data(const vector<dpnpc_index_t> &shape)
-{
-    const dpnpc_index_t size =
-        accumulate(shape.begin(), shape.end(), dpnpc_index_t(1),
-                   multiplies<dpnpc_index_t>());
-
-    vector<_DataType> input_data(size);
-    iota(input_data.begin(), input_data.end(),
-         1); // let's start from 1 to avoid cleaned memory comparison
-
-    return input_data;
-}
-
-template <typename _DataType>
-_DataType *get_shared_data(const vector<_DataType> &input_data)
-{
-    const size_t data_size_in_bytes = input_data.size() * sizeof(_DataType);
-    _DataType *shared_data =
-        reinterpret_cast<_DataType *>(dpnp_memory_alloc_c(data_size_in_bytes));
-    copy(input_data.begin(), input_data.end(), shared_data);
-
-    return shared_data;
-}
diff --git a/dpnp/backend/tests/test_broadcast_iterator.cpp b/dpnp/backend/tests/test_broadcast_iterator.cpp
deleted file mode 100644
index 9b97e82bc681..000000000000
--- a/dpnp/backend/tests/test_broadcast_iterator.cpp
+++ /dev/null
@@ -1,176 +0,0 @@
-//*****************************************************************************
-// Copyright (c) 2016-2025, Intel Corporation
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-// - Redistributions of source code must retain the above copyright notice,
-//   this list of conditions and the following disclaimer.
-// - Redistributions in binary form must reproduce the above copyright notice,
-//   this list of conditions and the following disclaimer in the documentation
-//   and/or other materials provided with the distribution.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
-// THE POSSIBILITY OF SUCH DAMAGE.
-//*****************************************************************************
-
-#include <gtest/gtest.h>
-#include <numeric>
-#include <vector>
-
-#include "dpnp_iterator.hpp"
-#include "dpnp_test_utils.hpp"
-
-#include "queue_sycl.hpp"
-
-struct IteratorParameters
-{
-    vector<dpnpc_it_t::size_type> input_shape;
-    vector<dpnpc_it_t::size_type> output_shape;
-    vector<dpnpc_value_t> result;
-
-    /// Operator needs to print this container in human readable form in error
-    /// reporting
-    friend std::ostream &operator<<(std::ostream &out,
-                                    const IteratorParameters &data)
-    {
-        out << "IteratorParameters(input_shape=" << data.input_shape
-            << ", output_shape=" << data.output_shape
-            << ", result=" << data.result << ")";
-
-        return out;
-    }
-};
-
-class IteratorBroadcasting : public ::testing::TestWithParam<IteratorParameters>
-{
-};
-
-TEST_P(IteratorBroadcasting, loop_broadcast)
-{
-    using data_type = double;
-
-    const IteratorParameters &param = GetParam();
-    std::vector<data_type> input_data =
-        get_input_data<data_type>(param.input_shape);
-
-    DPCTLSyclQueueRef q_ref = reinterpret_cast<DPCTLSyclQueueRef>(&DPNP_QUEUE);
-
-    DPNPC_id<data_type> input(q_ref, input_data.data(), param.input_shape);
-    input.broadcast_to_shape(param.output_shape);
-
-    ASSERT_EQ(input.get_output_size(), param.result.size());
-
-    for (dpnpc_index_t output_id = 0; output_id < input.get_output_size();
-         ++output_id)
-    {
-        EXPECT_EQ(input[output_id], param.result.at(output_id));
-    }
-}
-
-TEST_P(IteratorBroadcasting, sycl_broadcast)
-{
-    using data_type = double;
-
-    const IteratorParameters &param = GetParam();
-    const dpnpc_index_t result_size = param.result.size();
-    data_type *result = reinterpret_cast<data_type *>(
-        dpnp_memory_alloc_c(result_size * sizeof(data_type)));
-
-    std::vector<data_type> input_data =
-        get_input_data<data_type>(param.input_shape);
-    data_type *shared_data = get_shared_data<data_type>(input_data);
-
-    DPCTLSyclQueueRef q_ref = reinterpret_cast<DPCTLSyclQueueRef>(&DPNP_QUEUE);
-
-    DPNPC_id<data_type> *input_it;
-    input_it = reinterpret_cast<DPNPC_id<data_type> *>(
-        dpnp_memory_alloc_c(q_ref, sizeof(DPNPC_id<data_type>)));
-    new (input_it) DPNPC_id<data_type>(q_ref, shared_data, param.input_shape);
-
-    input_it->broadcast_to_shape(param.output_shape);
-
-    ASSERT_EQ(input_it->get_output_size(), result_size);
-
-    sycl::range<1> gws(result_size);
-    auto kernel_parallel_for_func = [=](sycl::id<1> global_id) {
-        const size_t idx = global_id[0];
-        result[idx] = (*input_it)[idx];
-    };
-
-    auto kernel_func = [&](sycl::handler &cgh) {
-        cgh.parallel_for<class test_sycl_reduce_axis_kernel>(
-            gws, kernel_parallel_for_func);
-    };
-
-    sycl::event event = DPNP_QUEUE.submit(kernel_func);
-    event.wait();
-
-    for (dpnpc_index_t i = 0; i < result_size; ++i) {
-        EXPECT_EQ(result[i], param.result.at(i));
-    }
-
-    input_it->~DPNPC_id();
-    dpnp_memory_free_c(shared_data);
-    dpnp_memory_free_c(result);
-}
-
-/**
- * Expected values produced by following script:
- *
- * import numpy as np
- *
- * input_size = 12
- * input_shape = [3, 4]
- * input = np.arange(1, input_size + 1, dtype=np.int64).reshape(input_shape)
- * print(f"input shape={input.shape}")
- * print(f"input:\n{input}\n")
- *
- * output_shape = [2, 3, 4]
- * output = np.ones(output_shape, dtype=np.int64)
- * print(f"output shape={output.shape}")
- *
- * result = input * output
- * print(f"result={np.array2string(result.reshape(result.size), separator=',
- * ')}\n", sep=", ")
- */
-INSTANTIATE_TEST_SUITE_P(
-    TestBroadcastIterator,
-    IteratorBroadcasting,
-    testing::Values(
-        IteratorParameters{{1}, {1}, {1}},
-        IteratorParameters{{1}, {4}, {1, 1, 1, 1}},
-        IteratorParameters{{1}, {3, 4}, {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}},
-        IteratorParameters{{1}, {2, 3, 4}, {1, 1, 1, 1, 1, 1, 1, 1,
-                                            1, 1, 1, 1, 1, 1, 1, 1,
-                                            1, 1, 1, 1, 1, 1, 1, 1}},
-        IteratorParameters{{4}, {4}, {1, 2, 3, 4}},
-        IteratorParameters{{4}, {3, 4}, {1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4}},
-        IteratorParameters{{4}, {2, 3, 4}, {1, 2, 3, 4, 1, 2, 3, 4,
-                                            1, 2, 3, 4, 1, 2, 3, 4,
-                                            1, 2, 3, 4, 1, 2, 3, 4}},
-        IteratorParameters{{3, 4},
-                           {3, 4},
-                           {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}},
-        IteratorParameters{{3, 4}, {2, 3, 4}, {1, 2,  3,  4,  5, 6,  7,  8,
-                                               9, 10, 11, 12, 1, 2,  3,  4,
-                                               5, 6,  7,  8,  9, 10, 11, 12}},
-        IteratorParameters{{2, 3, 4},
-                           {2, 3, 4},
-                           {1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12,
-                            13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24}},
-        IteratorParameters{{2, 3, 1}, {2, 3, 4}, {1, 1, 1, 1, 2, 2, 2, 2,
-                                                  3, 3, 3, 3, 4, 4, 4, 4,
-                                                  5, 5, 5, 5, 6, 6, 6, 6}},
-        IteratorParameters{{2, 1, 4}, {2, 3, 4}, {1, 2, 3, 4, 1, 2, 3, 4,
-                                                  1, 2, 3, 4, 5, 6, 7, 8,
-                                                  5, 6, 7, 8, 5, 6, 7, 8}}));
diff --git a/dpnp/backend/tests/test_utils.cpp b/dpnp/backend/tests/test_utils.cpp
deleted file mode 100644
index dc3a94b2d921..000000000000
--- a/dpnp/backend/tests/test_utils.cpp
+++ /dev/null
@@ -1,97 +0,0 @@
-//*****************************************************************************
-// Copyright (c) 2016-2025, Intel Corporation
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-// - Redistributions of source code must retain the above copyright notice,
-//   this list of conditions and the following disclaimer.
-// - Redistributions in binary form must reproduce the above copyright notice,
-//   this list of conditions and the following disclaimer in the documentation
-//   and/or other materials provided with the distribution.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
-// THE POSSIBILITY OF SUCH DAMAGE.
-//*****************************************************************************
-
-#include <gtest/gtest.h>
-#include <vector>
-
-#include "dpnp_utils.hpp"
-
-using namespace std;
-
-struct AxesParameters
-{
-    vector<long> axes;
-    size_t shape_size = size_t{};
-    bool duplications = bool{};
-    vector<size_t> result;
-
-    /// Operator needs to print this container in human readable form in error
-    /// reporting
-    friend std::ostream &operator<<(std::ostream &out,
-                                    const AxesParameters &data)
-    {
-        out << "AxesParameters(axes=" << data.axes
-            << ", shape_size=" << data.shape_size
-            << ", duplications=" << data.duplications
-            << ", result=" << data.result << ")";
-
-        return out;
-    }
-};
-
-struct AxesNormalization : public ::testing::TestWithParam<AxesParameters>
-{
-    // need to change test name string
-    struct PrintToStringParamName
-    {
-        template <class ParamType>
-        string operator()(const testing::TestParamInfo<ParamType> &info) const
-        {
-            const AxesParameters &param =
-                static_cast<AxesParameters>(info.param);
-            stringstream ss;
-            ss << "axes=" << param.axes << ", shape_size=" << param.shape_size
-               << ", duplications=" << param.duplications
-               << ", result=" << param.result;
-            return ss.str();
-        }
-    };
-};
-
-TEST_P(AxesNormalization, get_validated_axes)
-{
-    const AxesParameters &param = GetParam();
-    vector<size_t> result =
-        get_validated_axes(param.axes, param.shape_size, param.duplications);
-    EXPECT_EQ(result, param.result);
-}
-
-INSTANTIATE_TEST_SUITE_P(
-    TestUtilsAxesNormalization,
-    AxesNormalization,
-    testing::Values(AxesParameters{{0}, 1, true, {0}},
-                    AxesParameters{{1}, 4, false, {1}},
-                    AxesParameters{{-1}, 4, false, {3}},
-                    AxesParameters{{0, 1, 2, 3}, 4, false, {0, 1, 2, 3}},
-                    /* AxesParameters{{0, 1, 1, 3}, 4, false, {0, 1, 3}}, */
-                    AxesParameters{{0, 1, 1, 3}, 4, true, {0, 1, 1, 3}},
-                    AxesParameters{{-2, 1, -4, 3}, 4, false, {2, 1, 0, 3}},
-                    AxesParameters{{-4, -3, -2, -1}, 4, false, {0, 1, 2, 3}},
-                    AxesParameters{{-1, -2, -3, -4}, 4, false, {3, 2, 1, 0}},
-                    AxesParameters{{}, 0, true, {}},
-                    AxesParameters{{}, 0, false, {}},
-                    AxesParameters{{}, 2, true, {}})
-    /*, AxesNormalization::PrintToStringParamName()*/
-);
diff --git a/dpnp/backend/tests/test_utils_iterator.cpp b/dpnp/backend/tests/test_utils_iterator.cpp
deleted file mode 100644
index c625199fac4b..000000000000
--- a/dpnp/backend/tests/test_utils_iterator.cpp
+++ /dev/null
@@ -1,556 +0,0 @@
-//*****************************************************************************
-// Copyright (c) 2016-2025, Intel Corporation
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-// - Redistributions of source code must retain the above copyright notice,
-//   this list of conditions and the following disclaimer.
-// - Redistributions in binary form must reproduce the above copyright notice,
-//   this list of conditions and the following disclaimer in the documentation
-//   and/or other materials provided with the distribution.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
-// THE POSSIBILITY OF SUCH DAMAGE.
-//*****************************************************************************
-
-#include <gtest/gtest.h>
-#include <numeric>
-#include <vector>
-
-#include "dpnp_iterator.hpp"
-#include "dpnp_test_utils.hpp"
-
-#include "queue_sycl.hpp"
-
-using namespace std;
-
-TEST(TestUtilsIterator, begin_prefix_postfix)
-{
-    using test_it = dpnpc_it_t;
-
-    DPCTLSyclQueueRef q_ref = reinterpret_cast<DPCTLSyclQueueRef>(&DPNP_QUEUE);
-
-    vector<dpnpc_value_t> input_data = get_input_data<dpnpc_value_t>({2});
-    DPNPC_id<dpnpc_value_t> result_obj(q_ref, input_data.data(), {2});
-
-    test_it begin = result_obj.begin();
-    test_it end = result_obj.end();
-
-    EXPECT_NE(begin, end);
-
-    test_it begin0 = begin;
-    EXPECT_EQ(begin0, begin);
-
-    test_it begin1 = begin0++;
-    EXPECT_NE(begin1, begin0);
-    EXPECT_EQ(begin1, begin);
-
-    begin1++;
-    EXPECT_EQ(begin1, begin0);
-
-    test_it begin_1 = ++begin0;
-    EXPECT_EQ(begin_1, begin0);
-    EXPECT_EQ(begin0, end);
-}
-
-TEST(TestUtilsIterator, take_value)
-{
-    using test_it = dpnpc_it_t;
-
-    DPCTLSyclQueueRef q_ref = reinterpret_cast<DPCTLSyclQueueRef>(&DPNP_QUEUE);
-
-    // expected data 1, 2
-    vector<dpnpc_value_t> input_data = get_input_data<dpnpc_value_t>({2});
-    DPNPC_id<dpnpc_value_t> result_obj(q_ref, input_data.data(), {2});
-
-    test_it begin = result_obj.begin();
-    EXPECT_EQ(*begin, 1);
-
-    ++begin;
-    EXPECT_EQ(*begin, 2);
-
-    EXPECT_EQ(result_obj[1], 2);
-}
-
-TEST(TestUtilsIterator, take_value_loop)
-{
-    DPCTLSyclQueueRef q_ref = reinterpret_cast<DPCTLSyclQueueRef>(&DPNP_QUEUE);
-
-    // expected data 1, 2 ,3, 4
-    vector<dpnpc_value_t> input_data = get_input_data<dpnpc_value_t>({4});
-    DPNPC_id<dpnpc_value_t> result_obj(q_ref, input_data.data(), {4});
-
-    dpnpc_it_t begin = result_obj.begin();
-    for (size_t i = 0; i < input_data.size(); ++i, ++begin) {
-        EXPECT_EQ(result_obj[i], i + 1);
-    }
-}
-
-TEST(TestUtilsIterator, take_value_loop_3D)
-{
-    DPCTLSyclQueueRef q_ref = reinterpret_cast<DPCTLSyclQueueRef>(&DPNP_QUEUE);
-
-    // expected input data 1, 2 ,3, 4...24
-    vector<dpnpc_value_t> input_data = get_input_data<dpnpc_value_t>({2, 3, 4});
-    DPNPC_id<dpnpc_value_t> result_obj(q_ref, input_data.data(), {2, 3, 4});
-
-    dpnpc_it_t begin = result_obj.begin();
-    for (size_t i = 0; i < input_data.size(); ++i, ++begin) {
-        EXPECT_EQ(result_obj[i], i + 1);
-    }
-}
-
-TEST(TestUtilsIterator, take_value_axes_loop_3D)
-{
-    DPCTLSyclQueueRef q_ref = reinterpret_cast<DPCTLSyclQueueRef>(&DPNP_QUEUE);
-
-    vector<dpnpc_value_t> expected_data{1, 2, 3, 4, 13, 14, 15, 16};
-    // expected input data 1, 2 ,3, 4...24
-    vector<dpnpc_value_t> input_data = get_input_data<dpnpc_value_t>({2, 3, 4});
-    DPNPC_id<dpnpc_value_t> result_obj(q_ref, input_data.data(), {2, 3, 4});
-    result_obj.set_axes({0, 2});
-
-    vector<dpnpc_value_t>::iterator expected_it = expected_data.begin();
-    DPNPC_id<dpnpc_value_t>::iterator end = result_obj.end();
-    for (DPNPC_id<dpnpc_value_t>::iterator it = result_obj.begin(); it != end;
-         ++it, ++expected_it)
-    {
-        EXPECT_EQ(*it, *expected_it);
-    }
-}
-
-TEST(TestUtilsIterator, take_value_axis_0_0)
-{
-    DPCTLSyclQueueRef q_ref = reinterpret_cast<DPCTLSyclQueueRef>(&DPNP_QUEUE);
-
-    vector<dpnpc_value_t> input_data = get_input_data<dpnpc_value_t>({4});
-    DPNPC_id<dpnpc_value_t> result_obj(q_ref, input_data.data(), {2, 2});
-    result_obj.set_axis(0); // expected data {{1, 3}, {2 ,4}} with shape {2, 2}
-
-    dpnpc_it_t begin = result_obj.begin();
-    dpnpc_it_t end = result_obj.end();
-    EXPECT_NE(begin, end);
-    EXPECT_EQ(*begin, 1);
-
-    ++begin;
-    EXPECT_EQ(*begin, 3);
-}
-
-TEST(TestUtilsIterator, take_value_axis_0_1)
-{
-    DPCTLSyclQueueRef q_ref = reinterpret_cast<DPCTLSyclQueueRef>(&DPNP_QUEUE);
-
-    vector<dpnpc_value_t> input_data = get_input_data<dpnpc_value_t>({4});
-    DPNPC_id<dpnpc_value_t> result_obj(q_ref, input_data.data(), {2, 2});
-    result_obj.set_axis(0); // expected data {{1, 3}, {2 ,4}} with shape {2, 2}
-
-    dpnpc_it_t begin = result_obj.begin(1);
-    dpnpc_it_t end = result_obj.end(1);
-    EXPECT_NE(begin, end);
-    EXPECT_EQ(*begin, 2);
-
-    ++begin;
-    EXPECT_EQ(*begin, 4);
-}
-
-TEST(TestUtilsIterator, take_value_axis_1)
-{
-    DPCTLSyclQueueRef q_ref = reinterpret_cast<DPCTLSyclQueueRef>(&DPNP_QUEUE);
-
-    vector<dpnpc_value_t> input_data = get_input_data<dpnpc_value_t>({4});
-    DPNPC_id<dpnpc_value_t> result_obj(q_ref, input_data.data(), {2, 2});
-    result_obj.set_axis(1); // expected data {{1, 2}, {3 ,4}}
-
-    dpnpc_it_t begin = result_obj.begin();
-    dpnpc_it_t end = result_obj.end();
-    EXPECT_NE(begin, end);
-    EXPECT_EQ(*begin, 1);
-    EXPECT_EQ(*end, 3); // linear data space
-
-    ++begin;
-    EXPECT_EQ(*begin, 2);
-}
-
-TEST(TestUtilsIterator, full_reduction_with_input_shape)
-{
-    DPCTLSyclQueueRef q_ref = reinterpret_cast<DPCTLSyclQueueRef>(&DPNP_QUEUE);
-
-    vector<dpnpc_value_t> input_data = get_input_data<dpnpc_value_t>({2, 3});
-    DPNPC_id<dpnpc_value_t> result_obj(q_ref, input_data.data(), {2, 3});
-
-    dpnpc_value_t result = 0;
-    for (dpnpc_it_t data_it = result_obj.begin(0); data_it != result_obj.end(0);
-         ++data_it)
-    {
-        result += *data_it;
-    }
-
-    EXPECT_EQ(result, 21);
-}
-
-TEST(TestUtilsIterator, output_size)
-{
-    DPCTLSyclQueueRef q_ref = reinterpret_cast<DPCTLSyclQueueRef>(&DPNP_QUEUE);
-
-    // expected data 1, 2
-    vector<dpnpc_value_t> input_data = get_input_data<dpnpc_value_t>({2, 3});
-    DPNPC_id<dpnpc_value_t> result_obj(q_ref, input_data.data(), {2, 3});
-
-    const dpnpc_index_t output_size = result_obj.get_output_size();
-
-    EXPECT_EQ(output_size, 1);
-}
-
-TEST(TestUtilsIterator, output_size_empty)
-{
-    DPCTLSyclQueueRef q_ref = reinterpret_cast<DPCTLSyclQueueRef>(&DPNP_QUEUE);
-
-    vector<dpnpc_value_t> input_data = get_input_data<dpnpc_value_t>({});
-    DPNPC_id<dpnpc_value_t> result_obj(q_ref, input_data.data(), {});
-
-    const dpnpc_index_t output_size = result_obj.get_output_size();
-
-    EXPECT_EQ(output_size, 1);
-}
-
-TEST(TestUtilsIterator, output_size_nullptr)
-{
-    DPCTLSyclQueueRef q_ref = reinterpret_cast<DPCTLSyclQueueRef>(&DPNP_QUEUE);
-
-    DPNPC_id<dpnpc_value_t> result_obj(q_ref, nullptr, {});
-
-    const dpnpc_index_t output_size = result_obj.get_output_size();
-
-    EXPECT_EQ(output_size, 0);
-}
-
-TEST(TestUtilsIterator, output_size_axis)
-{
-    DPCTLSyclQueueRef q_ref = reinterpret_cast<DPCTLSyclQueueRef>(&DPNP_QUEUE);
-
-    // expected data 1, 2
-    vector<dpnpc_value_t> input_data = get_input_data<dpnpc_value_t>({2, 3, 4});
-    DPNPC_id<dpnpc_value_t> result_obj(q_ref, input_data.data(), {2, 3, 4});
-
-    result_obj.set_axis(1);
-    const dpnpc_index_t output_size = result_obj.get_output_size();
-    EXPECT_EQ(output_size, 8);
-
-    result_obj.set_axis(-2);
-    const dpnpc_index_t output_size_1 = result_obj.get_output_size();
-    EXPECT_EQ(output_size_1, 8);
-}
-
-TEST(TestUtilsIterator, output_size_axis_2D)
-{
-    DPCTLSyclQueueRef q_ref = reinterpret_cast<DPCTLSyclQueueRef>(&DPNP_QUEUE);
-
-    // expected data 1, 2
-    vector<dpnpc_value_t> input_data = get_input_data<dpnpc_value_t>({2, 3, 4});
-    DPNPC_id<dpnpc_value_t> result_obj(q_ref, input_data.data(), {2, 3, 4});
-
-    result_obj.set_axes({0, 2});
-    const dpnpc_index_t output_size = result_obj.get_output_size();
-    EXPECT_EQ(output_size, 3);
-
-    result_obj.set_axes({-3, 2});
-    const dpnpc_index_t output_size_1 = result_obj.get_output_size();
-    EXPECT_EQ(output_size_1, 3);
-}
-
-TEST(TestUtilsIterator, iterator_loop)
-{
-    const dpnpc_it_t::size_type size = 10;
-
-    vector<dpnpc_value_t> expected = get_input_data<dpnpc_value_t>({size});
-
-    DPCTLSyclQueueRef q_ref = reinterpret_cast<DPCTLSyclQueueRef>(&DPNP_QUEUE);
-
-    dpnpc_value_t input_data[size];
-    DPNPC_id<dpnpc_value_t> result(q_ref, input_data, {size});
-    iota(result.begin(), result.end(), 1);
-
-    vector<dpnpc_value_t>::iterator it_expected = expected.begin();
-    dpnpc_it_t it_result = result.begin();
-
-    for (; it_expected != expected.end(); ++it_expected, ++it_result) {
-        EXPECT_EQ(*it_expected, *it_result);
-    }
-}
-
-TEST(TestUtilsIterator, operator_minus)
-{
-    DPCTLSyclQueueRef q_ref = reinterpret_cast<DPCTLSyclQueueRef>(&DPNP_QUEUE);
-
-    vector<dpnpc_value_t> input_data = get_input_data<dpnpc_value_t>({3, 4});
-    DPNPC_id<dpnpc_value_t> obj(q_ref, input_data.data(), {3, 4});
-
-    EXPECT_EQ(obj.begin() - obj.end(), -12);
-    EXPECT_EQ(obj.end() - obj.begin(), 12);
-
-    obj.set_axis(0);
-    EXPECT_EQ(obj.begin() - obj.end(), -3);
-    EXPECT_EQ(obj.end() - obj.begin(), 3);
-
-    EXPECT_EQ(obj.begin(1) - obj.end(1), -3);
-    EXPECT_EQ(obj.end(1) - obj.begin(1), 3);
-
-    obj.set_axis(1);
-    EXPECT_EQ(obj.begin() - obj.end(), -4);
-    EXPECT_EQ(obj.end() - obj.begin(), 4);
-
-    EXPECT_EQ(obj.begin(1) - obj.end(1), -4);
-    EXPECT_EQ(obj.end(1) - obj.begin(1), 4);
-}
-
-TEST(TestUtilsIterator, iterator_distance)
-{
-    DPCTLSyclQueueRef q_ref = reinterpret_cast<DPCTLSyclQueueRef>(&DPNP_QUEUE);
-
-    vector<dpnpc_value_t> input_data = get_input_data<dpnpc_value_t>({3, 4});
-    DPNPC_id<dpnpc_value_t> obj(q_ref, input_data.data(), {3, 4});
-
-    dpnpc_it_t::difference_type default_diff_distance =
-        std::distance(obj.begin(), obj.end());
-    EXPECT_EQ(default_diff_distance, 12);
-
-    obj.set_axis(0);
-    dpnpc_it_t::difference_type axis_0_diff_distance =
-        std::distance(obj.begin(), obj.end());
-    EXPECT_EQ(axis_0_diff_distance, 3);
-
-    dpnpc_it_t::difference_type axis_0_1_diff_distance =
-        std::distance(obj.begin(1), obj.end(1));
-    EXPECT_EQ(axis_0_1_diff_distance, 3);
-
-    obj.set_axis(1);
-    dpnpc_it_t::difference_type axis_1_diff_distance =
-        std::distance(obj.begin(), obj.end());
-    EXPECT_EQ(axis_1_diff_distance, 4);
-
-    dpnpc_it_t::difference_type axis_1_1_diff_distance =
-        std::distance(obj.begin(1), obj.end(1));
-    EXPECT_EQ(axis_1_1_diff_distance, 4);
-}
-
-struct IteratorParameters
-{
-    vector<dpnpc_it_t::size_type> input_shape;
-    vector<long> axes;
-    vector<dpnpc_value_t> result;
-
-    /// Operator needs to print this container in human readable form in error
-    /// reporting
-    friend std::ostream &operator<<(std::ostream &out,
-                                    const IteratorParameters &data)
-    {
-        out << "IteratorParameters(input_shape=" << data.input_shape
-            << ", axis=" << data.axes << ", result=" << data.result << ")";
-
-        return out;
-    }
-};
-
-class IteratorReduction : public ::testing::TestWithParam<IteratorParameters>
-{
-};
-
-TEST_P(IteratorReduction, loop_reduce_axis)
-{
-    const IteratorParameters &param = GetParam();
-    const dpnpc_index_t result_size = param.result.size();
-
-    DPCTLSyclQueueRef q_ref = reinterpret_cast<DPCTLSyclQueueRef>(&DPNP_QUEUE);
-
-    vector<dpnpc_value_t> input_data =
-        get_input_data<dpnpc_value_t>(param.input_shape);
-    DPNPC_id<dpnpc_value_t> input(q_ref, input_data.data(), param.input_shape);
-    input.set_axes(param.axes);
-
-    ASSERT_EQ(input.get_output_size(), result_size);
-
-    vector<dpnpc_value_t> test_result(result_size, 42);
-    for (dpnpc_index_t output_id = 0; output_id < result_size; ++output_id) {
-        test_result[output_id] = 0;
-        for (dpnpc_it_t data_it = input.begin(output_id);
-             data_it != input.end(output_id); ++data_it)
-        {
-            test_result[output_id] += *data_it;
-        }
-
-        EXPECT_EQ(test_result[output_id], param.result.at(output_id));
-    }
-}
-
-TEST_P(IteratorReduction, pstl_reduce_axis)
-{
-    using data_type = double;
-
-    const IteratorParameters &param = GetParam();
-    const dpnpc_index_t result_size = param.result.size();
-
-    DPCTLSyclQueueRef q_ref = reinterpret_cast<DPCTLSyclQueueRef>(&DPNP_QUEUE);
-
-    vector<data_type> input_data = get_input_data<data_type>(param.input_shape);
-    DPNPC_id<data_type> input(q_ref, input_data.data(), param.input_shape);
-    input.set_axes(param.axes);
-
-    ASSERT_EQ(input.get_output_size(), result_size);
-
-    vector<data_type> result(result_size, 42);
-    for (dpnpc_index_t output_id = 0; output_id < result_size; ++output_id) {
-        auto policy = oneapi::dpl::execution::make_device_policy<
-            class test_pstl_reduce_axis_kernel>(DPNP_QUEUE);
-        result[output_id] =
-            std::reduce(policy, input.begin(output_id), input.end(output_id),
-                        data_type(0), std::plus<data_type>());
-        policy.queue().wait();
-
-        EXPECT_EQ(result[output_id], param.result.at(output_id));
-    }
-}
-
-TEST_P(IteratorReduction, sycl_reduce_axis)
-{
-    using data_type = double;
-
-    const IteratorParameters &param = GetParam();
-    const dpnpc_index_t result_size = param.result.size();
-    vector<data_type> result(result_size, 42);
-    data_type *result_ptr = result.data();
-
-    DPCTLSyclQueueRef q_ref = reinterpret_cast<DPCTLSyclQueueRef>(&DPNP_QUEUE);
-
-    vector<data_type> input_data = get_input_data<data_type>(param.input_shape);
-    DPNPC_id<data_type> input(q_ref, input_data.data(), param.input_shape);
-    input.set_axes(param.axes);
-
-    ASSERT_EQ(input.get_output_size(), result_size);
-
-    sycl::range<1> gws(result_size);
-    const DPNPC_id<data_type> *input_it = &input;
-    auto kernel_parallel_for_func = [=](sycl::id<1> global_id) {
-        const size_t idx = global_id[0];
-
-        data_type accumulator = 0;
-        for (DPNPC_id<data_type>::iterator data_it = input_it->begin(idx);
-             data_it != input_it->end(idx); ++data_it)
-        {
-            accumulator += *data_it;
-        }
-        result_ptr[idx] = accumulator;
-    };
-
-    auto kernel_func = [&](sycl::handler &cgh) {
-        cgh.parallel_for<class test_sycl_reduce_axis_kernel>(
-            gws, kernel_parallel_for_func);
-    };
-
-    sycl::event event = DPNP_QUEUE.submit(kernel_func);
-    event.wait();
-
-    for (dpnpc_index_t i = 0; i < result_size; ++i) {
-        EXPECT_EQ(result.at(i), param.result.at(i));
-    }
-}
-
-/**
- * Expected values produced by following script:
- *
- * import numpy as np
- *
- * shape = [2, 3, 4]
- * size = 24
- * axis=1
- * input = np.arange(1, size + 1).reshape(shape)
- * print(f"axis={axis}")
- * print(f"input.dtype={input.dtype}")
- * print(f"input shape={input.shape}")
- * print(f"input:\n{input}\n")
- *
- * result = np.sum(input, axis=axis)
- * print(f"result.dtype={result.dtype}")
- * print(f"result shape={result.shape}")
- *
- * print(f"result={np.array2string(result.reshape(result.size),
- * separator=',')}\n", sep=",")
- */
-INSTANTIATE_TEST_SUITE_P(
-    TestUtilsIterator,
-    IteratorReduction,
-    testing::Values(
-        IteratorParameters{{2, 3, 4},
-                           {0},
-                           {14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36}},
-        IteratorParameters{{2, 3, 4}, {1}, {15, 18, 21, 24, 51, 54, 57, 60}},
-        IteratorParameters{{2, 3, 4}, {2}, {10, 26, 42, 58, 74, 90}},
-        IteratorParameters{{1, 1, 1}, {0}, {1}},
-        IteratorParameters{{1, 1, 1}, {1}, {1}},
-        IteratorParameters{{1, 1, 1}, {2}, {1}},
-        IteratorParameters{{2, 3, 4, 2}, {0}, {26, 28, 30, 32, 34, 36, 38, 40,
-                                               42, 44, 46, 48, 50, 52, 54, 56,
-                                               58, 60, 62, 64, 66, 68, 70, 72}},
-        IteratorParameters{{2, 3, 4, 2},
-                           {1},
-                           {27, 30, 33, 36, 39, 42, 45, 48, 99, 102, 105, 108,
-                            111, 114, 117, 120}},
-        IteratorParameters{
-            {2, 3, 4, 2},
-            {2},
-            {16, 20, 48, 52, 80, 84, 112, 116, 144, 148, 176, 180}},
-        IteratorParameters{{2, 3, 4, 2}, {3}, {3,  7,  11, 15, 19, 23, 27, 31,
-                                               35, 39, 43, 47, 51, 55, 59, 63,
-                                               67, 71, 75, 79, 83, 87, 91, 95}},
-        IteratorParameters{{2, 3, 4, 2},
-                           {0, 1},
-                           {126, 132, 138, 144, 150, 156, 162, 168}},
-        IteratorParameters{{2, 3, 4, 2}, {2, 3}, {36, 100, 164, 228, 292, 356}},
-        IteratorParameters{
-            {2, 3, 4, 2},
-            {0, 3},
-            {54, 62, 70, 78, 86, 94, 102, 110, 118, 126, 134, 142}},
-        IteratorParameters{{2, 3, 4, 2}, {0, 1, 2}, {576, 600}},
-        IteratorParameters{{2, 3, 4, 2}, {0, 2, 3}, {264, 392, 520}},
-        IteratorParameters{{2, 3, 4, 2}, {0, -2, -1}, {264, 392, 520}},
-        IteratorParameters{{3, 4}, {0}, {15, 18, 21, 24}},
-        IteratorParameters{{3, 4}, {1}, {10, 26, 42}},
-        IteratorParameters{{2, 3, 4, 5, 6}, {0, 1, 2, 3, 4}, {259560}},
-        IteratorParameters{{2, 3, 4, 5, 6},
-                           {0, 1, 3, 4},
-                           {56790, 62190, 67590, 72990}},
-        IteratorParameters{{2, 3, 4, 5, 6},
-                           {1, 2, 3},
-                           {10680, 10740, 10800, 10860, 10920, 10980, 32280,
-                            32340, 32400, 32460, 32520, 32580}},
-        IteratorParameters{{2, 3, 4, 5, 6},
-                           {3, 1, 2},
-                           {10680, 10740, 10800, 10860, 10920, 10980, 32280,
-                            32340, 32400, 32460, 32520, 32580}},
-        IteratorParameters{{2, 3, 4, 5, 6},
-                           {0, 3, 1, 2},
-                           {42960, 43080, 43200, 43320, 43440, 43560}},
-        IteratorParameters{{2, 3, 4, 5},
-                           {1, 3},
-                           {345, 420, 495, 570, 1245, 1320, 1395, 1470}},
-        IteratorParameters{{2, 3, 0, 5}, {1, 3}, {}},
-        IteratorParameters{{2, 0, 4, 5}, {1, 3}, {0, 0, 0, 0, 0, 0, 0, 0}},
-        // IteratorParameters{{2, 3, -4, 5}, {1, 3}, {}},
-        // IteratorParameters{{2, -3, 4, 5}, {1, 3}, {0,0,0,0,0,0,0,0}},
-        IteratorParameters{{}, {}, {1}},
-        IteratorParameters{{0}, {}, {}},
-        IteratorParameters{{}, {0}, {1}},
-        IteratorParameters{{0}, {0}, {0}},
-        IteratorParameters{
-            {1},
-            {0},
-            {1}}) /*TODO ,  testing::PrintToStringParamName() */);
diff --git a/dpnp/dpnp_algo/CMakeLists.txt b/dpnp/dpnp_algo/CMakeLists.txt
index df9c1e26ea7a..f734be5c30b4 100644
--- a/dpnp/dpnp_algo/CMakeLists.txt
+++ b/dpnp/dpnp_algo/CMakeLists.txt
@@ -3,7 +3,6 @@ set(dpnp_algo_pyx_deps
   ${CMAKE_CURRENT_SOURCE_DIR}/dpnp_algo_sorting.pxi
   ${CMAKE_CURRENT_SOURCE_DIR}/dpnp_algo_mathematical.pxi
   ${CMAKE_CURRENT_SOURCE_DIR}/dpnp_algo_indexing.pxi
-  ${CMAKE_CURRENT_SOURCE_DIR}/dpnp_algo_special.pxi
   )
 
 build_dpnp_cython_ext_with_backend(
diff --git a/dpnp/dpnp_algo/dpnp_algo.pxd b/dpnp/dpnp_algo/dpnp_algo.pxd
index 0dcc519d198f..84fde77fb001 100644
--- a/dpnp/dpnp_algo/dpnp_algo.pxd
+++ b/dpnp/dpnp_algo/dpnp_algo.pxd
@@ -24,16 +24,10 @@
 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
 # THE POSSIBILITY OF SUCH DAMAGE.
 # *****************************************************************************
-cimport dpctl as c_dpctl
-from libcpp cimport bool as cpp_bool
-
-from dpnp.dpnp_algo cimport shape_elem_type, shape_type_c
-from dpnp.dpnp_utils.dpnp_algo_utils cimport dpnp_descriptor
 
 
 cdef extern from "dpnp_iface_fptr.hpp" namespace "DPNPFuncName":  # need this namespace for Enum import
     cdef enum DPNPFuncName "DPNPFuncName":
-        DPNP_FN_ERF_EXT
         DPNP_FN_MODF_EXT
         DPNP_FN_PARTITION_EXT
         DPNP_FN_RNG_BETA_EXT
@@ -94,15 +88,6 @@ cdef extern from "dpnp_iface_fptr.hpp":
 
     DPNPFuncData get_dpnp_function_ptr(DPNPFuncName name, DPNPFuncType first_type, DPNPFuncType second_type) except +
 
-# C function pointer to the C library template functions
-ctypedef c_dpctl.DPCTLSyclEventRef(*fptr_1in_1out_strides_t)(c_dpctl.DPCTLSyclQueueRef,
-                                                             void *, const size_t, const size_t,
-                                                             const shape_elem_type * , const shape_elem_type * ,
-                                                             void *, const size_t, const size_t,
-                                                             const shape_elem_type * , const shape_elem_type * ,
-                                                             const long * ,
-                                                             const c_dpctl.DPCTLEventVectorRef)
-
 
 """
 Internal functions
diff --git a/dpnp/dpnp_algo/dpnp_algo.pyx b/dpnp/dpnp_algo/dpnp_algo.pyx
index 718953d31449..f431cce0f1ed 100644
--- a/dpnp/dpnp_algo/dpnp_algo.pyx
+++ b/dpnp/dpnp_algo/dpnp_algo.pyx
@@ -33,25 +33,13 @@ and the rest of the library
 
 """
 
-from libc.time cimport time, time_t
-from libcpp.vector cimport vector
-
-import dpctl
 
 import dpnp
-import dpnp.config as config
-import dpnp.dpnp_container as dpnp_container
-import dpnp.dpnp_utils as utils_py
-from dpnp.dpnp_array import dpnp_array
 
-cimport cpython
-cimport numpy
+cimport dpctl as c_dpctl
 
 cimport dpnp.dpnp_utils as utils
-
-import operator
-
-import numpy
+from dpnp.dpnp_algo cimport shape_elem_type, shape_type_c
 
 __all__ = [
 ]
@@ -60,7 +48,6 @@ __all__ = [
 include "dpnp_algo_indexing.pxi"
 include "dpnp_algo_mathematical.pxi"
 include "dpnp_algo_sorting.pxi"
-include "dpnp_algo_special.pxi"
 
 
 """
@@ -115,78 +102,3 @@ cdef dpnp_DPNPFuncType_to_dtype(size_t type):
         return dpnp.bool
     else:
         utils.checker_throw_type_error("dpnp_DPNPFuncType_to_dtype", type)
-
-
-cdef utils.dpnp_descriptor call_fptr_1in_1out_strides(DPNPFuncName fptr_name,
-                                                      utils.dpnp_descriptor x1,
-                                                      object dtype=None,
-                                                      utils.dpnp_descriptor out=None,
-                                                      object where=True,
-                                                      func_name=None):
-
-    """ Convert type (x1.dtype) to C enum DPNPFuncType """
-    cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(x1.dtype)
-
-    """ get the FPTR data structure """
-    cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(fptr_name, param1_type, param1_type)
-
-    x1_obj = x1.get_array()
-
-    # get FPTR function and return type
-    cdef (DPNPFuncType, void *) ret_type_and_func = utils.get_ret_type_and_func(kernel_data,
-                                                                                x1_obj.sycl_device.has_aspect_fp64)
-    cdef DPNPFuncType return_type = ret_type_and_func[0]
-    cdef fptr_1in_1out_strides_t func = < fptr_1in_1out_strides_t > ret_type_and_func[1]
-
-    result_type = dpnp_DPNPFuncType_to_dtype( < size_t > return_type)
-
-    cdef shape_type_c x1_shape = x1.shape
-    cdef shape_type_c x1_strides = utils.strides_to_vector(x1.strides, x1_shape)
-
-    cdef shape_type_c result_shape = x1_shape
-    cdef utils.dpnp_descriptor result
-
-    if out is None:
-        """ Create result array with type given by FPTR data """
-        result = utils.create_output_descriptor(result_shape,
-                                                return_type,
-                                                None,
-                                                device=x1_obj.sycl_device,
-                                                usm_type=x1_obj.usm_type,
-                                                sycl_queue=x1_obj.sycl_queue)
-    else:
-        if out.dtype != result_type:
-            utils.checker_throw_value_error(func_name, 'out.dtype', out.dtype, result_type)
-        if out.shape != result_shape:
-            utils.checker_throw_value_error(func_name, 'out.shape', out.shape, result_shape)
-
-        result = out
-
-        utils.get_common_usm_allocation(x1, result)  # check USM allocation is common
-
-    result_sycl_queue = result.get_array().sycl_queue
-
-    cdef c_dpctl.SyclQueue q = <c_dpctl.SyclQueue> result_sycl_queue
-    cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref()
-
-    cdef shape_type_c result_strides = utils.strides_to_vector(result.strides, result_shape)
-
-    """ Call FPTR function """
-    cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref,
-                                                    result.get_data(),
-                                                    result.size,
-                                                    result.ndim,
-                                                    result_shape.data(),
-                                                    result_strides.data(),
-                                                    x1.get_data(),
-                                                    x1.size,
-                                                    x1.ndim,
-                                                    x1_shape.data(),
-                                                    x1_strides.data(),
-                                                    NULL,
-                                                    NULL)  # dep_events_ref
-
-    with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref)
-    c_dpctl.DPCTLEvent_Delete(event_ref)
-
-    return result
diff --git a/dpnp/dpnp_array.py b/dpnp/dpnp_array.py
index 48df4acf3b81..6d9aaab20518 100644
--- a/dpnp/dpnp_array.py
+++ b/dpnp/dpnp_array.py
@@ -24,18 +24,29 @@
 # THE POSSIBILITY OF SUCH DAMAGE.
 # *****************************************************************************
 
+"""
+Interface of an ndarray representing a multidimensional tensor of numeric
+elements stored in a USM allocation on a SYCL device.
+
+"""
+
+# pylint: disable=invalid-name
+# pylint: disable=protected-access
+
 import dpctl.tensor as dpt
+import dpctl.tensor._type_utils as dtu
 from dpctl.tensor._numpy_helper import AxisError
 
 import dpnp
+import dpnp.memory as dpm
 
 
 def _get_unwrapped_index_key(key):
     """
     Get an unwrapped index key.
 
-    Return a key where each nested instance of DPNP array is unwrapped into USM ndarray
-    for further processing in DPCTL advanced indexing functions.
+    Return a key where each nested instance of DPNP array is unwrapped into
+    USM ndarray for further processing in DPCTL advanced indexing functions.
 
     """
 
@@ -50,15 +61,18 @@ def _get_unwrapped_index_key(key):
     return key
 
 
+# pylint: disable=too-many-public-methods
 class dpnp_array:
     """
-    Multi-dimensional array object.
+    An array object represents a multidimensional tensor of numeric elements
+    stored in a USM allocation on a SYCL device.
 
-    This is a wrapper around dpctl.tensor.usm_ndarray that provides
+    This is a wrapper around :class:`dpctl.tensor.usm_ndarray` that provides
     methods to be compliant with original NumPy.
 
     """
 
+    # pylint: disable=too-many-positional-arguments
     def __init__(
         self,
         shape,
@@ -75,9 +89,12 @@ def __init__(
             order = "C"
 
         if buffer is not None:
-            buffer = dpnp.get_usm_ndarray(buffer)
+            # expecting to have buffer as dpnp.ndarray and usm_ndarray,
+            # or as USM memory allocation
+            if isinstance(buffer, dpnp_array):
+                buffer = buffer.get_array()
 
-            if dtype is None:
+            if dtype is None and hasattr(buffer, "dtype"):
                 dtype = buffer.dtype
         else:
             buffer = usm_type
@@ -97,95 +114,28 @@ def __init__(
             array_namespace=dpnp,
         )
 
-    @property
-    def __sycl_usm_array_interface__(self):
-        return self._array_obj.__sycl_usm_array_interface__
-
-    def get_array(self):
-        """Get usm_ndarray object."""
-        return self._array_obj
-
-    @property
-    def T(self):
-        """View of the transposed array."""
-        return self.transpose()
-
-    @property
-    def mT(self):
-        """
-        View of the matrix transposed array.
-
-        The matrix transpose is the transpose of the last two dimensions, even
-        if the array is of higher dimension.
-
-        Raises
-        ------
-        ValueError
-            If the array is of dimension less than 2.
-
-        Examples
-        --------
-        >>> import dpnp as np
-        >>> a = np.array([[1, 2], [3, 4]])
-        >>> a
-        array([[1, 2],
-               [3, 4]])
-        >>> a.mT
-        array([[1, 3],
-               [2, 4]])
-
-        >>> a = np.arange(8).reshape((2, 2, 2))
-        >>> a
-        array([[[0, 1],
-                [2, 3]],
-               [[4, 5],
-                [6, 7]]])
-        >>> a.mT
-        array([[[0, 2],
-                [1, 3]],
-               [[4, 6],
-                [5, 7]]])
-
-        """
-
-        if self.ndim < 2:
-            raise ValueError("matrix transpose with ndim < 2 is undefined")
-
-        return dpnp_array._create_from_usm_ndarray(self._array_obj.mT)
-
-    @property
-    def sycl_queue(self):
-        return self._array_obj.sycl_queue
-
-    @property
-    def sycl_device(self):
-        return self._array_obj.sycl_device
-
-    @property
-    def sycl_context(self):
-        return self._array_obj.sycl_context
-
-    @property
-    def device(self):
-        return self._array_obj.device
-
-    @property
-    def usm_type(self):
-        return self._array_obj.usm_type
-
-    def __abs__(self):
-        r"""Return ``\|self\|``."""
+    def __abs__(self, /):
+        r"""Return :math:`|\text{self}|`."""
         return dpnp.abs(self)
 
-    def __add__(self, other):
-        """Return ``self+value``."""
+    def __add__(self, other, /):
+        r"""Return :math:`\text{self + value}`."""
         return dpnp.add(self, other)
 
-    def __and__(self, other):
-        """Return ``self&value``."""
+    def __and__(self, other, /):
+        r"""Return :math:`\text{self & value}`."""
         return dpnp.bitwise_and(self, other)
 
     def __array__(self, dtype=None, /, *, copy=None):
+        """
+        NumPy's array protocol method to disallow implicit conversion.
+
+        Without this definition, ``numpy.asarray(dpnp_arr)`` converts
+        :class:`dpnp.ndarray` instance into NumPy array with data type `object`
+        and every element being zero-dimensional :class:`dpnp.ndarray`.
+
+        """  # noqa: D403
+
         raise TypeError(
             "Implicit conversion to a NumPy array is not allowed. "
             "Please use `.asnumpy()` to construct a NumPy array explicitly."
@@ -194,24 +144,18 @@ def __array__(self, dtype=None, /, *, copy=None):
     # '__array_finalize__',
     # '__array_function__',
     # '__array_interface__',
-    # '__array_prepare__',
-    # '__array_priority__',
-    # '__array_struct__',
-
-    __array_ufunc__ = None
-
-    # '__array_wrap__',
 
     def __array_namespace__(self, /, *, api_version=None):
         """
-        Returns array namespace, member functions of which implement data API.
+        Return array namespace, member functions of which implement data API.
 
         Parameters
         ----------
-        api_version : str, optional
+        api_version : {None, str}, optional
             Request namespace compliant with given version of array API. If
             ``None``, namespace for the most recent supported version is
             returned.
+
             Default: ``None``.
 
         Returns
@@ -227,21 +171,33 @@ def __array_namespace__(self, /, *, api_version=None):
 
         return self._array_obj.__array_namespace__(api_version=api_version)
 
-    def __bool__(self):
-        """``True`` if self else ``False``."""
+    # '__array_prepare__',
+    # '__array_priority__',
+    # '__array_struct__',
+
+    __array_ufunc__ = None
+
+    # '__array_wrap__',
+
+    def __bool__(self, /):
+        """``True`` if `self` else ``False``."""
         return self._array_obj.__bool__()
 
     # '__class__',
     # `__class_getitem__`,
 
-    def __complex__(self):
+    def __complex__(self, /):
+        """Convert a zero-dimensional array to a Python complex object."""
         return self._array_obj.__complex__()
 
-    # '__contains__',
+    def __contains__(self, value, /):
+        r"""Return :math:`\text{value in self}`."""
+        return (self == value).any()
 
     def __copy__(self):
         """
-        Used if :func:`copy.copy` is called on an array. Returns a copy of the array.
+        Used if :func:`copy.copy` is called on an array. Return a copy of the
+        array.
 
         Equivalent to ``a.copy(order="K")``.
 
@@ -253,34 +209,36 @@ def __copy__(self):
     # '__delitem__',
     # '__dir__',
     # '__divmod__',
-    # '__doc__',
 
     def __dlpack__(
-        self, *, stream=None, max_version=None, dl_device=None, copy=None
+        self, /, *, stream=None, max_version=None, dl_device=None, copy=None
     ):
         """
-        Produces DLPack capsule.
+        Produce DLPack capsule.
 
         Parameters
         ----------
         stream : {:class:`dpctl.SyclQueue`, None}, optional
             Execution queue to synchronize with. If ``None``, synchronization
             is not performed.
+
             Default: ``None``.
-        max_version {tuple of ints, None}, optional
+        max_version : {tuple of ints, None}, optional
             The maximum DLPack version the consumer (caller of ``__dlpack__``)
             supports. As ``__dlpack__`` may not always return a DLPack capsule
             with version `max_version`, the consumer must verify the version
             even if this argument is passed.
+
             Default: ``None``.
-        dl_device {tuple, None}, optional:
+        dl_device : {tuple, None}, optional:
             The device the returned DLPack capsule will be placed on. The
             device must be a 2-tuple matching the format of
-            ``__dlpack_device__`` method, an integer enumerator representing
-            the device type followed by an integer representing the index of
-            the device.
+            :meth:`dpnp.ndarray.__dlpack_device__`, an integer enumerator
+            representing the device type followed by an integer representing
+            the index of the device.
+
             Default: ``None``.
-        copy {bool, None}, optional:
+        copy : {bool, None}, optional:
             Boolean indicating whether or not to copy the input.
 
             * If `copy` is ``True``, the input will always be copied.
@@ -293,12 +251,12 @@ def __dlpack__(
 
         Raises
         ------
-        MemoryError:
+        MemoryError
             when host memory can not be allocated.
-        DLPackCreationError:
+        DLPackCreationError
             when array is allocated on a partitioned SYCL device, or with
             a non-default context.
-        BufferError:
+        BufferError
             when a copy is deemed necessary but `copy` is ``False`` or when
             the provided `dl_device` cannot be handled.
 
@@ -311,45 +269,51 @@ def __dlpack__(
             copy=copy,
         )
 
-    def __dlpack_device__(self):
+    def __dlpack_device__(self, /):
         """
-        Gives a tuple (``device_type``, ``device_id``) corresponding to
+        Give a tuple (``device_type``, ``device_id``) corresponding to
         ``DLDevice`` entry in ``DLTensor`` in DLPack protocol.
 
         The tuple describes the non-partitioned device where the array has been
         allocated, or the non-partitioned parent device of the allocation
         device.
 
+        See :class:`dpnp.DLDeviceType` for a list of devices supported by the
+        DLPack protocol.
+
         Raises
         ------
-        DLPackCreationError:
+        DLPackCreationError
             when the ``device_id`` could not be determined.
 
         """
 
         return self._array_obj.__dlpack_device__()
 
-    def __eq__(self, other):
-        """Return ``self==value``."""
+    # '__doc__',
+
+    def __eq__(self, other, /):
+        r"""Return :math:`\text{self == value}`."""
         return dpnp.equal(self, other)
 
-    def __float__(self):
+    def __float__(self, /):
+        """Convert a zero-dimensional array to a Python float object."""
         return self._array_obj.__float__()
 
-    def __floordiv__(self, other):
-        """Return ``self//value``."""
+    def __floordiv__(self, other, /):
+        r"""Return :math:`\text{self // value}`."""
         return dpnp.floor_divide(self, other)
 
     # '__format__',
 
-    def __ge__(self, other):
-        """Return ``self>=value``."""
+    def __ge__(self, other, /):
+        r"""Return :math:`\text{self >= value}`."""
         return dpnp.greater_equal(self, other)
 
     # '__getattribute__',
 
-    def __getitem__(self, key):
-        """Return ``self[key]``."""
+    def __getitem__(self, key, /):
+        r"""Return :math:`\text{self[key]}`."""
         key = _get_unwrapped_index_key(key)
 
         item = self._array_obj.__getitem__(key)
@@ -357,42 +321,40 @@ def __getitem__(self, key):
 
     # '__getstate__',
 
-    def __gt__(self, other):
-        """Return ``self>value``."""
+    def __gt__(self, other, /):
+        r"""Return :math:`\text{self > value}`."""
         return dpnp.greater(self, other)
 
     # '__hash__',
 
-    def __iadd__(self, other):
-        """Return ``self+=value``."""
+    def __iadd__(self, other, /):
+        r"""Return :math:`\text{self += value}`."""
         dpnp.add(self, other, out=self)
         return self
 
-    def __iand__(self, other):
-        """Return ``self&=value``."""
+    def __iand__(self, other, /):
+        r"""Return :math:`\text{self &= value}`."""
         dpnp.bitwise_and(self, other, out=self)
         return self
 
-    def __ifloordiv__(self, other):
-        """Return ``self//=value``."""
+    def __ifloordiv__(self, other, /):
+        r"""Return :math:`\text{self //= value}`."""
         dpnp.floor_divide(self, other, out=self)
         return self
 
-    def __ilshift__(self, other):
-        """Return ``self<<=value``."""
+    def __ilshift__(self, other, /):
+        r"""Return :math:`\text{self <<= value}`."""
         dpnp.left_shift(self, other, out=self)
         return self
 
-    def __imatmul__(self, other):
-        """Return ``self@=value``."""
+    def __imatmul__(self, other, /):
+        r"""Return :math:`\text{self @= value}`."""
 
-        """
-        Unlike `matmul(a, b, out=a)` we ensure that the result is not broadcast
-        if the result without `out` would have less dimensions than `a`.
-        Since the signature of matmul is '(n?,k),(k,m?)->(n?,m?)' this is the
-        case exactly when the second operand has both core dimensions.
-        We have to enforce this check by passing the correct `axes=`.
-        """
+        # Unlike `matmul(a, b, out=a)` we ensure that the result isn't broadcast
+        # if the result without `out` would have less dimensions than `a`.
+        # Since the signature of matmul is '(n?,k),(k,m?)->(n?,m?)' this is the
+        # case exactly when the second operand has both core dimensions.
+        # We have to enforce this check by passing the correct `axes=`.
         if self.ndim == 1:
             axes = [(-1,), (-2, -1), (-1,)]
         else:
@@ -400,130 +362,134 @@ def __imatmul__(self, other):
 
         try:
             dpnp.matmul(self, other, out=self, dtype=self.dtype, axes=axes)
-        except AxisError:
+        except AxisError as e:
             # AxisError should indicate that the axes argument didn't work out
             # which should mean the second operand not being 2 dimensional.
             raise ValueError(
                 "inplace matrix multiplication requires the first operand to "
                 "have at least one and the second at least two dimensions."
-            )
+            ) from e
         return self
 
-    def __imod__(self, other):
-        """Return ``self%=value``."""
+    def __imod__(self, other, /):
+        r"""Return :math:`\text{self %= value}`."""
         dpnp.remainder(self, other, out=self)
         return self
 
-    def __imul__(self, other):
-        """Return ``self*=value``."""
+    def __imul__(self, other, /):
+        r"""Return :math:`\text{self *= value}`."""
         dpnp.multiply(self, other, out=self)
         return self
 
-    def __index__(self):
+    def __index__(self, /):
+        """Convert a zero-dimensional array to a Python int object."""
         return self._array_obj.__index__()
 
     # '__init__',
     # '__init_subclass__',
 
-    def __int__(self):
+    def __int__(self, /):
+        """Convert a zero-dimensional array to a Python int object."""
         return self._array_obj.__int__()
 
-    def __invert__(self):
-        """Return ``~self``."""
+    def __invert__(self, /):
+        r"""Return :math:`\text{~self}`."""
         return dpnp.invert(self)
 
-    def __ior__(self, other):
-        """Return ``self|=value``."""
+    def __ior__(self, other, /):
+        r"""Return :math:`\text{self |= value}`."""
         dpnp.bitwise_or(self, other, out=self)
         return self
 
-    def __ipow__(self, other):
-        """Return ``self**=value``."""
+    def __ipow__(self, other, /):
+        r"""Return :math:`\text{self **= value}`."""
         dpnp.power(self, other, out=self)
         return self
 
-    def __irshift__(self, other):
-        """Return ``self>>=value``."""
+    def __irshift__(self, other, /):
+        r"""Return :math:`\text{self >>= value}`."""
         dpnp.right_shift(self, other, out=self)
         return self
 
-    def __isub__(self, other):
-        """Return ``self-=value``."""
+    def __isub__(self, other, /):
+        r"""Return :math:`\text{self -= value}`."""
         dpnp.subtract(self, other, out=self)
         return self
 
-    def __iter__(self):
-        """Return ``iter(self)``."""
+    def __iter__(self, /):
+        r"""Return :math:`\text{iter(self)}`."""
         if self.ndim == 0:
             raise TypeError("iteration over a 0-d array")
         return (self[i] for i in range(self.shape[0]))
 
-    def __itruediv__(self, other):
-        """Return ``self/=value``."""
+    def __itruediv__(self, other, /):
+        r"""Return :math:`\text{self /= value}`."""
         dpnp.true_divide(self, other, out=self)
         return self
 
-    def __ixor__(self, other):
-        """Return ``self^=value``."""
+    def __ixor__(self, other, /):
+        r"""Return :math:`\text{self ^= value}`."""
         dpnp.bitwise_xor(self, other, out=self)
         return self
 
-    def __le__(self, other):
-        """Return ``self<=value``."""
+    def __le__(self, other, /):
+        r"""Return :math:`\text{self <= value}`."""
         return dpnp.less_equal(self, other)
 
     def __len__(self):
-        """Return ``len(self)``."""
+        r"""Return :math:`\text{len(self)}`."""
         return self._array_obj.__len__()
 
-    def __lshift__(self, other):
-        """Return ``self<<value``."""
+    def __lshift__(self, other, /):
+        r"""Return :math:`\text{self << value}`."""
         return dpnp.left_shift(self, other)
 
-    def __lt__(self, other):
-        """Return ``self<value``."""
+    def __lt__(self, other, /):
+        r"""Return :math:`\text{self < value}`."""
         return dpnp.less(self, other)
 
-    def __matmul__(self, other):
-        """Return ``self@value``."""
+    def __matmul__(self, other, /):
+        r"""Return :math:`\text{self @ value}`."""
         return dpnp.matmul(self, other)
 
-    def __mod__(self, other):
-        """Return ``self%value``."""
+    def __mod__(self, other, /):
+        r"""Return :math:`\text{self % value}`."""
         return dpnp.remainder(self, other)
 
-    def __mul__(self, other):
-        """Return ``self*value``."""
+    def __mul__(self, other, /):
+        r"""Return :math:`\text{self * value}`."""
         return dpnp.multiply(self, other)
 
-    def __ne__(self, other):
-        """Return ``self!=value``."""
+    def __ne__(self, other, /):
+        r"""Return :math:`\text{self != value}`."""
         return dpnp.not_equal(self, other)
 
-    def __neg__(self):
-        """Return ``-self``."""
+    def __neg__(self, /):
+        r"""Return :math:`\text{-self}`."""
         return dpnp.negative(self)
 
     # '__new__',
 
-    def __or__(self, other):
-        """Return ``self|value``."""
+    def __or__(self, other, /):
+        r"""Return :math:`\text{self | value}`."""
         return dpnp.bitwise_or(self, other)
 
-    def __pos__(self):
-        """Return ``+self``."""
+    def __pos__(self, /):
+        r"""Return :math:`\text{+self}`."""
         return dpnp.positive(self)
 
-    def __pow__(self, other):
-        """Return ``self**value``."""
+    def __pow__(self, other, mod=None, /):
+        r"""Return :math:`\text{self ** value}`."""
+        if mod is not None:
+            return NotImplemented
         return dpnp.power(self, other)
 
-    def __radd__(self, other):
-        """Return ``value+self``."""
+    def __radd__(self, other, /):
+        r"""Return :math:`\text{value + self}`."""
         return dpnp.add(other, self)
 
-    def __rand__(self, other):
-        """Return ``value&self``."""
+    def __rand__(self, other, /):
+        r"""Return :math:`\text{value & self}`."""
         return dpnp.bitwise_and(other, self)
 
     # '__rdivmod__',
@@ -531,67 +497,69 @@ def __rand__(self, other):
     # '__reduce_ex__',
 
     def __repr__(self):
-        """Return ``repr(self)``."""
+        r"""Return :math:`\text{repr(self)}`."""
         return dpt.usm_ndarray_repr(self._array_obj, prefix="array")
 
-    def __rfloordiv__(self, other):
-        """Return ``value//self``."""
+    def __rfloordiv__(self, other, /):
+        r"""Return :math:`\text{value // self}`."""
         return dpnp.floor_divide(self, other)
 
-    def __rlshift__(self, other):
-        """Return ``value<<self``."""
+    def __rlshift__(self, other, /):
+        r"""Return :math:`\text{value << self}`."""
         return dpnp.left_shift(other, self)
 
-    def __rmatmul__(self, other):
-        """Return ``value@self``."""
+    def __rmatmul__(self, other, /):
+        r"""Return :math:`\text{value @ self}`."""
         return dpnp.matmul(other, self)
 
-    def __rmod__(self, other):
-        """Return ``value%self``."""
+    def __rmod__(self, other, /):
+        r"""Return :math:`\text{value % self}`."""
         return dpnp.remainder(other, self)
 
-    def __rmul__(self, other):
-        """Return ``value*self``."""
+    def __rmul__(self, other, /):
+        r"""Return :math:`\text{value * self}`."""
         return dpnp.multiply(other, self)
 
-    def __ror__(self, other):
-        """Return ``value|self``."""
+    def __ror__(self, other, /):
+        r"""Return :math:`\text{value | self}`."""
         return dpnp.bitwise_or(other, self)
 
-    def __rpow__(self, other):
-        """Return ``value**self``."""
+    def __rpow__(self, other, mod=None, /):
+        r"""Return :math:`\text{value ** self}`."""
+        if mod is not None:
+            return NotImplemented
         return dpnp.power(other, self)
 
-    def __rrshift__(self, other):
-        """Return ``value>>self``."""
+    def __rrshift__(self, other, /):
+        r"""Return :math:`\text{value >> self}`."""
         return dpnp.right_shift(other, self)
 
-    def __rshift__(self, other):
-        """Return ``self>>value``."""
+    def __rshift__(self, other, /):
+        r"""Return :math:`\text{self >> value}`."""
         return dpnp.right_shift(self, other)
 
-    def __rsub__(self, other):
-        """Return ``value-self``."""
+    def __rsub__(self, other, /):
+        r"""Return :math:`\text{value - self}`."""
         return dpnp.subtract(other, self)
 
-    def __rtruediv__(self, other):
-        """Return ``value/self``."""
+    def __rtruediv__(self, other, /):
+        r"""Return :math:`\text{value / self}`."""
         return dpnp.true_divide(other, self)
 
-    def __rxor__(self, other):
-        """Return ``value^self``."""
+    def __rxor__(self, other, /):
+        r"""Return :math:`\text{value ^ self}`."""
         return dpnp.bitwise_xor(other, self)
 
     # '__setattr__',
 
-    def __setitem__(self, key, val):
-        """Set ``self[key]`` to value."""
+    def __setitem__(self, key, value, /):
+        r"""Set :math:`\text{self[key]}` to a value."""
         key = _get_unwrapped_index_key(key)
 
-        if isinstance(val, dpnp_array):
-            val = val.get_array()
+        if isinstance(value, dpnp_array):
+            value = value.get_array()
 
-        self._array_obj.__setitem__(key, val)
+        self._array_obj.__setitem__(key, value)
 
     # '__setstate__',
     # '__sizeof__',
@@ -599,31 +567,39 @@ def __setitem__(self, key, val):
     __slots__ = ("_array_obj",)
 
     def __str__(self):
-        """Return ``str(self)``."""
+        r"""Return :math:`\text{str(self)}`."""
         return self._array_obj.__str__()
 
-    def __sub__(self, other):
-        """Return ``self-value``."""
+    def __sub__(self, other, /):
+        r"""Return :math:`\text{self - value}`."""
         return dpnp.subtract(self, other)
 
     # '__subclasshook__',
 
-    def __truediv__(self, other):
-        """Return ``self/value``."""
+    @property
+    def __sycl_usm_array_interface__(self):
+        """
+        Give ``__sycl_usm_array_interface__`` dictionary describing the array.
+
+        """  # noqa: D200
+        return self._array_obj.__sycl_usm_array_interface__
+
+    def __truediv__(self, other, /):
+        r"""Return :math:`\text{self / value}`."""
         return dpnp.true_divide(self, other)
 
     @property
     def __usm_ndarray__(self):
         """
-        Property to support `__usm_ndarray__` protocol.
+        Property to support ``__usm_ndarray__`` protocol.
 
         It assumes to return :class:`dpctl.tensor.usm_ndarray` instance
         corresponding to the content of the object.
 
         This property is intended to speed-up conversion from
-        :class:`dpnp.ndarray` to :class:`dpctl.tensor.usm_ndarray` passed
-        into  `dpctl.tensor.asarray` function. The input object that implements
-        `__usm_ndarray__` protocol is recognized as owner of USM allocation
+        :class:`dpnp.ndarray` to :class:`dpctl.tensor.usm_ndarray` passed into
+        :func:`dpctl.tensor.asarray` function. The input object that implements
+        ``__usm_ndarray__`` protocol is recognized as owner of USM allocation
         that is managed by a smart pointer, and asynchronous deallocation
         will not involve GIL.
 
@@ -631,12 +607,18 @@ def __usm_ndarray__(self):
 
         return self._array_obj
 
-    def __xor__(self, other):
-        """Return ``self^value``."""
+    def __xor__(self, other, /):
+        r"""Return :math:`\text{self ^ value}`."""
         return dpnp.bitwise_xor(self, other)
 
     @staticmethod
     def _create_from_usm_ndarray(usm_ary: dpt.usm_ndarray):
+        """
+        Return :class:`dpnp.ndarray` instance from USM allocation providing
+        by an instance of :class:`dpctl.tensor.usm_ndarray`.
+
+        """
+
         if not isinstance(usm_ary, dpt.usm_ndarray):
             raise TypeError(
                 f"Expected dpctl.tensor.usm_ndarray, got {type(usm_ary)}"
@@ -646,9 +628,9 @@ def _create_from_usm_ndarray(usm_ary: dpt.usm_ndarray):
         res._array_obj._set_namespace(dpnp)
         return res
 
-    def all(self, axis=None, out=None, keepdims=False, *, where=True):
+    def all(self, axis=None, *, out=None, keepdims=False, where=True):
         """
-        Returns True if all elements evaluate to True.
+        Return ``True`` if all elements evaluate to ``True``.
 
         Refer to :obj:`dpnp.all` for full documentation.
 
@@ -662,9 +644,9 @@ def all(self, axis=None, out=None, keepdims=False, *, where=True):
             self, axis=axis, out=out, keepdims=keepdims, where=where
         )
 
-    def any(self, axis=None, out=None, keepdims=False, *, where=True):
+    def any(self, axis=None, *, out=None, keepdims=False, where=True):
         """
-        Returns True if any of the elements of `a` evaluate to True.
+        Return ``True`` if any of the elements of `a` evaluate to ``True``.
 
         Refer to :obj:`dpnp.any` for full documentation.
 
@@ -678,9 +660,9 @@ def any(self, axis=None, out=None, keepdims=False, *, where=True):
             self, axis=axis, out=out, keepdims=keepdims, where=where
         )
 
-    def argmax(self, axis=None, out=None, *, keepdims=False):
+    def argmax(self, /, axis=None, out=None, *, keepdims=False):
         """
-        Returns array of indices of the maximum values along the given axis.
+        Return array of indices of the maximum values along the given axis.
 
         Refer to :obj:`dpnp.argmax` for full documentation.
 
@@ -688,7 +670,7 @@ def argmax(self, axis=None, out=None, *, keepdims=False):
 
         return dpnp.argmax(self, axis=axis, out=out, keepdims=keepdims)
 
-    def argmin(self, axis=None, out=None, *, keepdims=False):
+    def argmin(self, /, axis=None, out=None, *, keepdims=False):
         """
         Return array of indices to the minimum values along the given axis.
 
@@ -704,7 +686,8 @@ def argsort(
         self, axis=-1, kind=None, order=None, *, descending=False, stable=None
     ):
         """
-        Return an ndarray of indices that sort the array along the specified axis.
+        Return an ndarray of indices that sort the array along the specified
+        axis.
 
         Refer to :obj:`dpnp.argsort` for full documentation.
 
@@ -714,22 +697,26 @@ def argsort(
             Axis along which to sort. If ``None``, the array is flattened
             before sorting. The default is ``-1``, which sorts along the last
             axis.
+
             Default: ``-1``.
         kind : {None, "stable", "mergesort", "radixsort"}, optional
             Sorting algorithm. The default is ``None``, which uses parallel
             merge-sort or parallel radix-sort algorithms depending on the array
             data type.
+
             Default: ``None``.
         descending : bool, optional
             Sort order. If ``True``, the array must be sorted in descending
             order (by value). If ``False``, the array must be sorted in
             ascending order (by value).
+
             Default: ``False``.
         stable : {None, bool}, optional
             Sort stability. If ``True``, the returned array will maintain the
             relative order of `a` values which compare as equal. The same
             behavior applies when set to ``False`` or ``None``.
             Internally, this option selects ``kind="stable"``.
+
             Default: ``None``.
 
         See Also
@@ -814,8 +801,8 @@ def astype(
             Specifies whether to copy an array when the specified dtype matches
             the data type of that array. If ``True``, a newly allocated array
             must always be returned. If ``False`` and the specified dtype
-            matches the data type of that array, the self array must be returned;
-            otherwise, a newly allocated array must be returned.
+            matches the data type of that array, the self array must be
+            returned; otherwise, a newly allocated array must be returned.
 
             Default: ``True``.
         device : {None, string, SyclDevice, SyclQueue, Device}, optional
@@ -865,7 +852,8 @@ def astype(
 
     def choose(self, /, choices, out=None, mode="wrap"):
         """
-        Use an array as index array to construct a new array from a set of choices.
+        Use an array as index array to construct a new array from a set of
+        choices.
 
         Refer to :obj:`dpnp.choose` for full documentation.
 
@@ -873,7 +861,7 @@ def choose(self, /, choices, out=None, mode="wrap"):
 
         return dpnp.choose(self, choices, out, mode)
 
-    def clip(self, min=None, max=None, out=None, **kwargs):
+    def clip(self, /, min=None, max=None, out=None, **kwargs):
         """
         Clip (limit) the values in an array.
 
@@ -883,7 +871,7 @@ def clip(self, min=None, max=None, out=None, **kwargs):
 
         return dpnp.clip(self, min, max, out=out, **kwargs)
 
-    def compress(self, condition, axis=None, out=None):
+    def compress(self, /, condition, axis=None, *, out=None):
         """
         Select slices of an array along a given axis.
 
@@ -900,10 +888,7 @@ def conj(self):
 
         """
 
-        if not dpnp.issubdtype(self.dtype, dpnp.complexfloating):
-            return self
-        else:
-            return dpnp.conjugate(self)
+        return self.conjugate()
 
     def conjugate(self):
         """
@@ -915,10 +900,11 @@ def conjugate(self):
 
         if not dpnp.issubdtype(self.dtype, dpnp.complexfloating):
             return self
-        else:
-            return dpnp.conjugate(self)
+        return dpnp.conjugate(self)
 
-    def copy(self, order="C", device=None, usm_type=None, sycl_queue=None):
+    def copy(
+        self, /, order="C", *, device=None, usm_type=None, sycl_queue=None
+    ):
         """
         Return a copy of the array.
 
@@ -928,6 +914,7 @@ def copy(self, order="C", device=None, usm_type=None, sycl_queue=None):
         ----------
         order : {None, "C", "F", "A", "K"}, optional
             Memory layout of the newly output array.
+
             Default: ``"C"``.
         device : {None, string, SyclDevice, SyclQueue, Device}, optional
             An array API concept of device where the output array is created.
@@ -940,12 +927,14 @@ def copy(self, order="C", device=None, usm_type=None, sycl_queue=None):
             Default: ``None``.
         usm_type : {None, "device", "shared", "host"}, optional
             The type of SYCL USM allocation for the output array.
+
             Default: ``None``.
         sycl_queue : {None, SyclQueue}, optional
             A SYCL queue to use for output array allocation and copying. The
             `sycl_queue` can be passed as ``None`` (the default), which means
             to get the SYCL queue from `device` keyword if present or to use
             a default queue.
+
             Default: ``None``.
 
         Returns
@@ -994,7 +983,7 @@ def copy(self, order="C", device=None, usm_type=None, sycl_queue=None):
 
     # 'ctypes',
 
-    def cumprod(self, axis=None, dtype=None, out=None):
+    def cumprod(self, /, axis=None, dtype=None, *, out=None):
         """
         Return the cumulative product of the elements along the given axis.
 
@@ -1004,7 +993,7 @@ def cumprod(self, axis=None, dtype=None, out=None):
 
         return dpnp.cumprod(self, axis=axis, dtype=dtype, out=out)
 
-    def cumsum(self, axis=None, dtype=None, out=None):
+    def cumsum(self, /, axis=None, dtype=None, *, out=None):
         """
         Return the cumulative sum of the elements along the given axis.
 
@@ -1014,7 +1003,37 @@ def cumsum(self, axis=None, dtype=None, out=None):
 
         return dpnp.cumsum(self, axis=axis, dtype=dtype, out=out)
 
-    # 'data',
+    @property
+    def data(self):
+        """
+        Python object pointing to the start of USM memory allocation with the
+        array's data.
+
+        """
+
+        return dpm.create_data(self._array_obj)
+
+    @property
+    def device(self):
+        """
+        Return :class:`dpctl.tensor.Device` object representing residence of
+        the array data.
+
+        The ``Device`` object represents Array API notion of the device, and
+        contains :class:`dpctl.SyclQueue` associated with this array. Hence,
+        ``.device`` property provides information distinct from ``.sycl_device``
+        property.
+
+        Examples
+        --------
+        >>> import dpnp as np
+        >>> x = np.ones(10)
+        >>> x.device
+        Device(level_zero:gpu:0)
+
+        """
+
+        return self._array_obj.device
 
     def diagonal(self, offset=0, axis1=0, axis2=1):
         """
@@ -1063,7 +1082,10 @@ def dot(self, b, out=None):
 
     @property
     def dtype(self):
-        """Returns NumPy's dtype corresponding to the type of the array elements."""
+        """
+        Return NumPy's dtype corresponding to the type of the array elements.
+
+        """  # noqa: D200
 
         return self._array_obj.dtype
 
@@ -1096,6 +1118,7 @@ def fill(self, value):
         """
 
         # lazy import avoids circular imports
+        # pylint: disable=import-outside-toplevel
         from .dpnp_algo.dpnp_fill import dpnp_fill
 
         dpnp_fill(self, value)
@@ -1108,11 +1131,14 @@ def flags(self):
 
     @property
     def flat(self):
-        """Return a flat iterator, or set a flattened version of self to value."""
+        """
+        Return a flat iterator, or set a flattened version of self to value.
+
+        """  # noqa: D200
 
         return dpnp.flatiter(self)
 
-    def flatten(self, order="C"):
+    def flatten(self, /, order="C"):
         """
         Return a copy of the array collapsed into one dimension.
 
@@ -1156,10 +1182,14 @@ def flatten(self, order="C"):
 
         return self.reshape(-1, order=order, copy=True)
 
+    def get_array(self):
+        """Get :class:`dpctl.tensor.usm_ndarray` object."""
+        return self._array_obj
+
     # 'getfield',
 
     @property
-    def imag(self):
+    def imag(self, /):
         """
         The imaginary part of the array.
 
@@ -1178,7 +1208,7 @@ def imag(self):
         )
 
     @imag.setter
-    def imag(self, value):
+    def imag(self, value, /):
         """
         Set the imaginary part of the array.
 
@@ -1193,12 +1223,13 @@ def imag(self, value):
         array([1.+9.j, 3.+9.j, 5.+9.j])
 
         """
+
         if dpnp.issubdtype(self.dtype, dpnp.complexfloating):
             dpnp.copyto(self._array_obj.imag, value)
         else:
             raise TypeError("array does not have imaginary part to set")
 
-    def item(self, *args):
+    def item(self, /, *args):
         """
         Copy an element of an array to a standard Python scalar and return it.
 
@@ -1218,7 +1249,8 @@ def item(self, *args):
         Returns
         -------
         out : Standard Python scalar object
-            A copy of the specified element of the array as a suitable Python scalar.
+            A copy of the specified element of the array as a suitable Python
+            scalar.
 
         Examples
         --------
@@ -1256,7 +1288,9 @@ def itemsize(self):
 
     def max(
         self,
+        /,
         axis=None,
+        *,
         out=None,
         keepdims=False,
         initial=None,
@@ -1279,10 +1313,10 @@ def max(
         )
 
     def mean(
-        self, axis=None, dtype=None, out=None, keepdims=False, *, where=True
+        self, /, axis=None, dtype=None, *, out=None, keepdims=False, where=True
     ):
         """
-        Returns the average of the array elements.
+        Return the average of the array elements.
 
         Refer to :obj:`dpnp.mean` for full documentation.
 
@@ -1292,7 +1326,9 @@ def mean(
 
     def min(
         self,
+        /,
         axis=None,
+        *,
         out=None,
         keepdims=False,
         initial=None,
@@ -1314,6 +1350,49 @@ def min(
             where=where,
         )
 
+    @property
+    def mT(self):
+        """
+        View of the matrix transposed array.
+
+        The matrix transpose is the transpose of the last two dimensions, even
+        if the array is of higher dimension.
+
+        Raises
+        ------
+        ValueError
+            If the array is of dimension less than ``2``.
+
+        Examples
+        --------
+        >>> import dpnp as np
+        >>> a = np.array([[1, 2], [3, 4]])
+        >>> a
+        array([[1, 2],
+               [3, 4]])
+        >>> a.mT
+        array([[1, 3],
+               [2, 4]])
+
+        >>> a = np.arange(8).reshape((2, 2, 2))
+        >>> a
+        array([[[0, 1],
+                [2, 3]],
+               [[4, 5],
+                [6, 7]]])
+        >>> a.mT
+        array([[[0, 2],
+                [1, 3]],
+               [[4, 6],
+                [5, 7]]])
+
+        """
+
+        if self.ndim < 2:
+            raise ValueError("matrix transpose with ndim < 2 is undefined")
+
+        return dpnp_array._create_from_usm_ndarray(self._array_obj.mT)
+
     @property
     def nbytes(self):
         """Total bytes consumed by the elements of the array."""
@@ -1362,7 +1441,7 @@ def nonzero(self):
 
         return dpnp.nonzero(self)
 
-    def partition(self, kth, axis=-1, kind="introselect", order=None):
+    def partition(self, /, kth, axis=-1, kind="introselect", order=None):
         """
         Return a partitioned copy of an array.
 
@@ -1396,15 +1475,17 @@ def partition(self, kth, axis=-1, kind="introselect", order=None):
 
     def prod(
         self,
+        /,
         axis=None,
         dtype=None,
+        *,
         out=None,
         keepdims=False,
         initial=None,
         where=True,
     ):
         """
-        Returns the prod along a given axis.
+        Return the prod along a given axis.
 
         Refer to :obj:`dpnp.prod` for full documentation.
 
@@ -1420,9 +1501,9 @@ def prod(
             where=where,
         )
 
-    def put(self, indices, vals, /, *, axis=None, mode="wrap"):
+    def put(self, /, indices, vals, axis=None, mode="wrap"):
         """
-        Puts values of an array into another array along a given axis.
+        Put values of an array into another array along a given axis.
 
         Refer to :obj:`dpnp.put` for full documentation.
 
@@ -1430,7 +1511,7 @@ def put(self, indices, vals, /, *, axis=None, mode="wrap"):
 
         return dpnp.put(self, indices, vals, axis=axis, mode=mode)
 
-    def ravel(self, order="C"):
+    def ravel(self, /, order="C"):
         """
         Return a contiguous flattened array.
 
@@ -1441,7 +1522,7 @@ def ravel(self, order="C"):
         return dpnp.ravel(self, order=order)
 
     @property
-    def real(self):
+    def real(self, /):
         """
         The real part of the array.
 
@@ -1463,7 +1544,7 @@ def real(self):
         return self
 
     @real.setter
-    def real(self, value):
+    def real(self, value, /):
         """
         Set the real part of the array.
 
@@ -1491,9 +1572,9 @@ def repeat(self, repeats, axis=None):
 
         return dpnp.repeat(self, repeats, axis=axis)
 
-    def reshape(self, *shape, order="C", copy=None):
+    def reshape(self, /, *shape, order="C", copy=None):
         """
-        Returns an array containing the same data with a new shape.
+        Return an array containing the same data with a new shape.
 
         Refer to :obj:`dpnp.reshape` for full documentation.
 
@@ -1510,7 +1591,8 @@ def reshape(self, *shape, order="C", copy=None):
         Notes
         -----
         Unlike the free function `dpnp.reshape`, this method on `ndarray` allows
-        the elements of the shape parameter to be passed in as separate arguments.
+        the elements of the shape parameter to be passed in as separate
+        arguments.
         For example, ``a.reshape(10, 11)`` is equivalent to
         ``a.reshape((10, 11))``.
 
@@ -1522,7 +1604,7 @@ def reshape(self, *shape, order="C", copy=None):
 
     # 'resize',
 
-    def round(self, decimals=0, out=None):
+    def round(self, /, decimals=0, *, out=None):
         """
         Return array with each element rounded to the given number of decimals.
 
@@ -1658,22 +1740,26 @@ def sort(
         axis : int, optional
             Axis along which to sort. The default is ``-1``, which sorts along
             the last axis.
+
             Default: ``-1``.
         kind : {None, "stable", "mergesort", "radixsort"}, optional
             Sorting algorithm. The default is ``None``, which uses parallel
             merge-sort or parallel radix-sort algorithms depending on the array
             data type.
+
             Default: ``None``.
         descending : bool, optional
             Sort order. If ``True``, the array must be sorted in descending
             order (by value). If ``False``, the array must be sorted in
             ascending order (by value).
+
             Default: ``False``.
         stable : {None, bool}, optional
             Sort stability. If ``True``, the returned array will maintain the
             relative order of `a` values which compare as equal. The same
             behavior applies when set to ``False`` or ``None``.
             Internally, this option selects ``kind="stable"``.
+
             Default: ``None``.
 
         See Also
@@ -1733,16 +1819,16 @@ def std(
         self,
         axis=None,
         dtype=None,
+        *,
         out=None,
         ddof=0,
         keepdims=False,
-        *,
         where=True,
         mean=None,
         correction=None,
     ):
         """
-        Returns the standard deviation of the array elements, along given axis.
+        Return the standard deviation of the array elements, along given axis.
 
         Refer to :obj:`dpnp.std` for full documentation.
 
@@ -1763,7 +1849,7 @@ def std(
     @property
     def strides(self):
         """
-        Returns memory displacement in array elements, upon unit
+        Return memory displacement in array elements, upon unit
         change of respective index.
 
         For example, for strides ``(s1, s2, s3)`` and multi-index
@@ -1776,15 +1862,17 @@ def strides(self):
 
     def sum(
         self,
+        /,
         axis=None,
         dtype=None,
+        *,
         out=None,
         keepdims=False,
         initial=None,
         where=True,
     ):
         """
-        Returns the sum along a given axis.
+        Return the sum along a given axis.
 
         Refer to :obj:`dpnp.sum` for full documentation.
 
@@ -1810,7 +1898,64 @@ def swapaxes(self, axis1, axis2):
 
         return dpnp.swapaxes(self, axis1=axis1, axis2=axis2)
 
-    def take(self, indices, axis=None, out=None, mode="wrap"):
+    @property
+    def sycl_context(self):
+        """
+        Return :class:`dpctl.SyclContext` object to which USM data is bound.
+
+        """  # noqa: D200
+        return self._array_obj.sycl_context
+
+    @property
+    def sycl_device(self):
+        """
+        Return :class:`dpctl.SyclDevice` object on which USM data was
+        allocated.
+
+        """
+        return self._array_obj.sycl_device
+
+    @property
+    def sycl_queue(self):
+        """
+        Return :class:`dpctl.SyclQueue` object associated with USM data.
+
+        """  # noqa: D200
+        return self._array_obj.sycl_queue
+
+    @property
+    def T(self):
+        """
+        View of the transposed array.
+
+        Same as ``self.transpose()``.
+
+        See Also
+        --------
+        :obj:`dpnp.transpose` : Equivalent function.
+
+        Examples
+        --------
+        >>> import dpnp as np
+        >>> a = np.array([[1, 2], [3, 4]])
+        >>> a
+        array([[1, 2],
+            [3, 4]])
+        >>> a.T
+        array([[1, 3],
+            [2, 4]])
+
+        >>> a = np.array([1, 2, 3, 4])
+        >>> a
+        array([1, 2, 3, 4])
+        >>> a.T
+        array([1, 2, 3, 4])
+
+        """
+
+        return self.transpose()
+
+    def take(self, indices, axis=None, *, out=None, mode="wrap"):
         """
         Take elements from an array along an axis.
 
@@ -1822,7 +1967,7 @@ def take(self, indices, axis=None, out=None, mode="wrap"):
 
     def to_device(self, device, /, *, stream=None):
         """
-        Transfers this array to specified target device.
+        Transfer this array to specified target device.
 
         Parameters
         ----------
@@ -1836,6 +1981,7 @@ def to_device(self, device, /, *, stream=None):
         stream : {SyclQueue, None}, optional
             Execution queue to synchronize with. If ``None``, synchronization
             is not performed.
+
             Default: ``None``.
 
         Returns
@@ -1867,7 +2013,7 @@ def to_device(self, device, /, *, stream=None):
     # 'tofile',
     # 'tolist',
 
-    def trace(self, offset=0, axis1=0, axis2=1, dtype=None, out=None):
+    def trace(self, offset=0, axis1=0, axis2=1, dtype=None, *, out=None):
         """
         Return the sum along diagonals of the array.
 
@@ -1881,7 +2027,7 @@ def trace(self, offset=0, axis1=0, axis2=1, dtype=None, out=None):
 
     def transpose(self, *axes):
         """
-        Returns a view of the array with axes transposed.
+        Return a view of the array with axes transposed.
 
         For full documentation refer to :obj:`numpy.ndarray.transpose`.
 
@@ -1904,8 +2050,10 @@ def transpose(self, *axes):
         See Also
         --------
         :obj:`dpnp.transpose` : Equivalent function.
-        :obj:`dpnp.ndarray.ndarray.T` : Array property returning the array transposed.
-        :obj:`dpnp.ndarray.reshape` : Give a new shape to an array without changing its data.
+        :obj:`dpnp.ndarray.ndarray.T` : Array property returning the array
+            transposed.
+        :obj:`dpnp.ndarray.reshape` : Give a new shape to an array without
+            changing its data.
 
         Examples
         --------
@@ -1952,16 +2100,16 @@ def var(
         self,
         axis=None,
         dtype=None,
+        *,
         out=None,
         ddof=0,
         keepdims=False,
-        *,
         where=True,
         mean=None,
         correction=None,
     ):
         """
-        Returns the variance of the array elements, along given axis.
+        Return the variance of the array elements, along given axis.
 
         Refer to :obj:`dpnp.var` for full documentation.
 
@@ -1979,5 +2127,145 @@ def var(
             correction=correction,
         )
 
+    def view(self, /, dtype=None, *, type=None):
+        """
+        New view of array with the same data.
+
+        For full documentation refer to :obj:`numpy.ndarray.view`.
+
+        Parameters
+        ----------
+        dtype : {None, str, dtype object}, optional
+            The desired data type of the returned view, e.g. :obj:`dpnp.float32`
+            or :obj:`dpnp.int16`. By default, it results in the view having the
+            same data type.
+
+            Default: ``None``.
 
-# 'view'
+        Notes
+        -----
+        Passing ``None`` for `dtype` is the same as omitting the parameter,
+        opposite to NumPy where they have different meaning.
+
+        ``view(some_dtype)`` or ``view(dtype=some_dtype)`` constructs a view of
+        the array's memory with a different data type. This can cause a
+        reinterpretation of the bytes of memory.
+
+        Only the last axis has to be contiguous.
+
+        Limitations
+        -----------
+        Parameter `type` is supported only with default value ``None``.
+        Otherwise, the function raises ``NotImplementedError`` exception.
+
+        Examples
+        --------
+        >>> import dpnp as np
+        >>> x = np.ones((4,), dtype=np.float32)
+        >>> xv = x.view(dtype=np.int32)
+        >>> xv[:] = 0
+        >>> xv
+        array([0, 0, 0, 0], dtype=int32)
+
+        However, views that change dtype are totally fine for arrays with a
+        contiguous last axis, even if the rest of the axes are not C-contiguous:
+
+        >>> x = np.arange(2 * 3 * 4, dtype=np.int8).reshape(2, 3, 4)
+        >>> x.transpose(1, 0, 2).view(np.int16)
+        array([[[ 256,  770],
+                [3340, 3854]],
+        <BLANKLINE>
+            [[1284, 1798],
+                [4368, 4882]],
+        <BLANKLINE>
+            [[2312, 2826],
+                [5396, 5910]]], dtype=int16)
+
+        """
+
+        if type is not None:
+            raise NotImplementedError(
+                "Keyword argument `type` is supported only with "
+                f"default value ``None``, but got {type}."
+            )
+
+        old_sh = self.shape
+        old_strides = self.strides
+
+        if dtype is None:
+            return dpnp_array(old_sh, buffer=self, strides=old_strides)
+
+        new_dt = dpnp.dtype(dtype)
+        new_dt = dtu._to_device_supported_dtype(new_dt, self.sycl_device)
+
+        new_itemsz = new_dt.itemsize
+        old_itemsz = self.dtype.itemsize
+        if new_itemsz == old_itemsz:
+            return dpnp_array(
+                old_sh, dtype=new_dt, buffer=self, strides=old_strides
+            )
+
+        ndim = self.ndim
+        if ndim == 0:
+            raise ValueError(
+                "Changing the dtype of a 0d array is only supported "
+                "if the itemsize is unchanged"
+            )
+
+        # resize on last axis only
+        axis = ndim - 1
+        if old_sh[axis] != 1 and self.size != 0 and old_strides[axis] != 1:
+            raise ValueError(
+                "To change to a dtype of a different size, "
+                "the last axis must be contiguous"
+            )
+
+        # normalize strides whenever itemsize changes
+        if old_itemsz > new_itemsz:
+            new_strides = list(
+                el * (old_itemsz // new_itemsz) for el in old_strides
+            )
+        else:
+            new_strides = list(
+                el // (new_itemsz // old_itemsz) for el in old_strides
+            )
+        new_strides[axis] = 1
+        new_strides = tuple(new_strides)
+
+        new_dim = old_sh[axis] * old_itemsz
+        if new_dim % new_itemsz != 0:
+            raise ValueError(
+                "When changing to a larger dtype, its size must be a divisor "
+                "of the total size in bytes of the last axis of the array"
+            )
+
+        # normalize shape whenever itemsize changes
+        new_sh = list(old_sh)
+        new_sh[axis] = new_dim // new_itemsz
+        new_sh = tuple(new_sh)
+
+        return dpnp_array(
+            new_sh,
+            dtype=new_dt,
+            buffer=self,
+            strides=new_strides,
+        )
+
+    @property
+    def usm_type(self):
+        """
+        USM type of underlying memory. Possible values are:
+
+        * ``"device"``
+            USM-device allocation in device memory, only accessible to kernels
+            executed on the device
+        * ``"shared"``
+            USM-shared allocation in device memory, accessible both from the
+            device and from the host
+        * ``"host"``
+            USM-host allocation in host memory, accessible both from the device
+            and from the host
+
+        """
+
+        return self._array_obj.usm_type
diff --git a/dpnp/dpnp_iface.py b/dpnp/dpnp_iface.py
index 20cbf41b4078..d0c7ad9ad1ee 100644
--- a/dpnp/dpnp_iface.py
+++ b/dpnp/dpnp_iface.py
@@ -52,7 +52,7 @@
 from dpnp.dpnp_algo import *
 from dpnp.dpnp_array import dpnp_array
 from dpnp.fft import *
-from dpnp.linalg import *
+from dpnp.memory import *
 from dpnp.random import *
 
 __all__ = [
@@ -86,8 +86,6 @@
 from dpnp.dpnp_iface_histograms import __all__ as __all__histograms
 from dpnp.dpnp_iface_indexing import *
 from dpnp.dpnp_iface_indexing import __all__ as __all__indexing
-from dpnp.dpnp_iface_libmath import *
-from dpnp.dpnp_iface_libmath import __all__ as __all__libmath
 from dpnp.dpnp_iface_linearalgebra import *
 from dpnp.dpnp_iface_linearalgebra import __all__ as __all__linearalgebra
 from dpnp.dpnp_iface_logic import *
@@ -122,7 +120,6 @@
 __all__ += __all__functional
 __all__ += __all__histograms
 __all__ += __all__indexing
-__all__ += __all__libmath
 __all__ += __all__linearalgebra
 __all__ += __all__logic
 __all__ += __all__manipulation
diff --git a/dpnp/dpnp_iface_arraycreation.py b/dpnp/dpnp_iface_arraycreation.py
index 1d93274513d0..55eae24c3b90 100644
--- a/dpnp/dpnp_iface_arraycreation.py
+++ b/dpnp/dpnp_iface_arraycreation.py
@@ -38,6 +38,8 @@
 """
 
 
+# pylint: disable=duplicate-code
+
 import operator
 
 import dpctl.tensor as dpt
@@ -2180,8 +2182,9 @@ def from_dlpack(x, /, *, device=None, copy=None):
     Parameters
     ----------
     x : object
-        A Python object representing an array that implements the ``__dlpack__``
-        and ``__dlpack_device__`` methods.
+        A Python object representing an array that implements the
+        :meth:`dpnp.ndarray.__dlpack__` and
+        :meth:`dpnp.ndarray.__dlpack_device__`.
     device : {None, string, tuple, device}, optional
         Device where the output array is to be placed. `device` keyword values
         can be:
@@ -2197,10 +2200,10 @@ def from_dlpack(x, /, *, device=None, copy=None):
           ``device.sycl_queue``. The `device` object is obtained via
           :attr:`dpctl.tensor.usm_ndarray.device`.
         * ``(device_type, device_id)`` : 2-tuple matching the format of the
-          output of the ``__dlpack_device__`` method: an integer enumerator
-          representing the device type followed by an integer representing
-          the index of the device. The only supported :class:`dpnp.DLDeviceType`
-          device types are ``"kDLCPU"`` and ``"kDLOneAPI"``.
+          output of the :meth:`dpnp.ndarray.__dlpack_device__`: an integer
+          enumerator representing the device type followed by an integer
+          representing the index of the device. The only supported
+          :class:`dpnp.DLDeviceType` is ``"kDLCPU"`` or ``"kDLOneAPI"``.
 
         Default: ``None``.
     copy : {bool, None}, optional
diff --git a/dpnp/dpnp_iface_bitwise.py b/dpnp/dpnp_iface_bitwise.py
index bbc9e465f20d..f50f8dc9b872 100644
--- a/dpnp/dpnp_iface_bitwise.py
+++ b/dpnp/dpnp_iface_bitwise.py
@@ -37,8 +37,9 @@
 
 """
 
-# pylint: disable=protected-access
+# pylint: disable=duplicate-code
 # pylint: disable=no-name-in-module
+# pylint: disable=protected-access
 
 import dpctl.tensor._tensor_elementwise_impl as ti
 import numpy
diff --git a/dpnp/dpnp_iface_functional.py b/dpnp/dpnp_iface_functional.py
index e6fd9883d816..0cd4e465c450 100644
--- a/dpnp/dpnp_iface_functional.py
+++ b/dpnp/dpnp_iface_functional.py
@@ -36,6 +36,7 @@
 
 """
 
+# pylint: disable=protected-access
 
 from dpctl.tensor._numpy_helper import (
     normalize_axis_index,
@@ -44,7 +45,10 @@
 
 import dpnp
 
-__all__ = ["apply_along_axis", "apply_over_axes"]
+# pylint: disable=no-name-in-module
+from dpnp.dpnp_utils import get_usm_allocations
+
+__all__ = ["apply_along_axis", "apply_over_axes", "piecewise"]
 
 
 def apply_along_axis(func1d, axis, arr, *args, **kwargs):
@@ -266,3 +270,141 @@ def apply_over_axes(func, a, axes):
                 )
         a = res
     return res
+
+
+def piecewise(x, condlist, funclist):
+    """
+    Evaluate a piecewise-defined function.
+
+    Given a set of conditions and corresponding functions, evaluate each
+    function on the input data wherever its condition is true.
+
+    For full documentation refer to :obj:`numpy.piecewise`.
+
+    Parameters
+    ----------
+    x : {dpnp.ndarray, usm_ndarray}
+        The input domain.
+    condlist : {sequence of array-like boolean, bool scalars}
+        Each boolean array/scalar corresponds to a function in `funclist`.
+        Wherever `condlist[i]` is ``True``, `funclist[i](x)` is used as the
+        output value.
+
+        Each boolean array in `condlist` selects a piece of `x`, and should
+        therefore be of the same shape as `x`.
+
+        The length of `condlist` must correspond to that of `funclist`.
+        If one extra function is given, i.e. if
+        ``len(funclist) == len(condlist) + 1``, then that extra function
+        is the default value, used wherever all conditions are ``False``.
+    funclist : {array-like of scalars}
+        A constant value is returned wherever corresponding condition of `x`
+        is ``True``.
+
+    Returns
+    -------
+    out : dpnp.ndarray
+        The output is the same shape and type as `x` and is found by
+        calling the functions in `funclist` on the appropriate portions of `x`,
+        as defined by the boolean arrays in `condlist`. Portions not covered
+        by any condition have a default value of ``0``.
+
+    Limitations
+    -----------
+    Parameters `args` and `kw` are not supported and `funclist` cannot include a
+    callable functions.
+
+    See Also
+    --------
+    :obj:`dpnp.choose` : Construct an array from an index array and a set of
+                         arrays to choose from.
+    :obj:`dpnp.select` : Return an array drawn from elements in `choicelist`,
+                         depending on conditions.
+    :obj:`dpnp.where` : Return elements from one of two arrays depending
+                        on condition.
+
+    Examples
+    --------
+    >>> import dpnp as np
+
+    Define the signum function, which is -1 for ``x < 0`` and +1 for ``x >= 0``.
+
+    >>> x = np.linspace(-2.5, 2.5, 6)
+    >>> np.piecewise(x, [x < 0, x >= 0], [-1, 1])
+    array([-1., -1., -1.,  1.,  1.,  1.])
+
+    """
+    dpnp.check_supported_arrays_type(x)
+    x_dtype = x.dtype
+    if dpnp.is_supported_array_type(condlist) and condlist.ndim in [0, 1]:
+        condlist = [condlist]
+    elif dpnp.isscalar(condlist) or (
+        dpnp.isscalar(condlist[0]) and x.ndim != 0
+    ):
+        # convert scalar to a list of one array
+        # convert list of scalars to a list of one array
+        condlist = [
+            dpnp.full(
+                x.shape, condlist, usm_type=x.usm_type, sycl_queue=x.sycl_queue
+            )
+        ]
+    elif not dpnp.is_supported_array_type(condlist[0]):
+        # convert list of lists to list of arrays
+        # convert list of scalars to a list of 0d arrays (for 0d input)
+        tmp = []
+        for _, cond in enumerate(condlist):
+            tmp.append(
+                dpnp.array(cond, usm_type=x.usm_type, sycl_queue=x.sycl_queue)
+            )
+        condlist = tmp
+
+    dpnp.check_supported_arrays_type(*condlist)
+    if dpnp.is_supported_array_type(funclist):
+        usm_type, exec_q = get_usm_allocations([x, *condlist, funclist])
+    else:
+        usm_type, exec_q = get_usm_allocations([x, *condlist])
+
+    result = dpnp.empty_like(x, usm_type=usm_type, sycl_queue=exec_q)
+
+    condlen = len(condlist)
+    try:
+        if isinstance(funclist, str):
+            raise TypeError("funclist must be a non-string sequence")
+        funclen = len(funclist)
+    except TypeError as e:
+        raise TypeError("funclist must be a sequence of scalars") from e
+
+    if condlen == funclen:
+        # default value is zero
+        default_value = x_dtype.type(0)
+    elif condlen + 1 == funclen:
+        # default value is the last element of funclist
+        default_value = funclist[-1]
+        if callable(default_value):
+            raise NotImplementedError(
+                "Callable functions are not supported currently"
+            )
+        if isinstance(default_value, dpnp.ndarray):
+            default_value = default_value.astype(x_dtype, copy=False)
+        else:
+            default_value = x_dtype.type(default_value)
+        funclist = funclist[:-1]
+    else:
+        raise ValueError(
+            f"with {condlen} condition(s), either {condlen} or {condlen + 1} "
+            "functions are expected"
+        )
+
+    for condition, func in zip(condlist, funclist):
+        if callable(func):
+            raise NotImplementedError(
+                "Callable functions are not supported currently"
+            )
+        if isinstance(func, dpnp.ndarray):
+            func = func.astype(x_dtype, copy=False)
+        else:
+            func = x_dtype.type(func)
+        dpnp.where(condition, func, default_value, out=result)
+        default_value = result
+
+    return result
diff --git a/dpnp/dpnp_iface_indexing.py b/dpnp/dpnp_iface_indexing.py
index b2b07aea49aa..f7caccea1a23 100644
--- a/dpnp/dpnp_iface_indexing.py
+++ b/dpnp/dpnp_iface_indexing.py
@@ -2375,8 +2375,9 @@ def tril_indices(
     Returns
     -------
     inds : tuple of dpnp.ndarray
-        The indices for the triangle. The returned tuple contains two arrays,
-        each with the indices along one dimension of the array.
+        The row and column indices, respectively. The row indices are sorted in
+        non-decreasing order, and the corresponding column indices are strictly
+        increasing for each row.
 
     See Also
     --------
@@ -2394,7 +2395,13 @@ def tril_indices(
 
     >>> import dpnp as np
     >>> il1 = np.tril_indices(4)
-    >>> il2 = np.tril_indices(4, 2)
+    >>> il1
+    (array([0, 1, 1, 2, 2, 2, 3, 3, 3, 3]),
+     array([0, 0, 1, 0, 1, 2, 0, 1, 2, 3]))
+
+    Note that row indices (first array) are non-decreasing, and the
+    corresponding column indices (second array) are strictly increasing for
+    each row.
 
     Here is how they can be used with a sample array:
 
@@ -2421,6 +2428,7 @@ def tril_indices(
 
     These cover almost the whole array (two diagonals right of the main one):
 
+    >>> il2 = np.tril_indices(4, 2)
     >>> a[il2] = -10
     >>> a
     array([[-10, -10, -10,   3],
@@ -2584,9 +2592,9 @@ def triu_indices(
     Returns
     -------
     inds : tuple of dpnp.ndarray
-        The indices for the triangle. The returned tuple contains two arrays,
-        each with the indices along one dimension of the array. Can be used
-        to slice a ndarray of shape(`n`, `n`).
+        The row and column indices, respectively. The row indices are sorted in
+        non-decreasing order, and the corresponding column indices are strictly
+        increasing for each row.
 
     See Also
     --------
@@ -2604,7 +2612,13 @@ def triu_indices(
 
     >>> import dpnp as np
     >>> iu1 = np.triu_indices(4)
-    >>> iu2 = np.triu_indices(4, 2)
+    >>> iu1
+    (array([0, 0, 0, 0, 1, 1, 1, 2, 2, 3]),
+     array([0, 1, 2, 3, 1, 2, 3, 2, 3, 3]))
+
+    Note that row indices (first array) are non-decreasing, and the
+    corresponding column indices (second array) are strictly increasing for
+    each row.
 
     Here is how they can be used with a sample array:
 
@@ -2632,6 +2646,7 @@ def triu_indices(
     These cover only a small part of the whole array (two diagonals right
     of the main one):
 
+    >>> iu2 = np.triu_indices(4, 2)
     >>> a[iu2] = -10
     >>> a
     array([[ -1,  -1, -10, -10],
diff --git a/dpnp/dpnp_iface_libmath.py b/dpnp/dpnp_iface_libmath.py
deleted file mode 100644
index eaf6c5676a42..000000000000
--- a/dpnp/dpnp_iface_libmath.py
+++ /dev/null
@@ -1,97 +0,0 @@
-# -*- coding: utf-8 -*-
-# *****************************************************************************
-# Copyright (c) 2016-2025, Intel Corporation
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are met:
-# - Redistributions of source code must retain the above copyright notice,
-#   this list of conditions and the following disclaimer.
-# - Redistributions in binary form must reproduce the above copyright notice,
-#   this list of conditions and the following disclaimer in the documentation
-#   and/or other materials provided with the distribution.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
-# THE POSSIBILITY OF SUCH DAMAGE.
-# *****************************************************************************
-
-"""
-Interface of the function from Python Math library
-
-Notes
------
-This module is a face or public interface file for the library
-it contains:
- - Interface functions
- - documentation for the functions
- - The functions parameters check
-
-"""
-
-import math
-
-import dpnp
-
-# pylint: disable=no-name-in-module
-from .dpnp_algo import (
-    dpnp_erf,
-)
-from .dpnp_utils import (
-    create_output_descriptor_py,
-)
-
-__all__ = ["erf"]
-
-
-def erf(in_array1):
-    """
-    Returns the error function of complex argument.
-
-    For full documentation refer to :obj:`scipy.special.erf`.
-
-    Limitations
-    -----------
-    Parameter `in_array1` is supported as :obj:`dpnp.ndarray`.
-    Otherwise the function will be executed sequentially on CPU.
-    Input array data types are limited by supported DPNP :ref:`Data types`.
-
-    .. seealso:: :obj:`math.erf`
-
-    Examples
-    --------
-    >>> import dpnp as np
-    >>> x = np.linspace(2.0, 3.0, num=5)
-    >>> [i for i in x]
-    [2.0, 2.25, 2.5, 2.75, 3.0]
-    >>> out = np.erf(x)
-    >>> [i for i in out]
-    [0.99532227, 0.99853728, 0.99959305, 0.99989938, 0.99997791]
-
-    """
-
-    x1_desc = dpnp.get_dpnp_descriptor(
-        in_array1, copy_when_strides=False, copy_when_nondefault_queue=False
-    )
-    if x1_desc:
-        if dpnp.is_cuda_backend(x1_desc.get_array()):  # pragma: no cover
-            raise NotImplementedError(
-                "Running on CUDA is currently not supported"
-            )
-        return dpnp_erf(x1_desc).get_pyobj()
-
-    result = create_output_descriptor_py(
-        in_array1.shape, in_array1.dtype, None
-    ).get_pyobj()
-    for i in range(result.size):
-        result[i] = math.erf(in_array1[i])
-
-    return result
diff --git a/dpnp/dpnp_iface_linearalgebra.py b/dpnp/dpnp_iface_linearalgebra.py
index b52c16af9289..86493d04710e 100644
--- a/dpnp/dpnp_iface_linearalgebra.py
+++ b/dpnp/dpnp_iface_linearalgebra.py
@@ -956,7 +956,7 @@ def matvec(
     matrix-vector product is defined as:
 
     .. math::
-       \mathbf{A} \cdot \mathbf{b} = \sum_{i=0}^{n-1} A_{ij}v_j
+       \mathbf{A} \cdot \mathbf{v} = \sum_{j=0}^{n-1} A_{ij} v_j
 
     where the sum is over the last dimensions in `x1` and `x2` (unless `axes`
     is specified). (For a matrix-vector product with the vector conjugated,
@@ -1457,7 +1457,7 @@ def vecmat(
     is defined as:
 
     .. math::
-       \mathbf{b} \cdot \mathbf{A} = \sum_{i=0}^{n-1} \overline{v_i}A_{ij}
+       \mathbf{v} \cdot \mathbf{A} = \sum_{i=0}^{n-1} \overline{v_i}A_{ij}
 
     where the sum is over the last dimension of `x1` and the one-but-last
     dimensions in `x2` (unless `axes` is specified) and where
diff --git a/dpnp/dpnp_iface_logic.py b/dpnp/dpnp_iface_logic.py
index 84258f698b8f..817c601cd307 100644
--- a/dpnp/dpnp_iface_logic.py
+++ b/dpnp/dpnp_iface_logic.py
@@ -44,9 +44,11 @@
 
 import dpctl.tensor as dpt
 import dpctl.tensor._tensor_elementwise_impl as ti
+import dpctl.utils as dpu
 import numpy
 
 import dpnp
+import dpnp.backend.extensions.ufunc._ufunc_impl as ufi
 from dpnp.dpnp_algo.dpnp_elementwise_common import DPNPBinaryFunc, DPNPUnaryFunc
 
 from .dpnp_utils import get_usm_allocations
@@ -82,6 +84,71 @@
 ]
 
 
+def _isclose_scalar_tol(a, b, rtol, atol, equal_nan):
+    """
+    Specialized implementation of dpnp.isclose() for scalar rtol and atol
+    using a dedicated SYCL kernel.
+    """
+    dt = dpnp.result_type(a, b, 1.0)
+
+    if dpnp.isscalar(a):
+        usm_type = b.usm_type
+        exec_q = b.sycl_queue
+        a = dpnp.array(
+            a,
+            dt,
+            usm_type=usm_type,
+            sycl_queue=exec_q,
+        )
+    elif dpnp.isscalar(b):
+        usm_type = a.usm_type
+        exec_q = a.sycl_queue
+        b = dpnp.array(
+            b,
+            dt,
+            usm_type=usm_type,
+            sycl_queue=exec_q,
+        )
+    else:
+        usm_type, exec_q = get_usm_allocations([a, b])
+
+    a = dpnp.astype(a, dt, casting="same_kind", copy=False)
+    b = dpnp.astype(b, dt, casting="same_kind", copy=False)
+
+    # Convert complex rtol/atol to their real parts
+    # to avoid pybind11 cast errors and match NumPy behavior
+    if isinstance(rtol, complex):
+        rtol = rtol.real
+    if isinstance(atol, complex):
+        atol = atol.real
+
+    # Convert equal_nan to bool to avoid pybind11 cast errors
+    # and match NumPy behavior
+    if not isinstance(equal_nan, bool):
+        equal_nan = bool(equal_nan)
+
+    a, b = dpnp.broadcast_arrays(a, b)
+
+    output = dpnp.empty(
+        a.shape, dtype=dpnp.bool, sycl_queue=exec_q, usm_type=usm_type
+    )
+
+    _manager = dpu.SequentialOrderManager[exec_q]
+    mem_ev, ht_ev = ufi._isclose_scalar(
+        a.get_array(),
+        b.get_array(),
+        rtol,
+        atol,
+        equal_nan,
+        output.get_array(),
+        exec_q,
+        depends=_manager.submitted_events,
+    )
+    _manager.add_event_pair(mem_ev, ht_ev)
+
+    return output
+
+
 def all(a, /, axis=None, out=None, keepdims=False, *, where=True):
     """
     Test whether all array elements along a given axis evaluate to ``True``.
@@ -801,7 +868,7 @@ def isclose(a, b, rtol=1e-05, atol=1e-08, equal_nan=False):
         The absolute tolerance parameter.
 
         Default: ``1e-08``.
-    equal_nan : bool
+    equal_nan : bool, optional
         Whether to compare ``NaNs`` as equal. If ``True``, ``NaNs`` in `a` will
         be considered equal to ``NaNs`` in `b` in the output array.
 
@@ -870,6 +937,10 @@ def isclose(a, b, rtol=1e-05, atol=1e-08, equal_nan=False):
         rtol, atol, scalar_type=True, all_scalars=True
     )
 
+    # Use own SYCL kernel for scalar rtol/atol
+    if dpnp.isscalar(rtol) and dpnp.isscalar(atol):
+        return _isclose_scalar_tol(a, b, rtol, atol, equal_nan)
+
     # make sure b is an inexact type to avoid bad behavior on abs(MIN_INT)
     if dpnp.isscalar(b):
         dt = dpnp.result_type(a, b, 1.0, rtol, atol)
diff --git a/dpnp/dpnp_iface_manipulation.py b/dpnp/dpnp_iface_manipulation.py
index 28ba1a29d50b..2175b920233c 100644
--- a/dpnp/dpnp_iface_manipulation.py
+++ b/dpnp/dpnp_iface_manipulation.py
@@ -38,6 +38,8 @@
 """
 
 
+# pylint: disable=duplicate-code
+
 import math
 import operator
 import warnings
@@ -46,7 +48,11 @@
 import dpctl
 import dpctl.tensor as dpt
 import numpy
-from dpctl.tensor._numpy_helper import AxisError, normalize_axis_index
+from dpctl.tensor._numpy_helper import (
+    AxisError,
+    normalize_axis_index,
+    normalize_axis_tuple,
+)
 
 import dpnp
 
@@ -1346,11 +1352,11 @@ def column_stack(tup):
     --------
     >>> import dpnp as np
     >>> a = np.array((1, 2, 3))
-    >>> b = np.array((2, 3, 4))
+    >>> b = np.array((4, 5, 6))
     >>> np.column_stack((a, b))
-    array([[1, 2],
-           [2, 3],
-           [3, 4]])
+    array([[1, 4],
+           [2, 5],
+           [3, 6]])
 
     """
 
@@ -1778,18 +1784,18 @@ def dstack(tup):
     --------
     >>> import dpnp as np
     >>> a = np.array((1, 2, 3))
-    >>> b = np.array((2, 3, 4))
+    >>> b = np.array((4, 5, 6))
     >>> np.dstack((a, b))
-    array([[[1, 2],
-            [2, 3],
-            [3, 4]]])
+    array([[[1, 4],
+            [2, 5],
+            [3, 6]]])
 
     >>> a = np.array([[1], [2], [3]])
-    >>> b = np.array([[2], [3], [4]])
+    >>> b = np.array([[4], [5], [6]])
     >>> np.dstack((a, b))
-    array([[[1, 2]],
-           [[2, 3]],
-           [[3, 4]]])
+    array([[[1, 4]],
+           [[2, 5]],
+           [[3, 6]]])
 
     """
 
@@ -2504,7 +2510,7 @@ def pad(array, pad_width, mode="constant", **kwargs):
     ----------
     array : {dpnp.ndarray, usm_ndarray}
         The array of rank ``N`` to pad.
-    pad_width : {sequence, array_like, int}
+    pad_width : {sequence, array_like, int, dict}
         Number of values padded to the edges of each axis.
         ``((before_1, after_1), ... (before_N, after_N))`` unique pad widths
         for each axis.
@@ -2512,6 +2518,9 @@ def pad(array, pad_width, mode="constant", **kwargs):
         and after pad for each axis.
         ``(pad,)`` or ``int`` is a shortcut for ``before = after = pad`` width
         for all axes.
+        If a dictionary, each key is an axis and its corresponding value is an
+        integer or a pair of integers describing the padding ``(before, after)``
+        or ``pad`` width for that axis.
     mode : {str, function}, optional
         One of the following string values or a user supplied function.
 
@@ -2694,6 +2703,26 @@ def pad(array, pad_width, mode="constant", **kwargs):
            [100, 100, 100, 100, 100, 100, 100],
            [100, 100, 100, 100, 100, 100, 100]])
 
+    >>> a = np.arange(1, 7).reshape(2, 3)
+    >>> np.pad(a, {1: (1, 2)})
+    array([[0, 1, 2, 3, 0, 0],
+           [0, 4, 5, 6, 0, 0]])
+    >>> np.pad(a, {-1: 2})
+    array([[0, 0, 1, 2, 3, 0, 0],
+           [0, 0, 4, 5, 6, 0, 0]])
+    >>> np.pad(a, {0: (3, 0)})
+    array([[0, 0, 0],
+           [0, 0, 0],
+           [0, 0, 0],
+           [1, 2, 3],
+           [4, 5, 6]])
+    >>> np.pad(a, {0: (3, 0), 1: 2})
+    array([[0, 0, 0, 0, 0, 0, 0],
+           [0, 0, 0, 0, 0, 0, 0],
+           [0, 0, 0, 0, 0, 0, 0],
+           [0, 0, 1, 2, 3, 0, 0],
+           [0, 0, 4, 5, 6, 0, 0]])
+
     """
 
     dpnp.check_supported_arrays_type(array)
@@ -3129,9 +3158,9 @@ def resize(a, new_shape):
     Returns
     -------
     out : dpnp.ndarray
-        The new array is formed from the data in the old array, repeated
-        if necessary to fill out the required number of elements. The
-        data are repeated iterating over the array in C-order.
+        The new array is formed from the data in the old array, repeated if
+        necessary to fill out the required number of elements. The data are
+        repeated iterating over the array in C-order.
 
     See Also
     --------
@@ -3146,8 +3175,10 @@ def resize(a, new_shape):
     be used. In most other cases either indexing (to reduce the size) or
     padding (to increase the size) may be a more appropriate solution.
 
-    Warning: This functionality does **not** consider axes separately,
-    i.e. it does not apply interpolation/extrapolation.
+    Warning
+    -------
+    This functionality does **not** consider axes separately, i.e. it does not
+    apply interpolation/extrapolation.
     It fills the return array with the required number of elements, iterating
     over `a` in C-order, disregarding axes (and cycling back from the start if
     the new shape is larger). This functionality is therefore not suitable to
@@ -3187,7 +3218,8 @@ def resize(a, new_shape):
         # First case must zero fill. The second would have repeats == 0.
         return dpnp.zeros_like(a, shape=new_shape)
 
-    repeats = -(-new_size // a_size)  # ceil division
+    # ceiling division without negating new_size
+    repeats = (new_size + a_size - 1) // a_size
     a = dpnp.concatenate((dpnp.ravel(a),) * repeats)[:new_size]
 
     return a.reshape(new_shape)
@@ -3525,8 +3557,8 @@ def size(a, axis=None):
     ----------
     a : array_like
         Input data.
-    axis : {None, int}, optional
-        Axis along which the elements are counted.
+    axis : {None, int, tuple of ints}, optional
+        Axis or axes along which the elements are counted.
         By default, give the total number of elements.
 
         Default: ``None``.
@@ -3548,23 +3580,21 @@ def size(a, axis=None):
     >>> a = [[1, 2, 3], [4, 5, 6]]
     >>> np.size(a)
     6
-    >>> np.size(a, 1)
+    >>> np.size(a, axis=1)
     3
-    >>> np.size(a, 0)
+    >>> np.size(a, axis=0)
     2
-
-    >>> a = np.asarray(a)
-    >>> np.size(a)
+    >>> np.size(a, axis=(0, 1))
     6
-    >>> np.size(a, 1)
-    3
 
     """
 
     if dpnp.is_supported_array_type(a):
         if axis is None:
             return a.size
-        return a.shape[axis]
+        _shape = a.shape
+        _axis = normalize_axis_tuple(axis, a.ndim)
+        return math.prod(_shape[ax] for ax in _axis)
 
     return numpy.size(a, axis)
 
@@ -4242,7 +4272,13 @@ def unique(
 
     """
 
-    if axis is None:
+    dpnp.check_supported_arrays_type(ar)
+    nd = ar.ndim
+
+    if axis is None or nd == 1:
+        if axis is not None:
+            normalize_axis_index(axis, nd)
+
         return _unique_1d(
             ar, return_index, return_inverse, return_counts, equal_nan
         )
@@ -4252,7 +4288,7 @@ def unique(
         ar = dpnp.moveaxis(ar, axis, 0)
     except AxisError:
         # this removes the "axis1" or "axis2" prefix from the error message
-        raise AxisError(axis, ar.ndim) from None
+        raise AxisError(axis, nd) from None
 
     # reshape input array into a contiguous 2D array
     orig_sh = ar.shape
diff --git a/dpnp/dpnp_iface_mathematical.py b/dpnp/dpnp_iface_mathematical.py
index d822138b0b74..a59a0e9b69ff 100644
--- a/dpnp/dpnp_iface_mathematical.py
+++ b/dpnp/dpnp_iface_mathematical.py
@@ -2784,7 +2784,7 @@ def gradient(f, *varargs, axis=None, edge_order=1):
 
 
 def interp(x, xp, fp, left=None, right=None, period=None):
-    """
+    r"""
     One-dimensional linear interpolation.
 
     Returns the one-dimensional piecewise linear interpolant to a function
@@ -2800,11 +2800,11 @@ def interp(x, xp, fp, left=None, right=None, period=None):
         to evaluate the interpolated values.
 
     xp : {dpnp.ndarray, usm_ndarray}
-        Input 1-D array, expected to have a real-valued
-        floating-point data type. The x-coordinates of the data points,
-        must be increasing if argument `period` is not specified.
-        Otherwise, `xp` is internally sorted after normalizing
-        the periodic boundaries with ``xp = xp % period``.
+        Input 1-D array, expected to have a real-valued floating-point data
+        type. The x-coordinates of the data points, must be strictly increasing
+        if argument `period` is not specified. Otherwise, `xp` is internally
+        sorted after normalizing the periodic boundaries with
+        :math:`xp = xp \% period`.
 
     fp : {dpnp.ndarray, usm_ndarray}
         Input 1-D array. The y-coordinates of the data points,
diff --git a/dpnp/dpnp_utils/dpnp_algo_utils.pxd b/dpnp/dpnp_utils/dpnp_algo_utils.pxd
index f7de1c35ff23..76c5c08eb9d2 100644
--- a/dpnp/dpnp_utils/dpnp_algo_utils.pxd
+++ b/dpnp/dpnp_utils/dpnp_algo_utils.pxd
@@ -28,7 +28,7 @@
 from libcpp cimport bool as cpp_bool
 
 from dpnp.dpnp_algo cimport shape_type_c
-from dpnp.dpnp_algo.dpnp_algo cimport DPNPFuncData, DPNPFuncName, DPNPFuncType
+from dpnp.dpnp_algo.dpnp_algo cimport DPNPFuncType
 
 
 cpdef checker_throw_value_error(function_name, param_name, param, expected)
@@ -59,11 +59,6 @@ Return:
 cpdef tuple _object_to_tuple(object obj)
 
 
-cpdef tuple get_axis_offsets(shape)
-"""
-Compute axis offsets in the linear array memory
-"""
-
 cdef class dpnp_descriptor:
     """array DPNP descriptor"""
 
@@ -85,19 +80,7 @@ cdef dpnp_descriptor create_output_descriptor(shape_type_c output_shape,
 Create output dpnp_descriptor based on shape, type and 'out' parameters
 """
 
-cdef shape_type_c strides_to_vector(object strides, object shape) except *
-"""
-Get or calculate srtides based on shape.
-"""
-
 cdef tuple get_common_usm_allocation(dpnp_descriptor x1, dpnp_descriptor x2)
 """
 Get common USM allocation in the form of (sycl_device, usm_type, sycl_queue)
 """
-
-cdef (DPNPFuncType, void *) get_ret_type_and_func(DPNPFuncData kernel_data,
-                                                  cpp_bool has_aspect_fp64)
-"""
-Get the corresponding return type and function pointer based on the
-capability of the allocated result array device.
-"""
diff --git a/dpnp/dpnp_utils/dpnp_algo_utils.pyx b/dpnp/dpnp_utils/dpnp_algo_utils.pyx
index cc8acf068443..d9f35dc6f018 100644
--- a/dpnp/dpnp_utils/dpnp_algo_utils.pyx
+++ b/dpnp/dpnp_utils/dpnp_algo_utils.pyx
@@ -35,7 +35,6 @@ This module contains different helpers and utilities
 import dpctl
 import dpctl.utils as dpu
 import numpy
-from dpctl.tensor._numpy_helper import AxisError
 
 import dpnp
 import dpnp.config as config
@@ -46,12 +45,9 @@ cimport cpython
 cimport cython
 cimport numpy
 from libcpp cimport bool as cpp_bool
-from libcpp.complex cimport complex as cpp_complex
 
 from dpnp.dpnp_algo.dpnp_algo cimport (
     dpnp_DPNPFuncType_to_dtype,
-    dpnp_dtype_to_DPNPFuncType,
-    get_dpnp_function_ptr,
 )
 
 """
@@ -61,10 +57,8 @@ __all__ = [
     "call_origin",
     "checker_throw_type_error",
     "checker_throw_value_error",
-    "create_output_descriptor_py",
     "convert_item",
     "dpnp_descriptor",
-    "get_axis_offsets",
     "get_usm_allocations",
     "map_dtype_to_device",
     "_object_to_tuple",
@@ -76,9 +70,9 @@ cdef ERROR_PREFIX = "DPNP error:"
 
 
 def convert_item(item):
-    if getattr(item, "__sycl_usm_array_interface__", False):
+    if hasattr(item, "__sycl_usm_array_interface__"):
         item_converted = dpnp.asnumpy(item)
-    elif getattr(item, "__array_interface__", False):  # detect if it is a container (TODO any better way?)
+    elif hasattr(item, "__array_interface__"):  # detect if it is a container (TODO any better way?)
         mod_name = getattr(item, "__module__", 'none')
         if (mod_name != 'numpy'):
             item_converted = dpnp.asnumpy(item)
@@ -316,39 +310,6 @@ cpdef checker_throw_value_error(function_name, param_name, param, expected):
     raise ValueError(err_msg)
 
 
-cpdef dpnp_descriptor create_output_descriptor_py(shape_type_c output_shape,
-                                                  d_type,
-                                                  requested_out,
-                                                  device=None,
-                                                  usm_type="device",
-                                                  sycl_queue=None):
-    py_type = dpnp.default_float_type() if d_type is None else d_type
-
-    cdef DPNPFuncType c_type = dpnp_dtype_to_DPNPFuncType(py_type)
-
-    return create_output_descriptor(output_shape,
-                                    c_type,
-                                    requested_out,
-                                    device=device,
-                                    usm_type=usm_type,
-                                    sycl_queue=sycl_queue)
-
-
-cpdef tuple get_axis_offsets(shape):
-    """
-    Compute axis offsets in the linear array memory
-    """
-
-    res_size = len(shape)
-    result = [0] * res_size
-    acc = 1
-    for i in range(res_size - 1, -1, -1):
-        result[i] = acc
-        acc *= shape[i]
-
-    return _object_to_tuple(result)
-
-
 cdef dpnp_descriptor create_output_descriptor(shape_type_c output_shape,
                                               DPNPFuncType c_type,
                                               dpnp_descriptor requested_out,
@@ -390,10 +351,26 @@ cpdef inline tuple _object_to_tuple(object obj):
     if obj is None:
         return ()
 
-    if cpython.PySequence_Check(obj):
-        return tuple(obj)
+    # dpnp.ndarray unconditionally succeeds in PySequence_Check as it implements __getitem__
+    if cpython.PySequence_Check(obj) and not dpnp.is_supported_array_type(obj):
+        if isinstance(obj, numpy.ndarray):
+            obj = numpy.atleast_1d(obj)
+
+        nd = len(obj)
+        shape = []
+
+        for i in range(0, nd):
+            if cpython.PyBool_Check(obj[i]):
+                raise TypeError("DPNP object_to_tuple(): no item in size can be bool")
+
+            # Assumes each item is castable to Py_ssize_t,
+            # otherwise TypeError will be raised
+            shape.append(<Py_ssize_t> obj[i])
+        return tuple(shape)
 
     if dpnp.isscalar(obj):
+        if cpython.PyBool_Check(obj):
+            raise TypeError("DPNP object_to_tuple(): 'obj' can't be bool")
         return (obj, )
 
     raise ValueError("DPNP object_to_tuple(): 'obj' should be 'None', collections.abc.Sequence, or 'int'")
@@ -416,19 +393,6 @@ cpdef cpp_bool use_origin_backend(input1=None, size_t compute_size=0):
     return False
 
 
-cdef shape_type_c strides_to_vector(object strides, object shape) except *:
-    """
-    Get or calculate srtides based on shape.
-    """
-    cdef shape_type_c res
-    if strides is None:
-        res = get_axis_offsets(shape)
-    else:
-        res = strides
-
-    return res
-
-
 cdef tuple get_common_usm_allocation(dpnp_descriptor x1, dpnp_descriptor x2):
     """Get common USM allocation in the form of (sycl_device, usm_type, sycl_queue)."""
     array1_obj = x1.get_array()
@@ -449,24 +413,6 @@ cdef tuple get_common_usm_allocation(dpnp_descriptor x1, dpnp_descriptor x2):
     return (common_sycl_queue.sycl_device, common_usm_type, common_sycl_queue)
 
 
-@cython.linetrace(False)
-cdef (DPNPFuncType, void *) get_ret_type_and_func(DPNPFuncData kernel_data,
-                                                  cpp_bool has_aspect_fp64):
-    """
-    This function is responsible for determining the appropriate return type
-    and function pointer based on the capability of the allocated result array device.
-    """
-    return_type = kernel_data.return_type
-    func = kernel_data.ptr
-
-    if kernel_data.ptr_no_fp64 != NULL and not has_aspect_fp64:
-
-        return_type = kernel_data.return_type_no_fp64
-        func = kernel_data.ptr_no_fp64
-
-    return return_type, func
-
-
 cdef class dpnp_descriptor:
     def __init__(self, obj):
         """ Initialize variables """
diff --git a/dpnp/dpnp_utils/dpnp_utils_einsum.py b/dpnp/dpnp_utils/dpnp_utils_einsum.py
index 322d7dd2c148..12baacac3dc1 100644
--- a/dpnp/dpnp_utils/dpnp_utils_einsum.py
+++ b/dpnp/dpnp_utils/dpnp_utils_einsum.py
@@ -945,7 +945,6 @@ def _transpose_ex(a, axeses):
         stride = sum(a.strides[axis] for axis in axes)
         strides.append(stride)
 
-    # TODO: replace with a.view() when it is implemented in dpnp
     return dpnp_array(
         shape,
         dtype=a.dtype,
@@ -1151,8 +1150,7 @@ def dpnp_einsum(
             operands[idx] = operands[idx].sum(axis=sum_axes, dtype=result_dtype)
 
     if returns_view:
-        # TODO: replace with a.view() when it is implemented in dpnp
-        operands = [a for a in operands]
+        operands = [a.view() for a in operands]
     else:
         operands = [
             dpnp.astype(a, result_dtype, copy=False, casting=casting)
diff --git a/dpnp/dpnp_utils/dpnp_utils_linearalgebra.py b/dpnp/dpnp_utils/dpnp_utils_linearalgebra.py
index 60a4375843fb..8e21e1ca4dac 100644
--- a/dpnp/dpnp_utils/dpnp_utils_linearalgebra.py
+++ b/dpnp/dpnp_utils/dpnp_utils_linearalgebra.py
@@ -50,7 +50,7 @@
 ]
 
 
-def _compute_res_dtype(*arrays, sycl_queue, dtype=None, out=None, casting="no"):
+def _compute_res_dtype(*arrays, dtype=None, out=None, casting="no"):
     """
     Determines the output array data type.
     If `dtype` and `out` are ``None``, the output array data type of the
@@ -70,8 +70,6 @@ def _compute_res_dtype(*arrays, sycl_queue, dtype=None, out=None, casting="no"):
         If not ``None``, data type of the output array.
     casting : {"no", "equiv", "safe", "same_kind", "unsafe"}, optional
         Controls what kind of data casting may occur.
-    sycl_queue : {SyclQueue}
-        A SYCL queue to use for determining default floating point datat type.
 
     Returns
     -------
@@ -334,7 +332,7 @@ def _gemm_matmul(exec_q, x1, x2, res):
 def _gemm_special_case(x1, x2, res_dtype, call_flag):
     """
     `gemm` and `gemm_batch` support these special cases of data types
-    while `gemv` does not.
+    while `gemv` or `syrk` do not.
 
     """
 
@@ -342,10 +340,10 @@ def _gemm_special_case(x1, x2, res_dtype, call_flag):
     is_int32_or_f32 = res_dtype in [dpnp.int32, dpnp.float32]
     flag = is_int8 and is_int32_or_f32 and call_flag in ["gemm", "gemm_batch"]
 
-    # onemkl_interfaces does not support these data types
-    onemkl_interfaces = bi._using_onemkl_interfaces()
+    # onemath does not support these data types
+    onemath = bi._using_onemath()
 
-    return flag and not onemkl_interfaces
+    return flag and not onemath
 
 
 def _get_result_shape(x1, x2, out, func, _get_result_shape_fn, np_flag):
@@ -520,6 +518,29 @@ def _get_signature(func):
     return signature, distinct_core
 
 
+def _is_syrk_compatible(x1, x2):
+    """
+    Check to see if `syrk` can be called instead of `gemm`.
+    Input arrays have already been validated to be 2-dimensional.
+
+    """
+    # Must share data (same base buffer)
+    if dpnp.get_usm_ndarray(x1)._pointer != dpnp.get_usm_ndarray(x2)._pointer:
+        return False
+
+    # Result must be square
+    if x1.shape[0] != x2.shape[1]:
+        return False
+
+    # Strides must match transpose pattern
+    x1_strides = x1.strides
+    x2_strides = x2.strides
+    if x1_strides[0] != x2_strides[1] or x1_strides[1] != x2_strides[0]:
+        return False
+
+    return True
+
+
 def _shape_error(shape1, shape2, func, err_msg):
     """Validate the shapes of input and output arrays."""
 
@@ -765,9 +786,7 @@ def dpnp_dot(a, b, /, out=None, *, casting="same_kind", conjugate=False):
     _validate_out_array(out, exec_q)
 
     # Determine the appropriate data types
-    res_dtype = _compute_res_dtype(
-        a, b, out=out, casting=casting, sycl_queue=exec_q
-    )
+    res_dtype = _compute_res_dtype(a, b, out=out, casting=casting)
 
     result = _create_result_array(
         a, b, out, (), res_dtype, res_usm_type, exec_q
@@ -918,7 +937,7 @@ def dpnp_multiplication(
 
     # Determine the appropriate data types
     res_dtype = _compute_res_dtype(
-        x1, x2, dtype=dtype, out=out, casting=casting, sycl_queue=exec_q
+        x1, x2, dtype=dtype, out=out, casting=casting
     )
 
     call_flag = None
@@ -928,8 +947,6 @@ def dpnp_multiplication(
     x1_is_2D, x1_is_1D, x1_base_is_1D = _define_dim_flags(x1, axis=-1)
     x2_is_2D, x2_is_1D, x2_base_is_1D = _define_dim_flags(x2, axis=-2)
 
-    # TODO: investigate usage of syrk function from BLAS in
-    # case of a.T @ a and a @ a.T to gain performance.
     if numpy.prod(result_shape) == 0:
         res_shape = result_shape
     elif x1_shape[-1] == 1:
@@ -966,6 +983,14 @@ def dpnp_multiplication(
         x1 = dpnp.reshape(x1, x1_shape[-2:])
         x2 = dpnp.reshape(x2, x2_shape[-2:])
         res_shape = (x1_shape[-2], x2_shape[-1])
+        if _is_syrk_compatible(x1, x2):
+            call_flag = "syrk"
+            res_dtype_orig = res_dtype
+            # for exact dtypes, use syrk implementation unlike general approach
+            # where dpctl implementation is used for exact dtypes for better
+            # performance
+            if not dpnp.issubdtype(res_dtype, dpnp.inexact):
+                res_dtype = dpnp.default_float_type(x1.device)
     elif x1_base_is_1D:
         # TODO: implement gemv_batch to use it here with transpose
         call_flag = "gemm_batch"
@@ -1046,12 +1071,13 @@ def dpnp_multiplication(
                     dtype=res_dtype,
                     order=res_order,
                 )
-                x2 = _copy_array(
-                    x2,
-                    copy_flag=not x2_contig_flag,
-                    dtype=res_dtype,
-                    order=res_order,
-                )
+                if call_flag != "syrk":
+                    x2 = _copy_array(
+                        x2,
+                        copy_flag=not x2_contig_flag,
+                        dtype=res_dtype,
+                        order=res_order,
+                    )
 
                 if call_flag == "gemv":
                     if transpose:
@@ -1062,7 +1088,6 @@ def dpnp_multiplication(
                         x_usm = dpnp.get_usm_ndarray(x2)
 
                     _manager = dpu.SequentialOrderManager[exec_q]
-
                     ht_ev, gemv_ev = bi._gemv(
                         exec_q,
                         a_usm,
@@ -1072,6 +1097,15 @@ def dpnp_multiplication(
                         depends=_manager.submitted_events,
                     )
                     _manager.add_event_pair(ht_ev, gemv_ev)
+                elif call_flag == "syrk":
+                    _manager = dpu.SequentialOrderManager[exec_q]
+                    ht_ev, gemv_ev = bi._syrk(
+                        exec_q,
+                        dpnp.get_usm_ndarray(x1),
+                        dpnp.get_usm_ndarray(result),
+                        depends=_manager.submitted_events,
+                    )
+                    _manager.add_event_pair(ht_ev, gemv_ev)
                 elif call_flag == "gemm":
                     result = _gemm_matmul(exec_q, x1, x2, result)
                 else:
@@ -1101,6 +1135,9 @@ def dpnp_multiplication(
     elif res_shape != result_shape:
         result = dpnp.reshape(result, result_shape)
 
+    if call_flag == "syrk" and res_dtype_orig != res_dtype:
+        result = result.astype(res_dtype_orig)
+
     if out is None:
         if axes is not None:
             # Move the data back to the appropriate axes of the result array
@@ -1217,7 +1254,7 @@ def dpnp_vecdot(
         if axis is not None:
             raise TypeError("cannot specify both `axis` and `axes`.")
 
-        axes_x1, axes_x2, axes_res = _validate_axes(x1, x2, axes, "vecdot")
+        axes_x1, axes_x2, _ = _validate_axes(x1, x2, axes, "vecdot")
 
         # Move the axes that are going to be used in dot product,
         # to the end of "x1" and "x2"
@@ -1241,7 +1278,7 @@ def dpnp_vecdot(
 
     # Determine the appropriate data types
     res_dtype = _compute_res_dtype(
-        x1, x2, dtype=dtype, out=out, casting=casting, sycl_queue=exec_q
+        x1, x2, dtype=dtype, out=out, casting=casting
     )
 
     _, x1_is_1D, _ = _define_dim_flags(x1, axis=-1)
diff --git a/dpnp/dpnp_utils/dpnp_utils_pad.py b/dpnp/dpnp_utils/dpnp_utils_pad.py
index 65f314bbf2fd..c3f3d7b34590 100644
--- a/dpnp/dpnp_utils/dpnp_utils_pad.py
+++ b/dpnp/dpnp_utils/dpnp_utils_pad.py
@@ -273,6 +273,47 @@ def _get_stats(padded, axis, width_pair, length_pair, stat_func):
     return left_stat, right_stat
 
 
+def _pad_normalize_dict_width(pad_width, ndim):
+    """
+    Normalize pad width passed as a dictionary.
+
+    Parameters
+    ----------
+    pad_width : dict
+        Padding specification. The keys must be integer axis indices, and
+        the values must be either:
+          - a single int (same padding before and after),
+          - a tuple of two ints (before, after).
+    ndim : int
+        Number of dimensions in the input array.
+
+    Returns
+    -------
+    seq : list
+        A (ndim, 2) list of padding widths for each axis.
+
+    Raises
+    ------
+    TypeError
+        If the padding format for any axis is invalid.
+
+    """
+
+    seq = [(0, 0)] * ndim
+    for axis, width in pad_width.items():
+        if isinstance(width, int):
+            seq[axis] = (width, width)
+        elif (
+            isinstance(width, tuple)
+            and len(width) == 2
+            and all(isinstance(w, int) for w in width)
+        ):
+            seq[axis] = width
+        else:
+            raise TypeError(f"Invalid pad width for axis {axis}: {width}")
+    return seq
+
+
 def _pad_simple(array, pad_width, fill_value=None):
     """
     Copied from numpy/lib/_arraypad_impl.py
@@ -616,21 +657,25 @@ def _view_roi(array, original_area_slice, axis):
 def dpnp_pad(array, pad_width, mode="constant", **kwargs):
     """Pad an array."""
 
+    nd = array.ndim
+
     if isinstance(pad_width, int):
         if pad_width < 0:
             raise ValueError("index can't contain negative values")
-        pad_width = ((pad_width, pad_width),) * array.ndim
+        pad_width = ((pad_width, pad_width),) * nd
     else:
         if dpnp.is_supported_array_type(pad_width):
             pad_width = dpnp.asnumpy(pad_width)
         else:
+            if isinstance(pad_width, dict):
+                pad_width = _pad_normalize_dict_width(pad_width, nd)
             pad_width = numpy.asarray(pad_width)
 
         if not pad_width.dtype.kind == "i":
             raise TypeError("`pad_width` must be of integral type.")
 
-        # Broadcast to shape (array.ndim, 2)
-        pad_width = _as_pairs(pad_width, array.ndim, as_index=True)
+        # Broadcast to shape (nd, 2)
+        pad_width = _as_pairs(pad_width, nd, as_index=True)
 
     if callable(mode):
         function = mode
@@ -683,7 +728,7 @@ def dpnp_pad(array, pad_width, mode="constant", **kwargs):
         if (
             dpnp.isscalar(values)
             and values == 0
-            and (array.ndim == 1 or array.size < 3e7)
+            and (nd == 1 or array.size < 3e7)
         ):
             # faster path for 1d arrays or small n-dimensional arrays
             return _pad_simple(array, pad_width, 0)[0]
diff --git a/dpnp/fft/dpnp_utils_fft.py b/dpnp/fft/dpnp_utils_fft.py
index 2746861e11de..2dcefaaeb757 100644
--- a/dpnp/fft/dpnp_utils_fft.py
+++ b/dpnp/fft/dpnp_utils_fft.py
@@ -110,36 +110,53 @@ def _commit_descriptor(a, forward, in_place, c2c, a_strides, index, batch_fft):
     return dsc, out_strides
 
 
-def _complex_nd_fft(a, s, norm, out, forward, in_place, c2c, axes, batch_fft):
+def _c2c_nd_fft(
+    a,
+    s,
+    norm,
+    out,
+    forward,
+    in_place,
+    axes,
+    batch_fft,
+    *,
+    reversed_axes=True,
+):
     """Computes complex-to-complex FFT of the input N-D array."""
 
     len_axes = len(axes)
-    # OneMKL supports up to 3-dimensional FFT on GPU
-    # repeated axis in OneMKL FFT is not allowed
+    # oneMKL supports up to 3-dimensional FFT on GPU
+    # repeated axis in oneMKL FFT is not allowed
     if len_axes > 3 or len(set(axes)) < len_axes:
-        axes_chunk, shape_chunk = _extract_axes_chunk(axes, s, chunk_size=3)
+        axes_chunk, shape_chunk = _extract_axes_chunk(
+            axes, s, chunk_size=3, reversed_axes=reversed_axes
+        )
+
+        # We try to use in-place calculations where possible, which is
+        # everywhere except when the size changes after the first iteration.
+        size_changes = [axis for axis, n in zip(axes, s) if a.shape[axis] != n]
+
+        # cannot use out in the intermediate steps if size changes
+        res = None if size_changes else out
+
         for i, (s_chunk, a_chunk) in enumerate(zip(shape_chunk, axes_chunk)):
             a = _truncate_or_pad(a, shape=s_chunk, axes=a_chunk)
-            # if out is used in an intermediate step, it will have memory
-            # overlap with input and cannot be used in the final step (a new
-            # result array will be created for the final step), so there is no
-            # benefit in using out in an intermediate step
-            if i == len(axes_chunk) - 1:
-                tmp_out = out
-            else:
-                tmp_out = None
+            # if size_changes, out cannot be used in intermediate steps
+            if size_changes and i == len(axes_chunk) - 1:
+                res = out
 
             a = _fft(
                 a,
                 norm=norm,
-                out=tmp_out,
+                out=res,
                 forward=forward,
-                # TODO: in-place FFT is only implemented for c2c, see SAT-7154
-                in_place=in_place and c2c,
-                c2c=c2c,
+                in_place=in_place,
+                c2c=True,
                 axes=a_chunk,
             )
-
+            if not size_changes:
+                # Default output for next iteration.
+                res = a
         return a
 
     a = _truncate_or_pad(a, s, axes)
@@ -151,9 +168,8 @@ def _complex_nd_fft(a, s, norm, out, forward, in_place, c2c, axes, batch_fft):
         norm=norm,
         out=out,
         forward=forward,
-        # TODO: in-place FFT is only implemented for c2c, see SAT-7154
-        in_place=in_place and c2c,
-        c2c=c2c,
+        in_place=in_place,
+        c2c=True,
         axes=axes,
         batch_fft=batch_fft,
     )
@@ -184,7 +200,7 @@ def _compute_result(dsc, a, out, forward, c2c, out_strides):
             res_usm = dpnp.get_usm_ndarray(out)
             result = out
         else:
-            # Result array that is used in OneMKL must have the exact same
+            # Result array that is used in oneMKL must have the exact same
             # stride as input array
 
             if c2c:  # c2c FFT
@@ -263,9 +279,9 @@ def _copy_array(x, complex_input):
     dtype = x.dtype
     copy_flag = False
     if numpy.min(x.strides) < 0:
-        # negative stride is not allowed in OneMKL FFT
+        # negative stride is not allowed in oneMKL FFT
         # TODO: support for negative strides will be added in the future
-        # versions of OneMKL, see discussion in MKLD-17597
+        # versions of oneMKL, see discussion in MKLD-17597
         copy_flag = True
 
     if complex_input and not dpnp.issubdtype(dtype, dpnp.complexfloating):
@@ -291,10 +307,10 @@ def _copy_array(x, complex_input):
     return x, copy_flag
 
 
-def _extract_axes_chunk(a, s, chunk_size=3):
+def _extract_axes_chunk(a, s, chunk_size=3, reversed_axes=True):
     """
     Classify the first input into a list of lists with each list containing
-    only unique values in reverse order and its length is at most `chunk_size`.
+    only unique values and its length is at most `chunk_size`.
     The second input is also classified into a list of lists with each list
     containing the corresponding values of the first input.
 
@@ -306,13 +322,14 @@ def _extract_axes_chunk(a, s, chunk_size=3):
         The second input.
     chunk_size : int
         Maximum number of elements in each chunk.
+    reversed_axes : bool
+        If True, the output chunks will be in reverse order.
 
     Return
     ------
     out : a tuple of two lists
         The first element of output is a list of lists with each list
-        containing only unique values in revere order and its length is
-        at most `chunk_size`.
+        containing only unique values and its length is at most `chunk_size`.
         The second element of output is a list of lists with each list
         containing the corresponding values of the first input.
 
@@ -362,7 +379,10 @@ def _extract_axes_chunk(a, s, chunk_size=3):
         a_chunks.append(a_current_chunk[::-1])
         s_chunks.append(s_current_chunk[::-1])
 
-    return a_chunks[::-1], s_chunks[::-1]
+    if reversed_axes:
+        return a_chunks[::-1], s_chunks[::-1]
+
+    return a_chunks, s_chunks
 
 
 def _fft(a, norm, out, forward, in_place, c2c, axes, batch_fft=True):
@@ -370,6 +390,9 @@ def _fft(a, norm, out, forward, in_place, c2c, axes, batch_fft=True):
 
     index = 0
     fft_1d = isinstance(axes, int)
+    if not in_place and out is not None:
+        # if input and output are the same array, use in-place FFT
+        in_place = dpnp.are_same_logical_tensors(a, out)
     if batch_fft:
         len_axes = 1 if fft_1d else len(axes)
         local_axes = numpy.arange(-len_axes, 0)
@@ -531,9 +554,12 @@ def _validate_out_keyword(a, out, s, axes, c2c, c2r, r2c):
             expected_shape[axes[-1]] = s[-1] // 2 + 1
         elif c2c:
             expected_shape[axes[-1]] = s[-1]
-        for s_i, axis in zip(s[-2::-1], axes[-2::-1]):
-            expected_shape[axis] = s_i
+        if r2c or c2c:
+            for s_i, axis in zip(s[-2::-1], axes[-2::-1]):
+                expected_shape[axis] = s_i
         if c2r:
+            for s_i, axis in zip(s[:-1], axes[:-1]):
+                expected_shape[axis] = s_i
             expected_shape[axes[-1]] = s[-1]
 
         if out.shape != tuple(expected_shape):
@@ -606,9 +632,6 @@ def dpnp_fft(a, forward, real, n=None, axis=-1, norm=None, out=None):
     _validate_out_keyword(a, out, (n,), (axis,), c2c, c2r, r2c)
     # if input array is copied, in-place FFT can be used
     a, in_place = _copy_array(a, c2c or c2r)
-    if not in_place and out is not None:
-        # if input is also given for out, in-place FFT can be used
-        in_place = dpnp.are_same_logical_tensors(a, out)
 
     if a.size == 0:
         return dpnp.get_result_array(a, out=out, casting="same_kind")
@@ -674,31 +697,30 @@ def dpnp_fftn(a, forward, real, s=None, axes=None, norm=None, out=None):
         )
 
     if r2c:
-        # a 1D real-to-complext FFT is performed on the last axis and then
+        size_changes = [axis for axis, n in zip(axes, s) if a.shape[axis] != n]
+        # cannot use out in the intermediate steps if size changes
+        res = None if size_changes else out
+
+        # a 1D real-to-complex FFT is performed on the last axis and then
         # an N-D complex-to-complex FFT over the remaining axes
         a = _truncate_or_pad(a, (s[-1],), (axes[-1],))
         a = _fft(
             a,
             norm=norm,
-            # if out is used in an intermediate step, it will have memory
-            # overlap with input and cannot be used in the final step (a new
-            # result array will be created for the final step), so there is no
-            # benefit in using out in an intermediate step
-            out=None,
+            out=res,
             forward=forward,
-            in_place=in_place and c2c,
-            c2c=c2c,
+            in_place=False,
+            c2c=False,
             axes=axes[-1],
             batch_fft=a.ndim != 1,
         )
-        return _complex_nd_fft(
+        return _c2c_nd_fft(
             a,
-            s=s,
+            s=s[:-1],
             norm=norm,
             out=out,
             forward=forward,
             in_place=in_place,
-            c2c=True,
             axes=axes[:-1],
             batch_fft=a.ndim != len_axes - 1,
         )
@@ -706,30 +728,43 @@ def dpnp_fftn(a, forward, real, s=None, axes=None, norm=None, out=None):
     if c2r:
         # an N-D complex-to-complex FFT is performed on all axes except the
         # last one then a 1D complex-to-real FFT is performed on the last axis
-        a = _complex_nd_fft(
+        a = _c2c_nd_fft(
             a,
-            s=s,
+            s=s[:-1],
             norm=norm,
             # out has real dtype and cannot be used in intermediate steps
             out=None,
             forward=forward,
             in_place=in_place,
-            c2c=True,
             axes=axes[:-1],
             batch_fft=a.ndim != len_axes - 1,
+            reversed_axes=False,
         )
         a = _truncate_or_pad(a, (s[-1],), (axes[-1],))
-        if c2r:
-            a = _make_array_hermitian(
-                a, axes[-1], dpnp.are_same_logical_tensors(a, a_orig)
-            )
+        a = _make_array_hermitian(
+            a, axes[-1], dpnp.are_same_logical_tensors(a, a_orig)
+        )
         return _fft(
-            a, norm, out, forward, in_place and c2c, c2c, axes[-1], a.ndim != 1
+            a,
+            norm=norm,
+            out=out,
+            forward=forward,
+            in_place=False,
+            c2c=False,
+            axes=axes[-1],
+            batch_fft=a.ndim != 1,
         )
 
     # c2c
-    return _complex_nd_fft(
-        a, s, norm, out, forward, in_place, c2c, axes, a.ndim != len_axes
+    return _c2c_nd_fft(
+        a,
+        s=s,
+        norm=norm,
+        out=out,
+        forward=forward,
+        in_place=in_place,
+        axes=axes,
+        batch_fft=a.ndim != len_axes,
     )
 
 
diff --git a/dpnp/linalg/__init__.py b/dpnp/linalg/__init__.py
index fd315e5ed434..e3f0bdc6fe25 100644
--- a/dpnp/linalg/__init__.py
+++ b/dpnp/linalg/__init__.py
@@ -34,7 +34,42 @@
 """
 
 
-from dpnp.linalg.dpnp_iface_linalg import *
-from dpnp.linalg.dpnp_iface_linalg import __all__ as __all__linalg
+from .dpnp_iface_linalg import (
+    LinAlgError,
+)
+from .dpnp_iface_linalg import __all__ as __all__linalg
+from .dpnp_iface_linalg import (
+    cholesky,
+    cond,
+    cross,
+    det,
+    diagonal,
+    eig,
+    eigh,
+    eigvals,
+    eigvalsh,
+    inv,
+    lstsq,
+    matmul,
+    matrix_norm,
+    matrix_power,
+    matrix_rank,
+    matrix_transpose,
+    multi_dot,
+    norm,
+    outer,
+    pinv,
+    qr,
+    slogdet,
+    solve,
+    svd,
+    svdvals,
+    tensordot,
+    tensorinv,
+    tensorsolve,
+    trace,
+    vecdot,
+    vector_norm,
+)
 
 __all__ = __all__linalg
diff --git a/dpnp/linalg/dpnp_iface_linalg.py b/dpnp/linalg/dpnp_iface_linalg.py
index 137eed98d067..ec48ccf381f6 100644
--- a/dpnp/linalg/dpnp_iface_linalg.py
+++ b/dpnp/linalg/dpnp_iface_linalg.py
@@ -38,6 +38,7 @@
 
 # pylint: disable=invalid-name
 # pylint: disable=no-member
+# pylint: disable=no-name-in-module
 
 from typing import NamedTuple
 
@@ -45,6 +46,7 @@
 from dpctl.tensor._numpy_helper import normalize_axis_tuple
 
 import dpnp
+from dpnp.backend.extensions.lapack._lapack_impl import LinAlgError
 
 from .dpnp_utils_linalg import (
     assert_2d,
@@ -68,6 +70,7 @@
 )
 
 __all__ = [
+    "LinAlgError",
     "cholesky",
     "cond",
     "cross",
@@ -101,6 +104,9 @@
     "vector_norm",
 ]
 
+# Need to set the module explicitly, since exposed by LAPACK pybind11 extension
+LinAlgError.__module__ = "dpnp.linalg"
+
 
 # pylint:disable=missing-class-docstring
 class EigResult(NamedTuple):
@@ -180,8 +186,22 @@ def cond(x, p=None):
     x : {dpnp.ndarray, usm_ndarray}
         The matrix whose condition number is sought.
     p : {None, 1, -1, 2, -2, inf, -inf, "fro"}, optional
-        Order of the norm used in the condition number computation.
-        ``inf`` means the `dpnp.inf` object, and the Frobenius norm is
+        Order of the norm used in the condition number computation:
+
+        =====  ============================
+        p      norm for matrices
+        =====  ============================
+        None   2-norm
+        'fro'  Frobenius norm
+        inf    max(sum(abs(x), axis=1))
+        -inf   min(sum(abs(x), axis=1))
+        1      max(sum(abs(x), axis=0))
+        -1     min(sum(abs(x), axis=0))
+        2      2-norm (largest singular value)
+        -2     smallest singular value
+        =====  ============================
+
+        ``inf`` means the :obj:`dpnp.inf` object, and the Frobenius norm is
         the root-of-sum-of-squares norm.
 
         Default: ``None``.
@@ -2169,7 +2189,7 @@ def tensorsolve(a, b, axes=None):
     prod = numpy.prod(old_shape)
 
     if a.size != prod**2:
-        raise dpnp.linalg.LinAlgError(
+        raise LinAlgError(
             "Input arrays must satisfy the requirement "
             "prod(a.shape[b.ndim:]) == prod(a.shape[:b.ndim])"
         )
diff --git a/dpnp/linalg/dpnp_utils_linalg.py b/dpnp/linalg/dpnp_utils_linalg.py
index 6ea4d42a1852..ee7650d4600d 100644
--- a/dpnp/linalg/dpnp_utils_linalg.py
+++ b/dpnp/linalg/dpnp_utils_linalg.py
@@ -33,6 +33,7 @@
 
 """
 
+# pylint: disable=duplicate-code
 # pylint: disable=invalid-name
 # pylint: disable=no-name-in-module
 # pylint: disable=protected-access
@@ -49,7 +50,6 @@
 import dpnp
 import dpnp.backend.extensions.lapack._lapack_impl as li
 from dpnp.dpnp_utils import get_usm_allocations
-from dpnp.linalg import LinAlgError as LinAlgError
 
 __all__ = [
     "assert_2d",
@@ -245,6 +245,7 @@ def _batched_inv(a, res_type):
         ipiv_h.get_array(),
         dev_info,
         n,
+        n,
         a_stride,
         ipiv_stride,
         batch_size,
@@ -275,6 +276,90 @@ def _batched_inv(a, res_type):
     return a_h.reshape(orig_shape)
 
 
+def _batched_lu_factor(a, res_type):
+    """Compute pivoted LU decomposition for a batch of matrices."""
+
+    # TODO: Find out at which array sizes the best performance is obtained
+    # getrf_batch implementation shows slow results with large arrays on GPU.
+    # Use getrf_batch only on CPU.
+    # On GPU call getrf for each two-dimensional array by loop
+    use_batch = a.sycl_device.has_aspect_cpu
+
+    a_sycl_queue = a.sycl_queue
+    a_usm_type = a.usm_type
+    _manager = dpu.SequentialOrderManager[a_sycl_queue]
+
+    n = a.shape[-2]
+    orig_shape = a.shape
+    # get 3d input arrays by reshape
+    a = dpnp.reshape(a, (-1, n, n))
+    batch_size = a.shape[0]
+    a_usm_arr = dpnp.get_usm_ndarray(a)
+
+    # `a` must be copied because getrf/getrf_batch destroys the input matrix
+    a_h = dpnp.empty_like(a, order="C", dtype=res_type)
+    ht_ev, copy_ev = ti._copy_usm_ndarray_into_usm_ndarray(
+        src=a_usm_arr,
+        dst=a_h.get_array(),
+        sycl_queue=a_sycl_queue,
+        depends=_manager.submitted_events,
+    )
+    _manager.add_event_pair(ht_ev, copy_ev)
+
+    ipiv_h = dpnp.empty(
+        (batch_size, n),
+        dtype=dpnp.int64,
+        order="C",
+        usm_type=a_usm_type,
+        sycl_queue=a_sycl_queue,
+    )
+
+    if use_batch:
+        dev_info_h = [0] * batch_size
+
+        ipiv_stride = n
+        a_stride = a_h.strides[0]
+
+        # Call the LAPACK extension function _getrf_batch
+        # to perform LU decomposition of a batch of general matrices
+        ht_ev, getrf_ev = li._getrf_batch(
+            a_sycl_queue,
+            a_h.get_array(),
+            ipiv_h.get_array(),
+            dev_info_h,
+            n,
+            n,
+            a_stride,
+            ipiv_stride,
+            batch_size,
+            depends=[copy_ev],
+        )
+        _manager.add_event_pair(ht_ev, getrf_ev)
+
+    else:
+        dev_info_h = [[0] for _ in range(batch_size)]
+
+        # Sequential LU factorization using getrf per slice
+        for i in range(batch_size):
+            ht_ev, getrf_ev = li._getrf(
+                a_sycl_queue,
+                a_h[i].get_array(),
+                ipiv_h[i].get_array(),
+                dev_info_h[i],
+                depends=[copy_ev],
+            )
+            _manager.add_event_pair(ht_ev, getrf_ev)
+
+    # Reshape the results back to their original shape
+    out_a = a_h.reshape(orig_shape)
+    out_ipiv = ipiv_h.reshape(orig_shape[:-1])
+    out_dev_info = dpnp.array(
+        dev_info_h, usm_type=a_usm_type, sycl_queue=a_sycl_queue
+    ).reshape(orig_shape[:-2])
+
+    return (out_a, out_ipiv, out_dev_info)
+
+
 def _batched_solve(a, b, exec_q, res_usm_type, res_type):
     """
     _batched_solve(a, b, exec_q, res_usm_type, res_type)
@@ -732,7 +817,7 @@ def _check_lapack_dev_info(dev_info, error_msg=None):
     if any(dev_info):
         error_msg = error_msg or "Singular matrix"
 
-        raise LinAlgError(error_msg)
+        raise li.LinAlgError(error_msg)
 
 
 def _common_type(*arrays):
@@ -901,125 +986,15 @@ def _lu_factor(a, res_type):
 
     """
 
+    if a.ndim > 2:
+        return _batched_lu_factor(a, res_type)
+
     n = a.shape[-2]
 
     a_sycl_queue = a.sycl_queue
     a_usm_type = a.usm_type
-
-    # TODO: Find out at which array sizes the best performance is obtained
-    # getrf_batch implementation shows slow results with large arrays on GPU.
-    # Use getrf_batch only on CPU.
-    # On GPU call getrf for each two-dimensional array by loop
-    use_batch = a.sycl_device.has_aspect_cpu
-
     _manager = dpu.SequentialOrderManager[a_sycl_queue]
 
-    if a.ndim > 2:
-        orig_shape = a.shape
-        # get 3d input arrays by reshape
-        a = dpnp.reshape(a, (-1, n, n))
-        batch_size = a.shape[0]
-        a_usm_arr = dpnp.get_usm_ndarray(a)
-
-        if use_batch:
-            # `a` must be copied because getrf_batch destroys the input matrix
-            a_h = dpnp.empty_like(a, order="C", dtype=res_type)
-            ipiv_h = dpnp.empty(
-                (batch_size, n),
-                dtype=dpnp.int64,
-                order="C",
-                usm_type=a_usm_type,
-                sycl_queue=a_sycl_queue,
-            )
-            dev_info_h = [0] * batch_size
-
-            ht_ev, copy_ev = ti._copy_usm_ndarray_into_usm_ndarray(
-                src=a_usm_arr,
-                dst=a_h.get_array(),
-                sycl_queue=a_sycl_queue,
-                depends=_manager.submitted_events,
-            )
-            _manager.add_event_pair(ht_ev, copy_ev)
-
-            ipiv_stride = n
-            a_stride = a_h.strides[0]
-
-            # Call the LAPACK extension function _getrf_batch
-            # to perform LU decomposition of a batch of general matrices
-            ht_ev, getrf_ev = li._getrf_batch(
-                a_sycl_queue,
-                a_h.get_array(),
-                ipiv_h.get_array(),
-                dev_info_h,
-                n,
-                a_stride,
-                ipiv_stride,
-                batch_size,
-                depends=[copy_ev],
-            )
-            _manager.add_event_pair(ht_ev, getrf_ev)
-
-            dev_info_array = dpnp.array(
-                dev_info_h, usm_type=a_usm_type, sycl_queue=a_sycl_queue
-            )
-
-            # Reshape the results back to their original shape
-            a_h = a_h.reshape(orig_shape)
-            ipiv_h = ipiv_h.reshape(orig_shape[:-1])
-            dev_info_array = dev_info_array.reshape(orig_shape[:-2])
-
-            return (a_h, ipiv_h, dev_info_array)
-
-        # Initialize lists for storing arrays and events for each batch
-        a_vecs = [None] * batch_size
-        ipiv_vecs = [None] * batch_size
-        dev_info_vecs = [None] * batch_size
-
-        dep_evs = _manager.submitted_events
-
-        # Process each batch
-        for i in range(batch_size):
-            # Copy each 2D slice to a new array because getrf will destroy
-            # the input matrix
-            a_vecs[i] = dpnp.empty_like(a[i], order="C", dtype=res_type)
-
-            ht_ev, copy_ev = ti._copy_usm_ndarray_into_usm_ndarray(
-                src=a_usm_arr[i],
-                dst=a_vecs[i].get_array(),
-                sycl_queue=a_sycl_queue,
-                depends=dep_evs,
-            )
-            _manager.add_event_pair(ht_ev, copy_ev)
-
-            ipiv_vecs[i] = dpnp.empty(
-                (n,),
-                dtype=dpnp.int64,
-                order="C",
-                usm_type=a_usm_type,
-                sycl_queue=a_sycl_queue,
-            )
-            dev_info_vecs[i] = [0]
-
-            # Call the LAPACK extension function _getrf
-            # to perform LU decomposition on each batch in 'a_vecs[i]'
-            ht_ev, getrf_ev = li._getrf(
-                a_sycl_queue,
-                a_vecs[i].get_array(),
-                ipiv_vecs[i].get_array(),
-                dev_info_vecs[i],
-                depends=[copy_ev],
-            )
-            _manager.add_event_pair(ht_ev, getrf_ev)
-
-        # Reshape the results back to their original shape
-        out_a = dpnp.array(a_vecs, order="C").reshape(orig_shape)
-        out_ipiv = dpnp.array(ipiv_vecs).reshape(orig_shape[:-1])
-        out_dev_info = dpnp.array(
-            dev_info_vecs, usm_type=a_usm_type, sycl_queue=a_sycl_queue
-        ).reshape(orig_shape[:-2])
-
-        return (out_a, out_ipiv, out_dev_info)
-
     a_usm_arr = dpnp.get_usm_ndarray(a)
 
     # `a` must be copied because getrf destroys the input matrix
@@ -1187,7 +1162,10 @@ def _norm_int_axis(x, ord, axis, keepdims):
     if ord == dpnp.inf:
         if x.shape[axis] == 0:
             x = dpnp.moveaxis(x, axis, -1)
-            return dpnp.zeros_like(x, shape=x.shape[:-1])
+            res_shape = x.shape[:-1]
+            if keepdims:
+                res_shape += (1,)
+            return dpnp.zeros_like(x, shape=res_shape)
         return dpnp.abs(x).max(axis=axis, keepdims=keepdims)
     if ord == -dpnp.inf:
         return dpnp.abs(x).min(axis=axis, keepdims=keepdims)
@@ -1226,7 +1204,10 @@ def _norm_tuple_axis(x, ord, row_axis, col_axis, keepdims):
     flag = x.shape[row_axis] == 0 or x.shape[col_axis] == 0
     if flag and ord in [1, 2, dpnp.inf]:
         x = dpnp.moveaxis(x, axis, (-2, -1))
-        return dpnp.zeros_like(x, shape=x.shape[:-2])
+        res_shape = x.shape[:-2]
+        if keepdims:
+            res_shape += (1, 1)
+        return dpnp.zeros_like(x, shape=res_shape)
     if row_axis == col_axis:
         raise ValueError("Duplicate axes given.")
     if ord == 2:
@@ -1284,15 +1265,8 @@ def _nrm2_last_axis(x):
     """
 
     real_dtype = _real_type(x.dtype)
-    # TODO: use dpnp.sum(dpnp.square(dpnp.view(x)), axis=-1, dtype=real_dtype)
-    # w/a since dpnp.view() in not implemented yet
-    # Сalculate and sum the squares of both real and imaginary parts for
-    # compelex array.
-    if dpnp.issubdtype(x.dtype, dpnp.complexfloating):
-        y = dpnp.abs(x) ** 2
-    else:
-        y = dpnp.square(x)
-    return dpnp.sum(y, axis=-1, dtype=real_dtype)
+    x = dpnp.ascontiguousarray(x)
+    return dpnp.sum(dpnp.square(x.view(real_dtype)), axis=-1)
 
 
 def _real_type(dtype, device=None):
@@ -1762,7 +1736,7 @@ def assert_2d(*arrays):
 
     for a in arrays:
         if a.ndim != 2:
-            raise LinAlgError(
+            raise li.LinAlgError(
                 f"{a.ndim}-dimensional array given. The input "
                 "array must be exactly two-dimensional"
             )
@@ -1789,7 +1763,7 @@ def assert_stacked_2d(*arrays):
 
     for a in arrays:
         if a.ndim < 2:
-            raise LinAlgError(
+            raise li.LinAlgError(
                 f"{a.ndim}-dimensional array given. The input "
                 "array must be at least two-dimensional"
             )
@@ -1825,7 +1799,7 @@ def assert_stacked_square(*arrays):
     for a in arrays:
         m, n = a.shape[-2:]
         if m != n:
-            raise LinAlgError(
+            raise li.LinAlgError(
                 "Last 2 dimensions of the input array must be square"
             )
 
@@ -1969,7 +1943,7 @@ def dpnp_cond(x, p=None):
     """Compute the condition number of a matrix."""
 
     if _is_empty_2d(x):
-        raise LinAlgError("cond is not defined on empty arrays")
+        raise li.LinAlgError("cond is not defined on empty arrays")
     if p is None or p == 2 or p == -2:
         s = dpnp.linalg.svd(x, compute_uv=False)
         if p == -2:
@@ -1977,13 +1951,15 @@ def dpnp_cond(x, p=None):
         else:
             r = s[..., 0] / s[..., -1]
     else:
-        result_t = _common_type(x)
         # The result array will contain nans in the entries
         # where inversion failed
         invx = dpnp.linalg.inv(x)
         r = dpnp.linalg.norm(x, p, axis=(-2, -1)) * dpnp.linalg.norm(
             invx, p, axis=(-2, -1)
         )
+
+        # condition number is always real
+        result_t = _real_type(_common_type(x), device=x.sycl_queue)
         r = r.astype(result_t, copy=False)
 
     # Convert nans to infs unless the original array had nan entries
@@ -2221,7 +2197,7 @@ def dpnp_lstsq(a, b, rcond=None):
     """
 
     if b.ndim > 2:
-        raise LinAlgError(
+        raise li.LinAlgError(
             f"{b.ndim}-dimensional array given. The input "
             "array must be exactly two-dimensional"
         )
@@ -2229,7 +2205,7 @@ def dpnp_lstsq(a, b, rcond=None):
     m, n = a.shape[-2:]
     m2 = b.shape[0]
     if m != m2:
-        raise LinAlgError("Incompatible dimensions")
+        raise li.LinAlgError("Incompatible dimensions")
 
     u, s, vh = dpnp_svd(a, full_matrices=False, related_arrays=[b])
 
@@ -2341,20 +2317,20 @@ def dpnp_multi_dot(n, arrays, out=None):
     """Compute dot product of two or more arrays in a single function call."""
 
     if not arrays[0].ndim in [1, 2]:
-        raise LinAlgError(
+        raise li.LinAlgError(
             f"{arrays[0].ndim}-dimensional array given. "
             "First array must be 1-D or 2-D."
         )
 
     if not arrays[-1].ndim in [1, 2]:
-        raise LinAlgError(
+        raise li.LinAlgError(
             f"{arrays[-1].ndim}-dimensional array given. "
             "Last array must be 1-D or 2-D."
         )
 
     for arr in arrays[1:-1]:
         if arr.ndim != 2:
-            raise LinAlgError(
+            raise li.LinAlgError(
                 f"{arr.ndim}-dimensional array given. Inner arrays must be 2-D."
             )
 
@@ -2618,18 +2594,12 @@ def dpnp_solve(a, b):
     a_usm_arr = dpnp.get_usm_ndarray(a)
     b_usm_arr = dpnp.get_usm_ndarray(b)
 
-    # Due to MKLD-17226 (bug with incorrect checking ldb parameter
-    # in oneapi::mkl::lapack::gesv_scratchad_size that raises an error
-    # `invalid argument` when nrhs > n) we can not use _gesv directly.
-    # This w/a uses _getrf and _getrs instead
-    # to handle cases where nrhs > n for a.shape = (n x n)
-    # and b.shape = (n x nrhs).
-
-    # oneMKL LAPACK getrf overwrites `a`.
-    a_h = dpnp.empty_like(a, order="C", dtype=res_type, usm_type=res_usm_type)
+    # oneMKL LAPACK getrs overwrites `a` and assumes fortran-like array as
+    # input
+    a_h = dpnp.empty_like(a, order="F", dtype=res_type, usm_type=res_usm_type)
 
     _manager = dpu.SequentialOrderManager[exec_q]
-    dev_evs = _manager.submitted_events
+    dep_evs = _manager.submitted_events
 
     # use DPCTL tensor function to fill the сopy of the input array
     # from the input array
@@ -2637,7 +2607,7 @@ def dpnp_solve(a, b):
         src=a_usm_arr,
         dst=a_h.get_array(),
         sycl_queue=a.sycl_queue,
-        depends=dev_evs,
+        depends=dep_evs,
     )
     _manager.add_event_pair(ht_ev, a_copy_ev)
 
@@ -2653,43 +2623,18 @@ def dpnp_solve(a, b):
         src=b_usm_arr,
         dst=b_h.get_array(),
         sycl_queue=b.sycl_queue,
-        depends=dev_evs,
+        depends=dep_evs,
     )
     _manager.add_event_pair(ht_ev, b_copy_ev)
 
-    n = a.shape[0]
-
-    ipiv_h = dpnp.empty_like(
-        a,
-        shape=(n,),
-        dtype=dpnp.int64,
+    # Call the LAPACK extension function _gesv to solve the system of linear
+    # equations with the coefficient square matrix and
+    # the dependent variables array
+    ht_lapack_ev, gesv_ev = li._gesv(
+        exec_q, a_h.get_array(), b_h.get_array(), [a_copy_ev, b_copy_ev]
     )
-    dev_info_h = [0]
 
-    # Call the LAPACK extension function _getrf
-    # to perform LU decomposition of the input matrix
-    ht_ev, getrf_ev = li._getrf(
-        exec_q,
-        a_h.get_array(),
-        ipiv_h.get_array(),
-        dev_info_h,
-        depends=[a_copy_ev],
-    )
-    _manager.add_event_pair(ht_ev, getrf_ev)
-
-    _check_lapack_dev_info(dev_info_h)
-
-    # Call the LAPACK extension function _getrs
-    # to solve the system of linear equations with an LU-factored
-    # coefficient square matrix, with multiple right-hand sides.
-    ht_ev, getrs_ev = li._getrs(
-        exec_q,
-        a_h.get_array(),
-        ipiv_h.get_array(),
-        b_h.get_array(),
-        depends=[b_copy_ev, getrf_ev],
-    )
-    _manager.add_event_pair(ht_ev, getrs_ev)
+    _manager.add_event_pair(ht_lapack_ev, gesv_ev)
     return b_h
 
 
diff --git a/dpnp/memory/__init__.py b/dpnp/memory/__init__.py
new file mode 100644
index 000000000000..bef741fe3d1f
--- /dev/null
+++ b/dpnp/memory/__init__.py
@@ -0,0 +1,39 @@
+# -*- coding: utf-8 -*-
+# *****************************************************************************
+# Copyright (c) 2025, Intel Corporation
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# - Redistributions of source code must retain the above copyright notice,
+#   this list of conditions and the following disclaimer.
+# - Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+# THE POSSIBILITY OF SUCH DAMAGE.
+# *****************************************************************************
+
+from ._memory import (
+    MemoryUSMDevice,
+    MemoryUSMHost,
+    MemoryUSMShared,
+    create_data,
+)
+
+__all__ = [
+    "MemoryUSMDevice",
+    "MemoryUSMHost",
+    "MemoryUSMShared",
+    "create_data",
+]
diff --git a/dpnp/memory/_memory.py b/dpnp/memory/_memory.py
new file mode 100644
index 000000000000..c3f0984c52e7
--- /dev/null
+++ b/dpnp/memory/_memory.py
@@ -0,0 +1,108 @@
+# -*- coding: utf-8 -*-
+# *****************************************************************************
+# Copyright (c) 2025, Intel Corporation
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# - Redistributions of source code must retain the above copyright notice,
+#   this list of conditions and the following disclaimer.
+# - Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+# THE POSSIBILITY OF SUCH DAMAGE.
+# *****************************************************************************
+
+import dpctl.tensor as dpt
+from dpctl.memory import MemoryUSMDevice as DPCTLMemoryUSMDevice
+from dpctl.memory import MemoryUSMHost as DPCTLMemoryUSMHost
+from dpctl.memory import MemoryUSMShared as DPCTLMemoryUSMShared
+
+
+def _add_ptr_property(cls):
+    _storage_attr = "_ptr"
+
+    @property
+    def ptr(self):
+        """
+        Returns USM pointer to the start of array (element with zero
+        multi-index) encoded as integer.
+
+        """
+
+        return getattr(self, _storage_attr, None)
+
+    @ptr.setter
+    def ptr(self, value):
+        setattr(self, _storage_attr, value)
+
+    cls.ptr = ptr
+    return cls
+
+
+@_add_ptr_property
+class MemoryUSMDevice(DPCTLMemoryUSMDevice):
+    pass
+
+
+@_add_ptr_property
+class MemoryUSMHost(DPCTLMemoryUSMHost):
+    pass
+
+
+@_add_ptr_property
+class MemoryUSMShared(DPCTLMemoryUSMShared):
+    pass
+
+
+def create_data(x):
+    """
+    Create an instance of :class:`.MemoryUSMDevice`, :class:`.MemoryUSMHost`,
+    or :class:`.MemoryUSMShared` class depending on the type of USM allocation.
+
+    Parameters
+    ----------
+    x : usm_ndarray
+        Input array of :class:`dpctl.tensor.usm_ndarray` type.
+
+    Returns
+    -------
+    out : {MemoryUSMDevice, MemoryUSMHost, MemoryUSMShared}
+        A data object with a reference on USM memory.
+
+    """
+
+    dispatch = {
+        DPCTLMemoryUSMDevice: MemoryUSMDevice,
+        DPCTLMemoryUSMHost: MemoryUSMHost,
+        DPCTLMemoryUSMShared: MemoryUSMShared,
+    }
+
+    if not isinstance(x, dpt.usm_ndarray):
+        raise TypeError(
+            f"An array must be any of supported type, but got {type(x)}"
+        )
+    usm_data = x.usm_data
+
+    if isinstance(usm_data, tuple(dispatch.values())):
+        return usm_data
+
+    cls = dispatch.get(type(usm_data), None)
+    if cls:
+        data = cls(usm_data)
+        # `ptr` is expecting to point at the start of the array's data,
+        # while `usm_data._pointer` is a pointer at the start of memory buffer
+        data.ptr = x._pointer
+        return data
+    raise TypeError(f"Expected USM memory, but got {type(usm_data)}")
diff --git a/dpnp/scipy/__init__.py b/dpnp/scipy/__init__.py
new file mode 100644
index 000000000000..989dc6eccd0e
--- /dev/null
+++ b/dpnp/scipy/__init__.py
@@ -0,0 +1,39 @@
+# -*- coding: utf-8 -*-
+# *****************************************************************************
+# Copyright (c) 2025, Intel Corporation
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# - Redistributions of source code must retain the above copyright notice,
+#   this list of conditions and the following disclaimer.
+# - Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+# THE POSSIBILITY OF SUCH DAMAGE.
+# *****************************************************************************
+
+"""
+``dpnp.scipy``
+==============
+
+The SciPy-compatible interface of DPNP.
+
+This namespace provides submodules that mimic parts of ``SciPy`` on top of
+DPNP functionality, reusing DPNP and oneMKL implementations underneath.
+"""
+
+from . import linalg, special
+
+__all__ = ["linalg", "special"]
diff --git a/dpnp/dpnp_algo/dpnp_algo_special.pxi b/dpnp/scipy/linalg/__init__.py
similarity index 76%
rename from dpnp/dpnp_algo/dpnp_algo_special.pxi
rename to dpnp/scipy/linalg/__init__.py
index 6e1900dca66a..7109ee06bed4 100644
--- a/dpnp/dpnp_algo/dpnp_algo_special.pxi
+++ b/dpnp/scipy/linalg/__init__.py
@@ -1,8 +1,6 @@
-# cython: language_level=3
-# cython: linetrace=True
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2025, Intel Corporation
+# Copyright (c) 2025, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -26,19 +24,19 @@
 # THE POSSIBILITY OF SUCH DAMAGE.
 # *****************************************************************************
 
-"""Module Backend (Special part)
+"""
+``dpnp.scipy.linalg``
+=====================
 
-This module contains interface functions between C backend layer
-and the rest of the library
+The SciPy-compatible linear algebra functions in DPNP rely on LAPACK
+to provide efficient low-level implementations of standard algorithms.
 
 """
 
-# NO IMPORTs here. All imports must be placed into main "dpnp_algo.pyx" file
-
-__all__ += [
-    'dpnp_erf',
-]
 
+from ._decomp_lu import lu_factor, lu_solve
 
-cpdef utils.dpnp_descriptor dpnp_erf(utils.dpnp_descriptor x1):
-    return call_fptr_1in_1out_strides(DPNP_FN_ERF_EXT, x1)
+__all__ = [
+    "lu_factor",
+    "lu_solve",
+]
diff --git a/dpnp/scipy/linalg/_decomp_lu.py b/dpnp/scipy/linalg/_decomp_lu.py
new file mode 100644
index 000000000000..50a824a822cf
--- /dev/null
+++ b/dpnp/scipy/linalg/_decomp_lu.py
@@ -0,0 +1,195 @@
+# -*- coding: utf-8 -*-
+# *****************************************************************************
+# Copyright (c) 2025, Intel Corporation
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# - Redistributions of source code must retain the above copyright notice,
+#   this list of conditions and the following disclaimer.
+# - Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+# THE POSSIBILITY OF SUCH DAMAGE.
+# *****************************************************************************
+
+
+"""
+Interface of the SciPy-compatible Linear Algebra subset for DPNP.
+
+Notes
+-----
+This module exposes the public API for ``dpnp.scipy.linalg``.
+It contains:
+ - SciPy-like interface functions
+ - documentation for the functions
+
+"""
+
+
+import dpnp
+from dpnp.linalg.dpnp_utils_linalg import assert_stacked_2d
+
+from ._utils import (
+    dpnp_lu_factor,
+    dpnp_lu_solve,
+)
+
+__all__ = [
+    "lu_factor",
+    "lu_solve",
+]
+
+
+def lu_factor(a, overwrite_a=False, check_finite=True):
+    """
+    Compute the pivoted LU decomposition of `a` matrix.
+
+    The decomposition is::
+
+        A = P @ L @ U
+
+    where `P` is a permutation matrix, `L` is lower triangular with unit
+    diagonal elements, and `U` is upper triangular.
+
+    For full documentation refer to :obj:`scipy.linalg.lu_factor`.
+
+    Parameters
+    ----------
+    a : (..., M, N) {dpnp.ndarray, usm_ndarray}
+        Input array to decompose.
+    overwrite_a : {None, bool}, optional
+        Whether to overwrite data in `a` (may increase performance).
+
+        Default: ``False``.
+    check_finite : {None, bool}, optional
+        Whether to check that the input matrix contains only finite numbers.
+        Disabling may give a performance gain, but may result in problems
+        (crashes, non-termination) if the inputs do contain infinities or NaNs.
+
+        Default: ``True``.
+
+    Returns
+    -------
+    lu : (..., M, N) dpnp.ndarray
+        Matrix containing `U` in its upper triangle,
+        and `L` in its lower triangle.
+        The unit diagonal elements of `L` are not stored.
+    piv : (..., K) dpnp.ndarray
+        Pivot indices representing the permutation matrix `P`:
+        row i of matrix was interchanged with row piv[i].
+        Where ``K = min(M, N)``.
+
+    Warning
+    -------
+    This function synchronizes in order to validate array elements
+    when ``check_finite=True``.
+
+    See Also
+    --------
+    :obj:`dpnp.scipy.linalg.lu_solve` : Solve an equation system using
+                                        the LU factorization of `a` matrix.
+
+    Examples
+    --------
+    >>> import dpnp as np
+    >>> a = np.array([[4., 3.], [6., 3.]])
+    >>> lu, piv = np.scipy.linalg.lu_factor(a)
+    >>> lu
+    array([[6.        , 3.        ],
+           [0.66666667, 1.        ]])
+    >>> piv
+    array([1, 1])
+
+    """
+
+    dpnp.check_supported_arrays_type(a)
+    assert_stacked_2d(a)
+
+    return dpnp_lu_factor(a, overwrite_a=overwrite_a, check_finite=check_finite)
+
+
+def lu_solve(lu_and_piv, b, trans=0, overwrite_b=False, check_finite=True):
+    """
+    Solve a linear system, :math:`a x = b`, given the LU factorization of `a`.
+
+    For full documentation refer to :obj:`scipy.linalg.lu_solve`.
+
+    Parameters
+    ----------
+    lu, piv : {tuple of dpnp.ndarrays or usm_ndarrays}
+        LU factorization of matrix `a` (M, M) together with pivot indices.
+    b : {(M,), (..., M, K)} {dpnp.ndarray, usm_ndarray}
+        Right-hand side
+    trans : {0, 1, 2} , optional
+        Type of system to solve:
+
+        =====  =================
+        trans  system
+        =====  =================
+        0      :math:`a x = b`
+        1      :math:`a^T x = b`
+        2      :math:`a^H x = b`
+        =====  =================
+
+        Default: ``0``.
+    overwrite_b : {None, bool}, optional
+        Whether to overwrite data in `b` (may increase performance).
+
+        Default: ``False``.
+    check_finite : {None, bool}, optional
+        Whether to check that the input matrix contains only finite numbers.
+        Disabling may give a performance gain, but may result in problems
+        (crashes, non-termination) if the inputs do contain infinities or NaNs.
+
+        Default: ``True``.
+
+    Returns
+    -------
+    x : {(M,), (M, K)} dpnp.ndarray
+        Solution to the system
+
+    Warning
+    -------
+    This function synchronizes in order to validate array elements
+    when ``check_finite=True``.
+
+    See Also
+    --------
+    :obj:`dpnp.scipy.linalg.lu_factor` : LU factorize a matrix.
+
+    Examples
+    --------
+    >>> import dpnp as np
+    >>> A = np.array([[2, 5, 8, 7], [5, 2, 2, 8], [7, 5, 6, 6], [5, 4, 4, 8]])
+    >>> b = np.array([1, 1, 1, 1])
+    >>> lu, piv = np.scipy.linalg.lu_factor(A)
+    >>> x = np.scipy.linalg.lu_solve((lu, piv), b)
+    >>> np.allclose(A @ x - b, np.zeros((4,)))
+    array(True)
+
+    """
+
+    (lu, piv) = lu_and_piv
+    dpnp.check_supported_arrays_type(lu, piv, b)
+    assert_stacked_2d(lu)
+
+    return dpnp_lu_solve(
+        lu,
+        piv,
+        b,
+        trans=trans,
+        overwrite_b=overwrite_b,
+        check_finite=check_finite,
+    )
diff --git a/dpnp/scipy/linalg/_utils.py b/dpnp/scipy/linalg/_utils.py
new file mode 100644
index 000000000000..377c41ac35c4
--- /dev/null
+++ b/dpnp/scipy/linalg/_utils.py
@@ -0,0 +1,406 @@
+# -*- coding: utf-8 -*-
+# *****************************************************************************
+# Copyright (c) 2025, Intel Corporation
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# - Redistributions of source code must retain the above copyright notice,
+#   this list of conditions and the following disclaimer.
+# - Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+# THE POSSIBILITY OF SUCH DAMAGE.
+# *****************************************************************************
+
+
+"""
+Utility functions for the SciPy-compatible linear algebra interface.
+
+These include helper functions to check array properties and
+functions with the main implementation part to fulfill the interface.
+The main computational work is performed by enabling LAPACK functions
+available as a pybind11 extension.
+
+"""
+
+
+# pylint: disable=no-name-in-module
+# pylint: disable=protected-access
+
+from warnings import warn
+
+import dpctl.tensor._tensor_impl as ti
+import dpctl.utils as dpu
+
+import dpnp
+import dpnp.backend.extensions.lapack._lapack_impl as li
+from dpnp.dpnp_utils import get_usm_allocations
+from dpnp.linalg.dpnp_utils_linalg import _common_type
+
+__all__ = [
+    "dpnp_lu_factor",
+    "dpnp_lu_solve",
+]
+
+
+def _batched_lu_factor_scipy(a, res_type):  # pylint: disable=too-many-locals
+    """SciPy-compatible LU factorization for batched inputs."""
+
+    # TODO: Find out at which array sizes the best performance is obtained
+    # getrf_batch can be slow on large GPU arrays.
+    # Use getrf_batch only on CPU.
+    # On GPU fall back to calling getrf per 2D slice.
+    use_batch = a.sycl_device.has_aspect_cpu
+
+    a_sycl_queue = a.sycl_queue
+    a_usm_type = a.usm_type
+    _manager = dpu.SequentialOrderManager[a_sycl_queue]
+
+    m, n = a.shape[-2:]
+    k = min(m, n)
+    orig_shape = a.shape
+    batch_shape = orig_shape[:-2]
+
+    # handle empty input
+    if a.size == 0:
+        lu = dpnp.empty_like(a)
+        piv = dpnp.empty(
+            (*batch_shape, k),
+            dtype=dpnp.int64,
+            usm_type=a_usm_type,
+            sycl_queue=a_sycl_queue,
+        )
+        return lu, piv
+
+    # get 3d input arrays by reshape
+    a = dpnp.reshape(a, (-1, m, n))
+    batch_size = a.shape[0]
+
+    # Move batch axis to the end (m, n, batch) in Fortran order:
+    # required by getrf_batch
+    # and ensures each a[..., i] is F-contiguous for getrf
+    a = dpnp.moveaxis(a, 0, -1)
+
+    a_usm_arr = dpnp.get_usm_ndarray(a)
+
+    # `a` must be copied because getrf/getrf_batch destroys the input matrix
+    a_h = dpnp.empty_like(a, order="F", dtype=res_type)
+    ht_ev, copy_ev = ti._copy_usm_ndarray_into_usm_ndarray(
+        src=a_usm_arr,
+        dst=a_h.get_array(),
+        sycl_queue=a_sycl_queue,
+        depends=_manager.submitted_events,
+    )
+    _manager.add_event_pair(ht_ev, copy_ev)
+
+    ipiv_h = dpnp.empty(
+        (batch_size, k),
+        dtype=dpnp.int64,
+        order="C",
+        usm_type=a_usm_type,
+        sycl_queue=a_sycl_queue,
+    )
+
+    if use_batch:
+        dev_info_h = [0] * batch_size
+
+        ipiv_stride = k
+        a_stride = a_h.strides[-1]
+
+        # Call the LAPACK extension function _getrf_batch
+        # to perform LU decomposition of a batch of general matrices
+        ht_ev, getrf_ev = li._getrf_batch(
+            a_sycl_queue,
+            a_h.get_array(),
+            ipiv_h.get_array(),
+            dev_info_h,
+            m,
+            n,
+            a_stride,
+            ipiv_stride,
+            batch_size,
+            depends=[copy_ev],
+        )
+        _manager.add_event_pair(ht_ev, getrf_ev)
+
+        if any(dev_info_h):
+            diag_nums = ", ".join(str(v) for v in dev_info_h if v > 0)
+            warn(
+                f"Diagonal numbers {diag_nums} are exactly zero. "
+                "Singular matrix.",
+                RuntimeWarning,
+                stacklevel=2,
+            )
+    else:
+        dev_info_vecs = [[0] for _ in range(batch_size)]
+
+        # Sequential LU factorization using getrf per slice
+        for i in range(batch_size):
+            ht_ev, getrf_ev = li._getrf(
+                a_sycl_queue,
+                a_h[..., i].get_array(),
+                ipiv_h[i].get_array(),
+                dev_info_vecs[i],
+                depends=[copy_ev],
+            )
+            _manager.add_event_pair(ht_ev, getrf_ev)
+
+        diag_nums = ", ".join(
+            str(v) for info in dev_info_vecs for v in info if v > 0
+        )
+        if diag_nums:
+            warn(
+                f"Diagonal number {diag_nums} are exactly zero. "
+                "Singular matrix.",
+                RuntimeWarning,
+                stacklevel=2,
+            )
+
+    # Restore original shape: move batch axis back and reshape
+    a_h = dpnp.moveaxis(a_h, -1, 0).reshape(orig_shape)
+    ipiv_h = ipiv_h.reshape((*batch_shape, k))
+
+    # oneMKL LAPACK uses 1-origin while SciPy uses 0-origin
+    ipiv_h -= 1
+
+    # Return a tuple containing the factorized matrix 'a_h',
+    # pivot indices 'ipiv_h'
+    return (a_h, ipiv_h)
+
+
+def _is_copy_required(a, res_type):
+    """
+    Determine if `a` needs to be copied before LU decomposition.
+    This matches SciPy behavior: copy is needed unless input is suitable
+    for in-place modification.
+    """
+
+    if a.dtype != res_type:
+        return True
+    if not a.flags["F_CONTIGUOUS"]:
+        return True
+    if not a.flags["WRITABLE"]:
+        return True
+
+    return False
+
+
+def dpnp_lu_factor(a, overwrite_a=False, check_finite=True):
+    """
+    dpnp_lu_factor(a, overwrite_a=False, check_finite=True)
+
+    Compute pivoted LU decomposition (SciPy-compatible behavior).
+
+    This function mimics the behavior of `scipy.linalg.lu_factor` including
+    support for `overwrite_a`, `check_finite` and 0-based pivot indexing.
+
+    """
+
+    res_type = _common_type(a)
+    a_sycl_queue = a.sycl_queue
+    a_usm_type = a.usm_type
+
+    if check_finite:
+        if not dpnp.isfinite(a).all():
+            raise ValueError("array must not contain infs or NaNs")
+
+    if a.ndim > 2:
+        # SciPy always copies each 2D slice,
+        # so `overwrite_a` is ignored here
+        return _batched_lu_factor_scipy(a, res_type)
+
+    # accommodate empty arrays
+    if a.size == 0:
+        lu = dpnp.empty_like(a)
+        piv = dpnp.arange(
+            0, dtype=dpnp.int64, usm_type=a_usm_type, sycl_queue=a_sycl_queue
+        )
+        return lu, piv
+
+    _manager = dpu.SequentialOrderManager[a_sycl_queue]
+    a_usm_arr = dpnp.get_usm_ndarray(a)
+
+    # SciPy-compatible behavior
+    # Copy is required if:
+    # - overwrite_a is False (always copy),
+    # - dtype mismatch,
+    # - not F-contiguous,
+    # - not writeable
+    if not overwrite_a or _is_copy_required(a, res_type):
+        a_h = dpnp.empty_like(a, order="F", dtype=res_type)
+        ht_ev, dep_ev = ti._copy_usm_ndarray_into_usm_ndarray(
+            src=a_usm_arr,
+            dst=a_h.get_array(),
+            sycl_queue=a_sycl_queue,
+            depends=_manager.submitted_events,
+        )
+        _manager.add_event_pair(ht_ev, dep_ev)
+        dep_ev = [dep_ev]
+    else:
+        # input is suitable for in-place modification
+        a_h = a
+        dep_ev = _manager.submitted_events
+
+    m, n = a.shape
+
+    ipiv_h = dpnp.empty(
+        min(m, n),
+        dtype=dpnp.int64,
+        order="C",
+        usm_type=a_usm_type,
+        sycl_queue=a_sycl_queue,
+    )
+    dev_info_h = [0]
+
+    # Call the LAPACK extension function _getrf
+    # to perform LU decomposition on the input matrix
+    ht_ev, getrf_ev = li._getrf(
+        a_sycl_queue,
+        a_h.get_array(),
+        ipiv_h.get_array(),
+        dev_info_h,
+        depends=dep_ev,
+    )
+    _manager.add_event_pair(ht_ev, getrf_ev)
+
+    if any(dev_info_h):
+        diag_nums = ", ".join(str(v) for v in dev_info_h if v > 0)
+        warn(
+            f"Diagonal number {diag_nums} is exactly zero. Singular matrix.",
+            RuntimeWarning,
+            stacklevel=2,
+        )
+
+    # MKL lapack uses 1-origin while SciPy uses 0-origin
+    ipiv_h -= 1
+
+    # Return a tuple containing the factorized matrix 'a_h',
+    # pivot indices 'ipiv_h'
+    return (a_h, ipiv_h)
+
+
+def dpnp_lu_solve(lu, piv, b, trans=0, overwrite_b=False, check_finite=True):
+    """
+    dpnp_lu_solve(lu, piv, b, trans=0, overwrite_b=False, check_finite=True)
+
+    Solve an equation system (SciPy-compatible behavior).
+
+    This function mimics the behavior of `scipy.linalg.lu_solve` including
+    support for `trans`, `overwrite_b`, `check_finite`,
+    and 0-based pivot indexing.
+
+    """
+
+    res_usm_type, exec_q = get_usm_allocations([lu, piv, b])
+
+    res_type = _common_type(lu, b)
+
+    # TODO: add broadcasting
+    if lu.shape[0] != b.shape[0]:
+        raise ValueError(
+            f"Shapes of lu {lu.shape} and b {b.shape} are incompatible"
+        )
+
+    if b.size == 0:
+        return dpnp.empty_like(b, dtype=res_type, usm_type=res_usm_type)
+
+    if lu.ndim > 2:
+        raise NotImplementedError("Batched matrices are not supported")
+
+    if check_finite:
+        if not dpnp.isfinite(lu).all():
+            raise ValueError(
+                "LU factorization array must not contain infs or NaNs.\n"
+                "Note that when a singular matrix is given, unlike "
+                "dpnp.scipy.linalg.lu_factor returns an array containing NaN."
+            )
+        if not dpnp.isfinite(b).all():
+            raise ValueError(
+                "Right-hand side array must not contain infs or NaNs"
+            )
+
+    lu_usm_arr = dpnp.get_usm_ndarray(lu)
+    b_usm_arr = dpnp.get_usm_ndarray(b)
+
+    # dpnp.scipy.linalg.lu_factor() returns 0-based pivots to match SciPy,
+    # convert to 1-based for oneMKL getrs
+    piv_h = piv + 1
+
+    _manager = dpu.SequentialOrderManager[exec_q]
+    dep_evs = _manager.submitted_events
+
+    # oneMKL LAPACK getrs overwrites `lu`.
+    lu_h = dpnp.empty_like(lu, order="F", dtype=res_type, usm_type=res_usm_type)
+
+    # use DPCTL tensor function to fill the сopy of the input array
+    # from the input array
+    ht_ev, lu_copy_ev = ti._copy_usm_ndarray_into_usm_ndarray(
+        src=lu_usm_arr,
+        dst=lu_h.get_array(),
+        sycl_queue=lu.sycl_queue,
+        depends=dep_evs,
+    )
+    _manager.add_event_pair(ht_ev, lu_copy_ev)
+
+    # SciPy-compatible behavior
+    # Copy is required if:
+    # - overwrite_b is False (always copy),
+    # - dtype mismatch,
+    # - not F-contiguous,
+    # - not writeable
+    if not overwrite_b or _is_copy_required(b, res_type):
+        b_h = dpnp.empty_like(
+            b, order="F", dtype=res_type, usm_type=res_usm_type
+        )
+        ht_ev, b_copy_ev = ti._copy_usm_ndarray_into_usm_ndarray(
+            src=b_usm_arr,
+            dst=b_h.get_array(),
+            sycl_queue=b.sycl_queue,
+            depends=dep_evs,
+        )
+        _manager.add_event_pair(ht_ev, b_copy_ev)
+        dep_evs = [lu_copy_ev, b_copy_ev]
+    else:
+        # input is suitable for in-place modification
+        b_h = b
+        dep_evs = [lu_copy_ev]
+
+    if not isinstance(trans, int):
+        raise TypeError("`trans` must be an integer")
+
+    # Map SciPy-style trans codes (0, 1, 2) to MKL transpose enums
+    if trans == 0:
+        trans_mkl = li.Transpose.N
+    elif trans == 1:
+        trans_mkl = li.Transpose.T
+    elif trans == 2:
+        trans_mkl = li.Transpose.C
+    else:
+        raise ValueError("`trans` must be 0 (N), 1 (T), or 2 (C)")
+
+    # Call the LAPACK extension function _getrs
+    # to solve the system of linear equations with an LU-factored
+    # coefficient square matrix, with multiple right-hand sides.
+    ht_ev, getrs_ev = li._getrs(
+        exec_q,
+        lu_h.get_array(),
+        piv_h.get_array(),
+        b_h.get_array(),
+        trans_mkl,
+        depends=dep_evs,
+    )
+    _manager.add_event_pair(ht_ev, getrs_ev)
+
+    return b_h
diff --git a/dpnp/scipy/special/__init__.py b/dpnp/scipy/special/__init__.py
new file mode 100644
index 000000000000..50c8592a37d8
--- /dev/null
+++ b/dpnp/scipy/special/__init__.py
@@ -0,0 +1,50 @@
+# -*- coding: utf-8 -*-
+# *****************************************************************************
+# Copyright (c) 2025, Intel Corporation
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# - Redistributions of source code must retain the above copyright notice,
+#   this list of conditions and the following disclaimer.
+# - Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+# THE POSSIBILITY OF SUCH DAMAGE.
+# *****************************************************************************
+
+"""
+``dpnp.scipy.special``
+======================
+
+The submodule provides a large collection of mathematical functions that are
+widely used in science and engineering. It includes special functions of
+mathematical physics (e.g., Bessel, elliptic, gamma,  hypergeometric), as well
+as standard functions like `erf`, `sinc`, and `logit`.
+
+The functions in the submodule invokes VM implementation from pybind11
+extension above OneMKL VM if possible or uses a dedicated SYCL kernel, or,
+alternatively, is implemented through a subset of python calls.
+
+"""
+
+from ._erf import (
+    erf,
+    erfc,
+)
+
+__all__ = [
+    "erf",
+    "erfc",
+]
diff --git a/dpnp/scipy/special/_erf.py b/dpnp/scipy/special/_erf.py
new file mode 100644
index 000000000000..2b443d2628c2
--- /dev/null
+++ b/dpnp/scipy/special/_erf.py
@@ -0,0 +1,173 @@
+# -*- coding: utf-8 -*-
+# *****************************************************************************
+# Copyright (c) 2025, Intel Corporation
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# - Redistributions of source code must retain the above copyright notice,
+#   this list of conditions and the following disclaimer.
+# - Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+# THE POSSIBILITY OF SUCH DAMAGE.
+# *****************************************************************************
+
+"""
+Interface of the Error functions
+
+Notes
+-----
+This module exposes the public interface for ``dpnp.scipy.special``.
+it contains:
+ - Interface functions
+ - documentation for the functions
+
+"""
+
+# pylint: disable=protected-access
+
+# pylint: disable=no-name-in-module
+import dpnp.backend.extensions.ufunc._ufunc_impl as ufi
+from dpnp.dpnp_algo.dpnp_elementwise_common import DPNPUnaryFunc
+
+__all__ = ["erf", "erfc"]
+
+
+# pylint: disable=too-few-public-methods
+class DPNPErf(DPNPUnaryFunc):
+    """Class that implements a family of erf functions."""
+
+    def __init__(
+        self,
+        name,
+        result_type_resolver_fn,
+        unary_dp_impl_fn,
+        docs,
+        mkl_fn_to_call=None,
+        mkl_impl_fn=None,
+    ):
+        super().__init__(
+            name,
+            result_type_resolver_fn,
+            unary_dp_impl_fn,
+            docs,
+            mkl_fn_to_call=mkl_fn_to_call,
+            mkl_impl_fn=mkl_impl_fn,
+        )
+
+    def __call__(self, x, out=None):  # pylint: disable=signature-differs
+        return super().__call__(x, out=out)
+
+
+_ERF_DOCSTRING = r"""
+Calculates the Gauss error function of a given input array.
+
+It is defined as :math:`\frac{2}{\sqrt{\pi}} \int_{0}^{z} e^{-t^2} \, dt`.
+
+For full documentation refer to :obj:`scipy.special.erf`.
+
+Parameters
+----------
+x : {dpnp.ndarray, usm_ndarray}
+    Input array, expected to have a real-valued floating-point data type.
+out : {dpnp.ndarray, usm_ndarray}, optional
+    Optional output array for the function values.
+
+Returns
+-------
+out : dpnp.ndarray
+    The values of the error function at the given points `x`.
+
+See Also
+--------
+:obj:`dpnp.scipy.special.erfc` : Complementary error function.
+:obj:`dpnp.scipy.special.erfinv` : Inverse of the error function.
+:obj:`dpnp.scipy.special.erfcinv` : Inverse of the complementary error function.
+:obj:`dpnp.scipy.special.erfcx` : Scaled complementary error function.
+:obj:`dpnp.scipy.special.erfi` : Imaginary error function.
+
+Notes
+-----
+The cumulative of the unit normal distribution is given by
+
+.. math::
+    \Phi(z) = \frac{1}{2} \left[
+        1 + \operatorname{erf} \left(
+            \frac{z}{\sqrt{2}}
+        \right)
+    \right]
+
+Examples
+--------
+>>> import dpnp as np
+>>> x = np.linspace(-3, 3, num=5)
+>>> np.scipy.special.erf(x)
+array([[-0.99997791, -0.96610515,  0.        ,  0.96610515,  0.99997791])
+
+"""
+
+erf = DPNPErf(
+    "erf",
+    ufi._erf_result_type,
+    ufi._erf,
+    _ERF_DOCSTRING,
+    mkl_fn_to_call="_mkl_erf_to_call",
+    mkl_impl_fn="_erf",
+)
+
+_ERFC_DOCSTRING = r"""
+Calculates the complementary error function of a given input array.
+
+It is defined as :math:`1 - \operatorname{erf}(x)`.
+
+For full documentation refer to :obj:`scipy.special.erfc`.
+
+Parameters
+----------
+x : {dpnp.ndarray, usm_ndarray}
+    Input array, expected to have a real-valued floating-point data type.
+out : {dpnp.ndarray, usm_ndarray}, optional
+    Optional output array for the function values.
+
+Returns
+-------
+out : dpnp.ndarray
+    The values of the complementary error function at the given points `x`.
+
+See Also
+--------
+:obj:`dpnp.scipy.special.erf` : Gauss error function.
+:obj:`dpnp.scipy.special.erfinv` : Inverse of the error function.
+:obj:`dpnp.scipy.special.erfcinv` : Inverse of the complementary error function.
+:obj:`dpnp.scipy.special.erfcx` : Scaled complementary error function.
+:obj:`dpnp.scipy.special.erfi` : Imaginary error function.
+
+Examples
+--------
+>>> import dpnp as np
+>>> x = np.linspace(-3, 3, num=5)
+>>> np.scipy.special.erfc(x)
+array([[-0.99997791, -0.96610515,  0.        ,  0.96610515,  0.99997791])
+
+"""
+
+erfc = DPNPErf(
+    "erfc",
+    ufi._erf_result_type,
+    ufi._erfc,
+    _ERFC_DOCSTRING,
+    mkl_fn_to_call="_mkl_erf_to_call",
+    mkl_impl_fn="_erfc",
+)
diff --git a/dpnp/tests/conftest.py b/dpnp/tests/conftest.py
index 7544949f1bd3..a22eff8c914b 100644
--- a/dpnp/tests/conftest.py
+++ b/dpnp/tests/conftest.py
@@ -41,6 +41,8 @@
 
 import dpnp
 
+from .helper import get_dev_id
+
 skip_mark = pytest.mark.skip(reason="Skipping test.")
 
 
@@ -138,6 +140,7 @@ def pytest_collection_modifyitems(config, items):
     print(
         f"DPNP Test scope includes all integer dtypes: {bool(dtype_config.all_int_types)}"
     )
+    print(f"DPNP current device ID: 0x{get_dev_id(dev):04X}")
     print(f"DPNP current device is CPU: {is_cpu}")
     print(f"DPNP current device is GPU: {is_gpu}")
     print(f"DPNP current device supports fp64: {support_fp64}")
@@ -183,9 +186,8 @@ def allow_fall_back_on_numpy(monkeypatch):
 
 @pytest.fixture
 def suppress_complex_warning():
-    sup = numpy.testing.suppress_warnings("always")
-    sup.filter(ComplexWarning)
-    with sup:
+    with warnings.catch_warnings():
+        warnings.simplefilter("ignore", ComplexWarning)
         yield
 
 
diff --git a/dpnp/tests/helper.py b/dpnp/tests/helper.py
index 7bf6f156e9eb..93146159b11f 100644
--- a/dpnp/tests/helper.py
+++ b/dpnp/tests/helper.py
@@ -1,3 +1,4 @@
+import importlib.util
 from sys import platform
 
 import dpctl
@@ -26,6 +27,10 @@ def _assert_shape(a, b):
         assert a.shape == (), f"{a.shape} != ()"
 
 
+def _get_dev_mask(device=None):
+    return get_dev_id(device) & 0xFF00
+
+
 def assert_dtype_allclose(
     dpnp_arr,
     numpy_arr,
@@ -104,6 +109,19 @@ def assert_dtype_allclose(
             _assert_dtype(dpnp_arr.dtype, numpy_arr.dtype, check_only_type_kind)
 
 
+def factor_to_tol(dtype, factor):
+    """
+    Calculate the tolerance for comparing floating point and complex arrays.
+    The tolerance is based on the maximum resolution of the input dtype multiplied by the factor.
+    """
+
+    tol = 0
+    if numpy.issubdtype(dtype, numpy.inexact):
+        tol = numpy.finfo(dtype).resolution
+
+    return factor * tol
+
+
 def generate_random_numpy_array(
     shape,
     dtype=None,
@@ -202,19 +220,6 @@ def generate_random_numpy_array(
     return a
 
 
-def factor_to_tol(dtype, factor):
-    """
-    Calculate the tolerance for comparing floating point and complex arrays.
-    The tolerance is based on the maximum resolution of the input dtype multiplied by the factor.
-    """
-
-    tol = 0
-    if numpy.issubdtype(dtype, numpy.inexact):
-        tol = numpy.finfo(dtype).resolution
-
-    return factor * tol
-
-
 def get_abs_array(data, dtype=None):
     if numpy.issubdtype(dtype, numpy.unsignedinteger):
         data = numpy.abs(data)
@@ -299,6 +304,16 @@ def get_complex_dtypes(device=None):
     return dtypes
 
 
+def get_dev_id(device=None):
+    """
+    Obtain Intel Device ID for a device (the default device if not provided).
+    """
+
+    dev = dpctl.select_default_device() if device is None else device
+    dev_info = dpctl.utils.intel_device_info(dev)
+    return dev_info.get("device_id", 0)
+
+
 def get_float_dtypes(no_float16=True, device=None):
     """
     Build a list of floating types supported by DPNP based on device capabilities.
@@ -337,7 +352,7 @@ def get_integer_dtypes(all_int_types=False, no_unsigned=False):
     if config.all_int_types or all_int_types:
         dtypes += [dpnp.int8, dpnp.int16]
         if not no_unsigned:
-            dtypes += [dpnp.uint8, dpnp.uint16, dpnp.uint32, dpnp.uint64]
+            dtypes += get_unsigned_dtypes()
 
     return dtypes
 
@@ -372,6 +387,14 @@ def not_excluded(dtype):
     return dtypes
 
 
+def get_unsigned_dtypes():
+    """
+    Build a list of unsigned integer types supported by DPNP.
+    """
+
+    return [dpnp.uint8, dpnp.uint16, dpnp.uint32, dpnp.uint64]
+
+
 def has_support_aspect16(device=None):
     """
     Return True if the device supports 16-bit precision floating point operations,
@@ -390,6 +413,14 @@ def has_support_aspect64(device=None):
     return dev.has_aspect_fp64
 
 
+def is_arl_or_mtl(device=None):
+    """
+    Return True if a test is running on Arrow Lake or Meteor Lake GPU device,
+    False otherwise.
+    """
+    return _get_dev_mask(device) == 0x7D00
+
+
 def is_cpu_device(device=None):
     """
     Return True if a test is running on CPU device, False otherwise.
@@ -434,6 +465,46 @@ def is_intel_numpy():
     return all(dep["name"].startswith("mkl") for dep in [blas, lapack])
 
 
+def is_lnl(device=None):
+    """
+    Return True if a test is running on Lunar Lake GPU device, False otherwise.
+    """
+    return _get_dev_mask(device) == 0x6400
+
+
+def is_lts_driver(device=None):
+    """
+    Return True if a test is running on a GPU device with LTS driver version,
+    False otherwise.
+    """
+    dev = dpctl.select_default_device() if device is None else device
+    return dev.has_aspect_gpu and "1.3" in dev.driver_version
+
+
+def is_ptl(device=None):
+    """
+    Return True if a test is running on Panther Lake with Iris Xe3 GPU device
+    (which includes PTL-U, PTL-H and WCL), False otherwise.
+    """
+    return _get_dev_mask(device) in (0xB000, 0xFD00)
+
+
+def is_scipy_available():
+    """
+    Return True if SciPy is installed and can be found,
+    False otherwise.
+    """
+    return importlib.util.find_spec("scipy") is not None
+
+
+def is_tgllp_iris_xe(device=None):
+    """
+    Return True if a test is running on Tiger Lake-LP with Iris Xe GPU device,
+    False otherwise.
+    """
+    return get_dev_id(device) in (0x9A49, 0x9A40)
+
+
 def is_win_platform():
     """
     Return True if a test is running on Windows OS, False otherwise.
@@ -445,7 +516,7 @@ def numpy_version():
     return numpy.lib.NumpyVersion(numpy.__version__)
 
 
-def requires_intel_mkl_version(version):
+def requires_intel_mkl_version(version):  # pragma: no cover
     """
     Check if Intel MKL is used and its version is greater than or
     equal to the specified one.
diff --git a/dpnp/tests/skipped_tests_cuda.tbl b/dpnp/tests/skipped_tests_cuda.tbl
index 86c73b0e6a62..2846a3c2f5c7 100644
--- a/dpnp/tests/skipped_tests_cuda.tbl
+++ b/dpnp/tests/skipped_tests_cuda.tbl
@@ -807,11 +807,3 @@ tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_3_{extern
 tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_3_{external=True, length=20000}::test_partition_one_dim
 tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_3_{external=True, length=20000}::test_partition_sequence_kth
 tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_3_{external=True, length=20000}::test_partition_zero_dim
-
-# erf
-tests/test_special.py::test_erf
-tests/test_special.py::test_erf_fallback
-tests/test_strides.py::test_erf[int32]
-tests/test_strides.py::test_erf[int64]
-tests/test_strides.py::test_erf[float32]
-tests/test_strides.py::test_erf[float64]
diff --git a/dpnp/tests/test_arraycreation.py b/dpnp/tests/test_arraycreation.py
index c801b769a7e2..28d51e8e6d31 100644
--- a/dpnp/tests/test_arraycreation.py
+++ b/dpnp/tests/test_arraycreation.py
@@ -19,6 +19,9 @@
     assert_dtype_allclose,
     get_all_dtypes,
     get_array,
+    is_lts_driver,
+    is_tgllp_iris_xe,
+    is_win_platform,
 )
 from .third_party.cupy import testing
 
@@ -913,6 +916,14 @@ def test_geomspace_num0():
 @pytest.mark.parametrize("num", [2, 4, 8, 3, 9, 27])
 @pytest.mark.parametrize("endpoint", [True, False])
 def test_logspace(dtype, num, endpoint):
+    if not is_win_platform() and is_tgllp_iris_xe() and is_lts_driver():
+        if (
+            dpnp.issubdtype(dtype, dpnp.integer)
+            and num in [8, 27]
+            and endpoint is True
+        ):
+            pytest.skip("SAT-7978")
+
     start = 2
     stop = 5
     base = 2
@@ -927,12 +938,9 @@ def test_logspace(dtype, num, endpoint):
     assert_allclose(dpnp_res, np_res, rtol=1e-06)
 
 
+@testing.with_requires("numpy>=1.25.0")
 @pytest.mark.parametrize("axis", [0, 1])
 def test_logspace_axis(axis):
-    if numpy.lib.NumpyVersion(numpy.__version__) < "1.25.0":
-        pytest.skip(
-            "numpy.logspace supports a non-scalar base argument since 1.25.0"
-        )
     func = lambda xp: xp.logspace(
         [2, 3], [20, 15], num=2, base=[[1, 3], [5, 7]], axis=axis
     )
diff --git a/dpnp/tests/test_arraypad.py b/dpnp/tests/test_arraypad.py
index 795311105562..9a88dd8bab96 100644
--- a/dpnp/tests/test_arraypad.py
+++ b/dpnp/tests/test_arraypad.py
@@ -539,3 +539,21 @@ def test_as_pairs_exceptions(self):
             dpnp_as_pairs([[1, 2], [3, 4]], 3)
         with pytest.raises(ValueError, match="could not be broadcast"):
             dpnp_as_pairs(dpnp.ones((2, 3)), 3)
+
+    @testing.with_requires("numpy>=2.4")
+    @pytest.mark.parametrize(
+        "sh, pad_width",
+        [
+            ((3, 4, 5), {-2: (1, 3)}),
+            ((3, 4, 5), {0: (5, 2)}),
+            ((3, 4, 5), {0: (5, 2), -1: (3, 4)}),
+            ((3, 4, 5), {1: 5}),
+        ],
+    )
+    def test_dict_pad_width(self, sh, pad_width):
+        a = numpy.zeros(sh)
+        ia = dpnp.array(a)
+
+        result = dpnp.pad(ia, pad_width)
+        expected = numpy.pad(a, pad_width)
+        assert_equal(result, expected)
diff --git a/dpnp/tests/test_bitwise.py b/dpnp/tests/test_bitwise.py
index 2ec2e7184701..29a2aadecba2 100644
--- a/dpnp/tests/test_bitwise.py
+++ b/dpnp/tests/test_bitwise.py
@@ -5,7 +5,6 @@
 import dpnp
 
 from .helper import (
-    assert_dtype_allclose,
     get_abs_array,
     get_integer_dtypes,
     numpy_version,
diff --git a/dpnp/tests/test_dlpack.py b/dpnp/tests/test_dlpack.py
index 9a8783b5dbd1..32b72452bfa8 100644
--- a/dpnp/tests/test_dlpack.py
+++ b/dpnp/tests/test_dlpack.py
@@ -78,7 +78,7 @@ def test_device(self):
         x = dpnp.arange(5)
         y = dpnp.from_dlpack(x, device=x.__dlpack_device__())
         assert x.device == y.device
-        assert x.get_array()._pointer == y.get_array()._pointer
+        assert x.data.ptr == y.data.ptr
 
     def test_numpy_input(self):
         x = numpy.arange(10)
diff --git a/dpnp/tests/test_fft.py b/dpnp/tests/test_fft.py
index 3a8d3c0c466d..46c7271a3efd 100644
--- a/dpnp/tests/test_fft.py
+++ b/dpnp/tests/test_fft.py
@@ -401,13 +401,13 @@ def test_repeated_axes(self, axes):
         result = dpnp.fft.fftn(ia, axes=axes)
         # Intel NumPy ignores repeated axes (mkl_fft-gh-104), handle it one by one
         expected = a
-        for ii in axes:
+        for ii in axes[::-1]:
             expected = numpy.fft.fft(expected, axis=ii)
         assert_dtype_allclose(result, expected)
 
         # inverse FFT
         result = dpnp.fft.ifftn(result, axes=axes)
-        for ii in axes:
+        for ii in axes[::-1]:
             expected = numpy.fft.ifft(expected, axis=ii)
         assert_dtype_allclose(result, expected)
 
@@ -551,10 +551,7 @@ def test_basic(self, func, dtype, axes):
 
 
 class TestHfft:
-    # TODO: include boolean dtype when mkl_fft-gh-180 is merged
-    @pytest.mark.parametrize(
-        "dtype", get_all_dtypes(no_none=True, no_bool=True)
-    )
+    @pytest.mark.parametrize("dtype", get_all_dtypes(no_none=True))
     @pytest.mark.parametrize("n", [None, 5, 18])
     @pytest.mark.parametrize("norm", [None, "backward", "forward", "ortho"])
     def test_basic(self, dtype, n, norm):
@@ -563,9 +560,10 @@ def test_basic(self, dtype, n, norm):
 
         result = dpnp.fft.hfft(ia, n=n, norm=norm)
         expected = numpy.fft.hfft(a, n=n, norm=norm)
-        # check_only_type_kind=True since NumPy always returns float64
-        # but dpnp return float32 if input is float32
-        assert_dtype_allclose(result, expected, check_only_type_kind=True)
+        flag = True if numpy_version() < "2.0.0" else False
+        assert_dtype_allclose(
+            result, expected, factor=24, check_only_type_kind=flag
+        )
 
     @pytest.mark.parametrize(
         "dtype", get_all_dtypes(no_none=True, no_complex=True)
@@ -579,7 +577,7 @@ def test_inverse(self, dtype, n, norm):
         result = dpnp.fft.ihfft(ia, n=n, norm=norm)
         expected = numpy.fft.ihfft(a, n=n, norm=norm)
         flag = True if numpy_version() < "2.0.0" else False
-        assert_dtype_allclose(result, expected, check_only_type_kind=True)
+        assert_dtype_allclose(result, expected, check_only_type_kind=flag)
 
     def test_error(self):
         a = dpnp.ones(11)
@@ -600,14 +598,15 @@ class TestIrfft:
     @pytest.mark.parametrize("n", [None, 5, 18])
     @pytest.mark.parametrize("norm", [None, "backward", "forward", "ortho"])
     def test_basic(self, dtype, n, norm):
-        a = generate_random_numpy_array(11)
+        a = generate_random_numpy_array(11, dtype=dtype)
         ia = dpnp.array(a)
 
         result = dpnp.fft.irfft(ia, n=n, norm=norm)
         expected = numpy.fft.irfft(a, n=n, norm=norm)
-        # check_only_type_kind=True since NumPy always returns float64
-        # but dpnp return float32 if input is float32
-        assert_dtype_allclose(result, expected, check_only_type_kind=True)
+        flag = True if numpy_version() < "2.0.0" else False
+        assert_dtype_allclose(
+            result, expected, factor=24, check_only_type_kind=flag
+        )
 
     @pytest.mark.parametrize("dtype", get_complex_dtypes())
     @pytest.mark.parametrize("n", [None, 5, 8])
@@ -771,8 +770,10 @@ def test_float16(self):
 
         expected = numpy.fft.rfft(a)
         result = dpnp.fft.rfft(ia)
-        # check_only_type_kind=True since Intel NumPy returns complex128
-        assert_dtype_allclose(result, expected, check_only_type_kind=True)
+        # TODO: change to the commented line when mkl_fft-gh-204 is resolved
+        flag = True
+        # flag = True if numpy_version() < "2.0.0" else False
+        assert_dtype_allclose(result, expected, check_only_type_kind=flag)
 
     @testing.with_requires("numpy>=2.0.0")
     @pytest.mark.parametrize("xp", [numpy, dpnp])
@@ -789,11 +790,10 @@ def test_validate_out(self):
 
 
 class TestRfft2:
-    # TODO: add other axes when mkl_fft gh-119 is addressed
     @pytest.mark.parametrize(
         "dtype", get_all_dtypes(no_none=True, no_complex=True)
     )
-    @pytest.mark.parametrize("axes", [(0, 1)])  # (1, 2),(0, 2),(2, 1),(2, 0)
+    @pytest.mark.parametrize("axes", [(0, 1), (1, 2), (0, 2), (2, 1), (2, 0)])
     @pytest.mark.parametrize("norm", [None, "backward", "forward", "ortho"])
     @pytest.mark.parametrize("order", ["C", "F"])
     def test_basic(self, dtype, axes, norm, order):
@@ -848,12 +848,11 @@ def test_error(self, xp):
 
 
 class TestRfftn:
-    # TODO: add additional axes when mkl_fft gh-119 is addressed
     @pytest.mark.parametrize(
         "dtype", get_all_dtypes(no_none=True, no_complex=True)
     )
     @pytest.mark.parametrize(
-        "axes", [(0, 1, 2), (-2, -4, -1, -3)]  # (-1, -4, -2)
+        "axes", [(0, 1, 2), (-2, -4, -1, -3), (-1, -4, -2)]
     )
     @pytest.mark.parametrize("norm", [None, "backward", "forward", "ortho"])
     @pytest.mark.parametrize("order", ["C", "F"])
@@ -894,7 +893,7 @@ def test_repeated_axes(self, axes):
 
         # inverse FFT
         result = dpnp.fft.irfftn(result, axes=axes)
-        for ii in axes[-2::-1]:
+        for ii in axes[:-1]:
             expected = numpy.fft.ifft(expected, axis=ii)
         expected = numpy.fft.irfft(expected, axis=axes[-1])
         assert_dtype_allclose(result, expected)
@@ -913,7 +912,7 @@ def test_repeated_axes_with_s(self, axes, s):
         assert_dtype_allclose(result, expected)
 
         result = dpnp.fft.irfftn(result, s=s, axes=axes)
-        for jj, ii in zip(s[-2::-1], axes[-2::-1]):
+        for jj, ii in zip(s[:-1], axes[:-1]):
             expected = numpy.fft.ifft(expected, n=jj, axis=ii)
         expected = numpy.fft.irfft(expected, n=s[-1], axis=axes[-1])
         assert_dtype_allclose(result, expected)
@@ -935,7 +934,7 @@ def test_out(self, axes, s):
         assert_dtype_allclose(result, expected)
 
         # inverse FFT
-        for jj, ii in zip(s[-2::-1], axes[-2::-1]):
+        for jj, ii in zip(s[:-1], axes[:-1]):
             expected = numpy.fft.ifft(expected, n=jj, axis=ii)
         expected = numpy.fft.irfft(expected, n=s[-1], axis=axes[-1])
         out = dpnp.empty(expected.shape, dtype=numpy.float32)
@@ -954,7 +953,5 @@ def test_1d_array(self):
 
         result = dpnp.fft.irfftn(ia)
         expected = numpy.fft.irfftn(a)
-        # TODO: change to the commented line when mkl_fft-gh-180 is merged
-        flag = True
-        # flag = True if numpy_version() < "2.0.0" else False
+        flag = True if numpy_version() < "2.0.0" else False
         assert_dtype_allclose(result, expected, check_only_type_kind=flag)
diff --git a/dpnp/tests/test_functional.py b/dpnp/tests/test_functional.py
index 14c7e086cd51..1fb5a7839841 100644
--- a/dpnp/tests/test_functional.py
+++ b/dpnp/tests/test_functional.py
@@ -1,10 +1,20 @@
 import numpy
 import pytest
-from numpy.testing import assert_array_equal, assert_raises
+from numpy.testing import (
+    assert_array_equal,
+    assert_equal,
+    assert_raises,
+    assert_raises_regex,
+)
 
 import dpnp
 
-from .helper import get_all_dtypes
+from .helper import (
+    assert_dtype_allclose,
+    generate_random_numpy_array,
+    get_all_dtypes,
+    get_unsigned_dtypes,
+)
 
 
 class TestApplyAlongAxis:
@@ -65,3 +75,264 @@ def custom_func(x, axis):
 
         ia = dpnp.arange(24).reshape(2, 3, 4)
         assert_raises(ValueError, dpnp.apply_over_axes, custom_func, ia, 1)
+
+
+class TestPiecewise:
+    @pytest.mark.parametrize(
+        "dtype", get_all_dtypes(no_none=True, no_unsigned=True)
+    )
+    @pytest.mark.parametrize("funclist", [[True, False], [-1, 1], [-1.5, 1.5]])
+    def test_basic(self, dtype, funclist):
+        low = 0 if dpnp.issubdtype(dtype, dpnp.unsignedinteger) else -10
+        a = generate_random_numpy_array(10, dtype=dtype, low=low)
+        ia = dpnp.array(a)
+
+        expected = numpy.piecewise(a, [a < 0, a >= 0], funclist)
+        result = dpnp.piecewise(ia, [ia < 0, ia >= 0], funclist)
+        assert a.dtype == result.dtype
+        assert_dtype_allclose(result, expected)
+
+    @pytest.mark.parametrize("dtype", get_unsigned_dtypes())
+    @pytest.mark.parametrize("funclist", [[True, False], [1, 2], [1.5, 4.5]])
+    def test_unsigned(self, dtype, funclist):
+        a = generate_random_numpy_array(10, dtype=dtype, low=0)
+        ia = dpnp.array(a)
+
+        expected = numpy.piecewise(a, [a < 0, a >= 0], funclist)
+        result = dpnp.piecewise(ia, [ia < 0, ia >= 0], funclist)
+        assert a.dtype == result.dtype
+        assert_dtype_allclose(result, expected)
+
+    @pytest.mark.parametrize("dtype", get_all_dtypes(no_none=True))
+    def test_basic_complex(self, dtype):
+        a = generate_random_numpy_array(10, dtype=dtype)
+        ia = dpnp.array(a)
+        funclist = [-1.5 - 1j * 1.5, 1.5 + 1j * 1.5]
+
+        if numpy.issubdtype(dtype, numpy.complexfloating) or dtype == dpnp.bool:
+            expected = numpy.piecewise(a, [a < 0, a >= 0], funclist)
+            result = dpnp.piecewise(ia, [ia < 0, ia >= 0], funclist)
+            assert a.dtype == result.dtype
+            assert_dtype_allclose(result, expected)
+        else:
+            # If dtype is not complex, piecewise should raise an error
+            pytest.raises(
+                TypeError, numpy.piecewise, a, [a < 0, a >= 0], funclist
+            )
+            pytest.raises(
+                TypeError, dpnp.piecewise, ia, [ia < 0, ia >= 0], funclist
+            )
+
+    def test_simple(self):
+        a = numpy.array([0, 0])
+        ia = dpnp.array(a)
+        # Condition is single bool list
+        expected = numpy.piecewise(a, [True, False], [1])
+        result = dpnp.piecewise(ia, [True, False], [1])
+        assert_array_equal(result, expected)
+
+        # List of conditions: single bool list
+        expected = numpy.piecewise(a, [[True, False]], [1])
+        result = dpnp.piecewise(ia, [[True, False]], [1])
+        assert_array_equal(result, expected)
+
+        # Conditions is single bool array
+        expected = numpy.piecewise(a, [numpy.array([True, False])], [1])
+        result = dpnp.piecewise(ia, dpnp.array([True, False]), [1])
+        assert_array_equal(result, expected)
+
+        # Condition is single int array
+        expected = numpy.piecewise(a, [numpy.array([1, 0])], [1])
+        result = dpnp.piecewise(ia, dpnp.array([1, 0]), [1])
+        assert_array_equal(result, expected)
+
+        # List of conditions: int array
+        expected = numpy.piecewise(a, [numpy.array([1, 0])], [1])
+        result = dpnp.piecewise(ia, [dpnp.array([1, 0])], [1])
+        assert_array_equal(result, expected)
+
+        # List of conditions: single bool tuple
+        expected = numpy.piecewise(a, ([True, False], [False, True]), [1, -4])
+        result = dpnp.piecewise(ia, ([True, False], [False, True]), [1, -4])
+        assert_array_equal(result, expected)
+
+        # Condition is single bool tuple
+        expected = numpy.piecewise(a, (True, False), [1])
+        result = dpnp.piecewise(ia, (True, False), [1])
+        assert_array_equal(result, expected)
+
+    def test_two_conditions(self):
+        a = numpy.array([1, 2])
+        ia = dpnp.array(a)
+        cond = numpy.array([True, False])
+        icond = dpnp.array(cond)
+        expected = numpy.piecewise(a, [cond, cond], [3, 4])
+        result = dpnp.piecewise(ia, [icond, icond], [3, 4])
+        assert_array_equal(result, expected)
+
+    def test_default(self):
+        a = numpy.array([1, 2])
+        ia = dpnp.array(a)
+        # No value specified for x[1], should be 0
+        expected = numpy.piecewise(a, [True, False], [2])
+        result = dpnp.piecewise(ia, [True, False], [2])
+        assert_array_equal(result, expected)
+
+        # Should set x[1] to 3
+        expected = numpy.piecewise(a, [True, False], [2, 3])
+        result = dpnp.piecewise(ia, [True, False], [2, 3])
+        assert_array_equal(result, expected)
+
+    def test_0d(self):
+        a = numpy.array(3)
+        ia = dpnp.array(a)
+
+        expected = numpy.piecewise(a, a > 3, [4, 0])
+        result = dpnp.piecewise(ia, ia > 3, [4, 0])
+        assert_array_equal(result, expected)
+
+        a = numpy.array(5)
+        ia = dpnp.array(a)
+        expected = numpy.piecewise(a, [True, False], [1, 0])
+        result = dpnp.piecewise(ia, [True, False], [1, 0])
+        assert_array_equal(result, expected)
+
+        expected = numpy.piecewise(a, [False, False, True], [1, 2, 3])
+        result = dpnp.piecewise(ia, [False, False, True], [1, 2, 3])
+        assert_array_equal(result, expected)
+
+    def test_0d_comparison(self):
+        a = numpy.array(3)
+        ia = dpnp.array(a)
+        expected = numpy.piecewise(a, [a > 3, a <= 3], [4, 0])
+        result = dpnp.piecewise(ia, [ia > 3, ia <= 3], [4, 0])
+        assert_array_equal(result, expected)
+
+        a = numpy.array(4)
+        ia = dpnp.array(a)
+        expected = numpy.piecewise(
+            a, [a <= 3, (a > 3) * (a <= 5), a > 5], [1, 2, 3]
+        )
+        result = dpnp.piecewise(
+            ia, [ia <= 3, (ia > 3) * (ia <= 5), ia > 5], [1, 2, 3]
+        )
+        assert_array_equal(result, expected)
+
+        assert_raises_regex(
+            ValueError,
+            "2 or 3 functions are expected",
+            dpnp.piecewise,
+            ia,
+            [ia <= 3, ia > 3],
+            [1],
+        )
+        assert_raises_regex(
+            ValueError,
+            "2 or 3 functions are expected",
+            dpnp.piecewise,
+            ia,
+            [ia <= 3, ia > 3],
+            [1, 1, 1, 1],
+        )
+
+    def test_0d_0d_condition(self):
+        a = numpy.array(3)
+        ia = dpnp.array(a)
+        c = numpy.array(a > 3)
+        ic = dpnp.array(ia > 3)
+
+        expected = numpy.piecewise(a, [c], [1, 2])
+        result = dpnp.piecewise(ia, [ic], [1, 2])
+        assert_equal(result, expected)
+
+    def test_multidimensional_extrafunc(self):
+        a = numpy.array([[-2.5, -1.5, -0.5], [0.5, 1.5, 2.5]])
+        ia = dpnp.array(a)
+
+        expected = numpy.piecewise(a, [a < 0, a >= 2], [-1, 1, 3])
+        result = dpnp.piecewise(ia, [ia < 0, ia >= 2], [-1, 1, 3])
+        assert_array_equal(result, expected)
+
+    def test_error_dpnp(self):
+        ia = dpnp.array([0, 0])
+        # values cannot be a callable function
+        assert_raises_regex(
+            NotImplementedError,
+            "Callable functions are not supported currently",
+            dpnp.piecewise,
+            ia,
+            [dpnp.array([True, False])],
+            [lambda x: -1],
+        )
+
+        # default value cannot be a callable function
+        assert_raises_regex(
+            NotImplementedError,
+            "Callable functions are not supported currently",
+            dpnp.piecewise,
+            ia,
+            [dpnp.array([True, False])],
+            [-1, lambda x: 1],
+        )
+
+        # funclist is not array-like
+        assert_raises_regex(
+            TypeError,
+            "funclist must be a sequence of scalars",
+            dpnp.piecewise,
+            ia,
+            [dpnp.array([True, False])],
+            1,
+        )
+
+        # funclist is a string
+        assert_raises_regex(
+            TypeError,
+            "funclist must be a sequence of scalars",
+            dpnp.piecewise,
+            ia,
+            [ia > 0],
+            "q",
+        )
+
+        assert_raises_regex(
+            TypeError,
+            "object of type",
+            numpy.piecewise,
+            ia.asnumpy(),
+            [numpy.array([True, False])],
+            1,
+        )
+
+    @pytest.mark.parametrize("xp", [dpnp, numpy])
+    def test_error(self, xp):
+        ia = xp.array([0, 0])
+        # not enough functions
+        assert_raises_regex(
+            ValueError,
+            "1 or 2 functions are expected",
+            xp.piecewise,
+            ia,
+            [xp.array([True, False])],
+            [],
+        )
+
+        # extra function
+        assert_raises_regex(
+            ValueError,
+            "1 or 2 functions are expected",
+            xp.piecewise,
+            ia,
+            [xp.array([True, False])],
+            [1, 2, 3],
+        )
+
+        # condlist is empty
+        assert_raises_regex(
+            IndexError,
+            "index out of range",
+            xp.piecewise,
+            ia,
+            [],
+            [1, 2],
+        )
diff --git a/dpnp/tests/test_indexing.py b/dpnp/tests/test_indexing.py
index 69664ecccb14..07b5488f09ea 100644
--- a/dpnp/tests/test_indexing.py
+++ b/dpnp/tests/test_indexing.py
@@ -24,7 +24,6 @@
     get_array,
     get_integer_dtypes,
     has_support_aspect64,
-    is_win_platform,
     numpy_version,
 )
 from .third_party.cupy import testing
diff --git a/dpnp/tests/test_linalg.py b/dpnp/tests/test_linalg.py
index 2f567f7a9e5c..7c0753b96fe6 100644
--- a/dpnp/tests/test_linalg.py
+++ b/dpnp/tests/test_linalg.py
@@ -1,3 +1,5 @@
+import warnings
+
 import dpctl
 import dpctl.tensor as dpt
 import numpy
@@ -10,7 +12,6 @@
     assert_equal,
     assert_raises,
     assert_raises_regex,
-    suppress_warnings,
 )
 
 import dpnp
@@ -22,10 +23,7 @@
     get_float_complex_dtypes,
     get_integer_float_dtypes,
     has_support_aspect64,
-    is_cpu_device,
-    is_cuda_device,
     numpy_version,
-    requires_intel_mkl_version,
 )
 from .third_party.cupy import testing
 
@@ -278,15 +276,12 @@ def test_cholesky_errors(self):
 
 
 class TestCond:
-    def setup_method(self):
-        numpy.random.seed(70)
+    _norms = [None, -dpnp.inf, -2, -1, 1, 2, dpnp.inf, "fro"]
 
     @pytest.mark.parametrize(
-        "shape", [(0, 4, 4), (4, 0, 3, 3)], ids=["(0, 5, 3)", "(4, 0, 2, 3)"]
-    )
-    @pytest.mark.parametrize(
-        "p", [None, -dpnp.inf, -2, -1, 1, 2, dpnp.inf, "fro"]
+        "shape", [(0, 4, 4), (4, 0, 3, 3)], ids=["(0, 4, 4)", "(4, 0, 3, 3)"]
     )
+    @pytest.mark.parametrize("p", _norms)
     def test_empty(self, shape, p):
         a = numpy.empty(shape)
         ia = dpnp.array(a)
@@ -295,26 +290,27 @@ def test_empty(self, shape, p):
         expected = numpy.linalg.cond(a, p=p)
         assert_dtype_allclose(result, expected)
 
+    # TODO: uncomment once numpy 2.3.3 release is published
+    # @testing.with_requires("numpy>=2.3.3")
     @pytest.mark.parametrize(
         "dtype", get_all_dtypes(no_none=True, no_bool=True)
     )
     @pytest.mark.parametrize(
         "shape", [(4, 4), (2, 4, 3, 3)], ids=["(4, 4)", "(2, 4, 3, 3)"]
     )
-    @pytest.mark.parametrize(
-        "p", [None, -dpnp.inf, -2, -1, 1, 2, dpnp.inf, "fro"]
-    )
+    @pytest.mark.parametrize("p", _norms)
     def test_basic(self, dtype, shape, p):
         a = generate_random_numpy_array(shape, dtype)
         ia = dpnp.array(a)
 
         result = dpnp.linalg.cond(ia, p=p)
         expected = numpy.linalg.cond(a, p=p)
+        # TODO: remove when numpy#29333 is released
+        if numpy_version() < "2.3.3":
+            expected = expected.real
         assert_dtype_allclose(result, expected, factor=16)
 
-    @pytest.mark.parametrize(
-        "p", [None, -dpnp.inf, -2, -1, 1, 2, dpnp.inf, "fro"]
-    )
+    @pytest.mark.parametrize("p", _norms)
     def test_bool(self, p):
         a = numpy.array([[True, True], [True, False]])
         ia = dpnp.array(a)
@@ -323,9 +319,7 @@ def test_bool(self, p):
         expected = numpy.linalg.cond(a, p=p)
         assert_dtype_allclose(result, expected)
 
-    @pytest.mark.parametrize(
-        "p", [None, -dpnp.inf, -2, -1, 1, 2, dpnp.inf, "fro"]
-    )
+    @pytest.mark.parametrize("p", _norms)
     def test_nan_to_inf(self, p):
         a = numpy.zeros((2, 2))
         ia = dpnp.array(a)
@@ -343,9 +337,7 @@ def test_nan_to_inf(self, p):
         else:
             assert_raises(dpnp.linalg.LinAlgError, dpnp.linalg.cond, ia, p=p)
 
-    @pytest.mark.parametrize(
-        "p", [None, -dpnp.inf, -2, -1, 1, 2, dpnp.inf, "fro"]
-    )
+    @pytest.mark.parametrize("p", _norms)
     @pytest.mark.parametrize(
         "stride",
         [(-2, -3, 2, -2), (-2, 4, -4, -4), (2, 3, 4, 4), (-1, 3, 3, -3)],
@@ -357,21 +349,23 @@ def test_nan_to_inf(self, p):
         ],
     )
     def test_strided(self, p, stride):
-        A = numpy.random.rand(6, 8, 10, 10)
-        B = dpnp.asarray(A)
+        A = generate_random_numpy_array(
+            (6, 8, 10, 10), seed_value=70, low=0, high=1
+        )
+        iA = dpnp.array(A)
         slices = tuple(slice(None, None, stride[i]) for i in range(A.ndim))
-        a = A[slices]
-        b = B[slices]
+        a, ia = A[slices], iA[slices]
 
-        result = dpnp.linalg.cond(b, p=p)
+        result = dpnp.linalg.cond(ia, p=p)
         expected = numpy.linalg.cond(a, p=p)
         assert_dtype_allclose(result, expected, factor=24)
 
-    def test_error(self):
+    @pytest.mark.parametrize("xp", [dpnp, numpy])
+    def test_error(self, xp):
         # cond is not defined on empty arrays
-        ia = dpnp.empty((2, 0))
+        a = xp.empty((2, 0))
         with pytest.raises(ValueError):
-            dpnp.linalg.cond(ia, p=1)
+            xp.linalg.cond(a, p=1)
 
 
 class TestDet:
@@ -848,8 +842,8 @@ def check_einsum_sums(self, dtype, do_opt=False):
             )
 
         # Suppress the complex warnings for the 'as f8' tests
-        with suppress_warnings() as sup:
-            sup.filter(numpy.exceptions.ComplexWarning)
+        with warnings.catch_warnings():
+            warnings.simplefilter("ignore", numpy.exceptions.ComplexWarning)
 
             # matvec(a,b) / a.dot(b) where a is matrix, b is vector
             for n in range(1, 17):
@@ -1751,10 +1745,6 @@ def test_inv_singular_matrix(self, matrix):
         assert_raises(numpy.linalg.LinAlgError, numpy.linalg.inv, a_np)
         assert_raises(dpnp.linalg.LinAlgError, dpnp.linalg.inv, a_dp)
 
-    # TODO: remove skipif when Intel MKL 2025.2 is released
-    @pytest.mark.skipif(
-        not requires_intel_mkl_version("2025.2"), reason="mkl<2025.2"
-    )
     def test_inv_singular_matrix_3D(self):
         a_np = numpy.array(
             [[[1, 2], [3, 4]], [[1, 2], [1, 2]], [[1, 3], [3, 1]]]
@@ -1868,6 +1858,518 @@ def test_lstsq_errors(self):
         assert_raises(TypeError, dpnp.linalg.lstsq, a_dp, b_dp, [-1])
 
 
+class TestLuFactor:
+    @staticmethod
+    def _apply_pivots_rows(A_dp, piv_dp):
+        m = A_dp.shape[0]
+
+        if m == 0 or piv_dp.size == 0:
+            return A_dp
+
+        rows = list(range(m))
+        piv_np = dpnp.asnumpy(piv_dp)
+        for i, r in enumerate(piv_np):
+            if i != r:
+                rows[i], rows[r] = rows[r], rows[i]
+
+        rows = dpnp.asarray(rows)
+        return A_dp[rows]
+
+    @staticmethod
+    def _make_nonsingular_np(shape, dtype, order):
+        A = generate_random_numpy_array(shape, dtype, order)
+        m, n = shape
+        k = min(m, n)
+        for i in range(k):
+            off = numpy.sum(numpy.abs(A[i, :n])) - numpy.abs(A[i, i])
+            A[i, i] = A.dtype.type(off + 1.0)
+        return A
+
+    @staticmethod
+    def _split_lu(lu, m, n):
+        L = dpnp.tril(lu, k=-1)
+        dpnp.fill_diagonal(L, 1)
+        L = L[:, : min(m, n)]
+        U = dpnp.triu(lu)[: min(m, n), :]
+        return L, U
+
+    @pytest.mark.parametrize(
+        "shape", [(1, 1), (2, 2), (3, 3), (1, 5), (5, 1), (2, 5), (5, 2)]
+    )
+    @pytest.mark.parametrize("order", ["C", "F"])
+    @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True))
+    def test_lu_factor(self, shape, order, dtype):
+        a_np = self._make_nonsingular_np(shape, dtype, order)
+        a_dp = dpnp.array(a_np, order=order)
+
+        lu, piv = dpnp.scipy.linalg.lu_factor(
+            a_dp, check_finite=False, overwrite_a=False
+        )
+
+        # verify piv
+        assert piv.shape == (min(shape),)
+        assert piv.dtype == dpnp.int64
+        if shape[0] > 0:
+            assert int(dpnp.min(piv)) >= 0
+            assert int(dpnp.max(piv)) < shape[0]
+
+        m, n = shape
+        L, U = self._split_lu(lu, m, n)
+        LU = L @ U
+
+        A_cast = a_dp.astype(LU.dtype, copy=False)
+        PA = self._apply_pivots_rows(A_cast, piv)
+
+        assert dpnp.allclose(LU, PA, rtol=1e-6, atol=1e-6)
+
+    @pytest.mark.parametrize("dtype", get_float_complex_dtypes())
+    def test_overwrite_inplace(self, dtype):
+        a_dp = dpnp.array([[4, 3], [6, 3]], dtype=dtype, order="F")
+        a_dp_orig = a_dp.copy()
+        lu, piv = dpnp.scipy.linalg.lu_factor(
+            a_dp, overwrite_a=True, check_finite=False
+        )
+
+        assert lu is a_dp
+        assert lu.data.ptr == a_dp.data.ptr
+        assert lu.flags["F_CONTIGUOUS"] is True
+
+        L, U = self._split_lu(lu, 2, 2)
+        PA = self._apply_pivots_rows(a_dp_orig, piv)
+        LU = L @ U
+
+        assert dpnp.allclose(LU, PA, rtol=1e-6, atol=1e-6)
+
+    @pytest.mark.parametrize("dtype", get_float_complex_dtypes())
+    def test_overwrite_copy(self, dtype):
+        a_dp = dpnp.array([[4, 3], [6, 3]], dtype=dtype, order="C")
+        a_dp_orig = a_dp.copy()
+        lu, piv = dpnp.scipy.linalg.lu_factor(
+            a_dp, overwrite_a=True, check_finite=False
+        )
+
+        assert lu is not a_dp
+        assert lu.data.ptr != a_dp.data.ptr
+        assert lu.flags["F_CONTIGUOUS"] is True
+
+        L, U = self._split_lu(lu, 2, 2)
+        PA = self._apply_pivots_rows(a_dp_orig, piv)
+        LU = L @ U
+
+        assert dpnp.allclose(LU, PA, rtol=1e-6, atol=1e-6)
+
+    def test_overwrite_copy_special(self):
+        # F-contig but dtype != res_type
+        a1 = dpnp.array([[4, 3], [6, 3]], dtype=dpnp.int32, order="F")
+        a1_orig = a1.copy()
+
+        # F-contig, match dtype but read-only input
+        a2 = dpnp.array(
+            [[4, 3], [6, 3]], dtype=dpnp.default_float_type(), order="F"
+        )
+        a2_orig = a2.copy()
+        a2.flags["WRITABLE"] = False
+
+        for a_dp, a_orig in zip((a1, a2), (a1_orig, a2_orig)):
+            lu, piv = dpnp.scipy.linalg.lu_factor(
+                a_dp, overwrite_a=True, check_finite=False
+            )
+
+            assert lu is not a_dp
+            assert lu.data.ptr != a_dp.data.ptr
+            assert lu.flags["F_CONTIGUOUS"] is True
+
+            L, U = self._split_lu(lu, 2, 2)
+            PA = self._apply_pivots_rows(
+                a_orig.astype(L.dtype, copy=False), piv
+            )
+            LU = L @ U
+            assert dpnp.allclose(LU, PA, rtol=1e-6, atol=1e-6)
+
+    @pytest.mark.parametrize("shape", [(0, 0), (0, 2), (2, 0)])
+    def test_empty_inputs(self, shape):
+        a_dp = dpnp.empty(shape, dtype=dpnp.default_float_type(), order="F")
+        lu, piv = dpnp.scipy.linalg.lu_factor(a_dp, check_finite=False)
+        assert lu.shape == shape
+        assert piv.shape == (min(shape),)
+
+    @pytest.mark.parametrize(
+        "sl",
+        [
+            (slice(None, None, 2), slice(None, None, 2)),
+            (slice(None, None, -1), slice(None, None, -1)),
+        ],
+    )
+    def test_strided(self, sl):
+        base = self._make_nonsingular_np((7, 7), dpnp.default_float_type(), "F")
+        a_np = base[sl]
+        a_dp = dpnp.array(a_np)
+
+        lu, piv = dpnp.scipy.linalg.lu_factor(a_dp, check_finite=False)
+        L, U = self._split_lu(lu, *a_dp.shape)
+        PA = self._apply_pivots_rows(a_dp, piv)
+        LU = L @ U
+
+        assert dpnp.allclose(LU, PA, rtol=1e-6, atol=1e-6)
+
+    def test_singular_matrix(self):
+        a_dp = dpnp.array([[1.0, 2.0], [2.0, 4.0]])
+        with pytest.warns(RuntimeWarning, match="Singular matrix"):
+            dpnp.scipy.linalg.lu_factor(a_dp, check_finite=False)
+
+    @pytest.mark.parametrize("bad", [numpy.inf, -numpy.inf, numpy.nan])
+    def test_check_finite_raises(self, bad):
+        a_dp = dpnp.array([[1.0, 2.0], [3.0, bad]], order="F")
+        assert_raises(
+            ValueError, dpnp.scipy.linalg.lu_factor, a_dp, check_finite=True
+        )
+
+
+class TestLuFactorBatched:
+    @staticmethod
+    def _apply_pivots_rows(A_dp, piv_dp):
+        m = A_dp.shape[0]
+
+        if m == 0 or piv_dp.size == 0:
+            return A_dp
+
+        rows = list(range(m))
+        piv_np = dpnp.asnumpy(piv_dp)
+        for i, r in enumerate(piv_np):
+            if i != r:
+                rows[i], rows[r] = rows[r], rows[i]
+
+        rows = dpnp.asarray(rows)
+        return A_dp[rows]
+
+    @staticmethod
+    def _make_nonsingular_nd_np(shape, dtype, order):
+        A = generate_random_numpy_array(shape, dtype, order)
+        m, n = shape[-2], shape[-1]
+        k = min(m, n)
+        A3 = A.reshape((-1, m, n))
+        for B in A3:
+            for i in range(k):
+                off = numpy.sum(numpy.abs(B[i, :n])) - numpy.abs(B[i, i])
+                B[i, i] = A.dtype.type(off + 1.0)
+
+        A = A3.reshape(shape)
+        # Ensure reshapes did not break memory order
+        A = numpy.array(A, order=order)
+        return A
+
+    @staticmethod
+    def _split_lu(lu, m, n):
+        L = dpnp.tril(lu, k=-1)
+        dpnp.fill_diagonal(L, 1)
+        L = L[:, : min(m, n)]
+        U = dpnp.triu(lu)[: min(m, n), :]
+        return L, U
+
+    @pytest.mark.parametrize(
+        "shape",
+        [(2, 2, 2), (3, 4, 4), (2, 3, 5, 2), (4, 1, 3)],
+        ids=["(2,2,2)", "(3,4,4)", "(2,3,5,2)", "(4,1,3)"],
+    )
+    @pytest.mark.parametrize("order", ["C", "F"])
+    @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True))
+    def test_lu_factor_batched(self, shape, order, dtype):
+        a_np = self._make_nonsingular_nd_np(shape, dtype, order)
+        a_dp = dpnp.array(a_np, order=order)
+
+        lu, piv = dpnp.scipy.linalg.lu_factor(
+            a_dp, check_finite=False, overwrite_a=False
+        )
+
+        assert lu.shape == a_dp.shape
+        m, n = shape[-2], shape[-1]
+        assert piv.shape == (*shape[:-2], min(m, n))
+        assert piv.dtype == dpnp.int64
+
+        a_3d = a_dp.reshape((-1, m, n))
+        lu_3d = lu.reshape((-1, m, n))
+        piv_2d = piv.reshape((-1, min(m, n)))
+        for i in range(a_3d.shape[0]):
+            L, U = self._split_lu(lu_3d[i], m, n)
+            A_cast = a_3d[i].astype(L.dtype, copy=False)
+            PA = self._apply_pivots_rows(A_cast, piv_2d[i])
+            assert dpnp.allclose(L @ U, PA, rtol=1e-6, atol=1e-6)
+
+    @pytest.mark.parametrize("dtype", get_float_complex_dtypes())
+    @pytest.mark.parametrize("order", ["C", "F"])
+    def test_overwrite(self, dtype, order):
+        a_np = self._make_nonsingular_nd_np((3, 2, 2), dtype, order)
+        a_dp = dpnp.array(a_np, order=order)
+        a_dp_orig = a_dp.copy()
+        lu, piv = dpnp.scipy.linalg.lu_factor(
+            a_dp, overwrite_a=True, check_finite=False
+        )
+
+        assert lu is not a_dp
+        assert dpnp.allclose(a_dp, a_dp_orig)
+
+        m = n = 2
+        lu_3d = lu.reshape((-1, m, n))
+        a_3d = a_dp.reshape((-1, m, n))
+        piv_2d = piv.reshape((-1, min(m, n)))
+        for i in range(a_3d.shape[0]):
+            L, U = self._split_lu(lu_3d[i], m, n)
+            A_cast = a_3d[i].astype(L.dtype, copy=False)
+            PA = self._apply_pivots_rows(A_cast, piv_2d[i])
+            assert dpnp.allclose(L @ U, PA, rtol=1e-6, atol=1e-6)
+
+    @pytest.mark.parametrize(
+        "shape", [(0, 2, 2), (2, 0, 2), (2, 2, 0), (0, 0, 0)]
+    )
+    def test_empty_inputs(self, shape):
+        a = dpnp.empty(shape, dtype=dpnp.default_float_type(), order="F")
+
+        lu, piv = dpnp.scipy.linalg.lu_factor(a, check_finite=False)
+        assert lu.shape == shape
+        m, n = shape[-2:]
+        assert piv.shape == (*shape[:-2], min(m, n))
+
+    def test_strided(self):
+        a_np = self._make_nonsingular_nd_np(
+            (5, 3, 3), dpnp.default_float_type(), "F"
+        )
+        a_dp = dpnp.array(a_np, order="F")
+        a_stride = a_dp[::2]
+        lu, piv = dpnp.scipy.linalg.lu_factor(a_stride, check_finite=False)
+        for i in range(a_stride.shape[0]):
+            L, U = self._split_lu(lu[i], 3, 3)
+            PA = self._apply_pivots_rows(
+                a_stride[i].astype(L.dtype, copy=False), piv[i]
+            )
+            assert dpnp.allclose(L @ U, PA, rtol=1e-6, atol=1e-6)
+
+    def test_singular_matrix(self):
+        a = dpnp.zeros((3, 2, 2), dtype=dpnp.default_float_type())
+        a[0] = dpnp.array([[1.0, 2.0], [2.0, 4.0]])
+        a[1] = dpnp.eye(2)
+        a[2] = dpnp.array([[1.0, 1.0], [1.0, 1.0]])
+        with pytest.warns(RuntimeWarning, match="Singular matrix"):
+            dpnp.scipy.linalg.lu_factor(a, check_finite=False)
+
+    def test_check_finite_raises(self):
+        a = dpnp.ones((2, 3, 3), dtype=dpnp.default_float_type(), order="F")
+        a[1, 0, 0] = dpnp.nan
+        assert_raises(
+            ValueError, dpnp.scipy.linalg.lu_factor, a, check_finite=True
+        )
+
+
+class TestLuSolve:
+    @staticmethod
+    def _make_nonsingular_np(shape, dtype, order):
+        A = generate_random_numpy_array(shape, dtype, order)
+        m, n = shape
+        k = min(m, n)
+        for i in range(k):
+            off = numpy.sum(numpy.abs(A[i, :n])) - numpy.abs(A[i, i])
+            A[i, i] = A.dtype.type(off + 1.0)
+        return A
+
+    @pytest.mark.parametrize("shape", [(1, 1), (2, 2), (3, 3), (5, 5)])
+    @pytest.mark.parametrize("rhs_cols", [None, 1, 3])
+    @pytest.mark.parametrize("order", ["C", "F"])
+    @pytest.mark.parametrize(
+        "dtype", get_all_dtypes(no_bool=True, no_none=True)
+    )
+    def test_lu_solve(self, shape, rhs_cols, order, dtype):
+        a_np = self._make_nonsingular_np(shape, dtype, order)
+        a_dp = dpnp.array(a_np, order=order)
+
+        n = shape[0]
+        if rhs_cols is None:
+            b_np = generate_random_numpy_array((n,), dtype, order)
+        else:
+            b_np = generate_random_numpy_array((n, rhs_cols), dtype, order)
+        b_dp = dpnp.array(b_np, order=order)
+
+        lu, piv = dpnp.scipy.linalg.lu_factor(a_dp, check_finite=False)
+        x = dpnp.scipy.linalg.lu_solve(
+            (lu, piv), b_dp, trans=0, overwrite_b=False, check_finite=False
+        )
+
+        # check A @ x = b
+        Ax = a_dp @ x
+        assert dpnp.allclose(Ax, b_dp, rtol=1e-5, atol=1e-5)
+
+    @pytest.mark.parametrize("trans", [0, 1, 2])
+    @pytest.mark.parametrize("dtype", get_float_complex_dtypes())
+    def test_trans(self, trans, dtype):
+        n = 4
+        a_np = self._make_nonsingular_np((n, n), dtype, order="F")
+        a_dp = dpnp.array(a_np, order="F")
+        b_dp = dpnp.array(generate_random_numpy_array((n, 2), dtype, "F"))
+
+        lu, piv = dpnp.scipy.linalg.lu_factor(a_dp, check_finite=False)
+        x = dpnp.scipy.linalg.lu_solve(
+            (lu, piv), b_dp, trans=trans, overwrite_b=False, check_finite=False
+        )
+
+        if trans == 0:
+            lhs = a_dp @ x
+        elif trans == 1:
+            lhs = a_dp.T @ x
+        else:  # trans == 2
+            lhs = a_dp.conj().T @ x
+
+        assert dpnp.allclose(lhs, b_dp, rtol=1e-5, atol=1e-5)
+
+    @pytest.mark.parametrize("dtype", get_float_complex_dtypes())
+    def test_overwrite_inplace(self, dtype):
+        a_dp = dpnp.array([[4, 3], [6, 3]], dtype=dtype, order="F")
+        b_dp = dpnp.array([1, 0], dtype=dtype, order="F")
+        b_orig = b_dp.copy()
+
+        lu, piv = dpnp.scipy.linalg.lu_factor(
+            a_dp, overwrite_a=False, check_finite=False
+        )
+        x = dpnp.scipy.linalg.lu_solve(
+            (lu, piv), b_dp, trans=0, overwrite_b=True, check_finite=False
+        )
+
+        assert x is b_dp
+        assert x.data.ptr == b_dp.data.ptr
+        assert dpnp.allclose(a_dp @ x, b_orig, rtol=1e-6, atol=1e-6)
+
+    @pytest.mark.parametrize("dtype", get_float_complex_dtypes())
+    def test_overwrite_copy_special(self, dtype):
+        a_dp = dpnp.array([[4, 3], [6, 3]], dtype=dtype, order="F")
+        lu, piv = dpnp.scipy.linalg.lu_factor(a_dp, check_finite=False)
+
+        # F-contig but dtype != res_type
+        b1 = dpnp.array([1, 0], dtype=dpnp.int32, order="F")
+        x1 = dpnp.scipy.linalg.lu_solve(
+            (lu, piv), b1, overwrite_b=True, check_finite=False
+        )
+        assert x1 is not b1
+        assert x1.data.ptr != b1.data.ptr
+
+        # F-contig, match dtype but read-only input
+        b2 = dpnp.array([1, 0], dtype=dtype, order="F")
+        b2.flags["WRITABLE"] = False
+        x2 = dpnp.scipy.linalg.lu_solve(
+            (lu, piv), b2, overwrite_b=True, check_finite=False
+        )
+        assert x2 is not b2
+        assert x2.data.ptr != b2.data.ptr
+
+        for x in (x1, x2):
+            assert dpnp.allclose(
+                a_dp @ x,
+                dpnp.array([1, 0], dtype=x.dtype),
+                rtol=1e-6,
+                atol=1e-6,
+            )
+
+    @pytest.mark.parametrize(
+        "dtype_a", get_all_dtypes(no_bool=True, no_none=True)
+    )
+    @pytest.mark.parametrize(
+        "dtype_b", get_all_dtypes(no_bool=True, no_none=True)
+    )
+    def test_diff_type(self, dtype_a, dtype_b):
+        a_np = self._make_nonsingular_np((3, 3), dtype_a, order="F")
+        a_dp = dpnp.array(a_np, order="F")
+
+        b_np = generate_random_numpy_array((3,), dtype_b, order="F")
+        b_dp = dpnp.array(b_np, order="F")
+
+        lu, piv = dpnp.scipy.linalg.lu_factor(a_dp, check_finite=False)
+        x = dpnp.scipy.linalg.lu_solve((lu, piv), b_dp, check_finite=False)
+        assert dpnp.allclose(
+            a_dp @ x, b_dp.astype(x.dtype, copy=False), rtol=1e-5, atol=1e-5
+        )
+
+    def test_strided_rhs(self):
+        n = 7
+        a_np = self._make_nonsingular_np(
+            (n, n), dpnp.default_float_type(), order="F"
+        )
+        a_dp = dpnp.array(a_np, order="F")
+
+        rhs_full = (
+            dpnp.arange(n * n, dtype=dpnp.default_float_type()).reshape(
+                n, n, order="F"
+            )
+            + 1.0
+        )
+        b_dp = rhs_full[:, ::2][:, :3]
+
+        lu, piv = dpnp.scipy.linalg.lu_factor(a_dp, check_finite=False)
+        x = dpnp.scipy.linalg.lu_solve(
+            (lu, piv), b_dp, overwrite_b=False, check_finite=False
+        )
+
+        assert dpnp.allclose(a_dp @ x, b_dp, rtol=1e-5, atol=1e-5)
+
+    @pytest.mark.parametrize(
+        "b_shape",
+        [
+            (4,),
+            (4, 1),
+            (4, 3),
+            # (1, 4, 3),
+            # (2, 4, 3),
+            # (1, 1, 4, 3)
+        ],
+    )
+    def test_broadcast_rhs(self, b_shape):
+        dtype = dpnp.default_float_type()
+
+        a_np = self._make_nonsingular_np((4, 4), dtype, order="F")
+        a_dp = dpnp.array(a_np, order="F")
+
+        b_np = generate_random_numpy_array(b_shape, dtype, order="F")
+        b_dp = dpnp.array(b_np, order="F")
+
+        lu, piv = dpnp.scipy.linalg.lu_factor(a_dp, check_finite=False)
+        x = dpnp.scipy.linalg.lu_solve(
+            (lu, piv), b_dp, overwrite_b=False, check_finite=False
+        )
+
+        assert x.shape == b_dp.shape
+
+        assert dpnp.allclose(a_dp @ x, b_dp, rtol=1e-5, atol=1e-5)
+
+    @pytest.mark.parametrize("shape", [(0, 0), (0, 5), (5, 5)])
+    @pytest.mark.parametrize("rhs_cols", [None, 0, 3])
+    def test_empty_shapes(self, shape, rhs_cols):
+        a_dp = dpnp.empty(shape, dtype=dpnp.default_float_type(), order="F")
+        if min(shape) > 0:
+            for i in range(min(shape)):
+                a_dp[i, i] = a_dp.dtype.type(1.0)
+
+        n = shape[0]
+        if rhs_cols is None:
+            b_shape = (n,)
+        else:
+            b_shape = (n, rhs_cols)
+        b_dp = dpnp.empty(b_shape, dtype=dpnp.default_float_type(), order="F")
+
+        lu, piv = dpnp.scipy.linalg.lu_factor(a_dp, check_finite=False)
+        x = dpnp.scipy.linalg.lu_solve((lu, piv), b_dp, check_finite=False)
+
+        assert x.shape == b_shape
+
+    @pytest.mark.parametrize("bad", [numpy.inf, -numpy.inf, numpy.nan])
+    def test_check_finite_raises(self, bad):
+        a_dp = dpnp.array([[1.0, 0.0], [0.0, 1.0]], order="F")
+        lu, piv = dpnp.scipy.linalg.lu_factor(a_dp, check_finite=False)
+
+        b_bad = dpnp.array([1.0, bad], order="F")
+        assert_raises(
+            ValueError,
+            dpnp.scipy.linalg.lu_solve,
+            (lu, piv),
+            b_bad,
+            check_finite=True,
+        )
+
+
 class TestMatrixPower:
     @pytest.mark.parametrize("dtype", get_all_dtypes())
     @pytest.mark.parametrize(
@@ -2104,11 +2606,14 @@ def test_empty(self, shape, ord, axis, keepdims):
                 assert_raises(ValueError, dpnp.linalg.norm, ia, **kwarg)
                 assert_raises(ValueError, numpy.linalg.norm, a, **kwarg)
             else:
-                # TODO: when similar changes in numpy are available, instead
-                # of assert_equal with zero, we should compare with numpy
-                # ord in [None, 1, 2]
-                assert_equal(dpnp.linalg.norm(ia, **kwarg), 0.0)
-                assert_raises(ValueError, numpy.linalg.norm, a, **kwarg)
+                if numpy_version() >= "2.3.0":
+                    result = dpnp.linalg.norm(ia, **kwarg)
+                    expected = numpy.linalg.norm(a, **kwarg)
+                    assert_dtype_allclose(result, expected)
+                else:
+                    assert_equal(
+                        dpnp.linalg.norm(ia, **kwarg), 0.0, strict=False
+                    )
         else:
             result = dpnp.linalg.norm(ia, **kwarg)
             expected = numpy.linalg.norm(a, **kwarg)
@@ -2296,49 +2801,40 @@ def test_matrix_norm(self, ord, keepdims):
         expected = numpy.linalg.matrix_norm(a, ord=ord, keepdims=keepdims)
         assert_dtype_allclose(result, expected)
 
-    @pytest.mark.parametrize(
-        "xp",
-        [
-            dpnp,
-            pytest.param(
-                numpy,
-                marks=pytest.mark.skipif(
-                    numpy_version() < "2.3.0",
-                    reason="numpy raises an error",
-                ),
-            ),
-        ],
-    )
+    @testing.with_requires("numpy>=2.3")
     @pytest.mark.parametrize("dtype", [dpnp.float32, dpnp.int32])
     @pytest.mark.parametrize(
         "shape, axis", [[(2, 0), None], [(2, 0), (0, 1)], [(0, 2), (0, 1)]]
     )
     @pytest.mark.parametrize("ord", [None, "fro", "nuc", 1, 2, dpnp.inf])
-    def test_matrix_norm_empty(self, xp, dtype, shape, axis, ord):
-        x = xp.zeros(shape, dtype=dtype)
-        sc = dtype(0.0) if dtype == dpnp.float32 else 0.0
-        assert_equal(xp.linalg.norm(x, axis=axis, ord=ord), sc)
+    @pytest.mark.parametrize("keepdims", [True, False])
+    def test_matrix_norm_empty(self, dtype, shape, axis, ord, keepdims):
+        a = numpy.zeros(shape, dtype=dtype)
+        ia = dpnp.array(a)
+        result = dpnp.linalg.norm(ia, axis=axis, ord=ord, keepdims=keepdims)
+        expected = numpy.linalg.norm(a, axis=axis, ord=ord, keepdims=keepdims)
+        assert_dtype_allclose(result, expected)
 
-    @pytest.mark.parametrize(
-        "xp",
-        [
-            dpnp,
-            pytest.param(
-                numpy,
-                marks=pytest.mark.skipif(
-                    numpy_version() < "2.3.0",
-                    reason="numpy raises an error",
-                ),
-            ),
-        ],
-    )
+    @testing.with_requires("numpy>=2.3")
     @pytest.mark.parametrize("dtype", [dpnp.float32, dpnp.int32])
     @pytest.mark.parametrize("axis", [None, 0])
     @pytest.mark.parametrize("ord", [None, 1, 2, dpnp.inf])
-    def test_vector_norm_empty(self, xp, dtype, axis, ord):
-        x = xp.zeros(0, dtype=dtype)
-        sc = dtype(0.0) if dtype == dpnp.float32 else 0.0
-        assert_equal(xp.linalg.vector_norm(x, axis=axis, ord=ord), sc)
+    @pytest.mark.parametrize("keepdims", [True, False])
+    def test_vector_norm_empty(self, dtype, axis, ord, keepdims):
+        a = numpy.zeros(0, dtype=dtype)
+        ia = dpnp.array(a)
+        result = dpnp.linalg.vector_norm(
+            ia, axis=axis, ord=ord, keepdims=keepdims
+        )
+        expected = numpy.linalg.vector_norm(
+            a, axis=axis, ord=ord, keepdims=keepdims
+        )
+        assert_dtype_allclose(result, expected)
+        if keepdims:
+            # norm and vector_norm have different paths in dpnp when keepdims=True,
+            # to cover both of them test with norm as well
+            result = dpnp.linalg.norm(ia, axis=axis, ord=ord, keepdims=keepdims)
+            assert_dtype_allclose(result, expected)
 
     @testing.with_requires("numpy>=2.0")
     @pytest.mark.parametrize(
@@ -2580,7 +3076,7 @@ def test_solve(self, dtype):
         expected = numpy.linalg.solve(a_np, a_np)
         result = dpnp.linalg.solve(a_dp, a_dp)
 
-        assert_allclose(result, expected)
+        assert_dtype_allclose(result, expected)
 
     @testing.with_requires("numpy>=2.0")
     @pytest.mark.parametrize("dtype", get_float_complex_dtypes())
@@ -2653,12 +3149,12 @@ def test_solve_strides(self):
         # positive strides
         expected = numpy.linalg.solve(a_np[::2, ::2], b_np[::2])
         result = dpnp.linalg.solve(a_dp[::2, ::2], b_dp[::2])
-        assert_allclose(result, expected, rtol=1e-6)
+        assert_dtype_allclose(result, expected)
 
         # negative strides
         expected = numpy.linalg.solve(a_np[::-2, ::-2], b_np[::-2])
         result = dpnp.linalg.solve(a_dp[::-2, ::-2], b_dp[::-2])
-        assert_allclose(result, expected)
+        assert_dtype_allclose(result, expected)
 
     @pytest.mark.parametrize(
         "matrix, vector",
@@ -2776,13 +3272,6 @@ def test_slogdet_strides(self):
         assert_allclose(sign_result, sign_expected)
         assert_allclose(logdet_result, logdet_expected)
 
-    # TODO: remove skipif when Intel MKL 2025.2 is released
-    # Skip running on CPU because dpnp uses _getrf_batch only on CPU
-    # for dpnp.linalg.det/slogdet.
-    @pytest.mark.skipif(
-        is_cpu_device() and not requires_intel_mkl_version("2025.2"),
-        reason="mkl<2025.2",
-    )
     @pytest.mark.parametrize(
         "matrix",
         [
@@ -2813,13 +3302,6 @@ def test_slogdet_singular_matrix(self, matrix):
         assert_allclose(sign_result, sign_expected)
         assert_allclose(logdet_result, logdet_expected)
 
-    # TODO: remove skipif when Intel MKL 2025.2 is released
-    # Skip running on CPU because dpnp uses _getrf_batch only on CPU
-    # for dpnp.linalg.det/slogdet.
-    @pytest.mark.skipif(
-        is_cpu_device() and not requires_intel_mkl_version("2025.2"),
-        reason="mkl<2025.2",
-    )
     def test_slogdet_singular_matrix_3D(self):
         a_np = numpy.array(
             [[[1, 2], [3, 4]], [[1, 2], [1, 2]], [[1, 3], [3, 1]]]
diff --git a/dpnp/tests/test_logic.py b/dpnp/tests/test_logic.py
index 4281279da450..efcdf2b0be68 100644
--- a/dpnp/tests/test_logic.py
+++ b/dpnp/tests/test_logic.py
@@ -10,11 +10,14 @@
 import dpnp
 
 from .helper import (
+    generate_random_numpy_array,
     get_all_dtypes,
+    get_complex_dtypes,
     get_float_complex_dtypes,
     get_float_dtypes,
     get_integer_float_dtypes,
 )
+from .third_party.cupy import testing
 
 
 class TestAllAny:
@@ -515,23 +518,182 @@ def test_infinity_sign_errors(func):
         getattr(dpnp, func)(x, out=out)
 
 
-@pytest.mark.parametrize("dtype", get_integer_float_dtypes())
-@pytest.mark.parametrize(
-    "rtol", [1e-05, dpnp.array(1e-05), dpnp.full(10, 1e-05)]
-)
-@pytest.mark.parametrize(
-    "atol", [1e-08, dpnp.array(1e-08), dpnp.full(10, 1e-08)]
-)
-def test_isclose(dtype, rtol, atol):
-    a = numpy.random.rand(10)
-    b = a + numpy.random.rand(10) * 1e-8
+class TestIsClose:
+    @pytest.mark.parametrize(
+        "dtype", get_all_dtypes(no_bool=True, no_none=True)
+    )
+    @pytest.mark.parametrize(
+        "rtol", [1e-5, dpnp.array(1e-5), dpnp.full((10,), 1e-5)]
+    )
+    @pytest.mark.parametrize(
+        "atol", [1e-8, dpnp.array(1e-8), dpnp.full((10,), 1e-8)]
+    )
+    def test_isclose(self, dtype, rtol, atol):
+        a = generate_random_numpy_array((10,), dtype=dtype)
+        b = a + numpy.array(1e-8, dtype=dtype)
 
-    dpnp_a = dpnp.array(a, dtype=dtype)
-    dpnp_b = dpnp.array(b, dtype=dtype)
+        dpnp_a = dpnp.array(a, dtype=dtype)
+        dpnp_b = dpnp.array(b, dtype=dtype)
 
-    np_res = numpy.isclose(a, b, 1e-05, 1e-08)
-    dpnp_res = dpnp.isclose(dpnp_a, dpnp_b, rtol, atol)
-    assert_allclose(dpnp_res, np_res)
+        np_res = numpy.isclose(a, b, rtol=1e-5, atol=1e-8)
+        dpnp_res = dpnp.isclose(dpnp_a, dpnp_b, rtol=rtol, atol=atol)
+        assert_equal(dpnp_res, np_res)
+
+    @pytest.mark.parametrize("dtype", get_complex_dtypes())
+    @pytest.mark.parametrize("shape", [(4, 4), (16, 16), (4, 4, 4)])
+    def test_isclose_complex(self, dtype, shape):
+        a = generate_random_numpy_array(shape, dtype=dtype, seed_value=81)
+        b = a.copy()
+
+        b = b + numpy.array(1e-6 + 1e-6j, dtype=dtype)
+
+        dpnp_a = dpnp.array(a, dtype=dtype)
+        dpnp_b = dpnp.array(b, dtype=dtype)
+
+        np_res = numpy.isclose(a, b)
+        dpnp_res = dpnp.isclose(dpnp_a, dpnp_b)
+        assert_equal(dpnp_res, np_res)
+
+    @pytest.mark.parametrize(
+        "rtol, atol",
+        [
+            (1e-5, 1e-8),
+            (dpnp.array(1e-5), dpnp.array(1e-8)),
+        ],
+    )
+    def test_empty_input(self, rtol, atol):
+        a = numpy.array([])
+        b = numpy.array([])
+
+        dpnp_a = dpnp.array(a)
+        dpnp_b = dpnp.array(b)
+
+        np_res = numpy.isclose(a, b, rtol=1e-5, atol=1e-8)
+        dpnp_res = dpnp.isclose(dpnp_a, dpnp_b, rtol=rtol, atol=atol)
+        assert_equal(dpnp_res, np_res)
+
+    @pytest.mark.parametrize(
+        "rtol, atol",
+        [
+            (1e-5, 1e-8),
+            (dpnp.array(1e-5), dpnp.array(1e-8)),
+        ],
+    )
+    @pytest.mark.parametrize("val", [1.0, numpy.inf, -numpy.inf, numpy.nan])
+    def test_input_0d(self, val, rtol, atol):
+        dp_arr = dpnp.array(val)
+        np_arr = numpy.array(val)
+
+        # array & scalar
+        dp_res = dpnp.isclose(dp_arr, val, rtol=rtol, atol=atol)
+        np_res = numpy.isclose(np_arr, val, rtol=1e-5, atol=1e-8)
+        assert_equal(dp_res, np_res)
+
+        # scalar & array
+        dp_res = dpnp.isclose(val, dp_arr, rtol=rtol, atol=atol)
+        np_res = numpy.isclose(val, np_arr, rtol=1e-5, atol=1e-8)
+        assert_equal(dp_res, np_res)
+
+        # array & array
+        dp_res = dpnp.isclose(dp_arr, dp_arr, rtol=rtol, atol=atol)
+        np_res = numpy.isclose(np_arr, np_arr, rtol=1e-5, atol=1e-8)
+        assert_equal(dp_res, np_res)
+
+    @pytest.mark.parametrize(
+        "sh_a, sh_b",
+        [
+            ((10,), (1,)),
+            ((3, 1, 5), (3, 5)),
+            ((3, 1, 5), (1, 3, 5)),
+            ((1, 10), (10,)),
+        ],
+    )
+    def test_broadcast_shapes(self, sh_a, sh_b):
+        a_np = numpy.ones(sh_a)
+        b_np = numpy.ones(sh_b)
+
+        a_dp = dpnp.ones(sh_a)
+        b_dp = dpnp.ones(sh_b)
+
+        np_res = numpy.isclose(a_np, b_np)
+        dp_res = dpnp.isclose(a_dp, b_dp)
+        assert_equal(dp_res, np_res)
+
+    @pytest.mark.parametrize(
+        "rtol, atol",
+        [
+            (1e-5, 1e-8),
+            (dpnp.array(1e-5), dpnp.array(1e-8)),
+        ],
+    )
+    @pytest.mark.parametrize("equal_nan", [True, 1, "1"])
+    def test_equal_nan(self, rtol, atol, equal_nan):
+        a = numpy.array([numpy.nan, 1.0])
+        b = numpy.array([numpy.nan, 1.0])
+
+        dp_a = dpnp.array(a)
+        dp_b = dpnp.array(b)
+
+        np_res = numpy.isclose(a, b, rtol=1e-5, atol=1e-8, equal_nan=equal_nan)
+        dp_res = dpnp.isclose(
+            dp_a, dp_b, rtol=rtol, atol=atol, equal_nan=equal_nan
+        )
+        assert_equal(dp_res, np_res)
+
+    # array-like rtol/atol support requires NumPy >= 2.0
+    @testing.with_requires("numpy>=2.0")
+    def test_rtol_atol_arrays(self):
+        a = numpy.array([2.1, 2.1, 2.1, 2.1, 5, numpy.nan])
+        b = numpy.array([2, 2, 2, 2, numpy.nan, 5])
+        atol = numpy.array([0.11, 0.09, 1e-8, 1e-8, 1, 1])
+        rtol = numpy.array([1e-8, 1e-8, 0.06, 0.04, 1, 1])
+
+        dp_a = dpnp.array(a)
+        dp_b = dpnp.array(b)
+        dp_rtol = dpnp.array(rtol)
+        dp_atol = dpnp.array(atol)
+
+        np_res = numpy.isclose(a, b, rtol=rtol, atol=atol)
+        dp_res = dpnp.isclose(dp_a, dp_b, rtol=dp_rtol, atol=dp_atol)
+        assert_equal(dp_res, np_res)
+
+    @pytest.mark.parametrize(
+        "rtol, atol",
+        [
+            (0 + 1e-5j, 1e-08),
+            (1e-05, 0 + 1e-8j),
+            (0 + 1e-5j, 0 + 1e-8j),
+        ],
+    )
+    def test_rtol_atol_complex(self, rtol, atol):
+        a = dpnp.array([1.0, 1.0])
+        b = dpnp.array([1.0, 1.0 + 1e-6])
+
+        dpnp_res = dpnp.isclose(a, b, rtol=rtol, atol=atol)
+        np_res = numpy.isclose(a.asnumpy(), b.asnumpy(), rtol=rtol, atol=atol)
+        assert_equal(dpnp_res, np_res)
+
+    # NEP 50: float32 vs Python float comparison requires NumPy >= 2.0
+    @testing.with_requires("numpy>=2.0")
+    def test_rtol_atol_nep50(self):
+        below_one = float(1.0 - numpy.finfo("f8").eps)
+        f32 = numpy.array(below_one, dtype="f4")
+        dp_f32 = dpnp.array(f32)
+
+        assert_equal(
+            dpnp.isclose(dp_f32, below_one, rtol=0, atol=0),
+            numpy.isclose(f32, below_one, rtol=0, atol=0),
+        )
+
+    def test_invalid_input(self):
+        # unsupported type
+        assert_raises(TypeError, dpnp.isclose, 1.0, 1.0)
+        assert_raises(TypeError, dpnp.isclose, [1.0], [1.0])
+
+        # broadcast error
+        assert_raises(
+            ValueError, dpnp.isclose, dpnp.ones((10,)), dpnp.ones((3, 5))
+        )
 
 
 @pytest.mark.parametrize("a", [numpy.array([1, 2]), numpy.array([1, 1])])
diff --git a/dpnp/tests/test_manipulation.py b/dpnp/tests/test_manipulation.py
index 2950f393451a..25ac9445aaf9 100644
--- a/dpnp/tests/test_manipulation.py
+++ b/dpnp/tests/test_manipulation.py
@@ -21,6 +21,7 @@
     get_float_dtypes,
     get_integer_dtypes,
     get_integer_float_dtypes,
+    get_unsigned_dtypes,
     has_support_aspect64,
     numpy_version,
 )
@@ -73,18 +74,32 @@ def test_ndim():
     assert dpnp.ndim(ia) == exp
 
 
-def test_size():
-    a = [[1, 2, 3], [4, 5, 6]]
-    ia = dpnp.array(a)
+class TestSize:
+    def test_size(self):
+        a = [[1, 2, 3], [4, 5, 6]]
+        ia = dpnp.array(a)
+
+        exp = numpy.size(a)
+        assert ia.size == exp
+        assert dpnp.size(a) == exp
+        assert dpnp.size(ia) == exp
+
+        exp = numpy.size(a, 0)
+        assert dpnp.size(a, 0) == exp
+        assert dpnp.size(ia, 0) == exp
+
+        assert dpnp.size(ia, 1) == numpy.size(a, 1)
 
-    exp = numpy.size(a)
-    assert ia.size == exp
-    assert dpnp.size(a) == exp
-    assert dpnp.size(ia) == exp
+    # TODO: include commented code in the test when numpy-2.4 is released
+    # @testing.with_requires("numpy>=2.4")
+    def test_size_tuple(self):
+        a = [[1, 2, 3], [4, 5, 6]]
+        ia = dpnp.array(a)
 
-    exp = numpy.size(a, 0)
-    assert dpnp.size(a, 0) == exp
-    assert dpnp.size(ia, 0) == exp
+        assert dpnp.size(ia, ()) == 1  # numpy.size(a, ())
+        assert dpnp.size(ia, (0,)) == 2  # numpy.size(a, (0,))
+        assert dpnp.size(ia, (1,)) == 3  # numpy.size(a, (1,))
+        assert dpnp.size(ia, (0, 1)) == 6  # numpy.size(a, (0, 1))
 
 
 class TestAppend:
@@ -1238,6 +1253,16 @@ def test_negative_resize(self, xp):
         with pytest.raises(ValueError, match=r"negative"):
             xp.resize(a, new_shape=new_shape)
 
+    @testing.with_requires("numpy>=2.3.1")
+    @pytest.mark.parametrize("dt", [dpnp.uint32, dpnp.uint64])
+    def test_unsigned_resize(self, dt):
+        a = numpy.array([[23, 95], [66, 37]])
+        ia = dpnp.array(a)
+
+        result = dpnp.resize(ia, dt(1))
+        expected = numpy.resize(a, dt(1))
+        assert_array_equal(result, expected)
+
 
 class TestRot90:
     @pytest.mark.parametrize("xp", [numpy, dpnp])
@@ -1675,6 +1700,7 @@ def test_axis_list(self, axis):
         expected = numpy.unique(a, axis=axis)
         assert_array_equal(result, expected)
 
+    @testing.with_requires("numpy>=2.0.1")
     @pytest.mark.parametrize("dt", get_all_dtypes(no_none=True))
     @pytest.mark.parametrize(
         "axis_kwd",
@@ -1706,17 +1732,6 @@ def test_2d_axis(self, dt, axis_kwd, return_kwds):
         if len(return_kwds) == 0:
             assert_array_equal(result, expected)
         else:
-            if (
-                len(axis_kwd) == 0
-                and numpy.lib.NumpyVersion(numpy.__version__) < "2.0.1"
-            ):
-                # gh-26961: numpy.unique(..., return_inverse=True, axis=None)
-                # returned flatten unique_inverse till 2.0.1 version
-                expected = (
-                    expected[:2]
-                    + (expected[2].reshape(a.shape),)
-                    + expected[3:]
-                )
             for iv, v in zip(result, expected):
                 assert_array_equal(iv, v)
 
@@ -1746,6 +1761,7 @@ def test_1d_axis(self, axis):
         expected = numpy.unique(a, axis=axis)
         assert_array_equal(result, expected)
 
+    @testing.with_requires("numpy>=2.0.1")
     @pytest.mark.parametrize("axis", [None, 0, -1])
     def test_2d_axis_inverse(self, axis):
         a = numpy.array([[4, 4, 3], [2, 2, 1], [2, 2, 1], [4, 4, 3]])
@@ -1753,10 +1769,6 @@ def test_2d_axis_inverse(self, axis):
 
         result = dpnp.unique(ia, return_inverse=True, axis=axis)
         expected = numpy.unique(a, return_inverse=True, axis=axis)
-        if axis is None and numpy.lib.NumpyVersion(numpy.__version__) < "2.0.1":
-            # gh-26961: numpy.unique(..., return_inverse=True, axis=None)
-            # returned flatten unique_inverse till 2.0.1 version
-            expected = expected[:1] + (expected[1].reshape(a.shape),)
 
         for iv, v in zip(result, expected):
             assert_array_equal(iv, v)
@@ -1802,8 +1814,18 @@ def test_2d_axis_signed_inetger(self, dt):
         expected = numpy.unique(a, axis=0)
         assert_array_equal(result, expected)
 
+    @pytest.mark.parametrize("axis", [None, 0, 1])
+    @pytest.mark.parametrize("dt", get_unsigned_dtypes())
+    def test_2d_axis_unsigned_inetger(self, axis, dt):
+        a = numpy.array([[7, 1, 2, 1], [5, 7, 5, 7]], dtype=dt)
+        ia = dpnp.array(a)
+
+        result = dpnp.unique(ia, axis=axis)
+        expected = numpy.unique(a, axis=axis)
+        assert_array_equal(result, expected)
+
     @pytest.mark.parametrize("axis", [None, 0])
-    @pytest.mark.parametrize("dt", "bBhHiIlLqQ")
+    @pytest.mark.parametrize("dt", get_integer_dtypes(all_int_types=True))
     def test_1d_axis_all_inetger(self, axis, dt):
         a = numpy.array([5, 7, 1, 2, 1, 5, 7], dtype=dt)
         ia = dpnp.array(a)
@@ -1828,6 +1850,31 @@ def test_equal_nan(self, eq_nan_kwd):
         expected = numpy.unique(a, **eq_nan_kwd)
         assert_array_equal(result, expected)
 
+    # TODO: uncomment once numpy 2.4.0 release is published
+    # @testing.with_requires("numpy>=2.4.0")
+    @pytest.mark.parametrize("axis", [0, -1])
+    def test_1d_equal_nan_axis(self, axis):
+        a = numpy.array([numpy.nan, 0, 0, numpy.nan])
+        ia = dpnp.array(a)
+
+        result = dpnp.unique(ia, axis=axis, equal_nan=True)
+        expected = numpy.unique(a, axis=axis, equal_nan=True)
+        # TODO: remove when numpy#29372 is released
+        if numpy_version() < "2.4.0":
+            expected = numpy.array([0.0, numpy.nan])
+        assert_array_equal(result, expected)
+
+    # TODO: uncomment once numpy 2.4.0 release is published
+    # @testing.with_requires("numpy>=2.4.0")
+    @pytest.mark.parametrize("equal_nan", [True, False])
+    # @pytest.mark.parametrize("xp", [numpy, dpnp])
+    @pytest.mark.parametrize("xp", [dpnp])
+    def test_1d_axis_float_raises_typeerror(self, xp, equal_nan):
+        a = xp.array([xp.nan, 0, 0, xp.nan])
+        with pytest.raises(TypeError, match="integer argument expected"):
+            xp.unique(a, axis=0.0, equal_nan=equal_nan)
+
+    @testing.with_requires("numpy>=2.0.1")
     @pytest.mark.parametrize("dt", get_float_complex_dtypes())
     @pytest.mark.parametrize(
         "axis_kwd",
@@ -1869,14 +1916,6 @@ def test_2d_axis_nans(self, dt, axis_kwd, return_kwds, row):
         if len(return_kwds) == 0:
             assert_array_equal(result, expected)
         else:
-            if len(axis_kwd) == 0 and numpy_version() < "2.0.1":
-                # gh-26961: numpy.unique(..., return_inverse=True, axis=None)
-                # returned flatten unique_inverse till 2.0.1 version
-                expected = (
-                    expected[:2]
-                    + (expected[2].reshape(a.shape),)
-                    + expected[3:]
-                )
             for iv, v in zip(result, expected):
                 assert_array_equal(iv, v)
 
diff --git a/dpnp/tests/test_mathematical.py b/dpnp/tests/test_mathematical.py
index bd15b4ca7281..d0dbd44c853b 100644
--- a/dpnp/tests/test_mathematical.py
+++ b/dpnp/tests/test_mathematical.py
@@ -1992,6 +1992,7 @@ def test_discont(self, dt):
         assert result.dtype == ia.dtype == a.dtype
 
 
+@testing.with_requires("numpy>=2.1.0")
 @pytest.mark.usefixtures("suppress_divide_invalid_numpy_warnings")
 @pytest.mark.parametrize("val_type", [bool, int, float])
 @pytest.mark.parametrize("dtype", get_all_dtypes(no_none=True))
@@ -2302,6 +2303,7 @@ def test_projection(self, dtype):
 
 @pytest.mark.parametrize("func", ["ceil", "floor", "trunc", "fix"])
 class TestRoundingFuncs:
+    @testing.with_requires("numpy>=2.1.0")
     @pytest.mark.parametrize(
         "dt", get_all_dtypes(no_none=True, no_complex=True)
     )
diff --git a/dpnp/tests/test_memory.py b/dpnp/tests/test_memory.py
new file mode 100644
index 000000000000..ce9c7e60f030
--- /dev/null
+++ b/dpnp/tests/test_memory.py
@@ -0,0 +1,33 @@
+import dpctl.tensor as dpt
+import numpy
+import pytest
+
+import dpnp
+import dpnp.memory as dpm
+
+
+class IntUsmData(dpt.usm_ndarray):
+    """Class that overrides `usm_data` property in `dpt.usm_ndarray`."""
+
+    @property
+    def usm_data(self):
+        return 1
+
+
+class TestCreateData:
+    @pytest.mark.parametrize("x", [numpy.ones(4), dpnp.zeros(2)])
+    def test_wrong_input_type(self, x):
+        with pytest.raises(TypeError):
+            dpm.create_data(x)
+
+    def test_wrong_usm_data(self):
+        a = dpt.ones(10)
+        d = IntUsmData(a.shape, buffer=a)
+
+        with pytest.raises(TypeError):
+            dpm.create_data(d)
+
+    def test_ndarray_from_data(self):
+        a = dpnp.empty(5)
+        b = dpnp.ndarray(a.shape, buffer=a.data)
+        assert b.data.ptr == a.data.ptr
diff --git a/dpnp/tests/test_ndarray.py b/dpnp/tests/test_ndarray.py
index 0a4fea422fc9..ce857d73ea25 100644
--- a/dpnp/tests/test_ndarray.py
+++ b/dpnp/tests/test_ndarray.py
@@ -74,6 +74,69 @@ def test_attributes(self):
         assert_equal(self.two.itemsize, self.two.dtype.itemsize)
 
 
+@testing.parameterize(*testing.product({"xp": [dpnp, numpy]}))
+class TestContains:
+    def test_basic(self):
+        a = self.xp.arange(10).reshape((2, 5))
+        assert 4 in a
+        assert 20 not in a
+
+    def test_broadcast(self):
+        xp = self.xp
+        a = xp.arange(6).reshape((2, 3))
+        assert 4 in a
+        assert xp.array([0, 1, 2]) in a
+        assert xp.array([5, 3, 4]) not in a
+
+    def test_broadcast_error(self):
+        a = self.xp.arange(10).reshape((2, 5))
+        with pytest.raises(
+            ValueError,
+            match="operands could not be broadcast together with shapes",
+        ):
+            self.xp.array([1, 2]) in a
+
+    def test_strides(self):
+        xp = self.xp
+        a = xp.arange(10).reshape((2, 5))
+        a = a[:, ::2]
+        assert 4 in a
+        assert 8 not in a
+        assert xp.full(a.shape[-1], fill_value=2) in a
+        assert xp.full_like(a, fill_value=7) in a
+        assert xp.full_like(a, fill_value=6) not in a
+
+
+class TestView:
+    def test_none_dtype(self):
+        a = numpy.ones((1, 2, 4), dtype=numpy.int32)
+        ia = dpnp.array(a)
+
+        expected = a.view()
+        result = ia.view()
+        assert_allclose(result, expected)
+
+        expected = a.view()  # numpy returns dtype(None) otherwise
+        result = ia.view(None)
+        assert_allclose(result, expected)
+
+    @pytest.mark.parametrize("dt", [bool, int, float, complex])
+    def test_python_types(self, dt):
+        a = numpy.ones((8, 4), dtype=numpy.complex64)
+        ia = dpnp.array(a)
+
+        result = ia.view(dt)
+        if not has_support_aspect64() and dt in [float, complex]:
+            dt = result.dtype
+        expected = a.view(dt)
+        assert_allclose(result, expected)
+
+    def test_type_error(self):
+        x = dpnp.ones(4, dtype="i4")
+        with pytest.raises(NotImplementedError):
+            x.view("i2", type=dpnp.ndarray)
+
+
 @pytest.mark.parametrize(
     "arr",
     [
@@ -487,3 +550,15 @@ def test_rmatmul_numpy_array():
 
     with pytest.raises(TypeError):
         b @ a
+
+
+@pytest.mark.parametrize("xp", [dpnp, numpy])
+def test_pow_modulo(xp):
+    a = xp.array([2, 3, 4])
+    b = xp.array([5, 2, 3])
+
+    assert a.__pow__(b, 10) == NotImplemented
+    assert a.__rpow__(b, 10) == NotImplemented
+
+    assert (a.__pow__(b, None) == a**b).all()
+    assert (a.__rpow__(b, None) == b**a).all()
diff --git a/dpnp/tests/test_product.py b/dpnp/tests/test_product.py
index 0eccd4deefc1..9963c85f737c 100644
--- a/dpnp/tests/test_product.py
+++ b/dpnp/tests/test_product.py
@@ -12,7 +12,8 @@
     assert_dtype_allclose,
     generate_random_numpy_array,
     get_all_dtypes,
-    get_complex_dtypes,
+    is_gpu_device,
+    is_ptl,
     is_win_platform,
     numpy_version,
 )
@@ -24,9 +25,6 @@
 
 
 class TestCross:
-    def setup_method(self):
-        numpy.random.seed(42)
-
     @pytest.mark.parametrize("axis", [None, 0])
     @pytest.mark.parametrize("axisc", [-1, 0])
     @pytest.mark.parametrize("axisb", [-1, 0])
@@ -181,9 +179,6 @@ def test_linalg_error(self):
 
 
 class TestDot:
-    def setup_method(self):
-        numpy.random.seed(42)
-
     @pytest.mark.parametrize("dtype", get_all_dtypes(no_none=True))
     def test_ones(self, dtype):
         n = 10**5
@@ -432,9 +427,6 @@ def test_out_error(self, shape1, shape2, out_shape):
 
 
 class TestInner:
-    def setup_method(self):
-        numpy.random.seed(42)
-
     @pytest.mark.parametrize("dtype", get_all_dtypes(no_none=True))
     def test_scalar(self, dtype):
         a = 2
@@ -598,9 +590,6 @@ def test_order(self, order):
 
 
 class TestMatmul:
-    def setup_method(self):
-        numpy.random.seed(42)
-
     @pytest.mark.parametrize("dtype", _selected_dtypes)
     @pytest.mark.parametrize(
         "order1, order2", [("C", "C"), ("C", "F"), ("F", "C"), ("F", "F")]
@@ -845,6 +834,7 @@ def test_dtype_matrix(self, dt_in1, dt_in2, dt_out, shape1, shape2):
             assert_raises(TypeError, dpnp.matmul, ia, ib, out=iout)
             assert_raises(TypeError, numpy.matmul, a, b, out=out)
 
+    @testing.with_requires("numpy!=2.3.0")
     @pytest.mark.parametrize("dtype", _selected_dtypes)
     @pytest.mark.parametrize("order1", ["C", "F", "A"])
     @pytest.mark.parametrize("order2", ["C", "F", "A"])
@@ -882,6 +872,7 @@ def test_order(self, dtype, order1, order2, order, shape1, shape2):
         assert result.flags.f_contiguous == expected.flags.f_contiguous
         assert_dtype_allclose(result, expected)
 
+    @testing.with_requires("numpy!=2.3.0")
     @pytest.mark.parametrize("dtype", _selected_dtypes)
     @pytest.mark.parametrize(
         "stride",
@@ -1059,17 +1050,19 @@ def test_strided_vec_mat(self, dtype, func, incx, incy, transpose):
     @pytest.mark.parametrize("dtype", _selected_dtypes)
     def test_out_order1(self, order1, order2, out_order, dtype):
         # test gemm with out keyword
-        a = generate_random_numpy_array((5, 4), dtype, low=-5, high=5)
-        b = generate_random_numpy_array((4, 7), dtype, low=-5, high=5)
-        a = numpy.array(a, order=order1)
-        b = numpy.array(b, order=order2)
+        a = generate_random_numpy_array(
+            (5, 4), dtype, order=order1, low=-5, high=5
+        )
+        b = generate_random_numpy_array(
+            (4, 7), dtype, order=order2, low=-5, high=5
+        )
         ia, ib = dpnp.array(a), dpnp.array(b)
 
-        iout = dpnp.empty((5, 7), dtype=dtype, order=out_order)
+        out = numpy.empty((5, 7), dtype=dtype, order=out_order)
+        iout = dpnp.array(out)
         result = dpnp.matmul(ia, ib, out=iout)
         assert result is iout
 
-        out = numpy.empty((5, 7), dtype=dtype, order=out_order)
         expected = numpy.matmul(a, b, out=out)
         assert result.flags.c_contiguous == expected.flags.c_contiguous
         assert result.flags.f_contiguous == expected.flags.f_contiguous
@@ -1161,6 +1154,7 @@ def test_large_values(self, dtype):
         expected = numpy.matmul(a, b)
         assert_dtype_allclose(result, expected)
 
+    @pytest.mark.skipif(is_ptl(), reason="MKLD-18712")
     @pytest.mark.parametrize("dt_out", [numpy.int32, numpy.float32])
     @pytest.mark.parametrize(
         "shape1, shape2",
@@ -1181,6 +1175,75 @@ def test_special_case(self, dt_out, shape1, shape2):
         result = dpnp.matmul(ia, ib, out=iout)
         assert_dtype_allclose(result, expected)
 
+    @pytest.mark.parametrize("dt", get_all_dtypes(no_none=True))
+    def test_syrk(self, dt):
+        a = generate_random_numpy_array((6, 9), dtype=dt, low=-5, high=5)
+        ia = dpnp.array(a)
+
+        result = dpnp.matmul(ia, ia.mT)
+        expected = numpy.matmul(a, a.T)
+        assert_dtype_allclose(result, expected)
+
+        iout = dpnp.empty(result.shape, dtype=dt)
+        result = dpnp.matmul(ia, ia.mT, out=iout)
+        assert result is iout
+        assert_dtype_allclose(result, expected)
+
+        result = ia.mT @ ia
+        expected = a.T @ a
+        assert_dtype_allclose(result, expected)
+
+    @pytest.mark.parametrize("dt", [dpnp.int32, dpnp.float32])
+    def test_syrk_strided(self, dt):
+        a = generate_random_numpy_array((20, 30), dtype=dt)
+        ia = dpnp.array(a)
+        a = a[::2, ::2]
+        ia = ia[::2, ::2]
+
+        result = dpnp.matmul(ia, ia.mT)
+        expected = numpy.matmul(a, a.T)
+        assert_dtype_allclose(result, expected)
+
+        result = ia.mT @ ia
+        expected = a.T @ a
+        assert_dtype_allclose(result, expected)
+
+    @pytest.mark.parametrize(
+        "order, out_order",
+        [("C", "C"), ("C", "F"), ("F", "C"), ("F", "F")],
+    )
+    def test_syrk_out_order(self, order, out_order):
+        a = generate_random_numpy_array((5, 4), order=order, low=-5, high=5)
+        out = numpy.empty((5, 5), dtype=a.dtype, order=out_order)
+        ia, iout = dpnp.array(a), dpnp.array(out)
+
+        expected = numpy.matmul(a, a.T, out=out)
+        result = dpnp.matmul(ia, ia.mT, out=iout)
+        assert result is iout
+        assert result.flags.c_contiguous == expected.flags.c_contiguous
+        assert result.flags.f_contiguous == expected.flags.f_contiguous
+        assert_dtype_allclose(result, expected)
+
+    @pytest.mark.parametrize("order", ["F", "C"])
+    def test_syrk_order(self, order):
+        a = generate_random_numpy_array((4, 6), order=order, low=-5, high=5)
+        ia = dpnp.array(a)
+        expected = numpy.matmul(a, a.T)
+        result = dpnp.matmul(ia, ia.mT)
+        assert_dtype_allclose(result, expected)
+
+    # added for coverage
+    def test_not_syrk(self):
+        a = generate_random_numpy_array((20, 20), low=-5, high=5)
+        ia = dpnp.array(a)
+
+        # Result must be square
+        b = a.T[:, ::2]
+        ib = ia.mT[:, ::2]
+        expected = numpy.matmul(a, b)
+        result = dpnp.matmul(ia, ib)
+        assert_dtype_allclose(result, expected)
+
     def test_bool(self):
         a = generate_random_numpy_array((3, 4), dtype=dpnp.bool)
         b = generate_random_numpy_array((4, 5), dtype=dpnp.bool)
@@ -1436,9 +1499,9 @@ def test_invalid_axes(self, xp):
 
 @testing.with_requires("numpy>=2.2")
 class TestMatvec:
-    def setup_method(self):
-        numpy.random.seed(42)
-
+    @pytest.mark.skipif(
+        is_win_platform() and not is_gpu_device(), reason="SAT-8073"
+    )
     @pytest.mark.parametrize("dtype", get_all_dtypes(no_none=True))
     @pytest.mark.parametrize(
         "shape1, shape2",
@@ -1498,9 +1561,6 @@ def test_error(self, xp):
 
 
 class TestMultiDot:
-    def setup_method(self):
-        numpy.random.seed(70)
-
     @pytest.mark.parametrize("dtype", get_all_dtypes(no_none=True))
     @pytest.mark.parametrize(
         "shapes",
@@ -1655,9 +1715,6 @@ def test_error(self):
 
 
 class TestTensordot:
-    def setup_method(self):
-        numpy.random.seed(87)
-
     @pytest.mark.parametrize("dtype", get_all_dtypes(no_none=True))
     def test_scalar(self, dtype):
         a = 2
@@ -1789,9 +1846,6 @@ def test_error(self):
 
 
 class TestVdot:
-    def setup_method(self):
-        numpy.random.seed(42)
-
     @pytest.mark.parametrize("dtype", get_all_dtypes(no_none=True))
     def test_scalar(self, dtype):
         a = numpy.array([3.5], dtype=dtype)
@@ -1882,9 +1936,6 @@ def test_error(self):
 
 @testing.with_requires("numpy>=2.0")
 class TestVecdot:
-    def setup_method(self):
-        numpy.random.seed(42)
-
     @pytest.mark.parametrize("dtype", get_all_dtypes(no_none=True))
     @pytest.mark.parametrize(
         "shape1, shape2",
@@ -2162,9 +2213,9 @@ def test_error(self, xp):
 
 @testing.with_requires("numpy>=2.2")
 class TestVecmat:
-    def setup_method(self):
-        numpy.random.seed(42)
-
+    @pytest.mark.skipif(
+        is_win_platform() and not is_gpu_device(), reason="SAT-8073"
+    )
     @pytest.mark.parametrize("dtype", get_all_dtypes(no_none=True))
     @pytest.mark.parametrize(
         "shape1, shape2",
diff --git a/dpnp/tests/test_random_state.py b/dpnp/tests/test_random_state.py
index ecc76611377a..0c9c0a8b6017 100644
--- a/dpnp/tests/test_random_state.py
+++ b/dpnp/tests/test_random_state.py
@@ -4,7 +4,6 @@
 import numpy
 import pytest
 from numpy.testing import (
-    assert_allclose,
     assert_array_almost_equal,
     assert_array_equal,
     assert_equal,
@@ -21,6 +20,7 @@
     is_cpu_device,
     is_gpu_device,
 )
+from .third_party.cupy import testing
 
 # aspects of default device:
 _def_device = dpctl.SyclQueue().sycl_device
@@ -1115,3 +1115,31 @@ def test_invalid_dtype(self, dtype):
     def test_invalid_usm_type(self, usm_type):
         # dtype must be float32 or float64
         assert_raises(ValueError, RandomState().uniform, usm_type=usm_type)
+
+    def test_size_castable_to_integer(self):
+        M = numpy.int64(31)
+        N = numpy.int64(31)
+        K = 63  # plain Python int
+
+        sizes = [(M, K), (M, N), (K, N)]
+        for size in sizes:
+            result = RandomState().uniform(size=size)
+            assert result.shape == size
+
+    @testing.with_requires("numpy>=2.3.2")
+    @pytest.mark.parametrize("xp", [numpy, dpnp])
+    @pytest.mark.parametrize(
+        "size",
+        [True, [True], dpnp.bool(True), numpy.array(True), numpy.array([True])],
+    )
+    def test_bool_size(self, xp, size):
+        rs = xp.random.RandomState()
+        assert_raises(TypeError, rs.uniform, size=size)
+
+    @pytest.mark.parametrize("size", [numpy.array(1), numpy.array([2])])
+    def test_numpy_ndarray_size(self, size):
+        result = RandomState().uniform(size=size)
+        assert result.shape == size
+
+    def test_dpnp_ndarray_size(self):
+        assert_raises(ValueError, RandomState().uniform, size=dpnp.array(1))
diff --git a/dpnp/tests/test_special.py b/dpnp/tests/test_special.py
index 3c567dcd3ca6..e55fa001786d 100644
--- a/dpnp/tests/test_special.py
+++ b/dpnp/tests/test_special.py
@@ -1,32 +1,83 @@
-import math
-
 import numpy
-from numpy.testing import assert_allclose
+import pytest
+from numpy.testing import assert_allclose, assert_equal
 
 import dpnp
 
+from .helper import (
+    assert_dtype_allclose,
+    generate_random_numpy_array,
+    get_all_dtypes,
+    get_complex_dtypes,
+)
+from .third_party.cupy.testing import installed, with_requires
+
+
+@with_requires("scipy")
+@pytest.mark.parametrize("func", ["erf", "erfc"])
+class TestCommon:
+    @pytest.mark.parametrize(
+        "dt", get_all_dtypes(no_none=True, no_float16=False, no_complex=True)
+    )
+    def test_basic(self, func, dt):
+        import scipy.special
+
+        a = generate_random_numpy_array((2, 5), dtype=dt)
+        ia = dpnp.array(a)
+
+        result = getattr(dpnp.scipy.special, func)(ia)
+        expected = getattr(scipy.special, func)(a)
+
+        # scipy >= 0.16.0 returns float64, but dpnp returns float32
+        to_float32 = dt in (dpnp.bool, dpnp.float16)
+        only_type_kind = installed("scipy>=0.16.0") and to_float32
+        assert_dtype_allclose(
+            result, expected, check_only_type_kind=only_type_kind
+        )
+
+    def test_nan_inf(self, func):
+        import scipy.special
+
+        a = numpy.array([numpy.nan, -numpy.inf, numpy.inf])
+        ia = dpnp.array(a)
+
+        result = getattr(dpnp.scipy.special, func)(ia)
+        expected = getattr(scipy.special, func)(a)
+        assert_allclose(result, expected)
 
-def test_erf():
-    a = numpy.linspace(2.0, 3.0, num=10)
-    ia = dpnp.array(a)
+    def test_zeros(self, func):
+        import scipy.special
 
-    expected = numpy.empty_like(a)
-    for idx, val in enumerate(a):
-        expected[idx] = math.erf(val)
+        a = numpy.array([0.0, -0.0])
+        ia = dpnp.array(a)
 
-    result = dpnp.erf(ia)
+        result = getattr(dpnp.scipy.special, func)(ia)
+        expected = getattr(scipy.special, func)(a)
+        assert_allclose(result, expected)
+        assert_equal(dpnp.signbit(result), numpy.signbit(expected))
 
-    assert_allclose(result, expected)
+    @pytest.mark.parametrize("dt", get_complex_dtypes())
+    def test_complex(self, func, dt):
+        x = dpnp.empty(5, dtype=dt)
+        with pytest.raises(ValueError):
+            getattr(dpnp.scipy.special, func)(x)
 
 
-def test_erf_fallback():
-    a = numpy.linspace(2.0, 3.0, num=10)
-    dpa = dpnp.linspace(2.0, 3.0, num=10)
+class TestConsistency:
 
-    expected = numpy.empty_like(a)
-    for idx, val in enumerate(a):
-        expected[idx] = math.erf(val)
+    @pytest.mark.filterwarnings("ignore::RuntimeWarning")
+    def test_erfc(self):
+        # TODO: replace with dpnp.random.RandomState, once pareto is added
+        rng = numpy.random.RandomState(1234)
+        n = 10000
+        a = rng.pareto(0.02, n) * (2 * rng.randint(0, 2, n) - 1)
+        a = dpnp.array(a)
 
-    result = dpnp.erf(dpa)
+        res = 1 - dpnp.scipy.special.erf(a)
+        mask = dpnp.isfinite(res)
+        a = a[mask]
 
-    assert_allclose(result, expected)
+        tol = 8 * dpnp.finfo(a).resolution
+        assert dpnp.allclose(
+            dpnp.scipy.special.erfc(a), res[mask], rtol=tol, atol=tol
+        )
diff --git a/dpnp/tests/test_strides.py b/dpnp/tests/test_strides.py
index 712bc7c91df3..3d91231bcb7b 100644
--- a/dpnp/tests/test_strides.py
+++ b/dpnp/tests/test_strides.py
@@ -12,10 +12,12 @@
     get_all_dtypes,
     get_complex_dtypes,
     get_float_complex_dtypes,
+    get_float_dtypes,
     get_integer_dtypes,
     get_integer_float_dtypes,
     numpy_version,
 )
+from .third_party.cupy.testing import with_requires
 
 
 @pytest.mark.usefixtures("suppress_divide_invalid_numpy_warnings")
@@ -164,24 +166,21 @@ def test_reduce_hypot(dtype, stride):
     assert_dtype_allclose(result, expected, check_only_type_kind=flag)
 
 
-@pytest.mark.parametrize(
-    "dtype",
-    get_integer_float_dtypes(
-        no_unsigned=True, xfail_dtypes=[dpnp.int8, dpnp.int16]
-    ),
-)
-def test_erf(dtype):
-    a = dpnp.linspace(-1, 1, num=10, dtype=dtype)
-    b = a[::2]
-    result = dpnp.erf(b)
+@with_requires("scipy")
+@pytest.mark.parametrize("func", ["erf", "erfc"])
+@pytest.mark.parametrize("stride", [2, -1, -3])
+def test_erf_funcs(func, stride):
+    import scipy.special
 
-    expected = numpy.empty_like(b.asnumpy())
-    for idx, val in enumerate(b):
-        expected[idx] = math.erf(val)
+    x = generate_random_numpy_array(10)
+    a, ia = x[::stride], dpnp.array(x)[::stride]
 
+    result = getattr(dpnp.scipy.special, func)(ia)
+    expected = getattr(scipy.special, func)(a)
     assert_dtype_allclose(result, expected)
 
 
+@pytest.mark.filterwarnings("ignore::RuntimeWarning")
 @pytest.mark.parametrize("dtype", get_float_complex_dtypes())
 @pytest.mark.parametrize("stride", [2, -1, -3])
 def test_reciprocal(dtype, stride):
diff --git a/dpnp/tests/test_sycl_queue.py b/dpnp/tests/test_sycl_queue.py
index c501bcb169e6..bce599c22790 100644
--- a/dpnp/tests/test_sycl_queue.py
+++ b/dpnp/tests/test_sycl_queue.py
@@ -9,10 +9,15 @@
 from numpy.testing import assert_array_equal, assert_raises
 
 import dpnp
+import dpnp.linalg
 from dpnp.dpnp_array import dpnp_array
 from dpnp.dpnp_utils import get_usm_allocations
 
-from .helper import generate_random_numpy_array, get_all_dtypes, is_win_platform
+from .helper import (
+    generate_random_numpy_array,
+    get_all_dtypes,
+    is_win_platform,
+)
 
 list_of_backend_str = ["cuda", "host", "level_zero", "opencl"]
 
@@ -415,9 +420,6 @@ def test_1in_1out(func, data, device):
         pytest.param("ldexp", [5, 5, 5, 5, 5], [0, 1, 2, 3, 4]),
         pytest.param("logaddexp", [-1, 2, 5, 9], [4, -3, 2, -8]),
         pytest.param("logaddexp2", [-1, 2, 5, 9], [4, -3, 2, -8]),
-        pytest.param(
-            "matmul", [[1.0, 0.0], [0.0, 1.0]], [[4.0, 1.0], [1.0, 2.0]]
-        ),
         pytest.param("maximum", [2.0, 3.0, 4.0], [1.0, 5.0, 2.0]),
         pytest.param("minimum", [2.0, 3.0, 4.0], [1.0, 5.0, 2.0]),
         pytest.param(
@@ -632,40 +634,50 @@ def test_bitwise_op_2in(op, device):
     assert_sycl_queue_equal(zy.sycl_queue, y.sycl_queue)
 
 
-@pytest.mark.parametrize("device", valid_dev, ids=dev_ids)
-@pytest.mark.parametrize(
-    "shape1, shape2",
-    [
-        ((2, 4), (4,)),
-        ((4,), (4, 3)),
-        ((2, 4), (4, 3)),
-        ((2, 0), (0, 3)),
-        ((2, 4), (4, 0)),
-        ((4, 2, 3), (4, 3, 5)),
-        ((4, 2, 3), (4, 3, 1)),
-        ((4, 1, 3), (4, 3, 5)),
-        ((6, 7, 4, 3), (6, 7, 3, 5)),
-    ],
-    ids=[
-        "((2, 4), (4,))",
-        "((4,), (4, 3))",
-        "((2, 4), (4, 3))",
-        "((2, 0), (0, 3))",
-        "((2, 4), (4, 0))",
-        "((4, 2, 3), (4, 3, 5))",
-        "((4, 2, 3), (4, 3, 1))",
-        "((4, 1, 3), (4, 3, 5))",
-        "((6, 7, 4, 3), (6, 7, 3, 5))",
-    ],
-)
-def test_matmul(device, shape1, shape2):
-    a = dpnp.arange(numpy.prod(shape1), device=device).reshape(shape1)
-    b = dpnp.arange(numpy.prod(shape2), device=device).reshape(shape2)
-    result = dpnp.matmul(a, b)
+class TestMatmul:
+    @pytest.mark.parametrize("device", valid_dev, ids=dev_ids)
+    @pytest.mark.parametrize("dtype", [dpnp.int32, dpnp.float32])
+    @pytest.mark.parametrize(
+        "shape1, shape2",
+        [
+            ((2, 4), (4,)),
+            ((4,), (4, 3)),
+            ((2, 4), (4, 3)),
+            ((2, 0), (0, 3)),
+            ((2, 4), (4, 0)),
+            ((4, 2, 3), (4, 3, 5)),
+            ((4, 2, 3), (4, 3, 1)),
+            ((4, 1, 3), (4, 3, 5)),
+            ((6, 7, 4, 3), (6, 7, 3, 5)),
+        ],
+        ids=[
+            "((2, 4), (4,))",
+            "((4,), (4, 3))",
+            "((2, 4), (4, 3))",
+            "((2, 0), (0, 3))",
+            "((2, 4), (4, 0))",
+            "((4, 2, 3), (4, 3, 5))",
+            "((4, 2, 3), (4, 3, 1))",
+            "((4, 1, 3), (4, 3, 5))",
+            "((6, 7, 4, 3), (6, 7, 3, 5))",
+        ],
+    )
+    def test_matmul(self, device, dtype, shape1, shape2):
+        # int32 checks dpctl implementation and float32 checks oneMKL
+        a = dpnp.arange(numpy.prod(shape1), dtype=dtype, device=device)
+        b = dpnp.arange(numpy.prod(shape2), dtype=dtype, device=device)
+        a, b = a.reshape(shape1), b.reshape(shape2)
+        result = dpnp.matmul(a, b)
 
-    result_queue = result.sycl_queue
-    assert_sycl_queue_equal(result_queue, a.sycl_queue)
-    assert_sycl_queue_equal(result_queue, b.sycl_queue)
+        result_queue = result.sycl_queue
+        assert_sycl_queue_equal(result_queue, a.sycl_queue)
+        assert_sycl_queue_equal(result_queue, b.sycl_queue)
+
+    @pytest.mark.parametrize("device", valid_dev, ids=dev_ids)
+    def test_matmul_syrk(self, device):
+        a = dpnp.arange(20, dtype=dpnp.float32, device=device).reshape(4, 5)
+        result = dpnp.matmul(a, a.mT)
+        assert_sycl_queue_equal(result.sycl_queue, a.sycl_queue)
 
 
 @pytest.mark.parametrize("device", valid_dev, ids=dev_ids)
@@ -765,10 +777,7 @@ def test_random(func, args, kwargs, device, usm_type):
     assert device == res_array.sycl_device
     assert usm_type == res_array.usm_type
 
-    # SAT-7414: w/a to avoid crash on Windows (observing on LNL and ARL)
-    # sycl_queue = dpctl.SyclQueue(device, property="in_order")
-    # TODO: remove the w/a once resolved
-    sycl_queue = dpctl.SyclQueue(device, property="enable_profiling")
+    sycl_queue = dpctl.SyclQueue(device, property="in_order")
     kwargs["device"] = None
     kwargs["sycl_queue"] = sycl_queue
 
@@ -807,10 +816,7 @@ def test_random_state(func, args, kwargs, device, usm_type):
     assert device == res_array.sycl_device
     assert usm_type == res_array.usm_type
 
-    # SAT-7414: w/a to avoid crash on Windows (observing on LNL and ARL)
-    # sycl_queue = dpctl.SyclQueue(device, property="in_order")
-    # TODO: remove the w/a once resolved
-    sycl_queue = dpctl.SyclQueue(device, property="enable_profiling")
+    sycl_queue = dpctl.SyclQueue(device, property="in_order")
 
     # test with in-order SYCL queue per a device and passed as argument
     seed = (147, 56, 896) if device.is_cpu else 987654
@@ -1178,6 +1184,19 @@ def test_apply_over_axes(device):
     assert_sycl_queue_equal(result.sycl_queue, x.sycl_queue)
 
 
+@pytest.mark.parametrize("device", valid_dev, ids=dev_ids)
+def test_piecewise(device):
+    x = dpnp.array([0, 0], device=device)
+    y = dpnp.array([True, False], device=device)
+    z = dpnp.array([1, -1], device=device)
+    result = dpnp.piecewise(x, y, z)
+    res_sycl_queue = result.sycl_queue
+
+    assert_sycl_queue_equal(res_sycl_queue, x.sycl_queue)
+    assert_sycl_queue_equal(res_sycl_queue, y.sycl_queue)
+    assert_sycl_queue_equal(res_sycl_queue, z.sycl_queue)
+
+
 @pytest.mark.parametrize("device_x", valid_dev, ids=dev_ids)
 @pytest.mark.parametrize("device_y", valid_dev, ids=dev_ids)
 def test_asarray(device_x, device_y):
@@ -1469,6 +1488,15 @@ def test_interp(device, left, right, period):
     assert_sycl_queue_equal(result.sycl_queue, x.sycl_queue)
 
 
+@pytest.mark.parametrize("func", ["erf", "erfc"])
+@pytest.mark.parametrize("device", valid_dev, ids=dev_ids)
+def test_erf_funcs(func, device):
+    x = dpnp.linspace(-3, 3, num=5, device=device)
+
+    result = getattr(dpnp.scipy.special, func)(x)
+    assert_sycl_queue_equal(result.sycl_queue, x.sycl_queue)
+
+
 @pytest.mark.parametrize("device", valid_dev, ids=dev_ids)
 class TestLinAlgebra:
     @pytest.mark.parametrize(
@@ -1569,6 +1597,32 @@ def test_lstsq(self, m, n, nrhs, device):
             assert_sycl_queue_equal(param_queue, a.sycl_queue)
             assert_sycl_queue_equal(param_queue, b.sycl_queue)
 
+    @pytest.mark.parametrize(
+        "data",
+        [[[1.0, 2.0], [3.0, 5.0]], [[]], [[[1.0, 2.0], [3.0, 5.0]]], [[[]]]],
+    )
+    def test_lu_factor(self, data, device):
+        a = dpnp.array(data, device=device)
+        result = dpnp.scipy.linalg.lu_factor(a)
+
+        for param in result:
+            param_queue = param.sycl_queue
+            assert_sycl_queue_equal(param_queue, a.sycl_queue)
+
+    @pytest.mark.parametrize(
+        "b_data",
+        [[1.0, 2.0], numpy.empty((2, 0))],
+    )
+    def test_lu_solve(self, b_data, device):
+        a = dpnp.array([[1.0, 2.0], [3.0, 5.0]], device=device)
+        lu, piv = dpnp.scipy.linalg.lu_factor(a)
+        b = dpnp.array(b_data, device=device)
+
+        result = dpnp.scipy.linalg.lu_solve((lu, piv), b)
+
+        assert_sycl_queue_equal(result.sycl_queue, a.sycl_queue)
+        assert_sycl_queue_equal(result.sycl_queue, b.sycl_queue)
+
     @pytest.mark.parametrize("n", [-1, 0, 1, 2, 3])
     def test_matrix_power(self, n, device):
         x = dpnp.array([[1.0, 2.0], [3.0, 5.0]], device=device)
diff --git a/dpnp/tests/test_umath.py b/dpnp/tests/test_umath.py
index 56f55de2f1c7..6284950ae94e 100644
--- a/dpnp/tests/test_umath.py
+++ b/dpnp/tests/test_umath.py
@@ -23,6 +23,7 @@
     has_support_aspect64,
     is_cuda_device,
     is_gpu_device,
+    is_win_platform,
 )
 
 # full list of umaths
@@ -121,6 +122,9 @@ def test_umaths(test_cases):
             pytest.skip("dpnp.modf is not supported with dpnp.float16")
         elif is_cuda_device():
             pytest.skip("dpnp.modf is not supported on CUDA device")
+    elif umath in ["vecmat", "matvec"]:
+        if is_win_platform() and not is_gpu_device():
+            pytest.skip("SAT-8073")
 
     expected = getattr(numpy, umath)(*args)
     result = getattr(dpnp, umath)(*iargs)
@@ -441,7 +445,7 @@ def test_large_values(self, dtype):
 
 
 class TestReciprocal:
-    @pytest.mark.usefixtures("suppress_divide_numpy_warnings")
+    @pytest.mark.filterwarnings("ignore::RuntimeWarning")
     @pytest.mark.parametrize("dtype", get_float_complex_dtypes())
     def test_reciprocal(self, dtype):
         a = generate_random_numpy_array(10, dtype)
diff --git a/dpnp/tests/test_usm_type.py b/dpnp/tests/test_usm_type.py
index df88071e39e5..eb059e335a3e 100644
--- a/dpnp/tests/test_usm_type.py
+++ b/dpnp/tests/test_usm_type.py
@@ -10,7 +10,7 @@
 import dpnp
 from dpnp.dpnp_utils import get_usm_allocations
 
-from .helper import generate_random_numpy_array, is_win_platform
+from .helper import generate_random_numpy_array
 
 list_of_usm_types = ["device", "shared", "host"]
 
@@ -403,41 +403,52 @@ def test_bitwise_op_2in(op, usm_type_x, usm_type_y):
     assert z.usm_type == du.get_coerced_usm_type([usm_type_x, usm_type_y])
 
 
-@pytest.mark.parametrize("usm_type_x", list_of_usm_types)
-@pytest.mark.parametrize("usm_type_y", list_of_usm_types)
-@pytest.mark.parametrize(
-    "shape1, shape2",
-    [
-        ((2, 4), (4,)),
-        ((4,), (4, 3)),
-        ((2, 4), (4, 3)),
-        ((2, 0), (0, 3)),
-        ((2, 4), (4, 0)),
-        ((4, 2, 3), (4, 3, 5)),
-        ((4, 2, 3), (4, 3, 1)),
-        ((4, 1, 3), (4, 3, 5)),
-        ((6, 7, 4, 3), (6, 7, 3, 5)),
-    ],
-    ids=[
-        "((2, 4), (4,))",
-        "((4,), (4, 3))",
-        "((2, 4), (4, 3))",
-        "((2, 0), (0, 3))",
-        "((2, 4), (4, 0))",
-        "((4, 2, 3), (4, 3, 5))",
-        "((4, 2, 3), (4, 3, 1))",
-        "((4, 1, 3), (4, 3, 5))",
-        "((6, 7, 4, 3), (6, 7, 3, 5))",
-    ],
-)
-def test_matmul(usm_type_x, usm_type_y, shape1, shape2):
-    x = dpnp.arange(numpy.prod(shape1), usm_type=usm_type_x).reshape(shape1)
-    y = dpnp.arange(numpy.prod(shape2), usm_type=usm_type_y).reshape(shape2)
-    z = dpnp.matmul(x, y)
+class TestMatmul:
+    @pytest.mark.parametrize("usm_type_x", list_of_usm_types)
+    @pytest.mark.parametrize("usm_type_y", list_of_usm_types)
+    @pytest.mark.parametrize("dtype", [dpnp.int32, dpnp.float32])
+    @pytest.mark.parametrize(
+        "shape1, shape2",
+        [
+            ((2, 4), (4,)),
+            ((4,), (4, 3)),
+            ((2, 4), (4, 3)),
+            ((2, 0), (0, 3)),
+            ((2, 4), (4, 0)),
+            ((4, 2, 3), (4, 3, 5)),
+            ((4, 2, 3), (4, 3, 1)),
+            ((4, 1, 3), (4, 3, 5)),
+            ((6, 7, 4, 3), (6, 7, 3, 5)),
+        ],
+        ids=[
+            "((2, 4), (4,))",
+            "((4,), (4, 3))",
+            "((2, 4), (4, 3))",
+            "((2, 0), (0, 3))",
+            "((2, 4), (4, 0))",
+            "((4, 2, 3), (4, 3, 5))",
+            "((4, 2, 3), (4, 3, 1))",
+            "((4, 1, 3), (4, 3, 5))",
+            "((6, 7, 4, 3), (6, 7, 3, 5))",
+        ],
+    )
+    def test_basic(self, usm_type_x, usm_type_y, dtype, shape1, shape2):
+        # int32 checks dpctl implementation and float32 checks oneMKL
+        x = dpnp.arange(numpy.prod(shape1), dtype=dtype, usm_type=usm_type_x)
+        y = dpnp.arange(numpy.prod(shape2), dtype=dtype, usm_type=usm_type_y)
+        x, y = x.reshape(shape1), y.reshape(shape2)
+        z = dpnp.matmul(x, y)
 
-    assert x.usm_type == usm_type_x
-    assert y.usm_type == usm_type_y
-    assert z.usm_type == du.get_coerced_usm_type([usm_type_x, usm_type_y])
+        assert x.usm_type == usm_type_x
+        assert y.usm_type == usm_type_y
+        assert z.usm_type == du.get_coerced_usm_type([usm_type_x, usm_type_y])
+
+    @pytest.mark.parametrize("usm_type", list_of_usm_types)
+    def test_syrk(self, usm_type):
+        x = dpnp.arange(20, dtype=dpnp.float32, usm_type=usm_type).reshape(4, 5)
+        y = dpnp.matmul(x, x.mT)
+
+        assert y.usm_type == usm_type
 
 
 @pytest.mark.parametrize("usm_type_x", list_of_usm_types)
@@ -744,6 +755,23 @@ def test_apply_over_axes(usm_type):
     assert x.usm_type == y.usm_type
 
 
+@pytest.mark.parametrize("usm_type_x", list_of_usm_types)
+@pytest.mark.parametrize("usm_type_y", list_of_usm_types)
+@pytest.mark.parametrize("usm_type_z", list_of_usm_types)
+def test_piecewise(usm_type_x, usm_type_y, usm_type_z):
+    x = dpnp.array([0, 0], usm_type=usm_type_x)
+    y = dpnp.array([True, False], usm_type=usm_type_y)
+    z = dpnp.array([1, -1], usm_type=usm_type_z)
+    result = dpnp.piecewise(x, y, z)
+
+    assert x.usm_type == usm_type_x
+    assert y.usm_type == usm_type_y
+    assert z.usm_type == usm_type_z
+    assert result.usm_type == du.get_coerced_usm_type(
+        [usm_type_x, usm_type_y, usm_type_z]
+    )
+
+
 @pytest.mark.parametrize(
     "func,data1,data2",
     [
@@ -1268,6 +1296,14 @@ def test_choose(usm_type_x, usm_type_ind):
     assert z.usm_type == du.get_coerced_usm_type([usm_type_x, usm_type_ind])
 
 
+@pytest.mark.parametrize("func", ["erf", "erfc"])
+@pytest.mark.parametrize("usm_type", list_of_usm_types)
+def test_erf_funcs(func, usm_type):
+    x = dpnp.linspace(-3, 3, num=5, usm_type=usm_type)
+    y = getattr(dpnp.scipy.special, func)(x)
+    assert x.usm_type == y.usm_type == usm_type
+
+
 class TestInterp:
     @pytest.mark.parametrize("usm_type_x", list_of_usm_types)
     @pytest.mark.parametrize("usm_type_xp", list_of_usm_types)
@@ -1438,6 +1474,36 @@ def test_lstsq(self, m, n, nrhs, usm_type, usm_type_other):
                 [usm_type, usm_type_other]
             )
 
+    @pytest.mark.parametrize(
+        "data",
+        [[[1.0, 2.0], [3.0, 5.0]], [[]], [[[1.0, 2.0], [3.0, 5.0]]], [[[]]]],
+    )
+    def test_lu_factor(self, data, usm_type):
+        a = dpnp.array(data, usm_type=usm_type)
+        result = dpnp.scipy.linalg.lu_factor(a)
+
+        assert a.usm_type == usm_type
+        for param in result:
+            assert param.usm_type == a.usm_type
+
+    @pytest.mark.parametrize("usm_type_rhs", list_of_usm_types)
+    @pytest.mark.parametrize(
+        "b_data",
+        [[1.0, 2.0], numpy.empty((2, 0))],
+    )
+    def test_lu_solve(self, b_data, usm_type, usm_type_rhs):
+        a = dpnp.array([[1.0, 2.0], [3.0, 5.0]], usm_type=usm_type)
+        lu, piv = dpnp.scipy.linalg.lu_factor(a)
+        b = dpnp.array(b_data, usm_type=usm_type_rhs)
+
+        result = dpnp.scipy.linalg.lu_solve((lu, piv), b)
+
+        assert lu.usm_type == usm_type
+        assert b.usm_type == usm_type_rhs
+        assert result.usm_type == du.get_coerced_usm_type(
+            [usm_type, usm_type_rhs]
+        )
+
     @pytest.mark.parametrize("n", [-1, 0, 1, 2, 3])
     def test_matrix_power(self, n, usm_type):
         a = dpnp.array([[1, 2], [3, 5]], usm_type=usm_type)
diff --git a/dpnp/tests/test_utils.py b/dpnp/tests/test_utils.py
index 89e97b75d5e5..eef9132e5b55 100644
--- a/dpnp/tests/test_utils.py
+++ b/dpnp/tests/test_utils.py
@@ -1,4 +1,3 @@
-import dpctl
 import dpctl.tensor as dpt
 import numpy
 import pytest
diff --git a/dpnp/tests/testing/array.py b/dpnp/tests/testing/array.py
index 3db237387054..96bbf94e9ce5 100644
--- a/dpnp/tests/testing/array.py
+++ b/dpnp/tests/testing/array.py
@@ -49,7 +49,10 @@ def _assert(assert_func, result, expected, *args, **kwargs):
     ]
     # For numpy < 2.0, some tests will fail for dtype mismatch
     dev = dpctl.select_default_device()
-    if numpy.__version__ >= "2.0.0" and dev.has_aspect_fp64:
+    if (
+        numpy.lib.NumpyVersion(numpy.__version__) >= "2.0.0"
+        and dev.has_aspect_fp64
+    ):
         strict = kwargs.setdefault("strict", True)
         if flag:
             if strict:
diff --git a/dpnp/tests/third_party/cupy/core_tests/test_dlpack.py b/dpnp/tests/third_party/cupy/core_tests/test_dlpack.py
index 15e4c9c72fbf..65feab0a0419 100644
--- a/dpnp/tests/third_party/cupy/core_tests/test_dlpack.py
+++ b/dpnp/tests/third_party/cupy/core_tests/test_dlpack.py
@@ -41,14 +41,15 @@ def __dlpack_device__(self):
 @pytest.mark.skip("toDlpack() and fromDlpack() are not supported")
 class TestDLPackConversion:
 
-    @pytest.mark.filterwarnings("ignore::DeprecationWarning")
     @testing.for_all_dtypes(no_bool=False)
-    def test_conversion(self, dtype):
+    def test_conversion(self, dtype, recwarn):
         orig_array = _gen_array(dtype)
         tensor = orig_array.toDlpack()
         out_array = cupy.fromDlpack(tensor)
         testing.assert_array_equal(orig_array, out_array)
-        assert orig_array.get_array()._pointer == out_array.get_array()._pointer
+        testing.assert_array_equal(orig_array.data.ptr, out_array.data.ptr)
+        for w in recwarn:
+            assert issubclass(w.category, cupy.VisibleDeprecationWarning)
 
 
 class TestNewDLPackConversion:
@@ -82,7 +83,7 @@ def test_conversion(self, dtype):
         orig_array = _gen_array(dtype)
         out_array = cupy.from_dlpack(orig_array)
         testing.assert_array_equal(orig_array, out_array)
-        assert orig_array.get_array()._pointer == out_array.get_array()._pointer
+        testing.assert_array_equal(orig_array.data.ptr, out_array.data.ptr)
 
     @pytest.mark.skip("no limitations in from_dlpack()")
     def test_from_dlpack_and_conv_errors(self):
@@ -121,7 +122,7 @@ def test_conversion_max_version(self, kwargs, versioned):
         )
 
         testing.assert_array_equal(orig_array, out_array)
-        assert orig_array.get_array()._pointer == out_array.get_array()._pointer
+        testing.assert_array_equal(orig_array.data.ptr, out_array.data.ptr)
 
     def test_conversion_device(self):
         orig_array = _gen_array("float32")
@@ -135,7 +136,7 @@ def test_conversion_device(self):
         )
 
         testing.assert_array_equal(orig_array, out_array)
-        assert orig_array.get_array()._pointer == out_array.get_array()._pointer
+        testing.assert_array_equal(orig_array.data.ptr, out_array.data.ptr)
 
     def test_conversion_bad_device(self):
         arr = _gen_array("float32")
@@ -212,9 +213,8 @@ def test_stream(self):
                 out_array = dlp.from_dlpack_capsule(dltensor)
                 out_array = cupy.from_dlpack(out_array, device=dst_s)
                 testing.assert_array_equal(orig_array, out_array)
-                assert (
-                    orig_array.get_array()._pointer
-                    == out_array.get_array()._pointer
+                testing.assert_array_equal(
+                    orig_array.data.ptr, out_array.data.ptr
                 )
 
 
@@ -267,8 +267,7 @@ def test_deleter2(self, pool, max_version):
         # assert pool.n_free_blocks() == 1
 
     @pytest.mark.skip("toDlpack() and fromDlpack() are not supported")
-    @pytest.mark.filterwarnings("ignore::DeprecationWarning")
-    def test_multiple_consumption_error(self):
+    def test_multiple_consumption_error(self, recwarn):
         # Prevent segfault, see #3611
         array = cupy.empty(10)
         tensor = array.toDlpack()
@@ -276,3 +275,5 @@ def test_multiple_consumption_error(self):
         with pytest.raises(ValueError) as e:
             array3 = cupy.fromDlpack(tensor)
         assert "consumed multiple times" in str(e.value)
+        for w in recwarn:
+            assert issubclass(w.category, cupy.VisibleDeprecationWarning)
diff --git a/dpnp/tests/third_party/cupy/core_tests/test_internal.py b/dpnp/tests/third_party/cupy/core_tests/test_internal.py
index 5e41f3b0310a..205661e80d75 100644
--- a/dpnp/tests/third_party/cupy/core_tests/test_internal.py
+++ b/dpnp/tests/third_party/cupy/core_tests/test_internal.py
@@ -38,10 +38,6 @@ def test_two(self):
 
 class TestGetSize:
 
-    def test_none(self):
-        with testing.assert_warns(DeprecationWarning):
-            assert internal.get_size(None) == ()
-
     def check_collection(self, a):
         assert internal.get_size(a) == tuple(a)
 
diff --git a/dpnp/tests/third_party/cupy/core_tests/test_ndarray.py b/dpnp/tests/third_party/cupy/core_tests/test_ndarray.py
index 47fd08556e55..3c7c2c9ed77c 100644
--- a/dpnp/tests/third_party/cupy/core_tests/test_ndarray.py
+++ b/dpnp/tests/third_party/cupy/core_tests/test_ndarray.py
@@ -25,11 +25,10 @@ def wrap_take(array, *args, **kwargs):
 
 
 class TestNdarrayInit(unittest.TestCase):
-    @pytest.mark.skip("passing 'None' into shape arguments is not supported")
+
     def test_shape_none(self):
-        with testing.assert_warns(DeprecationWarning):
-            a = cupy.ndarray(None)
-        assert a.shape == ()
+        with pytest.raises(TypeError):
+            cupy.ndarray(None)
 
     def test_shape_int(self):
         a = cupy.ndarray(3)
@@ -44,13 +43,13 @@ def test_shape_not_integer(self):
 
     def test_shape_int_with_strides(self):
         dummy = cupy.ndarray(3)
-        a = cupy.ndarray(3, strides=(0,), buffer=dummy)
+        a = cupy.ndarray(3, strides=(0,), buffer=dummy.data)
         assert a.shape == (3,)
         assert a.strides == (0,)
 
     def test_memptr(self):
         a = cupy.arange(6).astype(numpy.float32).reshape((2, 3))
-        memptr = a
+        memptr = a.data
 
         b = cupy.ndarray((2, 3), numpy.float32, memptr)
         testing.assert_array_equal(a, b)
@@ -63,7 +62,7 @@ def test_memptr(self):
     )
     def test_memptr_with_strides(self):
         buf = cupy.ndarray(20, numpy.uint8)
-        memptr = buf
+        memptr = buf.data
 
         # self-overlapping strides
         a = cupy.ndarray((2, 3), numpy.float32, memptr, strides=(2, 1))
@@ -83,7 +82,9 @@ def test_strides_without_memptr(self):
 
     def test_strides_is_given_and_order_is_ignored(self):
         buf = cupy.ndarray(20, numpy.uint8)
-        a = cupy.ndarray((2, 3), numpy.float32, buf, strides=(2, 1), order="C")
+        a = cupy.ndarray(
+            (2, 3), numpy.float32, buf.data, strides=(2, 1), order="C"
+        )
         assert a.strides == (2, 1)
 
     @testing.with_requires("numpy>=1.19")
diff --git a/dpnp/tests/third_party/cupy/core_tests/test_ndarray_adv_indexing.py b/dpnp/tests/third_party/cupy/core_tests/test_ndarray_adv_indexing.py
index 68a4a68282de..6a635ee53b2c 100644
--- a/dpnp/tests/third_party/cupy/core_tests/test_ndarray_adv_indexing.py
+++ b/dpnp/tests/third_party/cupy/core_tests/test_ndarray_adv_indexing.py
@@ -751,7 +751,7 @@ class TestArrayAdvancedIndexingSetitemScalarValueIndexError:
     def test_adv_setitem(self):
         for xp in (numpy, cupy):
             a = xp.zeros(self.shape)
-            with pytest.raises(IndexError):
+            with pytest.raises((IndexError, ValueError)):
                 a[self.indexes] = self.value
 
 
diff --git a/dpnp/tests/third_party/cupy/core_tests/test_ndarray_copy_and_view.py b/dpnp/tests/third_party/cupy/core_tests/test_ndarray_copy_and_view.py
index eaf01d1b345c..828794ec2d77 100644
--- a/dpnp/tests/third_party/cupy/core_tests/test_ndarray_copy_and_view.py
+++ b/dpnp/tests/third_party/cupy/core_tests/test_ndarray_copy_and_view.py
@@ -25,7 +25,6 @@ def get_strides(xp, a):
     return a.strides
 
 
-@pytest.mark.skip("'dpnp_array' object has no attribute 'view' yet")
 class TestView:
 
     @testing.numpy_cupy_array_equal()
@@ -98,9 +97,9 @@ def test_view_relaxed_contiguous(self, xp, dtype):
     )
     @testing.numpy_cupy_equal()
     def test_view_flags_smaller(self, xp, order, shape):
-        a = xp.zeros(shape, numpy.int32, order)
+        a = xp.zeros(shape, dtype=numpy.int32, order=order)
         b = a.view(numpy.int16)
-        return b.flags.c_contiguous, b.flags.f_contiguous, b.flags.owndata
+        return b.flags.c_contiguous, b.flags.f_contiguous  # , b.flags.owndata
 
     @pytest.mark.parametrize(
         ("order", "shape"),
@@ -112,7 +111,7 @@ def test_view_flags_smaller(self, xp, order, shape):
     @testing.with_requires("numpy>=1.23")
     def test_view_flags_smaller_invalid(self, order, shape):
         for xp in (numpy, cupy):
-            a = xp.zeros(shape, numpy.int32, order)
+            a = xp.zeros(shape, dtype=numpy.int32, order=order)
             with pytest.raises(ValueError):
                 a.view(numpy.int16)
 
@@ -129,9 +128,9 @@ def test_view_flags_smaller_invalid(self, order, shape):
     )
     @testing.numpy_cupy_equal()
     def test_view_flags_larger(self, xp, order, shape):
-        a = xp.zeros(shape, numpy.int16, order)
+        a = xp.zeros(shape, dtype=numpy.int16, order=order)
         b = a.view(numpy.int32)
-        return b.flags.c_contiguous, b.flags.f_contiguous, b.flags.owndata
+        return b.flags.c_contiguous, b.flags.f_contiguous  # , b.flags.owndata
 
     @pytest.mark.parametrize(
         ("order", "shape"),
@@ -144,7 +143,7 @@ def test_view_flags_larger(self, xp, order, shape):
     @testing.with_requires("numpy>=1.23")
     def test_view_flags_larger_invalid(self, order, shape):
         for xp in (numpy, cupy):
-            a = xp.zeros(shape, numpy.int16, order)
+            a = xp.zeros(shape, dtype=numpy.int16, order=order)
             with pytest.raises(ValueError):
                 a.view(numpy.int32)
 
@@ -161,7 +160,7 @@ def test_view_smaller_dtype_multiple(self, xp):
     @testing.numpy_cupy_array_equal()
     def test_view_smaller_dtype_multiple2(self, xp):
         # x is non-contiguous, and stride[-1] != 0
-        x = xp.ones((3, 4), xp.int32)[:, :1:2]
+        x = xp.ones((3, 4), dtype=xp.int32)[:, :1:2]
         return x.view(xp.int16)
 
     @testing.with_requires("numpy>=1.23")
@@ -184,7 +183,7 @@ def test_view_non_c_contiguous(self, xp):
 
     @testing.numpy_cupy_array_equal()
     def test_view_larger_dtype_zero_sized(self, xp):
-        x = xp.ones((3, 20), xp.int16)[:0, ::2]
+        x = xp.ones((3, 20), dtype=xp.int16)[:0, ::2]
         return x.view(xp.int32)
 
 
@@ -387,7 +386,6 @@ def test_astype_strides_broadcast(self, xp, src_dtype, dst_dtype):
         dst = astype_without_warning(src, dst_dtype, order="K")
         return get_strides(xp, dst)
 
-    @pytest.mark.skip("'dpnp_array' object has no attribute 'view' yet")
     @testing.numpy_cupy_array_equal()
     def test_astype_boolean_view(self, xp):
         # See #4354
@@ -454,7 +452,7 @@ def __array_finalize__(self, obj):
         self.info = getattr(obj, "info", None)
 
 
-@pytest.mark.skip("'dpnp_array' object has no attribute 'view' yet")
+@pytest.mark.skip("subclass array is not supported")
 class TestSubclassArrayView:
 
     def test_view_casting(self):
diff --git a/dpnp/tests/third_party/cupy/core_tests/test_ndarray_math.py b/dpnp/tests/third_party/cupy/core_tests/test_ndarray_math.py
index 11a6826c99a7..4de290741375 100644
--- a/dpnp/tests/third_party/cupy/core_tests/test_ndarray_math.py
+++ b/dpnp/tests/third_party/cupy/core_tests/test_ndarray_math.py
@@ -39,7 +39,7 @@ def test_round_out(self, xp):
             self.shape, xp, scale=100, dtype=cupy.default_float_type()
         )
         out = xp.empty_like(a)
-        a.round(self.decimals, out)
+        a.round(self.decimals, out=out)
         return out
 
 
diff --git a/dpnp/tests/third_party/cupy/creation_tests/test_basic.py b/dpnp/tests/third_party/cupy/creation_tests/test_basic.py
index e29dd668b0d8..8265671ab350 100644
--- a/dpnp/tests/third_party/cupy/creation_tests/test_basic.py
+++ b/dpnp/tests/third_party/cupy/creation_tests/test_basic.py
@@ -47,16 +47,13 @@ def test_empty_scalar(self, xp, dtype, order):
         a.fill(0)
         return a
 
-    @pytest.mark.skip("passing 'None' into shape arguments is not supported")
-    @testing.with_requires("numpy>=1.20")
+    @testing.with_requires("numpy>=2.3")
     @testing.for_CF_orders()
     @testing.for_all_dtypes()
-    @testing.numpy_cupy_array_equal()
-    def test_empty_scalar_none(self, xp, dtype, order):
-        with testing.assert_warns(DeprecationWarning):
-            a = xp.empty(None, dtype=dtype, order=order)
-        a.fill(0)
-        return a
+    def test_empty_scalar_none(self, dtype, order):
+        for xp in (numpy, cupy):
+            with pytest.raises(TypeError):
+                xp.empty(None, dtype=dtype, order=order)
 
     @testing.for_CF_orders()
     @testing.for_all_dtypes()
@@ -206,14 +203,13 @@ def test_zeros(self, xp, dtype, order):
     def test_zeros_scalar(self, xp, dtype, order):
         return xp.zeros((), dtype=dtype, order=order)
 
-    @pytest.mark.skip("passing 'None' into shape arguments is not supported")
-    @testing.with_requires("numpy>=1.20")
+    @testing.with_requires("numpy>=2.3")
     @testing.for_CF_orders()
     @testing.for_all_dtypes()
-    @testing.numpy_cupy_array_equal()
-    def test_zeros_scalar_none(self, xp, dtype, order):
-        with testing.assert_warns(DeprecationWarning):
-            return xp.zeros(None, dtype=dtype, order=order)
+    def test_zeros_scalar_none(self, dtype, order):
+        for xp in (numpy, cupy):
+            with pytest.raises(TypeError):
+                xp.zeros(None, dtype=dtype, order=order)
 
     @testing.for_CF_orders()
     @testing.for_all_dtypes()
diff --git a/dpnp/tests/third_party/cupy/creation_tests/test_from_data.py b/dpnp/tests/third_party/cupy/creation_tests/test_from_data.py
index ba82ca6c4b41..d3e42cc66c33 100644
--- a/dpnp/tests/third_party/cupy/creation_tests/test_from_data.py
+++ b/dpnp/tests/third_party/cupy/creation_tests/test_from_data.py
@@ -62,7 +62,7 @@ def test_array_from_numpy_broad_cast(self, xp, dtype, order):
     @testing.for_orders("CFAK", name="src_order")
     @testing.for_orders("CFAK", name="dst_order")
     @testing.for_all_dtypes()
-    @testing.numpy_cupy_array_equal()
+    @testing.numpy_cupy_array_equal(strides_check=True)
     def test_array_from_list_of_numpy(self, xp, dtype, src_order, dst_order):
         # compares numpy.array(<list of numpy.ndarray>) with
         # cupy.array(<list of numpy.ndarray>)
@@ -75,7 +75,7 @@ def test_array_from_list_of_numpy(self, xp, dtype, src_order, dst_order):
     @testing.for_orders("CFAK", name="src_order")
     @testing.for_orders("CFAK", name="dst_order")
     @testing.for_all_dtypes()
-    @testing.numpy_cupy_array_equal()
+    @testing.numpy_cupy_array_equal(strides_check=True)
     def test_array_from_list_of_numpy_view(
         self, xp, dtype, src_order, dst_order
     ):
@@ -93,7 +93,7 @@ def test_array_from_list_of_numpy_view(
 
     @testing.for_orders("CFAK")
     @testing.for_all_dtypes()
-    @testing.numpy_cupy_array_equal()
+    @testing.numpy_cupy_array_equal(strides_check=True)
     def test_array_from_list_of_numpy_scalar(self, xp, dtype, order):
         # compares numpy.array(<list of numpy.ndarray>) with
         # cupy.array(<list of numpy.ndarray>)
@@ -103,7 +103,7 @@ def test_array_from_list_of_numpy_scalar(self, xp, dtype, order):
     @testing.for_orders("CFAK", name="src_order")
     @testing.for_orders("CFAK", name="dst_order")
     @testing.for_all_dtypes()
-    @testing.numpy_cupy_array_equal()
+    @testing.numpy_cupy_array_equal(strides_check=True)
     def test_array_from_nested_list_of_numpy(
         self, xp, dtype, src_order, dst_order
     ):
@@ -118,7 +118,9 @@ def test_array_from_nested_list_of_numpy(
     @testing.for_orders("CFAK", name="src_order")
     @testing.for_orders("CFAK", name="dst_order")
     @testing.for_all_dtypes_combination(names=("dtype1", "dtype2"))
-    @testing.numpy_cupy_array_equal(type_check=has_support_aspect64())
+    @testing.numpy_cupy_array_equal(
+        type_check=has_support_aspect64(), strides_check=True
+    )
     def test_array_from_list_of_cupy(
         self, xp, dtype1, dtype2, src_order, dst_order
     ):
@@ -133,7 +135,7 @@ def test_array_from_list_of_cupy(
     @testing.for_orders("CFAK", name="src_order")
     @testing.for_orders("CFAK", name="dst_order")
     @testing.for_all_dtypes()
-    @testing.numpy_cupy_array_equal()
+    @testing.numpy_cupy_array_equal(strides_check=True)
     def test_array_from_list_of_cupy_view(
         self, xp, dtype, src_order, dst_order
     ):
@@ -152,7 +154,7 @@ def test_array_from_list_of_cupy_view(
     @testing.for_orders("CFAK", name="src_order")
     @testing.for_orders("CFAK", name="dst_order")
     @testing.for_all_dtypes()
-    @testing.numpy_cupy_array_equal()
+    @testing.numpy_cupy_array_equal(strides_check=True)
     def test_array_from_nested_list_of_cupy(
         self, xp, dtype, src_order, dst_order
     ):
@@ -166,7 +168,7 @@ def test_array_from_nested_list_of_cupy(
 
     @testing.for_orders("CFAK")
     @testing.for_all_dtypes()
-    @testing.numpy_cupy_array_equal()
+    @testing.numpy_cupy_array_equal(strides_check=True)
     def test_array_from_list_of_cupy_scalar(self, xp, dtype, order):
         # compares numpy.array(<list of numpy.ndarray>) with
         # cupy.array(<list of cupy.ndarray>)
@@ -656,7 +658,7 @@ def test_with_strides(self, dtype):
             DummyObjectWithCudaArrayInterface(a, self.ver, self.strides)
         )
         assert a.strides == b.strides
-        assert a.nbytes == b.nbytes
+        assert a.nbytes == b.data.mem.size
 
     @testing.for_all_dtypes()
     def test_with_zero_size_array(self, dtype):
@@ -665,7 +667,8 @@ def test_with_zero_size_array(self, dtype):
             DummyObjectWithCudaArrayInterface(a, self.ver, self.strides)
         )
         assert a.strides == b.strides
-        assert a.nbytes == b.nbytes
+        assert a.nbytes == b.data.mem.size
+        assert a.data.ptr == 0
         assert a.size == 0
 
     @testing.for_all_dtypes()
@@ -724,7 +727,7 @@ def test_with_over_size_array(self):
         testing.assert_array_equal(a, b)
 
 
-class DummyObjectWithCudaArrayInterface(object):
+class DummyObjectWithCudaArrayInterface:
     def __init__(self, a, ver, include_strides=False, mask=None, stream=None):
         assert ver in tuple(range(max_cuda_array_interface_version + 1))
         self.a = None
@@ -834,7 +837,7 @@ def test_cupy_array(self, dtype):
         is_copied = not (
             (actual is a)
             or (self.xp is cupy)
-            and (a.get_array()._pointer == actual.get_array()._pointer)
+            and (a.data.ptr == actual.data.ptr)
         )
         assert should_copy == is_copied
 
diff --git a/dpnp/tests/third_party/cupy/fft_tests/test_fft.py b/dpnp/tests/third_party/cupy/fft_tests/test_fft.py
index 5c0368278f97..534b474363f1 100644
--- a/dpnp/tests/third_party/cupy/fft_tests/test_fft.py
+++ b/dpnp/tests/third_party/cupy/fft_tests/test_fft.py
@@ -378,21 +378,21 @@ def test_fft_allocate(self):
                 {"shape": (3, 4), "s": (1, 5), "axes": (-2, -1)},
                 {"shape": (3, 4), "s": None, "axes": (-2, -1)},
                 {"shape": (3, 4), "s": None, "axes": (-1, -2)},
-                # {"shape": (3, 4), "s": None, "axes": (0,)}, # mkl_fft gh-109
+                {"shape": (3, 4), "s": None, "axes": (0,)},
                 {"shape": (3, 4), "s": None, "axes": None},
-                # {"shape": (3, 4), "s": None, "axes": ()}, # mkl_fft gh-108
+                {"shape": (3, 4), "s": None, "axes": ()},
                 {"shape": (2, 3, 4), "s": None, "axes": None},
                 {"shape": (2, 3, 4), "s": (1, 4, 10), "axes": (-2, -1)},
                 {"shape": (2, 3, 4), "s": None, "axes": (-3, -2, -1)},
                 {"shape": (2, 3, 4), "s": None, "axes": (-1, -2, -3)},
-                # {"shape": (2, 3, 4), "s": None, "axes": (0, 1)}, # mkl_fft gh-109
+                {"shape": (2, 3, 4), "s": None, "axes": (0, 1)},
                 {"shape": (2, 3, 4), "s": None, "axes": None},
-                # {"shape": (2, 3, 4), "s": None, "axes": ()}, # mkl_fft gh-108
-                # {"shape": (2, 3, 4), "s": (2, 3), "axes": (0, 1, 2)}, # mkl_fft gh-109
+                {"shape": (2, 3, 4), "s": None, "axes": ()},
+                {"shape": (2, 3, 4), "s": (2, 3), "axes": (0, 1, 2)},
                 {"shape": (2, 3, 4, 5), "s": None, "axes": None},
-                # {"shape": (0, 5), "s": None, "axes": None}, # mkl_fft gh-110
-                # {"shape": (2, 0, 5), "s": None, "axes": None}, # mkl_fft gh-110
-                # {"shape": (0, 0, 5), "s": None, "axes": None}, # mkl_fft gh-110
+                {"shape": (0, 5), "s": None, "axes": None},
+                {"shape": (2, 0, 5), "s": None, "axes": None},
+                {"shape": (0, 0, 5), "s": None, "axes": None},
                 {"shape": (3, 4), "s": (0, 5), "axes": (-2, -1)},
                 {"shape": (3, 4), "s": (1, 0), "axes": (-2, -1)},
             ],
@@ -468,23 +468,23 @@ def test_ifft2(self, xp, dtype, order, enable_nd):
                 {"shape": (3, 4), "s": None, "axes": (-2, -1)},
                 {"shape": (3, 4), "s": None, "axes": (-1, -2)},
                 {"shape": (3, 4), "s": None, "axes": [-1, -2]},
-                # {"shape": (3, 4), "s": None, "axes": (0,)}, # mkl_fft gh-109
-                # {"shape": (3, 4), "s": None, "axes": ()}, # mkl_fft gh-108
+                {"shape": (3, 4), "s": None, "axes": (0,)},
+                {"shape": (3, 4), "s": None, "axes": ()},
                 {"shape": (3, 4), "s": None, "axes": None},
                 {"shape": (2, 3, 4), "s": None, "axes": None},
                 {"shape": (2, 3, 4), "s": (1, 4, 10), "axes": (-3, -2, -1)},
                 {"shape": (2, 3, 4), "s": None, "axes": (-3, -2, -1)},
                 {"shape": (2, 3, 4), "s": None, "axes": (-1, -2, -3)},
-                # {"shape": (2, 3, 4), "s": None, "axes": (-1, -3)}, # mkl_fft gh-109
-                # {"shape": (2, 3, 4), "s": None, "axes": (0, 1)}, # mkl_fft gh-109
+                {"shape": (2, 3, 4), "s": None, "axes": (-1, -3)},
+                {"shape": (2, 3, 4), "s": None, "axes": (0, 1)},
                 {"shape": (2, 3, 4), "s": None, "axes": None},
-                # {"shape": (2, 3, 4), "s": None, "axes": ()}, # mkl_fft gh-108
-                # {"shape": (2, 3, 4), "s": (2, 3), "axes": (0, 1, 2)}, # mkl_fft gh-109
+                {"shape": (2, 3, 4), "s": None, "axes": ()},
+                {"shape": (2, 3, 4), "s": (2, 3), "axes": (0, 1, 2)},
                 {"shape": (2, 3, 4), "s": (4, 3, 2), "axes": (2, 0, 1)},
                 {"shape": (2, 3, 4, 5), "s": None, "axes": None},
-                # {"shape": (0, 5), "s": None, "axes": None}, # mkl_fft gh-110
-                # {"shape": (2, 0, 5), "s": None, "axes": None}, # mkl_fft gh-110
-                # {"shape": (0, 0, 5), "s": None, "axes": None}, # mkl_fft gh-110
+                {"shape": (0, 5), "s": None, "axes": None},
+                {"shape": (2, 0, 5), "s": None, "axes": None},
+                {"shape": (0, 0, 5), "s": None, "axes": None},
             ],
             testing.product({"norm": [None, "backward", "ortho", "forward"]}),
         )
@@ -912,8 +912,7 @@ def test_rfft(self, xp, dtype):
         atol=2e-6,
         accept_error=ValueError,
         contiguous_check=False,
-        # TODO: replace with has_support_aspect64() when mkl_fft-gh-180 is merged
-        type_check=False,
+        type_check=has_support_aspect64(),
     )
     def test_irfft(self, xp, dtype):
         a = testing.shaped_random(self.shape, xp, dtype)
@@ -1002,14 +1001,14 @@ def test_rfft_error_on_wrong_plan(self, dtype):
                 {"shape": (3, 4), "s": None, "axes": (-1, -2)},
                 {"shape": (3, 4), "s": None, "axes": (0,)},
                 {"shape": (3, 4), "s": None, "axes": None},
-                # {"shape": (2, 3, 4), "s": None, "axes": None}, # mkl_fft gh-116
-                # {"shape": (2, 3, 4), "s": (1, 4, 10), "axes": (-3, -2, -1)}, # mkl_fft gh-115
-                # {"shape": (2, 3, 4), "s": None, "axes": (-3, -2, -1)}, # mkl_fft gh-116
-                # {"shape": (2, 3, 4), "s": None, "axes": (-1, -2, -3)}, # mkl_fft gh-116
+                {"shape": (2, 3, 4), "s": None, "axes": None},
+                {"shape": (2, 3, 4), "s": (1, 4, 10), "axes": (-3, -2, -1)},
+                {"shape": (2, 3, 4), "s": None, "axes": (-3, -2, -1)},
+                {"shape": (2, 3, 4), "s": None, "axes": (-1, -2, -3)},
                 {"shape": (2, 3, 4), "s": None, "axes": (0, 1)},
                 {"shape": (2, 3, 4), "s": None, "axes": None},
                 {"shape": (2, 3, 4), "s": (2, 3), "axes": (0, 1, 2)},
-                # {"shape": (2, 3, 4, 5), "s": None, "axes": None}, # mkl_fft gh-109 and gh-116
+                {"shape": (2, 3, 4, 5), "s": None, "axes": None},
             ],
             testing.product(
                 {"norm": [None, "backward", "ortho", "forward", ""]}
@@ -1044,8 +1043,7 @@ def test_rfft2(self, xp, dtype, order, enable_nd):
         atol=1e-7,
         accept_error=ValueError,
         contiguous_check=False,
-        # TODO: replace with has_support_aspect64() when mkl_fft-gh-180 is merged
-        type_check=False,
+        type_check=has_support_aspect64(),
     )
     def test_irfft2(self, xp, dtype, order, enable_nd):
         # assert config.enable_nd_planning == enable_nd
@@ -1090,13 +1088,13 @@ def test_irfft2(self, dtype):
                 {"shape": (3, 4), "s": None, "axes": (0,)},
                 {"shape": (3, 4), "s": None, "axes": None},
                 {"shape": (2, 3, 4), "s": None, "axes": None},
-                # {"shape": (2, 3, 4), "s": (1, 4, 10), "axes": (-3, -2, -1)}, # mkl_fft gh-115
-                # {"shape": (2, 3, 4), "s": None, "axes": (-3, -2, -1)}, # mkl_fft gh-116
-                # {"shape": (2, 3, 4), "s": None, "axes": (-1, -2, -3)}, # mkl_fft gh-116
+                {"shape": (2, 3, 4), "s": (1, 4, 10), "axes": (-3, -2, -1)},
+                {"shape": (2, 3, 4), "s": None, "axes": (-3, -2, -1)},
+                {"shape": (2, 3, 4), "s": None, "axes": (-1, -2, -3)},
                 {"shape": (2, 3, 4), "s": None, "axes": (0, 1)},
                 {"shape": (2, 3, 4), "s": None, "axes": None},
                 {"shape": (2, 3, 4), "s": (2, 3), "axes": (0, 1, 2)},
-                # {"shape": (2, 3, 4, 5), "s": None, "axes": None}, # mkl_fft gh-109 and gh-116
+                {"shape": (2, 3, 4, 5), "s": None, "axes": None},
             ],
             testing.product(
                 {"norm": [None, "backward", "ortho", "forward", ""]}
@@ -1131,8 +1129,7 @@ def test_rfftn(self, xp, dtype, order, enable_nd):
         atol=1e-7,
         accept_error=ValueError,
         contiguous_check=False,
-        # TODO: replace with has_support_aspect64() when mkl_fft-gh-180 is merged
-        type_check=False,
+        type_check=has_support_aspect64(),
     )
     def test_irfftn(self, xp, dtype, order, enable_nd):
         # assert config.enable_nd_planning == enable_nd
@@ -1326,8 +1323,7 @@ class TestHfft:
         atol=2e-6,
         accept_error=ValueError,
         contiguous_check=False,
-        # TODO: replace with has_support_aspect64() when mkl_fft-gh-180 is merged
-        type_check=False,
+        type_check=has_support_aspect64(),
     )
     def test_hfft(self, xp, dtype):
         a = testing.shaped_random(self.shape, xp, dtype)
diff --git a/dpnp/tests/third_party/cupy/functional_tests/init.py b/dpnp/tests/third_party/cupy/functional_tests/init.py
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/dpnp/tests/third_party/cupy/functional_tests/test_piecewise.py b/dpnp/tests/third_party/cupy/functional_tests/test_piecewise.py
new file mode 100644
index 000000000000..5ce72bd806b8
--- /dev/null
+++ b/dpnp/tests/third_party/cupy/functional_tests/test_piecewise.py
@@ -0,0 +1,124 @@
+import unittest
+
+import numpy
+import pytest
+
+import dpnp as cupy
+from dpnp.tests.third_party.cupy import testing
+
+
+class TestPiecewise(unittest.TestCase):
+
+    @testing.for_all_dtypes()
+    @testing.numpy_cupy_array_equal()
+    def test_piecewise(self, xp, dtype):
+        x = xp.linspace(2.5, 12.5, 6, dtype=dtype)
+        condlist = [x < 0, x >= 0, x < 5, x >= 1.5]
+        funclist = xp.array([-1, 1, 2, 5])
+        return xp.piecewise(x, condlist, funclist)
+
+    @pytest.mark.skip("scalar input is not supported")
+    @testing.for_all_dtypes()
+    @testing.numpy_cupy_array_equal()
+    def test_piecewise_scalar_input(self, xp, dtype):
+        x = dtype(2)
+        condlist = [x < 0, x >= 0]
+        funclist = [1, 10]
+        return xp.piecewise(x, condlist, funclist)
+
+    @testing.for_all_dtypes()
+    @testing.numpy_cupy_array_equal()
+    def test_piecewise_scalar_condition(self, xp, dtype):
+        x = testing.shaped_random(shape=(2, 3, 5), xp=xp, dtype=dtype)
+        condlist = True
+        if cupy.issubdtype(dtype, cupy.unsignedinteger):
+            funclist = xp.array([5, 10], dtype=dtype)
+        else:
+            funclist = xp.array([-10, 10], dtype=dtype)
+        return xp.piecewise(x, condlist, funclist)
+
+    @testing.for_signed_dtypes()
+    @testing.numpy_cupy_array_equal()
+    def test_piecewise_otherwise_condition1(self, xp, dtype):
+        x = xp.linspace(-2, 20, 12, dtype=dtype)
+        condlist = [x > 15, x <= 5, x == 0, x == 10]
+        funclist = xp.array([-1, 0, 2, 3, -5])
+        return xp.piecewise(x, condlist, funclist)
+
+    @testing.for_all_dtypes()
+    @testing.numpy_cupy_array_equal()
+    def test_piecewise_otherwise_condition2(self, xp, dtype):
+        x = xp.array([-10, 20, 30, 40]).astype(dtype)
+        condlist = [
+            xp.array([True, False, False, True]),
+            xp.array([True, False, False, True]),
+        ]
+        funclist = xp.array([-1, 1, 2])
+        return xp.piecewise(x, condlist, funclist)
+
+    @testing.for_all_dtypes()
+    @testing.numpy_cupy_array_equal()
+    def test_piecewise_zero_dim_input(self, xp, dtype):
+        x = testing.shaped_random(shape=(), xp=xp, dtype=dtype)
+        condlist = [x < 0, x > 0]
+        funclist = [10, 1, 2]
+        return xp.piecewise(x, condlist, funclist)
+
+    @testing.for_all_dtypes()
+    @testing.numpy_cupy_array_equal()
+    def test_piecewise_ndim_input(self, xp, dtype):
+        x = testing.shaped_random(shape=(2, 3, 5), xp=xp, dtype=dtype)
+        condlist = [x < 0, x > 0]
+        funclist = [10, 1, 2]
+        return xp.piecewise(x, condlist, funclist)
+
+    @testing.for_all_dtypes()
+    @testing.numpy_cupy_array_equal()
+    def test_piecewise_zero_dim_condlist(self, xp, dtype):
+        x = testing.shaped_random(shape=(), xp=xp, dtype=dtype)
+        condlist = [testing.shaped_random(shape=(), xp=xp, dtype=bool)]
+        funclist = [1, 2]
+        return xp.piecewise(x, condlist, funclist)
+
+    @testing.for_all_dtypes()
+    @testing.numpy_cupy_array_equal()
+    def test_piecewise_ndarray_condlist_funclist(self, xp, dtype):
+        x = xp.linspace(1, 20, 12, dtype=dtype)
+        condlist = xp.array([x > 15, x <= 5, x == 0, x == 10])
+        funclist = xp.array([-1, 0, 2, 3, -5]).astype(dtype)
+        return xp.piecewise(x, condlist, funclist)
+
+    @testing.for_all_dtypes_combination(
+        names=["dtype1", "dtype2"], no_complex=True
+    )
+    @testing.numpy_cupy_array_equal()
+    def test_piecewise_diff_types_funclist(self, xp, dtype1, dtype2):
+        x = xp.linspace(1, 20, 12, dtype=dtype1)
+        condlist = [x > 15, x <= 5, x == 0, x == 10]
+        funclist = xp.array([1, 0, 2, 3, 5], dtype=dtype2)
+        return xp.piecewise(x, condlist, funclist)
+
+    @testing.for_all_dtypes()
+    def test_mismatched_lengths(self, dtype):
+        funclist = [-1, 0, 2, 4, 5]
+        for xp in (numpy, cupy):
+            x = xp.linspace(-2, 4, 6, dtype=dtype)
+            condlist = [x < 0, x >= 0]
+            with pytest.raises(ValueError):
+                xp.piecewise(x, condlist, funclist)
+
+    @testing.for_all_dtypes()
+    def test_callable_funclist(self, dtype):
+        x = cupy.linspace(-2, 4, 6, dtype=dtype)
+        condlist = [x < 0, x > 0]
+        funclist = [lambda x: -x, lambda x: x]
+        with pytest.raises(NotImplementedError):
+            cupy.piecewise(x, condlist, funclist)
+
+    @testing.for_all_dtypes()
+    def test_mixed_funclist(self, dtype):
+        x = cupy.linspace(-2, 2, 6, dtype=dtype)
+        condlist = [x < 0, x == 0, x > 0]
+        funclist = [-10, lambda x: -x, 10, lambda x: x]
+        with pytest.raises(NotImplementedError):
+            cupy.piecewise(x, condlist, funclist)
diff --git a/dpnp/tests/third_party/cupy/functional_tests/test_vectorize.py b/dpnp/tests/third_party/cupy/functional_tests/test_vectorize.py
new file mode 100644
index 000000000000..910ab2dc0aa1
--- /dev/null
+++ b/dpnp/tests/third_party/cupy/functional_tests/test_vectorize.py
@@ -0,0 +1,677 @@
+import unittest
+
+import numpy
+import pytest
+
+import dpnp as cupy
+from dpnp.tests.third_party.cupy import testing
+
+# from cupy.cuda import runtime
+
+pytest.skip("dpnp.vectorize is not implemented", allow_module_level=True)
+
+
+class TestVectorizeOps(unittest.TestCase):
+
+    def _run(self, func, xp, dtypes):
+        f = xp.vectorize(func)
+        args = [
+            testing.shaped_random((20, 30), xp, dtype, seed=seed)
+            for seed, dtype in enumerate(dtypes)
+        ]
+        return f(*args)
+
+    @testing.for_all_dtypes()
+    @testing.numpy_cupy_allclose(rtol={"default": 1e-6, numpy.float16: 1.5e-3})
+    def test_vectorize_reciprocal(self, xp, dtype):
+        def my_reciprocal(x):
+            scalar = xp.dtype(dtype).type(10)
+            return xp.reciprocal(x + scalar)
+
+        return self._run(my_reciprocal, xp, [dtype])
+
+    @testing.for_all_dtypes_combination(names=("dtype1", "dtype2"))
+    @testing.numpy_cupy_array_equal()
+    def test_vectorize_add(self, xp, dtype1, dtype2):
+        def my_add(x, y):
+            return x + y
+
+        return self._run(my_add, xp, [dtype1, dtype2])
+
+    @testing.for_dtypes("bhilqefdFD")
+    @testing.numpy_cupy_array_equal()
+    def test_vectorize_sub(self, xp, dtype):
+        def my_sub(x, y):
+            return x - y
+
+        return self._run(my_sub, xp, [dtype, dtype])
+
+    @testing.for_all_dtypes_combination(names=("dtype1", "dtype2"))
+    @testing.numpy_cupy_allclose(rtol=1e-6)
+    def test_vectorize_mul(self, xp, dtype1, dtype2):
+        def my_mul(x, y):
+            return x * y
+
+        return self._run(my_mul, xp, [dtype1, dtype2])
+
+    @testing.for_dtypes("qQefdFD")
+    @testing.numpy_cupy_allclose(rtol=1e-5)
+    def test_vectorize_pow(self, xp, dtype):
+        def my_pow(x, y):
+            return x**y
+
+        f = xp.vectorize(my_pow)
+        x1 = testing.shaped_random((20, 30), xp, dtype, seed=0)
+        x2 = testing.shaped_random((20, 30), xp, dtype, seed=1)
+        x1[x1 == 0] = 1
+        return f(x1, x2)
+
+    @testing.for_all_dtypes_combination(
+        names=("dtype1", "dtype2"), no_bool=True, no_complex=True
+    )
+    @testing.numpy_cupy_allclose(rtol=1e-5)
+    def test_vectorize_minmax(self, xp, dtype1, dtype2):
+        def my_minmax(x, y):
+            return max(x, y) - min(x, y)
+
+        f = xp.vectorize(my_minmax)
+        x1 = testing.shaped_random((20, 30), xp, dtype1, seed=0)
+        x2 = testing.shaped_random((20, 30), xp, dtype2, seed=1)
+        x1[x1 == 0] = 1
+        return f(x1, x2)
+
+    def run_div(self, func, xp, dtypes):
+        dtype1, dtype2 = dtypes
+        f = xp.vectorize(func)
+        x1 = testing.shaped_random((20, 30), xp, dtype1, seed=0)
+        x2 = testing.shaped_random((20, 30), xp, dtype2, seed=1)
+        x2[x2 == 0] = 1
+        return f(x1, x2)
+
+    @testing.for_all_dtypes_combination(names=("dtype1", "dtype2"))
+    @testing.numpy_cupy_allclose(rtol=1e-6)
+    @testing.with_requires("numpy>=1.23", "numpy!=1.24.0", "numpy!=1.24.1")
+    def test_vectorize_div(self, xp, dtype1, dtype2):
+        def my_div(x, y):
+            return x / y
+
+        return self.run_div(my_div, xp, [dtype1, dtype2])
+
+    @testing.for_all_dtypes_combination(names=("dtype1", "dtype2"))
+    @testing.numpy_cupy_allclose(accept_error=TypeError)
+    def test_vectorize_floor_div(self, xp, dtype1, dtype2):
+        def my_floor_div(x, y):
+            return x // y
+
+        return self.run_div(my_floor_div, xp, [dtype1, dtype2])
+
+    @testing.for_all_dtypes_combination(names=("dtype1", "dtype2"))
+    @testing.numpy_cupy_allclose(rtol=1e-6, atol=1e-6, accept_error=TypeError)
+    def test_vectorize_mod(self, xp, dtype1, dtype2):
+        def my_mod(x, y):
+            return x % y
+
+        return self.run_div(my_mod, xp, [dtype1, dtype2])
+
+    @testing.for_dtypes("iIlLqQ")
+    @testing.numpy_cupy_array_equal()
+    def test_vectorize_lshift(self, xp, dtype):
+        def my_lshift(x, y):
+            return x << y
+
+        return self._run(my_lshift, xp, [dtype, dtype])
+
+    @testing.for_all_dtypes_combination(names=("dtype1", "dtype2"))
+    @testing.numpy_cupy_array_equal(accept_error=TypeError)
+    def test_vectorize_rshift(self, xp, dtype1, dtype2):
+        def my_lshift(x, y):
+            return x >> y
+
+        return self._run(my_lshift, xp, [dtype1, dtype2])
+
+    @testing.for_all_dtypes_combination(names=("dtype1", "dtype2"))
+    @testing.numpy_cupy_array_equal(accept_error=TypeError)
+    def test_vectorize_bit_or(self, xp, dtype1, dtype2):
+        def my_bit_or(x, y):
+            return x | y
+
+        return self._run(my_bit_or, xp, [dtype1, dtype2])
+
+    @testing.for_all_dtypes_combination(names=("dtype1", "dtype2"))
+    @testing.numpy_cupy_array_equal(accept_error=TypeError)
+    def test_vectorize_bit_and(self, xp, dtype1, dtype2):
+        def my_bit_and(x, y):
+            return x & y
+
+        return self._run(my_bit_and, xp, [dtype1, dtype2])
+
+    @testing.for_all_dtypes_combination(names=("dtype1", "dtype2"))
+    @testing.numpy_cupy_array_equal(accept_error=TypeError)
+    def test_vectorize_bit_xor(self, xp, dtype1, dtype2):
+        def my_bit_xor(x, y):
+            return x ^ y
+
+        return self._run(my_bit_xor, xp, [dtype1, dtype2])
+
+    @testing.numpy_cupy_array_equal()
+    def test_vectorize_bit_invert(self, xp):
+        def my_bit_invert(x):
+            return ~x
+
+        return self._run(my_bit_invert, xp, [numpy.int64])
+
+    @testing.for_all_dtypes()
+    @testing.numpy_cupy_array_equal(accept_error=TypeError)
+    def test_vectorize_logical_not(self, xp, dtype):
+        def my_logical_not(x):
+            return not x
+
+        return self._run(my_logical_not, xp, [dtype])
+
+    @testing.for_all_dtypes_combination(names=("dtype1", "dtype2"))
+    @testing.numpy_cupy_array_equal(accept_error=TypeError)
+    def test_vectorize_eq(self, xp, dtype1, dtype2):
+        def my_eq(x, y):
+            return x == y
+
+        return self._run(my_eq, xp, [dtype1, dtype2])
+
+    @testing.for_all_dtypes_combination(names=("dtype1", "dtype2"))
+    @testing.numpy_cupy_array_equal(accept_error=TypeError)
+    def test_vectorize_neq(self, xp, dtype1, dtype2):
+        def my_neq(x, y):
+            return x != y
+
+        return self._run(my_neq, xp, [dtype1, dtype2])
+
+    @testing.for_all_dtypes_combination(names=("dtype1", "dtype2"))
+    @testing.numpy_cupy_array_equal(accept_error=TypeError)
+    def test_vectorize_lt(self, xp, dtype1, dtype2):
+        def my_lt(x, y):
+            return x < y
+
+        return self._run(my_lt, xp, [dtype1, dtype2])
+
+    @testing.for_all_dtypes_combination(names=("dtype1", "dtype2"))
+    @testing.numpy_cupy_array_equal(accept_error=TypeError)
+    def test_vectorize_le(self, xp, dtype1, dtype2):
+        def my_le(x, y):
+            return x <= y
+
+        return self._run(my_le, xp, [dtype1, dtype2])
+
+    @testing.for_all_dtypes_combination(names=("dtype1", "dtype2"))
+    @testing.numpy_cupy_array_equal(accept_error=TypeError)
+    def test_vectorize_gt(self, xp, dtype1, dtype2):
+        def my_gt(x, y):
+            return x > y
+
+        return self._run(my_gt, xp, [dtype1, dtype2])
+
+    @testing.for_all_dtypes_combination(names=("dtype1", "dtype2"))
+    @testing.numpy_cupy_array_equal(accept_error=TypeError)
+    def test_vectorize_ge(self, xp, dtype1, dtype2):
+        def my_ge(x, y):
+            return x >= y
+
+        return self._run(my_ge, xp, [dtype1, dtype2])
+
+    @testing.for_dtypes("bhilqefdFD")
+    @testing.numpy_cupy_array_equal(accept_error=TypeError)
+    def test_vectorize_usub(self, xp, dtype):
+        def my_usub(x):
+            return -x
+
+        return self._run(my_usub, xp, [dtype])
+
+
+class TestVectorizeExprs(unittest.TestCase):
+
+    @testing.for_all_dtypes(name="cond_dtype", no_complex=True)
+    @testing.for_all_dtypes()
+    @testing.numpy_cupy_allclose()
+    def test_vectorize_ifexp(self, xp, dtype, cond_dtype):
+        def my_ifexp(c, x, y):
+            return x if c else y
+
+        f = xp.vectorize(my_ifexp)
+        cond = testing.shaped_random((20, 30), xp, cond_dtype, seed=0)
+        x = testing.shaped_random((20, 30), xp, dtype, seed=1)
+        y = testing.shaped_random((20, 30), xp, dtype, seed=2)
+        return f(cond, x, y)
+
+    @testing.for_all_dtypes()
+    @testing.numpy_cupy_allclose()
+    def test_vectorize_incr(self, xp, dtype):
+        def my_incr(x):
+            return x + 1
+
+        if dtype != xp.float64:
+            pytest.xfail("vectorize with scalars: no NEP 50")
+
+        f = xp.vectorize(my_incr)
+        x = testing.shaped_random((20, 30), xp, dtype, seed=0)
+        return f(x)
+
+    @testing.for_all_dtypes()
+    @testing.numpy_cupy_array_equal(accept_error=TypeError)
+    def test_vectorize_ufunc_call(self, xp, dtype):
+        def my_ufunc_add(x, y):
+            return xp.add(x, y)
+
+        f = xp.vectorize(my_ufunc_add)
+        x = testing.shaped_random((20, 30), xp, dtype, seed=1)
+        y = testing.shaped_random((20, 30), xp, dtype, seed=2)
+        return f(x, y)
+
+    @testing.with_requires("numpy>=1.25")
+    @testing.for_all_dtypes_combination(names=("dtype1", "dtype2"))
+    @testing.numpy_cupy_allclose(
+        rtol={numpy.float16: 1e3, "default": 1e-7}, accept_error=TypeError
+    )
+    def test_vectorize_ufunc_call_dtype(self, xp, dtype1, dtype2):
+        def my_ufunc_add(x, y):
+            return xp.add(x, y, dtype=dtype2)
+
+        f = xp.vectorize(my_ufunc_add)
+        x = testing.shaped_random((20, 30), xp, dtype1, seed=1)
+        y = testing.shaped_random((20, 30), xp, dtype1, seed=2)
+        return f(x, y)
+
+    @testing.for_all_dtypes_combination(names=("dtype1", "dtype2"), full=True)
+    @testing.numpy_cupy_array_equal(
+        accept_error=(TypeError, cupy.exceptions.ComplexWarning)
+    )
+    def test_vectorize_typecast(self, xp, dtype1, dtype2):
+        typecast = xp.dtype(dtype2).type
+
+        def my_typecast(x):
+            return typecast(x)
+
+        f = xp.vectorize(my_typecast)
+        x = testing.shaped_random((20, 30), xp, dtype1, seed=1)
+        return f(x)
+
+
+class TestVectorizeInstructions(unittest.TestCase):
+
+    @testing.for_all_dtypes()
+    @testing.numpy_cupy_allclose()
+    def test_vectorize_assign_new(self, xp, dtype):
+        def my_assign(x):
+            y = x + x
+            return x + y
+
+        f = xp.vectorize(my_assign)
+        x = testing.shaped_random((20, 30), xp, dtype, seed=1)
+        return f(x)
+
+    @testing.for_all_dtypes()
+    @testing.numpy_cupy_allclose()
+    def test_vectorize_assign_update(self, xp, dtype):
+        def my_assign(x):
+            x = x + x
+            return x + x
+
+        f = xp.vectorize(my_assign)
+        x = testing.shaped_random((20, 30), xp, dtype, seed=1)
+        return f(x)
+
+    @testing.for_all_dtypes()
+    @testing.numpy_cupy_allclose()
+    def test_vectorize_augassign(self, xp, dtype):
+        def my_augassign(x):
+            x += x
+            return x + x
+
+        f = xp.vectorize(my_augassign)
+        x = testing.shaped_random((20, 30), xp, dtype, seed=1)
+        return f(x)
+
+    @testing.numpy_cupy_array_equal()
+    def test_vectorize_const_assign(self, xp):
+        def my_typecast(x):
+            typecast = xp.dtype("f").type
+            return typecast(x)
+
+        f = xp.vectorize(my_typecast)
+        x = testing.shaped_random((20, 30), xp, numpy.int32, seed=1)
+        return f(x)
+
+    def test_vectorize_const_typeerror(self):
+        def my_invalid_type(x):
+            x = numpy.dtype("f").type
+            return x
+
+        f = cupy.vectorize(my_invalid_type)
+        x = testing.shaped_random((20, 30), cupy, numpy.int32, seed=1)
+        with pytest.raises(TypeError):
+            f(x)
+
+    def test_vectorize_const_non_toplevel(self):
+        def my_invalid_type(x):
+            if x == 3:
+                typecast = numpy.dtype("f").type
+            return x
+
+        f = cupy.vectorize(my_invalid_type)
+        x = cupy.array([1, 2, 3, 4, 5])
+        with pytest.raises(TypeError):
+            f(x)
+
+    @testing.numpy_cupy_array_equal()
+    def test_vectorize_nonconst_for_value(self, xp):
+        def my_nonconst_result(x):
+            result = numpy.int32(0)
+            result = x
+            return result
+
+        f = xp.vectorize(my_nonconst_result)
+        x = testing.shaped_random((20, 30), xp, numpy.int32, seed=1)
+        return f(x)
+
+
+class TestVectorizeStmts(unittest.TestCase):
+
+    @testing.numpy_cupy_array_equal()
+    def test_if(self, xp):
+        def func_if(x):
+            if x % 2 == 0:
+                y = x
+            else:
+                y = -x
+            return y
+
+        f = xp.vectorize(func_if)
+        x = xp.array([1, 2, 3, 4, 5])
+        return f(x)
+
+    @testing.numpy_cupy_array_equal()
+    def test_if_no_orlese(self, xp):
+        def func_if(x):
+            y = 0
+            if x % 2 == 0:
+                y = x
+            return y
+
+        f = xp.vectorize(func_if)
+        x = xp.array([1, 2, 3, 4, 5])
+        return f(x)
+
+    @testing.numpy_cupy_array_equal()
+    def test_elif(self, xp):
+        def func_if(x):
+            y = 0
+            if x % 2 == 0:
+                y = x
+            elif x % 3 == 0:
+                y = -x
+            return y
+
+        f = xp.vectorize(func_if)
+        x = xp.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
+        return f(x)
+
+    @testing.numpy_cupy_array_equal()
+    def test_while(self, xp):
+        def func_while(x):
+            y = 0
+            while x > 0:
+                y += x
+                x -= 1
+            return y
+
+        f = xp.vectorize(func_while)
+        x = xp.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
+        return f(x)
+
+    @testing.for_dtypes("qQ")
+    @testing.numpy_cupy_array_equal()
+    def test_for(self, xp, dtype):
+        def func_for(x):
+            y = 0
+            for i in range(x):
+                y += i
+            return y
+
+        f = xp.vectorize(func_for)
+        x = xp.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10], dtype)
+        return f(x)
+
+    @testing.numpy_cupy_array_equal()
+    def test_for_const_range(self, xp):
+        def func_for(x):
+            for i in range(3, 10):
+                x += i
+            return x
+
+        f = xp.vectorize(func_for)
+        x = xp.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
+        return f(x)
+
+    @testing.numpy_cupy_array_equal()
+    def test_for_range_step(self, xp):
+        def func_for(x, y, z):
+            res = 0
+            for i in range(x, y, z):
+                res += i * i
+            return res
+
+        f = xp.vectorize(func_for)
+        start = xp.array([0, 1, 2, 3, 4, 5])
+        stop = xp.array([-21, -23, -19, 17, 27, 24])
+        step = xp.array([-3, -2, -1, 1, 2, 3])
+        return f(start, stop, step)
+
+    @testing.numpy_cupy_array_equal()
+    def test_for_update_counter(self, xp):
+        def func_for(x):
+            for i in range(10):
+                x += i
+                i += 1
+            return x
+
+        f = xp.vectorize(func_for)
+        x = xp.array([0, 1, 2, 3, 4])
+        return f(x)
+
+    @testing.numpy_cupy_array_equal()
+    def test_for_counter_after_loop(self, xp):
+        def func_for(x):
+            for i in range(10):
+                pass
+            return x + i
+
+        f = xp.vectorize(func_for)
+        x = xp.array([0, 1, 2, 3, 4])
+        return f(x)
+
+    @testing.numpy_cupy_array_equal()
+    def test_for_compound_expression_param(self, xp):
+        def func_for(x, y):
+            res = 0
+            for i in range(x * y):
+                res += i
+            return res
+
+        f = xp.vectorize(func_for)
+        x = xp.array([0, 1, 2, 3, 4])
+        return f(x, x)
+
+    @testing.numpy_cupy_array_equal()
+    def test_for_update_loop_condition(self, xp):
+        def func_for(x):
+            res = 0
+            for i in range(x):
+                res += i
+                x -= 1
+            return res
+
+        f = xp.vectorize(func_for)
+        x = xp.array([0, 1, 2, 3, 4])
+        return f(x)
+
+    @testing.numpy_cupy_array_equal()
+    def test_tuple(self, xp):
+        def func_tuple(x, y):
+            x, y = y, x
+            z = x, y
+            a, b = z
+            return a * a + b
+
+        f = xp.vectorize(func_tuple)
+        x = xp.array([0, 1, 2, 3, 4])
+        y = xp.array([5, 6, 7, 8, 9])
+        return f(x, y)
+
+    @testing.numpy_cupy_array_equal()
+    def test_tuple_pattern_match(self, xp):
+        def func_pattern_match(x, y):
+            x, y = y, x
+            z = x, y
+            (a, b), y = z, x
+            return a * a + b + y
+
+        f = xp.vectorize(func_pattern_match)
+        x = xp.array([0, 1, 2, 3, 4])
+        y = xp.array([5, 6, 7, 8, 9])
+        return f(x, y)
+
+    def test_tuple_pattern_match_type_error(self):
+        def func_pattern_match(x, y):
+            x, y = y, x
+            z = x, y
+            (a, b), z = z, x
+            return a * a + b
+
+        f = cupy.vectorize(func_pattern_match)
+        x = cupy.array([0, 1, 2, 3, 4])
+        y = cupy.array([5, 6, 7, 8, 9])
+        with pytest.raises(TypeError, match="Data type mismatch of variable:"):
+            return f(x, y)
+
+    @testing.numpy_cupy_array_equal()
+    def test_return_tuple(self, xp):
+        def func_tuple(x, y):
+            return x + y, x / y
+
+        f = xp.vectorize(func_tuple)
+        x = xp.array([0, 1, 2, 3, 4])
+        y = xp.array([5, 6, 7, 8, 9])
+        return f(x, y)
+
+
+class _MyClass:
+
+    def __init__(self, x):
+        self.x = x
+
+
+class TestVectorizeConstants(unittest.TestCase):
+
+    @testing.numpy_cupy_array_equal()
+    def test_vectorize_const_value(self, xp):
+
+        def my_func(x1, x2):
+            return x1 - x2 + const
+
+        const = 8
+        f = xp.vectorize(my_func)
+        x1 = testing.shaped_random((20, 30), xp, xp.int64, seed=1)
+        x2 = testing.shaped_random((20, 30), xp, xp.int64, seed=2)
+        return f(x1, x2)
+
+    @testing.numpy_cupy_array_equal()
+    def test_vectorize_const_attr(self, xp):
+
+        def my_func(x1, x2):
+            return x1 - x2 + const.x
+
+        const = _MyClass(10)
+        f = xp.vectorize(my_func)
+        x1 = testing.shaped_random((20, 30), xp, xp.int64, seed=1)
+        x2 = testing.shaped_random((20, 30), xp, xp.int64, seed=2)
+        return f(x1, x2)
+
+
+class TestVectorizeBroadcast(unittest.TestCase):
+
+    @testing.for_all_dtypes(no_bool=True)
+    @testing.numpy_cupy_allclose(rtol=1e-5)
+    def test_vectorize_broadcast(self, xp, dtype):
+        def my_func(x1, x2):
+            return x1 + x2
+
+        f = xp.vectorize(my_func)
+        x1 = testing.shaped_random((20, 30), xp, dtype, seed=1)
+        x2 = testing.shaped_random((30,), xp, dtype, seed=2)
+        return f(x1, x2)
+
+    @testing.for_all_dtypes(no_bool=True)
+    @testing.numpy_cupy_allclose(rtol=1e-5)
+    def test_vectorize_python_scalar_input(self, xp, dtype):
+        def my_func(x1, x2):
+            return x1 + x2
+
+        f = xp.vectorize(my_func)
+        x1 = testing.shaped_random((20, 30), xp, dtype, seed=1)
+        x2 = 1
+        return f(x1, x2)
+
+    @testing.for_all_dtypes(no_bool=True)
+    @testing.numpy_cupy_allclose(rtol=1e-5)
+    def test_vectorize_numpy_scalar_input(self, xp, dtype):
+        def my_func(x1, x2):
+            return x1 + x2
+
+        f = xp.vectorize(my_func)
+        x1 = testing.shaped_random((20, 30), xp, dtype, seed=1)
+        x2 = dtype(1)
+        return f(x1, x2)
+
+
+class TestVectorize(unittest.TestCase):
+
+    @testing.for_all_dtypes(no_bool=True)
+    @testing.numpy_cupy_allclose(
+        rtol={"default": 1e-5, numpy.float16: 1e-3 if runtime.is_hip else 1e-5}
+    )
+    def test_vectorize_arithmetic_ops(self, xp, dtype):
+        def my_func(x1, x2, x3):
+            y = x1 + x2 * x3**x1
+            x2 = y + x3 * x1
+            return x1 + x2 + x3
+
+        f = xp.vectorize(my_func)
+        x1 = testing.shaped_random((20, 30), xp, dtype, seed=1, scale=4)
+        x2 = testing.shaped_random((20, 30), xp, dtype, seed=2, scale=4)
+        x3 = testing.shaped_random((20, 30), xp, dtype, seed=3, scale=4)
+        return f(x1, x2, x3)
+
+    @testing.numpy_cupy_array_equal()
+    def test_vectorize_lambda(self, xp):
+        f = xp.vectorize(lambda a, b, c: a + b * c)
+        x1 = testing.shaped_random((20, 30), xp, numpy.int64, seed=1)
+        x2 = testing.shaped_random((20, 30), xp, numpy.int64, seed=2)
+        x3 = testing.shaped_random((20, 30), xp, numpy.int64, seed=3)
+        return f(x1, x2, x3)
+
+    def test_vectorize_lambda_xfail(self):
+        functions = [lambda a, b: a + b, lambda a, b: a * b]
+        f = cupy.vectorize(functions[0])
+        x1 = testing.shaped_random((20, 30), cupy, numpy.int64, seed=1)
+        x2 = testing.shaped_random((20, 30), cupy, numpy.int64, seed=2)
+        with pytest.raises(ValueError, match="Multiple callables are found"):
+            return f(x1, x2)
+
+    @testing.numpy_cupy_array_equal()
+    def test_relu(self, xp):
+        f = xp.vectorize(lambda x: x if x > 0.0 else 0.0)
+        a = xp.array([0.4, -0.2, 1.8, -1.2], dtype=xp.float32)
+        return f(a)  # float32
+
+    def test_relu_type_error(self):
+        f = cupy.vectorize(lambda x: x if x > 0.0 else cupy.float64(0.0))
+        a = cupy.array([0.4, -0.2, 1.8, -1.2], dtype=cupy.float32)
+        with pytest.raises(TypeError):
+            return f(a)
diff --git a/dpnp/tests/third_party/cupy/linalg_tests/test_einsum.py b/dpnp/tests/third_party/cupy/linalg_tests/test_einsum.py
index bb44b791d413..484b0fbd3e47 100644
--- a/dpnp/tests/third_party/cupy/linalg_tests/test_einsum.py
+++ b/dpnp/tests/third_party/cupy/linalg_tests/test_einsum.py
@@ -359,9 +359,7 @@ def test_einsum_unary_views(self, xp, dtype):
         a = testing.shaped_arange(self.shape_a, xp, dtype)
         b = xp.einsum(self.subscripts, a)
         if xp is cupy:
-            return (
-                b.ndim == 0 or b.get_array()._pointer == a.get_array()._pointer
-            )
+            return b.ndim == 0 or b.data.ptr == a.data.ptr
         return b.ndim == 0 or b.base is a
 
     @testing.for_all_dtypes_combination(
diff --git a/dpnp/tests/third_party/cupy/linalg_tests/test_norms.py b/dpnp/tests/third_party/cupy/linalg_tests/test_norms.py
index 297ce282928a..b26a6a4826a1 100644
--- a/dpnp/tests/third_party/cupy/linalg_tests/test_norms.py
+++ b/dpnp/tests/third_party/cupy/linalg_tests/test_norms.py
@@ -4,7 +4,6 @@
 import pytest
 
 import dpnp as cupy
-from dpnp.tests.helper import is_cpu_device
 from dpnp.tests.third_party.cupy import testing
 
 
@@ -224,3 +223,133 @@ def test_slogdet_one_dim(self, dtype):
             a = testing.shaped_arange((2,), xp, dtype)
             with pytest.raises(xp.linalg.LinAlgError):
                 xp.linalg.slogdet(a)
+
+
+@testing.parameterize(
+    *testing.product({"ord": [-numpy.inf, -2, -1, 1, 2, numpy.inf, "fro"]})
+)
+class TestCond(unittest.TestCase):
+    @testing.for_float_dtypes(no_float16=True)
+    @testing.numpy_cupy_allclose(rtol=1e-3, atol=1e-4)
+    def test_singular_zeros(self, xp, dtype):
+        if self.ord not in [None, 2, -2]:
+            pytest.skip("no LinAlgError is raising on singular matrices")
+
+        A = xp.zeros(shape=(2, 2), dtype=dtype)
+        result = xp.linalg.cond(A, self.ord)
+
+        # singular matrices don't always hit infinity.
+        result = xp.asarray(result)  # numpy is scalar and can't be replaced
+        large_number = 1.0 / (xp.finfo(dtype).eps)
+        result[result >= large_number] = xp.inf
+
+        return result
+
+    @testing.for_float_dtypes(no_float16=True)
+    @testing.numpy_cupy_allclose(rtol=1e-3, atol=1e-4)
+    def test_singular_ones(self, xp, dtype):
+        if self.ord not in [None, 2, -2]:
+            pytest.skip("no LinAlgError is raising on singular matrices")
+
+        A = xp.ones(shape=(2, 2), dtype=dtype)
+        result = xp.linalg.cond(A, self.ord)
+
+        # singular matrices don't always hit infinity.
+        result = xp.asarray(result)  # numpy is scalar and can't be replaced
+        large_number = 1.0 / (xp.finfo(dtype).eps)
+        result[result >= large_number] = xp.inf
+
+        return result
+
+    @testing.for_float_dtypes(no_float16=True)
+    @testing.numpy_cupy_allclose(rtol=1e-3, atol=1e-4)
+    def test_stacked_singular(self, xp, dtype):
+        if self.ord not in [None, 2, -2]:
+            pytest.skip("no LinAlgError is raising on singular matrices")
+
+        # Check behavior when only some of the stacked matrices are
+        # singular
+
+        A = xp.arange(16, dtype=dtype).reshape((2, 2, 2, 2))
+        A[0, 0] = 0
+        A[1, 1] = 0
+
+        res = xp.linalg.cond(A, self.ord)
+        return res
+
+    @testing.for_float_dtypes(no_float16=True)
+    @testing.numpy_cupy_allclose(rtol=1e-3, atol=1e-4)
+    def test_default(self, xp, dtype):
+        A = testing.shaped_arange((2, 2), xp, dtype=dtype)
+        return xp.linalg.cond(A)
+
+    @testing.for_float_dtypes(no_float16=True)
+    @testing.numpy_cupy_allclose(rtol=1e-3, atol=1e-4)
+    def test_basic(self, xp, dtype):
+        A = testing.shaped_arange((2, 2), xp, dtype=dtype)
+        return xp.linalg.cond(A, self.ord)
+
+    @testing.for_float_dtypes(no_float16=True)
+    @testing.numpy_cupy_allclose(rtol=1e-3, atol=1e-4)
+    def test_generalized_1(self, xp, dtype):
+        A = testing.shaped_arange((2, 2), xp, dtype=dtype)
+        A = xp.array([A, 2 * A, 3 * A])
+        return xp.linalg.cond(A, self.ord)
+
+    @testing.for_float_dtypes(no_float16=True)
+    @testing.numpy_cupy_allclose(rtol=1e-3, atol=1e-4)
+    def test_generalized_2(self, xp, dtype):
+        A = testing.shaped_arange((2, 2), xp, dtype=dtype)
+        A = xp.array([A, 2 * A, 3 * A])
+        A = xp.array([A] * 2 * 3).reshape((3, 2) + A.shape)
+
+        return xp.linalg.cond(A, self.ord)
+
+    @testing.for_float_dtypes(no_float16=True)
+    def test_0x0(self, dtype):
+        for xp in (numpy, cupy):
+            A = xp.empty((0, 0), dtype=dtype)
+            with pytest.raises(
+                xp.linalg.LinAlgError,
+                match="cond is not defined on empty arrays",
+            ):
+                xp.linalg.cond(A, self.ord)
+
+    @testing.for_float_dtypes(no_float16=True)
+    @testing.numpy_cupy_allclose(rtol=1e-3, atol=1e-4)
+    def test_1x1(self, xp, dtype):
+        A = xp.ones((1, 1), dtype=dtype)
+        return xp.linalg.cond(A, self.ord)
+
+    @testing.for_float_dtypes(no_float16=True)
+    @testing.numpy_cupy_allclose(rtol=1e-3, atol=1e-4)
+    def test_8x8(self, xp, dtype):
+        A = testing.shaped_arange((8, 8), xp, dtype=dtype) + xp.diag(
+            xp.ones(8, dtype=dtype)
+        )
+        return xp.linalg.cond(A, self.ord)
+
+    @pytest.mark.skip("only ndarray input is supported")
+    @testing.numpy_cupy_allclose(rtol=1e-3, atol=1e-4)
+    def test_nonarray(self, xp):
+        A = [[1.0, 2.0], [3.0, 4.0]]
+        return xp.linalg.cond(A, self.ord)
+
+    @testing.for_float_dtypes(no_float16=True)
+    @testing.numpy_cupy_allclose(rtol=1e-3, atol=1e-4)
+    def test_hermitian(self, xp, dtype):
+        A = xp.array([[1.0, 2.0], [2.0, 1.0]], dtype=dtype)
+        return xp.linalg.cond(A, self.ord)
+
+
+class TestCondBasicNonSVD(unittest.TestCase):
+    def test_basic_nonsvd(self):
+        # Smoketest the non-svd norms
+        A = cupy.array([[1.0, 0, 1], [0, -2.0, 0], [0, 0, 3.0]])
+        testing.assert_array_almost_equal(cupy.linalg.cond(A, cupy.inf), 4)
+        testing.assert_array_almost_equal(cupy.linalg.cond(A, -cupy.inf), 2 / 3)
+        testing.assert_array_almost_equal(cupy.linalg.cond(A, 1), 4)
+        testing.assert_array_almost_equal(cupy.linalg.cond(A, -1), 0.5)
+        testing.assert_array_almost_equal(
+            cupy.linalg.cond(A, "fro"), numpy.sqrt(265 / 12)
+        )
diff --git a/dpnp/tests/third_party/cupy/linalg_tests/test_solve.py b/dpnp/tests/third_party/cupy/linalg_tests/test_solve.py
index f43559e1c077..bac6591bb7f0 100644
--- a/dpnp/tests/third_party/cupy/linalg_tests/test_solve.py
+++ b/dpnp/tests/third_party/cupy/linalg_tests/test_solve.py
@@ -7,7 +7,6 @@
 from dpnp.tests.helper import (
     assert_dtype_allclose,
     has_support_aspect64,
-    requires_intel_mkl_version,
 )
 from dpnp.tests.third_party.cupy import testing
 from dpnp.tests.third_party.cupy.testing import _condition
@@ -214,10 +213,6 @@ def test_inv(self, dtype):
             ):
                 xp.linalg.inv(a)
 
-    # TODO: remove skipif when Intel MKL 2025.2 is released
-    @pytest.mark.skipif(
-        not requires_intel_mkl_version("2025.2"), reason="mkl<2025.2"
-    )
     @testing.for_dtypes("ifdFD")
     def test_batched_inv(self, dtype):
         for xp in (numpy, cupy):
diff --git a/dpnp/tests/third_party/cupy/manipulation_tests/test_add_remove.py b/dpnp/tests/third_party/cupy/manipulation_tests/test_add_remove.py
index acd3e706d352..31bbc9691889 100644
--- a/dpnp/tests/third_party/cupy/manipulation_tests/test_add_remove.py
+++ b/dpnp/tests/third_party/cupy/manipulation_tests/test_add_remove.py
@@ -345,7 +345,8 @@ def test_unique_inverse(self, xp, dtype, attr):
     @testing.numpy_cupy_array_equal()
     def test_unique_values(self, xp, dtype):
         a = testing.shaped_random((100, 100), xp, dtype)
-        return xp.unique_values(a)
+        out = xp.unique_values(a)  # may not be sorted from NumPy 2.3.
+        return xp.sort(out)
 
 
 @testing.parameterize(*testing.product({"trim": ["fb", "f", "b"]}))
diff --git a/dpnp/tests/third_party/cupy/manipulation_tests/test_shape.py b/dpnp/tests/third_party/cupy/manipulation_tests/test_shape.py
index 49f22951e4b8..bec0215d4b64 100644
--- a/dpnp/tests/third_party/cupy/manipulation_tests/test_shape.py
+++ b/dpnp/tests/third_party/cupy/manipulation_tests/test_shape.py
@@ -103,7 +103,7 @@ def test_reshape_zerosize(self, xp):
         a = xp.zeros((0,))
         b = a.reshape((0,))
         if xp is cupy:
-            assert a.get_array()._pointer == b.get_array()._pointer
+            assert a.data.ptr == b.data.ptr
         else:
             assert b.base is a
         return b
@@ -114,7 +114,7 @@ def test_reshape_zerosize2(self, xp, order):
         a = xp.zeros((2, 0, 3))
         b = a.reshape((5, 0, 4), order=order)
         if xp is cupy:
-            assert a.get_array()._pointer == b.get_array()._pointer
+            assert a.data.ptr == b.data.ptr
         else:
             assert b.base is a
         return b
diff --git a/dpnp/tests/third_party/cupy/math_tests/test_arithmetic.py b/dpnp/tests/third_party/cupy/math_tests/test_arithmetic.py
index a20b287c18b4..1e233bddada1 100644
--- a/dpnp/tests/third_party/cupy/math_tests/test_arithmetic.py
+++ b/dpnp/tests/third_party/cupy/math_tests/test_arithmetic.py
@@ -134,6 +134,7 @@ def test_raises_with_numpy_input(self):
 )
 class TestArithmeticUnary:
 
+    @pytest.mark.filterwarnings("ignore::RuntimeWarning")
     @testing.numpy_cupy_allclose(atol=1e-5, type_check=has_support_aspect64())
     def test_unary(self, xp):
         arg1 = self.arg1
@@ -366,6 +367,7 @@ def check_binary(self, xp):
         return y
 
 
+@testing.with_requires("numpy>=2.1.0")
 @testing.parameterize(
     *(
         testing.product(
@@ -408,6 +410,7 @@ def test_binary(self):
         self.check_binary()
 
 
+@testing.with_requires("numpy>=2.1.0")
 @testing.parameterize(
     *(
         testing.product(
@@ -486,7 +489,7 @@ def test_binary(self):
         self.check_binary()
 
 
-@testing.with_requires("numpy>=2.0")
+@testing.with_requires("numpy>=2.1.0")
 class TestArithmeticBinary3(ArithmeticBinaryBase):
 
     @pytest.mark.parametrize(
diff --git a/dpnp/tests/third_party/cupy/math_tests/test_hyperbolic.py b/dpnp/tests/third_party/cupy/math_tests/test_hyperbolic.py
index f195b041b5b2..5613cee41589 100644
--- a/dpnp/tests/third_party/cupy/math_tests/test_hyperbolic.py
+++ b/dpnp/tests/third_party/cupy/math_tests/test_hyperbolic.py
@@ -1,5 +1,7 @@
 import unittest
 
+import numpy
+
 from dpnp.tests.helper import has_support_aspect64
 from dpnp.tests.third_party.cupy import testing
 
@@ -7,13 +9,16 @@
 class TestHyperbolic(unittest.TestCase):
 
     @testing.for_all_dtypes()
-    @testing.numpy_cupy_allclose(atol=1e-5, type_check=has_support_aspect64())
+    @testing.numpy_cupy_allclose(
+        atol={numpy.float16: 1e-3, "default": 1e-5},
+        type_check=has_support_aspect64(),
+    )
     def check_unary(self, name, xp, dtype):
         a = testing.shaped_arange((2, 3), xp, dtype)
         return getattr(xp, name)(a)
 
     @testing.for_dtypes(["e", "f", "d"])
-    @testing.numpy_cupy_allclose(atol=1e-5)
+    @testing.numpy_cupy_allclose(atol={numpy.float16: 1e-3, "default": 1e-5})
     def check_unary_unit(self, name, xp, dtype):
         a = xp.array([0.2, 0.4, 0.6, 0.8], dtype=dtype)
         return getattr(xp, name)(a)
@@ -31,7 +36,7 @@ def test_arcsinh(self):
         self.check_unary("arcsinh")
 
     @testing.for_dtypes(["e", "f", "d"])
-    @testing.numpy_cupy_allclose(atol=1e-5)
+    @testing.numpy_cupy_allclose(atol={numpy.float16: 1e-3, "default": 1e-5})
     def test_arccosh(self, xp, dtype):
         a = xp.array([1, 2, 3], dtype=dtype)
         return xp.arccosh(a)
diff --git a/dpnp/tests/third_party/cupy/math_tests/test_matmul.py b/dpnp/tests/third_party/cupy/math_tests/test_matmul.py
index fd15ba33110a..3057e6343a82 100644
--- a/dpnp/tests/third_party/cupy/math_tests/test_matmul.py
+++ b/dpnp/tests/third_party/cupy/math_tests/test_matmul.py
@@ -99,6 +99,7 @@ def test_cupy_matmul(self, xp, dtype1, dtype2):
 )
 class TestMatmulOut(unittest.TestCase):
 
+    @testing.with_requires("numpy!=2.3.0")
     # no_int8=True is added to avoid overflow
     @testing.for_all_dtypes(name="dtype1", no_int8=True)
     @testing.for_all_dtypes(name="dtype2", no_int8=True)
diff --git a/dpnp/tests/third_party/cupy/math_tests/test_misc.py b/dpnp/tests/third_party/cupy/math_tests/test_misc.py
index 4542c51de33e..c04a4cbc306d 100644
--- a/dpnp/tests/third_party/cupy/math_tests/test_misc.py
+++ b/dpnp/tests/third_party/cupy/math_tests/test_misc.py
@@ -250,6 +250,7 @@ def test_nan_to_num_for_old_numpy(self):
     def test_nan_to_num_negative_for_old_numpy(self):
         self.check_unary_negative("nan_to_num", no_bool=True)
 
+    @pytest.mark.filterwarnings("ignore::RuntimeWarning")
     def test_nan_to_num_inf(self):
         self.check_unary_inf("nan_to_num")
 
@@ -260,6 +261,7 @@ def test_nan_to_num_nan(self):
     def test_nan_to_num_scalar_nan(self, xp):
         return xp.nan_to_num(xp.array(xp.nan))
 
+    @pytest.mark.filterwarnings("ignore::RuntimeWarning")
     def test_nan_to_num_inf_nan(self):
         self.check_unary_inf_nan("nan_to_num")
 
diff --git a/dpnp/tests/third_party/cupy/math_tests/test_trigonometric.py b/dpnp/tests/third_party/cupy/math_tests/test_trigonometric.py
index 9bb8b67870c7..3340565fbcb9 100644
--- a/dpnp/tests/third_party/cupy/math_tests/test_trigonometric.py
+++ b/dpnp/tests/third_party/cupy/math_tests/test_trigonometric.py
@@ -1,5 +1,6 @@
 import unittest
 
+import numpy
 import pytest
 
 from dpnp.tests.helper import has_support_aspect64
@@ -24,7 +25,7 @@ def check_binary(self, name, xp, dtype):
         return getattr(xp, name)(a, b)
 
     @testing.for_dtypes(["e", "f", "d"])
-    @testing.numpy_cupy_allclose(atol=1e-5)
+    @testing.numpy_cupy_allclose(atol={numpy.float16: 1e-3, "default": 1e-5})
     def check_unary_unit(self, name, xp, dtype):
         a = xp.array([0.2, 0.4, 0.6, 0.8], dtype=dtype)
         return getattr(xp, name)(a)
diff --git a/dpnp/tests/third_party/cupy/misc_tests/test_byte_bounds.py b/dpnp/tests/third_party/cupy/misc_tests/test_byte_bounds.py
index a0cfb53f93b6..9e1158c8e871 100644
--- a/dpnp/tests/third_party/cupy/misc_tests/test_byte_bounds.py
+++ b/dpnp/tests/third_party/cupy/misc_tests/test_byte_bounds.py
@@ -8,16 +8,16 @@ class TestByteBounds:
     def test_1d_contiguous(self, dtype):
         a = cupy.zeros(12, dtype=dtype)
         itemsize = a.itemsize
-        a_low = a.get_array()._pointer
-        a_high = a.get_array()._pointer + 12 * itemsize
+        a_low = a.data.ptr
+        a_high = a.data.ptr + 12 * itemsize
         assert cupy.byte_bounds(a) == (a_low, a_high)
 
     @testing.for_all_dtypes()
     def test_2d_contiguous(self, dtype):
         a = cupy.zeros((4, 7), dtype=dtype)
         itemsize = a.itemsize
-        a_low = a.get_array()._pointer
-        a_high = a.get_array()._pointer + 4 * 7 * itemsize
+        a_low = a.data.ptr
+        a_high = a.data.ptr + 4 * 7 * itemsize
         assert cupy.byte_bounds(a) == (a_low, a_high)
 
     @testing.for_all_dtypes()
@@ -25,8 +25,8 @@ def test_1d_noncontiguous_pos_stride(self, dtype):
         a = cupy.zeros(12, dtype=dtype)
         itemsize = a.itemsize
         b = a[::2]
-        b_low = b.get_array()._pointer
-        b_high = b.get_array()._pointer + 11 * itemsize  # a[10]
+        b_low = b.data.ptr
+        b_high = b.data.ptr + 11 * itemsize  # a[10]
         assert cupy.byte_bounds(b) == (b_low, b_high)
 
     @testing.for_all_dtypes()
@@ -34,8 +34,8 @@ def test_2d_noncontiguous_pos_stride(self, dtype):
         a = cupy.zeros((4, 7), dtype=dtype)
         b = a[::2, ::2]
         itemsize = b.itemsize
-        b_low = a.get_array()._pointer
-        b_high = b.get_array()._pointer + 3 * 7 * itemsize  # a[2][6]
+        b_low = a.data.ptr
+        b_high = b.data.ptr + 3 * 7 * itemsize  # a[2][6]
         assert cupy.byte_bounds(b) == (b_low, b_high)
 
     @testing.for_all_dtypes()
@@ -43,8 +43,8 @@ def test_1d_contiguous_neg_stride(self, dtype):
         a = cupy.zeros(12, dtype=dtype)
         b = a[::-1]
         itemsize = b.itemsize
-        b_low = b.get_array()._pointer - 11 * itemsize
-        b_high = b.get_array()._pointer + 1 * itemsize
+        b_low = b.data.ptr - 11 * itemsize
+        b_high = b.data.ptr + 1 * itemsize
         assert cupy.byte_bounds(b) == (b_low, b_high)
 
     @testing.for_all_dtypes()
@@ -52,12 +52,8 @@ def test_2d_noncontiguous_neg_stride(self, dtype):
         a = cupy.zeros((4, 7), dtype=dtype)
         b = a[::-2, ::-2]  # strides = (-56, -8), shape = (2, 4)
         itemsize = b.itemsize
-        b_low = (
-            b.get_array()._pointer
-            - 2 * 7 * itemsize * (2 - 1)
-            - 2 * itemsize * (4 - 1)
-        )
-        b_high = b.get_array()._pointer + 1 * itemsize
+        b_low = b.data.ptr - 2 * 7 * itemsize * (2 - 1) - 2 * itemsize * (4 - 1)
+        b_high = b.data.ptr + 1 * itemsize
         assert cupy.byte_bounds(b) == (b_low, b_high)
 
     @testing.for_all_dtypes()
@@ -65,8 +61,8 @@ def test_2d_noncontiguous_posneg_stride_1(self, dtype):
         a = cupy.zeros((4, 7), dtype=dtype)
         b = a[::1, ::-1]  # strides = (28, -4), shape=(4, 7)
         itemsize = b.itemsize
-        b_low = b.get_array()._pointer - itemsize * (7 - 1)
-        b_high = b.get_array()._pointer + 1 * itemsize + 7 * itemsize * (4 - 1)
+        b_low = b.data.ptr - itemsize * (7 - 1)
+        b_high = b.data.ptr + 1 * itemsize + 7 * itemsize * (4 - 1)
         assert cupy.byte_bounds(b) == (b_low, b_high)
 
     @testing.for_all_dtypes()
@@ -74,8 +70,6 @@ def test_2d_noncontiguous_posneg_stride_2(self, dtype):
         a = cupy.zeros((4, 7), dtype=dtype)
         b = a[::2, ::-2]  # strides = (56, -8), shape=(2, 4)
         itemsize = b.itemsize
-        b_low = b.get_array()._pointer - 2 * itemsize * (4 - 1)
-        b_high = (
-            b.get_array()._pointer + 1 * itemsize + 2 * 7 * itemsize * (2 - 1)
-        )
+        b_low = b.data.ptr - 2 * itemsize * (4 - 1)
+        b_high = b.data.ptr + 1 * itemsize + 2 * 7 * itemsize * (2 - 1)
         assert cupy.byte_bounds(b) == (b_low, b_high)
diff --git a/dpnp/tests/third_party/cupy/testing/_array.py b/dpnp/tests/third_party/cupy/testing/_array.py
index beecaac16e58..f2f8d455dd8e 100644
--- a/dpnp/tests/third_party/cupy/testing/_array.py
+++ b/dpnp/tests/third_party/cupy/testing/_array.py
@@ -171,13 +171,14 @@ def assert_array_equal(
         )
 
     if strides_check:
-        if actual.strides != desired.strides:
+        strides = tuple(el // desired.itemsize for el in desired.strides)
+        if actual.strides != strides:
             msg = ["Strides are not equal:"]
             if err_msg:
                 msg = [msg[0] + " " + err_msg]
             if verbose:
                 msg.append(" x: {}".format(actual.strides))
-                msg.append(" y: {}".format(desired.strides))
+                msg.append(" y: {}".format(strides))
             raise AssertionError("\n".join(msg))
 
 
diff --git a/dpnp/tests/third_party/cupy/testing/_loops.py b/dpnp/tests/third_party/cupy/testing/_loops.py
index f4d7f7a374da..4e459ac4a02f 100644
--- a/dpnp/tests/third_party/cupy/testing/_loops.py
+++ b/dpnp/tests/third_party/cupy/testing/_loops.py
@@ -71,7 +71,7 @@ def _call_func_cupy(impl, args, kw, name, sp_name, scipy_name):
     if sp_name:
         kw[sp_name] = cupyx.scipy.sparse
     if scipy_name:
-        kw[scipy_name] = cupyx.scipy
+        kw[scipy_name] = cupy.scipy
     kw[name] = cupy
     result, error = _call_func(impl, args, kw)
     return result, error
@@ -408,9 +408,7 @@ def test_func(*args, **kw):
                     if cupy_r.shape == ():
                         skip = (mask == 0).all()
                     else:
-                        # mask is numpy.ndarray here which is not supported now
-                        # TODO remove asarray() once dpctl-2053 is addressed
-                        cupy_r = cupy_r[cupy.asarray(mask)].asnumpy()
+                        cupy_r = cupy_r[mask].asnumpy()
                         numpy_r = numpy_r[mask]
 
                 if not skip:
diff --git a/dpnp/tests/third_party/cupyx/__init__.py b/dpnp/tests/third_party/cupyx/__init__.py
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/dpnp/tests/third_party/cupyx/scipy_tests/__init__.py b/dpnp/tests/third_party/cupyx/scipy_tests/__init__.py
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/dpnp/tests/third_party/cupyx/scipy_tests/linalg_tests/__init__.py b/dpnp/tests/third_party/cupyx/scipy_tests/linalg_tests/__init__.py
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/dpnp/tests/third_party/cupyx/scipy_tests/linalg_tests/test_decomp_lu.py b/dpnp/tests/third_party/cupyx/scipy_tests/linalg_tests/test_decomp_lu.py
new file mode 100644
index 000000000000..fb51c3e39244
--- /dev/null
+++ b/dpnp/tests/third_party/cupyx/scipy_tests/linalg_tests/test_decomp_lu.py
@@ -0,0 +1,197 @@
+from __future__ import annotations
+
+import unittest
+import warnings
+
+import numpy
+import pytest
+
+import dpnp as cupy
+from dpnp.tests.third_party.cupy import testing
+
+if cupy.tests.helper.is_scipy_available():
+    import scipy.linalg
+
+
+# TODO: After the feature is released
+# requires_scipy_linalg_backend = testing.with_requires('scipy>=1.x.x')
+requires_scipy_linalg_backend = unittest.skip(
+    "scipy.linalg backend feature has not been released"
+)
+
+
+@testing.parameterize(
+    *testing.product(
+        {
+            "shape": [
+                (1, 1),
+                (2, 2),
+                (3, 3),
+                (5, 5),
+                (1, 5),
+                (5, 1),
+                (2, 5),
+                (5, 2),
+            ],
+        }
+    )
+)
+@testing.fix_random()
+@testing.with_requires("scipy")
+class TestLUFactor(unittest.TestCase):
+
+    @testing.for_dtypes("fdFD")
+    def test_lu_factor(self, dtype):
+        if self.shape[0] != self.shape[1]:
+            self.skipTest(
+                "skip non-square tests since scipy.lu_factor requires square"
+            )
+        a_cpu = testing.shaped_random(self.shape, numpy, dtype=dtype)
+        a_gpu = cupy.asarray(a_cpu)
+        result_cpu = scipy.linalg.lu_factor(a_cpu)
+        result_gpu = cupy.scipy.linalg.lu_factor(a_gpu)
+        assert len(result_cpu) == len(result_gpu)
+        assert result_cpu[0].dtype == result_gpu[0].dtype
+        # DPNP returns pivot indices as int64, while SciPy returns int32.
+        # Check for the expected dtypes explicitly.
+        # assert result_cpu[1].dtype == result_gpu[1].dtype
+        assert result_cpu[1].dtype == cupy.int32
+        assert result_gpu[1].dtype == cupy.int64
+        testing.assert_allclose(result_cpu[0], result_gpu[0], atol=1e-5)
+        testing.assert_array_equal(result_cpu[1], result_gpu[1])
+
+    def check_lu_factor_reconstruction(self, A):
+        m, n = self.shape
+        lu, piv = cupy.scipy.linalg.lu_factor(A)
+        # extract ``L`` and ``U`` from ``lu``
+        L = cupy.tril(lu, k=-1)
+        cupy.fill_diagonal(L, 1.0)
+        L = L[:, :m]
+        U = cupy.triu(lu)
+        U = U[:n, :]
+        # check output shapes
+        assert lu.shape == (m, n)
+        assert L.shape == (m, min(m, n))
+        assert U.shape == (min(m, n), n)
+        assert piv.shape == (min(m, n),)
+        # apply pivot (on CPU since slaswp is not available in cupy)
+        piv = cupy.asnumpy(piv)
+        rows = list(range(m))
+        for i, row in enumerate(piv):
+            if i != row:
+                rows[i], rows[row] = rows[row], rows[i]
+        rows = cupy.asarray(rows)
+        PA = A[rows]
+        # check that reconstruction is close to original
+        LU = L.dot(U)
+        testing.assert_allclose(LU, PA, atol=1e-5)
+
+    @testing.for_dtypes("fdFD")
+    def test_lu_factor_reconstruction(self, dtype):
+        A = testing.shaped_random(self.shape, cupy, dtype=dtype)
+        self.check_lu_factor_reconstruction(A)
+
+    @testing.for_dtypes("fdFD")
+    def test_lu_factor_reconstruction_singular(self, dtype):
+        if self.shape[0] != self.shape[1]:
+            self.skipTest(
+                "skip non-square tests since scipy.lu_factor requires square"
+            )
+        A = testing.shaped_random(self.shape, cupy, dtype=dtype)
+        A -= A.mean(axis=0, keepdims=True)
+        A -= A.mean(axis=1, keepdims=True)
+        with warnings.catch_warnings():
+            warnings.simplefilter("ignore", RuntimeWarning)
+            self.check_lu_factor_reconstruction(A)
+
+
+@testing.parameterize(
+    *testing.product(
+        {
+            "shape": [
+                (1, 1),
+                (2, 2),
+                (3, 3),
+                (5, 5),
+                (1, 5),
+                (5, 1),
+                (2, 5),
+                (5, 2),
+            ],
+            "permute_l": [False, True],
+        }
+    )
+)
+@testing.fix_random()
+@testing.with_requires("scipy")
+@pytest.mark.skip("lu() is not supported yet")
+class TestLU(unittest.TestCase):
+
+    @testing.for_dtypes("fdFD")
+    def test_lu(self, dtype):
+        a_cpu = testing.shaped_random(self.shape, numpy, dtype=dtype)
+        a_gpu = cupy.asarray(a_cpu)
+        result_cpu = scipy.linalg.lu(a_cpu, permute_l=self.permute_l)
+        result_gpu = cupy.linalg.lu(a_gpu, permute_l=self.permute_l)
+        assert len(result_cpu) == len(result_gpu)
+        if not self.permute_l:
+            # check permutation matrix
+            result_cpu = list(result_cpu)
+            result_gpu = list(result_gpu)
+            P_cpu = result_cpu.pop(0)
+            P_gpu = result_gpu.pop(0)
+            cupy.testing.assert_array_equal(P_gpu, P_cpu)
+        cupy.testing.assert_allclose(result_gpu[0], result_cpu[0], atol=1e-5)
+        cupy.testing.assert_allclose(result_gpu[1], result_cpu[1], atol=1e-5)
+
+    @testing.for_dtypes("fdFD")
+    def test_lu_reconstruction(self, dtype):
+        m, n = self.shape
+        A = testing.shaped_random(self.shape, cupy, dtype=dtype)
+        if self.permute_l:
+            PL, U = cupy.linalg.lu(A, permute_l=self.permute_l)
+            PLU = PL @ U
+        else:
+            P, L, U = cupy.linalg.lu(A, permute_l=self.permute_l)
+            PLU = P @ L @ U
+        # check that reconstruction is close to original
+        cupy.testing.assert_allclose(PLU, A, atol=1e-5)
+
+
+@testing.parameterize(
+    *testing.product(
+        {
+            "trans": [0, 1, 2],
+            "shapes": [((4, 4), (4,)), ((5, 5), (5, 2))],
+        }
+    )
+)
+@testing.fix_random()
+@testing.with_requires("scipy")
+class TestLUSolve(unittest.TestCase):
+
+    @testing.for_dtypes("fdFD")
+    @testing.numpy_cupy_allclose(atol=1e-5, scipy_name="scp")
+    def test_lu_solve(self, xp, scp, dtype):
+        a_shape, b_shape = self.shapes
+        A = testing.shaped_random(a_shape, xp, dtype=dtype)
+        b = testing.shaped_random(b_shape, xp, dtype=dtype)
+        lu = scp.linalg.lu_factor(A)
+        return scp.linalg.lu_solve(lu, b, trans=self.trans)
+
+    @requires_scipy_linalg_backend
+    @testing.for_dtypes("fdFD")
+    @testing.numpy_cupy_allclose(atol=1e-5)
+    def test_lu_solve_backend(self, xp, dtype):
+        a_shape, b_shape = self.shapes
+        A = testing.shaped_random(a_shape, xp, dtype=dtype)
+        b = testing.shaped_random(b_shape, xp, dtype=dtype)
+        if xp is numpy:
+            lu = scipy.linalg.lu_factor(A)
+            backend = "scipy"
+        else:
+            lu = cupy.scipy.linalg.lu_factor(A)
+            backend = cupy.linalg
+        with scipy.linalg.set_backend(backend):
+            out = scipy.linalg.lu_solve(lu, b, trans=self.trans)
+        return out
diff --git a/dpnp/tests/third_party/cupyx/scipy_tests/special_tests/__init__.py b/dpnp/tests/third_party/cupyx/scipy_tests/special_tests/__init__.py
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/dpnp/tests/third_party/cupyx/scipy_tests/special_tests/test_erf.py b/dpnp/tests/third_party/cupyx/scipy_tests/special_tests/test_erf.py
new file mode 100644
index 000000000000..02e505a51ed4
--- /dev/null
+++ b/dpnp/tests/third_party/cupyx/scipy_tests/special_tests/test_erf.py
@@ -0,0 +1,160 @@
+from __future__ import annotations
+
+import unittest
+
+import numpy
+import pytest
+
+import dpnp as cupy
+import dpnp.scipy.special
+from dpnp.tests.third_party.cupy import testing
+
+
+def _boundary_inputs(boundary, rtol, atol):
+    left = boundary * (1 - numpy.copysign(rtol, boundary)) - atol
+    right = boundary * (1 + numpy.copysign(rtol, boundary)) + atol
+    return [left, boundary, right]
+
+
+@testing.with_requires("scipy")
+class _TestBase:
+
+    @testing.with_requires("scipy>=1.16.0")
+    def test_erf(self):
+        self.check_unary("erf")
+
+    @testing.with_requires("scipy>=1.16.0")
+    def test_erfc(self):
+        self.check_unary("erfc")
+
+    @pytest.mark.skip("erfcx() is not supported yet")
+    @testing.with_requires("scipy>=1.16.0")
+    def test_erfcx(self):
+        self.check_unary("erfcx")
+
+    @pytest.mark.skip("erfinv() is not supported yet")
+    def test_erfinv(self):
+        self.check_unary("erfinv")
+        self.check_unary_random("erfinv", scale=2, offset=-1)
+        self.check_unary_boundary("erfinv", boundary=-1)
+        self.check_unary_boundary("erfinv", boundary=1)
+
+    @pytest.mark.skip("erfcinv() is not supported yet")
+    def test_erfcinv(self):
+        self.check_unary("erfcinv")
+        self.check_unary_random("erfcinv", scale=2, offset=0)
+        self.check_unary_boundary("erfcinv", boundary=0)
+        self.check_unary_boundary("erfcinv", boundary=2)
+
+
+@testing.with_requires("scipy")
+class TestSpecial(unittest.TestCase, _TestBase):
+
+    # scipy>=1.16: 'e -> d', which causes type_check=False
+    @testing.for_dtypes(["e", "f", "d"])
+    @testing.numpy_cupy_allclose(atol=1e-5, scipy_name="scp", type_check=False)
+    def check_unary(self, name, xp, scp, dtype):
+        import scipy.special
+
+        a = testing.shaped_arange((2, 3), xp, dtype)
+        return getattr(scp.special, name)(a)
+
+    @testing.for_dtypes(["f", "d"])
+    @testing.numpy_cupy_allclose(atol=1e-5, scipy_name="scp")
+    def check_unary_random(self, name, xp, scp, dtype, scale, offset):
+        import scipy.special
+
+        a = testing.shaped_random((2, 3), xp, dtype, scale=scale) + offset
+        return getattr(scp.special, name)(a)
+
+    @testing.for_dtypes(["f", "d"])
+    @testing.numpy_cupy_allclose(atol=1e-5, scipy_name="scp")
+    def check_unary_boundary(self, name, xp, scp, dtype, boundary):
+        import scipy.special
+
+        a = _boundary_inputs(boundary, 1.0 / 1024, 1.0 / 1024)
+        a = xp.array(a, dtype=dtype)
+        return getattr(scp.special, name)(a)
+
+    @pytest.mark.skip("erfinv() is not supported yet")
+    @testing.with_requires("scipy>=1.4.0")
+    @testing.for_dtypes(["f", "d"])
+    def test_erfinv_behavior(self, dtype):
+        a = cupy.empty((1,), dtype=dtype)
+
+        a[:] = 1.0 + 1e-6
+        a = cupyx.scipy.special.erfinv(a)
+        assert cupy.isnan(a)
+        a[:] = -1.0 - 1e-6
+        a = cupyx.scipy.special.erfinv(a)
+        assert cupy.isnan(a)
+        a[:] = 1.0
+        a = cupyx.scipy.special.erfinv(a)
+        assert numpy.isposinf(cupy.asnumpy(a))
+        a[:] = -1.0
+        a = cupyx.scipy.special.erfinv(a)
+        assert numpy.isneginf(cupy.asnumpy(a))
+
+    @pytest.mark.skip("erfcinv() is not supported yet")
+    @testing.with_requires("scipy>=1.4.0")
+    @testing.for_dtypes(["f", "d"])
+    def test_erfcinv_behavior(self, dtype):
+        a = cupy.empty((1,), dtype=dtype)
+
+        a[:] = 2.0 + 1e-6
+        a = cupyx.scipy.special.erfcinv(a)
+        assert cupy.isnan(a)
+        a[:] = 0.0 - 1e-6
+        a = cupyx.scipy.special.erfcinv(a)
+        assert cupy.isnan(a)
+        a[:] = 0.0
+        a = cupyx.scipy.special.erfcinv(a)
+        assert numpy.isposinf(cupy.asnumpy(a))
+        a[:] = 2.0
+        a = cupyx.scipy.special.erfcinv(a)
+        assert numpy.isneginf(cupy.asnumpy(a))
+
+
+@pytest.mark.skip("fuse() is not supported yet")
+@testing.with_requires("scipy")
+class TestFusionSpecial(unittest.TestCase, _TestBase):
+
+    @testing.for_dtypes(["e", "f", "d"])
+    @testing.numpy_cupy_allclose(atol=1e-5, scipy_name="scp")
+    def check_unary(self, name, xp, scp, dtype):
+        import scipy.special
+
+        a = testing.shaped_arange((2, 3), xp, dtype)
+
+        @cupy.fuse()
+        def f(x):
+            return getattr(scp.special, name)(x)
+
+        return f(a)
+
+    @testing.for_dtypes(["f", "d"])
+    @testing.numpy_cupy_allclose(atol=1e-5, scipy_name="scp")
+    def check_unary_random(self, name, xp, scp, dtype, scale, offset):
+        import scipy.special
+
+        a = testing.shaped_random((2, 3), xp, dtype, scale=scale) + offset
+
+        @cupy.fuse()
+        def f(x):
+            return getattr(scp.special, name)(x)
+
+        return f(a)
+
+    @testing.for_dtypes(["f", "d"])
+    @testing.numpy_cupy_allclose(atol=1e-5, scipy_name="scp")
+    def check_unary_boundary(self, name, xp, scp, dtype, boundary):
+        import scipy.special
+
+        a = _boundary_inputs(boundary, 1.0 / 1024, 1.0 / 1024)
+        a = xp.array(a, dtype=dtype)
+
+        @cupy.fuse()
+        def f(x):
+            return getattr(scp.special, name)(x)
+
+        return f(a)
diff --git a/environments/build_conda_pkg.yml b/environments/build_conda_pkg.yml
index c84d70b78c76..f5d1ebcb62da 100644
--- a/environments/build_conda_pkg.yml
+++ b/environments/build_conda_pkg.yml
@@ -2,5 +2,5 @@ name: Build DPNP conda package
 channels:
   - conda-forge
 dependencies:
-  - python=3.12 # conda-build does not support python 3.13
-  - conda-build=25.4.2
+  - python=3.13
+  - conda-build=25.7.0
diff --git a/environments/build_with_oneapi.yml b/environments/build_with_oneapi.yml
index 3786ce2ca0e5..9dafc156838a 100644
--- a/environments/build_with_oneapi.yml
+++ b/environments/build_with_oneapi.yml
@@ -6,7 +6,6 @@ dependencies:
   - cython
   - ninja
   - numpy
-  - pytest
   - setuptools
   - scikit-build
   - versioneer
diff --git a/environments/building_docs.yml b/environments/building_docs.yml
index 2d452ddf3e48..f2df72a85ece 100644
--- a/environments/building_docs.yml
+++ b/environments/building_docs.yml
@@ -2,7 +2,7 @@ name: Building docs specific packages
 channels:
   - conda-forge
 dependencies:
-  - python=3.12
+  - python=3.13
   - cupy
   - sphinx
   - sphinx_rtd_theme
diff --git a/environments/coverage.yml b/environments/coverage.yml
index 0fcc8d82129d..54cbbdede34a 100644
--- a/environments/coverage.yml
+++ b/environments/coverage.yml
@@ -2,7 +2,7 @@ name: Coverage specific packages
 channels:
   - conda-forge
 dependencies:
-  - python=3.12
+  - python=3.12 # no python 3.13 support by coveralls
   - coverage[toml]
   - llvm
   - pytest-cov
diff --git a/environments/create_conda_channel.yml b/environments/create_conda_channel.yml
index b808b7ca49ed..8dc62a5099c0 100644
--- a/environments/create_conda_channel.yml
+++ b/environments/create_conda_channel.yml
@@ -2,5 +2,5 @@ name: Create conda channel with DPNP package
 channels:
   - conda-forge
 dependencies:
-  - python=3.12 # conda does not support python 3.13
-  - conda-index=0.6.0
+  - python=3.13
+  - conda-index=0.6.1
diff --git a/environments/dpctl_pkg.txt b/environments/dpctl_pkg.txt
index 111da6267ec0..08a466236a0c 100644
--- a/environments/dpctl_pkg.txt
+++ b/environments/dpctl_pkg.txt
@@ -1,2 +1,2 @@
 --index-url https://pypi.anaconda.org/dppy/label/dev/simple
-dpctl>=0.20.0dev0
+dpctl>=0.21.0
diff --git a/environments/dpctl_pkg.yml b/environments/dpctl_pkg.yml
index ff0aecfa0bf7..ed4f4e397407 100644
--- a/environments/dpctl_pkg.yml
+++ b/environments/dpctl_pkg.yml
@@ -2,4 +2,4 @@ name: Install dpctl package
 channels:
   - dppy/label/dev
 dependencies:
-  - dpctl>=0.20.0dev0
+  - dpctl>=0.21.0
diff --git a/environments/test.yml b/environments/test.yml
new file mode 100644
index 000000000000..db446a7d0d16
--- /dev/null
+++ b/environments/test.yml
@@ -0,0 +1,6 @@
+name: Packages to test DPNP
+channels:
+  - conda-forge
+dependencies:
+  - pytest
+  - scipy
diff --git a/pyproject.toml b/pyproject.toml
index 68cea36a07a2..5d92878f9067 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -5,11 +5,10 @@ requires = [
   "build>=1.2.2",
   "cmake>=3.31.6",
   "cython>=3.0.12",
-  # WARNING: use only dpctl version available on PyPi
-  "dpctl>=0.19.0",
+  "dpctl>=0.21.0",
   "ninja>=1.11.1; platform_system!='Windows'",
   # NOTE: no DPNP restriction on NumPy version, so follow NumPy's drop schedule
-  "numpy>=1.25.0",
+  "numpy>=1.26.0",
   "scikit-build>=0.18.1",
   "setuptools>=79.0.1",
   "wheel>=0.45.1",
@@ -31,6 +30,7 @@ classifiers = [
   "Programming Language :: Python :: 3.10",
   "Programming Language :: Python :: 3.11",
   "Programming Language :: Python :: 3.12",
+  "Programming Language :: Python :: 3.13",
   "Programming Language :: Python :: 3 :: Only",
   "Programming Language :: Python :: Implementation :: CPython",
   "Topic :: Software Development",
@@ -49,8 +49,8 @@ dependencies = [
   # "dpcpp-cpp-rt>=0.59.0",
   # "intel-cmplr-lib-rt>=0.59.0"
   # WARNING: use the latest dpctl dev version, otherwise stable w/f will fail
-  "dpctl>=0.20.0dev0",
-  "numpy>=1.25.0"
+  "dpctl>=0.21.0",
+  "numpy>=1.26.0"
 ]
 description = "Data Parallel Extension for NumPy"
 dynamic = ["version"]
@@ -65,14 +65,23 @@ keywords = [
   "gpu",
   "dpcpp"
 ]
-license = {text = "Apache 2.0"}
+license = "BSD-2-Clause"
+license-files = ["LICENSE.txt"]
 maintainers = [{name = "Intel Corporation"}]
 name = "dpnp"
 readme = {file = "README.md", content-type = "text/markdown"}
-requires-python = ">=3.9,<3.13"
+requires-python = ">=3.9,<3.14"
 
 [project.optional-dependencies]
-coverage = ["Cython", "pytest", "pytest-cov", "coverage", "tomli", "llvm"]
+coverage = [
+  "coverage",
+  "Cython",
+  "llvm",
+  "pytest",
+  "pytest-cov",
+  "scipy",
+  "tomli"
+]
 docs = [
   "Cython",
   "cupy",
@@ -82,6 +91,7 @@ docs = [
   "sphinxcontrib-googleanalytics",
   "sphinxcontrib-spelling"
 ]
+test = ["pytest", "scipy"]
 
 [project.urls]
 Changelog = "https://github.com/IntelPython/dpnp/blob/master/CHANGELOG.md"
@@ -92,7 +102,7 @@ Repository = "https://github.com/IntelPython/dpnp.git"
 
 [tool.black]
 line-length = 80
-target-version = ['py39', 'py310', 'py311', 'py312']
+target-version = ['py39', 'py310', 'py311', 'py312', 'py313']
 
 [tool.codespell]
 builtin = "clear,rare,informal,names"
@@ -137,7 +147,7 @@ exclude-protected = ["_create_from_usm_ndarray"]
 
 [tool.pylint.design]
 max-args = 11
-max-branches = 16
+max-branches = 18
 max-locals = 30
 max-positional-arguments = 9
 max-returns = 8
diff --git a/scripts/build_locally.py b/scripts/build_locally.py
index 3403f98304eb..fd3c94a28a62 100644
--- a/scripts/build_locally.py
+++ b/scripts/build_locally.py
@@ -27,6 +27,9 @@
 import os
 import subprocess
 import sys
+import warnings
+
+warnings.simplefilter("default", DeprecationWarning)
 
 
 def run(
@@ -38,10 +41,12 @@ def run(
     cmake_executable=None,
     verbose=False,
     cmake_opts="",
-    target="intel",
+    target_cuda=None,
     target_hip=None,
     onemkl_interfaces=False,
     onemkl_interfaces_dir=None,
+    onemath=False,
+    onemath_dir=None,
 ):
     build_system = None
 
@@ -98,15 +103,34 @@ def run(
         if "DPL_ROOT" in os.environ:
             os.environ["DPL_ROOT_HINT"] = os.environ["DPL_ROOT"]
 
-    if not target.strip():
-        target = "intel"
+    # TODO: onemkl_interfaces and onemkl_interfaces_dir are deprecated in
+    # dpnp-0.19.0 and should be removed in dpnp-0.20.0.
+    if onemkl_interfaces:
+        warnings.warn(
+            "Using 'onemkl_interfaces' is deprecated. Please use 'onemath' instead.",
+            DeprecationWarning,
+            stacklevel=1,
+        )
+        onemath = True
+    if onemkl_interfaces_dir is not None:
+        warnings.warn(
+            "Using 'onemkl_interfaces_dir' is deprecated. Please use 'onemath_dir' instead.",
+            DeprecationWarning,
+            stacklevel=1,
+        )
+        onemath_dir = onemkl_interfaces_dir
 
-    if target == "cuda":
+    if target_cuda is not None:
+        if not target_cuda.strip():
+            raise ValueError(
+                "--target-cuda can not be an empty string. "
+                "Use --target-cuda=<arch> or --target-cuda"
+            )
         cmake_args += [
-            "-DDPNP_TARGET_CUDA=ON",
+            f"-DDPNP_TARGET_CUDA={target_cuda}",
         ]
-        # Always builds using oneMKL interfaces for the cuda target
-        onemkl_interfaces = True
+        # Always builds using oneMath for the cuda target
+        onemath = True
 
     if target_hip is not None:
         if not target_hip.strip():
@@ -116,20 +140,20 @@ def run(
         cmake_args += [
             f"-DHIP_TARGETS={target_hip}",
         ]
-        # Always builds using oneMKL interfaces for the hip target
-        onemkl_interfaces = True
+        # Always builds using oneMath for the hip target
+        onemath = True
 
-    if onemkl_interfaces:
+    if onemath:
         cmake_args += [
-            "-DDPNP_USE_ONEMKL_INTERFACES=ON",
+            "-DDPNP_USE_ONEMATH=ON",
         ]
 
-        if onemkl_interfaces_dir:
+        if onemath_dir:
             cmake_args += [
-                f"-DDPNP_ONEMKL_INTERFACES_DIR={onemkl_interfaces_dir}",
+                f"-DDPNP_ONEMATH_DIR={onemath_dir}",
             ]
-    elif onemkl_interfaces_dir:
-        RuntimeError("--onemkl-interfaces-dir option is not supported")
+    elif onemath_dir:
+        raise RuntimeError("--onemath-dir option is not supported")
 
     subprocess.check_call(
         cmake_args, shell=False, cwd=setup_dir, env=os.environ
@@ -186,10 +210,12 @@ def run(
         type=str,
     )
     driver.add_argument(
-        "--target",
-        help="Target backend for build",
-        dest="target",
-        default="intel",
+        "--target-cuda",
+        nargs="?",
+        const="ON",
+        help="Enable CUDA target for build; "
+        "optionally specify architecture (e.g., --target-cuda=sm_80)",
+        default=None,
         type=str,
     )
     driver.add_argument(
@@ -200,18 +226,31 @@ def run(
         type=str,
     )
     driver.add_argument(
-        "--onemkl-interfaces",
-        help="Build using oneMKL Interfaces",
+        "--onemkl_interfaces",
+        help="(DEPRECATED) Build using oneMath",
         dest="onemkl_interfaces",
         action="store_true",
     )
     driver.add_argument(
-        "--onemkl-interfaces-dir",
-        help="Local directory with source of oneMKL Interfaces",
+        "--onemkl_interfaces_dir",
+        help="(DEPRECATED) Local directory with source of oneMath",
         dest="onemkl_interfaces_dir",
         default=None,
         type=str,
     )
+    driver.add_argument(
+        "--onemath",
+        help="Build using oneMath",
+        dest="onemath",
+        action="store_true",
+    )
+    driver.add_argument(
+        "--onemath-dir",
+        help="Local directory with source of oneMath",
+        dest="onemath_dir",
+        default=None,
+        type=str,
+    )
     args = parser.parse_args()
 
     args_to_validate = [
@@ -265,8 +304,10 @@ def run(
         cmake_executable=args.cmake_executable,
         verbose=args.verbose,
         cmake_opts=args.cmake_opts,
-        target=args.target,
+        target_cuda=args.target_cuda,
         target_hip=args.target_hip,
         onemkl_interfaces=args.onemkl_interfaces,
         onemkl_interfaces_dir=args.onemkl_interfaces_dir,
+        onemath=args.onemath,
+        onemath_dir=args.onemath_dir,
     )
diff --git a/setup.py b/setup.py
index 63b8f006316a..5b8671ebcaa0 100644
--- a/setup.py
+++ b/setup.py
@@ -35,7 +35,11 @@
         "dpnp.dpnp_utils",
         "dpnp.fft",
         "dpnp.linalg",
+        "dpnp.memory",
         "dpnp.random",
+        "dpnp.scipy",
+        "dpnp.scipy.linalg",
+        "dpnp.scipy.special",
     ],
     package_data={
         "dpnp": [
@@ -47,6 +51,8 @@
             "tests/testing/*.py",
             "tests/third_party/cupy/*.py",
             "tests/third_party/cupy/*/*.py",
+            "tests/third_party/cupyx/*.py",
+            "tests/third_party/cupyx/*/*.py",
         ]
     },
     include_package_data=False,
diff --git a/tests_external/numpy/runtests.py b/tests_external/numpy/runtests.py
index f40e86cfa1f9..82e21395e2cd 100644
--- a/tests_external/numpy/runtests.py
+++ b/tests_external/numpy/runtests.py
@@ -291,9 +291,6 @@ def wrapper(*args, **kwargs):
 dpnp.unicode = str
 dpnp.unicode_ = dpnp.str_
 
-dpnp.compat = dummymodule
-dpnp.compat.unicode = dpnp.unicode
-
 dpnp.core = dpnp.core.umath = dpnp
 
 dpnp.core._exceptions = dummymodule