From 1e8f6a5f749e37d77b6cab68825fce7658476234 Mon Sep 17 00:00:00 2001 From: IgGusev Date: Mon, 3 Nov 2025 11:56:00 +0400 Subject: [PATCH 1/3] IGNITE-26897 Add migration guide for 3.0 to 3.1 (#6873) --- docs/_data/toc.yaml | 2 + .../installation/migration-from-ai3-1.adoc | 705 ++++++++++++++++++ 2 files changed, 707 insertions(+) create mode 100644 docs/_docs/installation/migration-from-ai3-1.adoc diff --git a/docs/_data/toc.yaml b/docs/_data/toc.yaml index 9a2f1685719f..e88eea1bedb3 100644 --- a/docs/_data/toc.yaml +++ b/docs/_data/toc.yaml @@ -23,6 +23,8 @@ url: installation/deb-rpm - title: Installing Docker url: installation/installing-using-docker + - title: Migration From Ignite 3.1 + url: installation/migration-from-ai3-1 - title: Migration From Ignite 2 url: installation/migration-from-ai2/overview items: diff --git a/docs/_docs/installation/migration-from-ai3-1.adoc b/docs/_docs/installation/migration-from-ai3-1.adoc new file mode 100644 index 000000000000..e13b1834f35a --- /dev/null +++ b/docs/_docs/installation/migration-from-ai3-1.adoc @@ -0,0 +1,705 @@ += Migrating from Apache Ignite 3.0 to 3.1 + +== Overview + +This guide provides step-by-step instructions for migrating Apache Ignite clusters from version 3.0 to version 3.1. Due to architectural changes in 3.1, including the introduction of zone-based replication, migration requires creating a new 3.1 cluster and migrating data using the export/import process. + +[IMPORTANT] +==== +This migration requires cluster downtime. +==== + +== Zone-Based Replication + +Apache Ignite 3.1 introduces zone-based replication, replacing the table-based replication model from version 3.0. Table-based replication is still supported, however it will be dropped in a later release. + +=== Table-Based vs Zone-Based Replication + +[cols="1,2,2"] +|=== +|Aspect |3.0 Table-Based |3.1 Zone-Based + +|RAFT Groups +|Each table creates separate RAFT groups +|Tables in same zone share RAFT groups + +|Example (100 tables) +|100 separate RAFT group sets +|1 shared RAFT group set + +|Memory Footprint +|Higher with more tables +|Significantly reduced + +|Thread Overhead +|Higher (more RAFT groups) +|Lower (fewer RAFT groups) +|=== + +=== Benefits of Zone-Based Replication + +* *Reduced Memory Footprint:* Fewer RAFT groups means lower memory consumption for clusters with many tables +* *Lower Thread Overhead:* Decreased number of RAFT groups reduces thread management complexity +* *Improved Performance:* Better resource utilization for multi-table workloads +* *Transparent Migration:* No changes to user-facing APIs or query behavior + +[NOTE] +==== +Zone-based replication is an internal cluster optimization. Your applications will continue to work without code changes. +==== + +== Phase 1: Document Current Environment + +=== Step 1.1: Connect to 3.0 Cluster + +Connect to your Apache Ignite 3.0 cluster using the CLI tool: + +[source,bash] +---- +cd ignite3-cli-3.0.0/bin +./ignite3 +---- + +Once connected, enter sql execution mode: + +[source,bash] +---- +sql +---- + +=== Step 1.2: Document All Schemas + +List all schemas in your cluster: + +[source,sql] +---- +-- List all schemas +SELECT * FROM SYSTEM.SCHEMAS; +---- + +Save the output to a file for reference during schema recreation. + +=== Step 1.3: Document All Tables + +List all tables across all schemas: + +[source,sql] +---- +-- List all tables +SELECT SCHEMA_NAME, TABLE_NAME +FROM SYSTEM.TABLES +WHERE TABLE_TYPE = 'TABLE' +ORDER BY SCHEMA_NAME, TABLE_NAME; +---- + +Save the output to a file for reference during table recreation. + +=== Step 1.4: Document Table Schemas + +For each table, capture its complete schema definition: + +[source,sql] +---- +-- Get detailed schema for each table +SELECT + SCHEMA_NAME, + TABLE_NAME, + COLUMN_NAME, + TYPE, + NULLABLE, + COLUMN_DEFAULT +FROM SYSTEM.TABLE_COLUMNS +WHERE SCHEMA_NAME = 'YOUR_SCHEMA' +ORDER BY TABLE_NAME, ORDINAL_POSITION; +---- + +Save the output to a file for reference during schema recreation. + +[IMPORTANT] +==== +Document the exact CREATE TABLE statements for all tables. You'll need these to recreate the schema in 3.1. +==== + +=== Step 1.5: Document Distribution Zones + +Capture current distribution zone configuration: + +[source,sql] +---- +-- Document distribution zones +SELECT * FROM SYSTEM.ZONES; +---- + +Save the output to a file for reference during schema recreation. + +=== Step 1.6: Calculate Data Volume + +Estimate the size of data to be migrated: + +[source,sql] +---- +-- Get row count for each table +SELECT + TABLE_NAME, + COUNT(*) as ROW_COUNT +FROM your_table +GROUP BY TABLE_NAME; +---- + +Save row counts for each table. You'll use these to verify data integrity after migration. + +=== Step 1.7: Create Schema Recreation Script + +Create a SQL script file named `schema-recreation.sql` containing all CREATE TABLE statements: + +[source,sql] +---- +-- Example for a table: +CREATE TABLE analytics.events ( + id INT PRIMARY KEY, + event_time TIMESTAMP NOT NULL, + user_id VARCHAR(100), + event_type VARCHAR(50), + payload VARCHAR(4000) +) WITH ( + -- Document any table options here +); + +-- Repeat for all tables +---- + +Save the output to a file for reference during schema recreation. + +[WARNING] +==== +Ensure your CREATE TABLE statements include all constraints, indexes, and table options. Missing configuration can lead to performance or data integrity issues. +==== + +== Phase 2: Export Data from 3.0 Cluster + +=== Step 2.1: Create Export Directory + +Create a directory for export files on accessible storage: + +[source,bash] +---- +mkdir -p /backup/ignite-3.0-export +chmod 755 /backup/ignite-3.0-export +---- + +[NOTE] +==== +If using shared network storage, ensure all nodes have write access to this location. +==== + +=== Step 2.2: Choose Export Format + +Apache Ignite supports two export formats: + +[cols="1,2,2"] +|=== +|Format |Advantages |Best For + +|*CSV* +|Human-readable, easy to debug, compatible with many tools +|Small to medium datasets, troubleshooting + +|*Parquet* +|Compressed, faster I/O, efficient for large datasets +|Large datasets, production migrations +|=== + +=== Step 2.3: Export Table Data + +Export each table using the `COPY INTO` command. + +==== CSV Export + +[source,sql] +---- +-- Export with headers for easier import +COPY FROM (SELECT * FROM analytics.events) +INTO '/backup/ignite-3.0-export/analytics_events.csv' +FORMAT CSV +WITH 'header'='true'; +---- + +For large tables, export in chunks: + +[source,sql] +---- +-- Export in chunks by partition +COPY FROM (SELECT * FROM analytics.events WHERE id BETWEEN 0 AND 1000000) +INTO '/backup/ignite-3.0-export/analytics_events_part1.csv' +FORMAT CSV +WITH 'header'='true'; +---- + +==== Parquet Export (Recommended) + +[source,sql] +---- +COPY FROM analytics.events (id, event_time, user_id, event_type, payload) +INTO '/backup/ignite-3.0-export/analytics_events.parquet' +FORMAT PARQUET; +---- + +=== Step 2.4: Automate Exports with Script + +Create a shell script to export all tables automatically: + +[source,bash] +---- +#!/bin/bash +# export-all-tables.sh + +BACKUP_DIR="/backup/ignite-3.0-export" + +# Array of tables to export (schema.table format) +TABLES=( + "analytics.events" + "analytics.users" + "sales.orders" + "sales.products" +) + +for table in "${TABLES[@]}"; do + schema=$(echo $table | cut -d'.' -f1) + tbl=$(echo $table | cut -d'.' -f2) + + echo "Exporting ${table}..." + + ignite sql "COPY FROM (SELECT * FROM ${table}) \ + INTO '${BACKUP_DIR}/${schema}_${tbl}.parquet' \ + FORMAT PARQUET" + + if [ $? -eq 0 ]; then + echo "✓ ${table} exported successfully" + + # Get row count for verification + ignite sql "SELECT COUNT(*) as row_count FROM ${table}" > "${BACKUP_DIR}/${schema}_${tbl}.count" + else + echo "✗ Failed to export ${table}" + exit 1 + fi +done + +echo "Export complete. Files in ${BACKUP_DIR}" +---- + +Make the script executable and run it: + +[source,bash] +---- +chmod +x export-all-tables.sh +./export-all-tables.sh +---- + +=== Step 2.5: Verify Exports + +Check that all export files were created successfully: + +[source,bash] +---- +# List all export files +ls -lh /backup/ignite-3.0-export/ + +# Verify file sizes are reasonable (not 0 bytes) +find /backup/ignite-3.0-export/ -size 0 +---- + +[CAUTION] +==== +Do not proceed to the next phase until all exports are verified. Missing or corrupted export files will result in data loss. +==== + +=== Step 2.6: Stop 3.0 Cluster + +Once all exports are verified, gracefully stop all cluster nodes: + +[source,bash] +---- +# Stop all nodes gracefully +ignite node stop --node node1 +ignite node stop --node node2 +... +---- + +[WARNING] +==== +After stopping the 3.0 cluster, do not delete any data until the migration is completely verified in the 3.1 cluster. +==== + +== Phase 3: Set Up 3.1 Cluster + +=== Step 3.1: Download Apache Ignite 3.1 + +Download the Apache Ignite 3.1 distribution from the link:https://ignite.apache.org/download.cgi[official website]. + +=== Step 3.2: Configure Cluster Nodes + +Update your configuration files from 3.0 to 3.1 format: + +==== Configuration Changes in 3.1 + +[cols="1,1,2"] +|=== +|Change Type |3.0 Format |3.1 Format + +|Timeout Properties +|`timeout=5000` +|`timeoutMillis=5000` + +|Zone Creation +|`CREATE ZONE myZone WITH STORAGE_PROFILES='default', REPLICAS=3;` +|`CREATE ZONE myZone (REPLICAS 3) STORAGE PROFILES['default'];` +|=== + +[TIP] +==== +Review the Apache Ignite 3.1 documentation for a complete list of configuration changes. +==== + +=== Step 3.3: Start Cluster Nodes + +Start each node in your cluster: + +[source,bash] +---- +# Start each node (repeat for all nodes) +./bin/ignite3 node start --config ignite-config.conf +---- + +[NOTE] +==== +By default, nodes load the configuration from `etc/ignite-config.conf`. You can specify a different configuration file with the `--config` parameter. +==== + +=== Step 3.4: Initialize the Cluster + +Once all nodes are started, initialize the cluster from any node: + +[source,bash] +---- +ignite cluster init --name=ignite-cluster +---- + +=== Step 3.5: Verify Cluster Topology + +Confirm all nodes are part of the cluster: + +[source,bash] +---- +ignite cluster topology +---- + +Expected output should show all nodes in ACTIVE state: + +---- +[name=node1, address=192.168.1.10:10800, state=ACTIVE] +[name=node2, address=192.168.1.11:10800, state=ACTIVE] +... +---- + +=== Step 3.6: Recreate Schemas + +Connect to the cluster and recreate all schemas: + +[source,sql] +---- +-- Create schemas +CREATE SCHEMA analytics; +CREATE SCHEMA sales; +---- + +=== Step 3.7: Recreate Distribution Zones + +If you have custom distribution zones, recreate them: + +[source,sql] +---- +-- Create distribution zones (if customized) +CREATE ZONE analytics_zone (REPLICAS 3) STORAGE PROFILES['default']; +---- + +=== Step 3.8: Recreate Tables + +Execute your saved schema recreation script: + +[source,sql] +---- +CREATE TABLE analytics.events ( + id INT PRIMARY KEY, + event_time TIMESTAMP NOT NULL, + user_id VARCHAR(100), + event_type VARCHAR(50), + payload VARCHAR(4000) +); + +-- Repeat for all tables +---- + +Verify each table was created correctly: + +[source,sql] +---- +-- Verify table creation +SELECT * FROM SYSTEM.TABLES WHERE TABLE_NAME = 'EVENTS'; +---- + +[IMPORTANT] +==== +Ensure table schemas in 3.1 exactly match the schemas from 3.0. Mismatches will cause import failures. +==== + +== Phase 4: Import Data into 3.1 Cluster + +=== Step 4.1: Import Individual Tables + +Import data for each table using the `COPY FROM` command. + +==== CSV Import + +[source,sql] +---- +COPY FROM '/backup/ignite-3.0-export/analytics_events.csv' +INTO analytics.events (id, event_time, user_id, event_type, payload) +FORMAT CSV +WITH 'header'='true', 'batchSize'='2048'; +---- + +==== Parquet Import (Recommended) + +[source,sql] +---- +COPY FROM '/backup/ignite-3.0-export/analytics_events.parquet' +INTO analytics.events (id, event_time, user_id, event_type, payload) +FORMAT PARQUET +WITH 'batchSize'='2048'; +---- + +=== Step 4.2: Automate Imports with Script + +Create a shell script to import all tables: + +[source,bash] +---- +#!/bin/bash +# import-all-tables.sh + +BACKUP_DIR="/backup/ignite-3.0-export" + +# Array of tables to import +TABLES=( + "analytics.events:id,event_time,user_id,event_type,payload" + "analytics.users:user_id,username,email,created_at" + "sales.orders:order_id,customer_id,order_date,total" + "sales.products:product_id,name,price,stock" +) + +for entry in "${TABLES[@]}"; do + table=$(echo $entry | cut -d':' -f1) + columns=$(echo $entry | cut -d':' -f2) + schema=$(echo $table | cut -d'.' -f1) + tbl=$(echo $table | cut -d'.' -f2) + + echo "Importing ${table}..." + + ignite sql "COPY FROM '${BACKUP_DIR}/${schema}_${tbl}.parquet' \ + INTO ${table} (${columns}) \ + FORMAT PARQUET \ + WITH 'batchSize'='2048'" + + if [ $? -eq 0 ]; then + echo "✓ ${table} imported successfully" + + # Verify row count + actual_count=$(ignite sql "SELECT COUNT(*) FROM ${table}" | grep -oE '[0-9]+') + expected_count=$(cat "${BACKUP_DIR}/${schema}_${tbl}.count" | grep -oE '[0-9]+') + + if [ "$actual_count" == "$expected_count" ]; then + echo "✓ Row count verified: ${actual_count}" + else + echo "✗ Row count mismatch: expected ${expected_count}, got ${actual_count}" + exit 1 + fi + else + echo "✗ Failed to import ${table}" + exit 1 + fi +done + +echo "Import complete." +---- + +Make the script executable and run it: + +[source,bash] +---- +chmod +x import-all-tables.sh +./import-all-tables.sh +---- + +=== Step 4.3: Verify Data Integrity + +After imports complete, perform thorough verification: + +==== Row Count Verification + +[source,sql] +---- +-- Compare row counts +SELECT COUNT(*) FROM analytics.events; +---- + +Compare with the saved row counts from your 3.0 cluster. + +==== Data Sampling + +[source,sql] +---- +-- Spot check data +SELECT * FROM analytics.events LIMIT 10; + +-- Verify no NULL values in NOT NULL columns +SELECT COUNT(*) FROM analytics.events +WHERE event_time IS NULL; + +-- Check date ranges are preserved +SELECT MIN(event_time), MAX(event_time) +FROM analytics.events; +---- + +==== Create Verification Script + +Automate verification across all tables: + +[source,bash] +---- +#!/bin/bash +# verify-migration.sh + +echo "=== Migration Verification Report ===" +echo + +TABLES=( + "analytics.events" + "analytics.users" + "sales.orders" + "sales.products" +) + +BACKUP_DIR="/backup/ignite-3.0-export" + +for table in "${TABLES[@]}"; do + schema=$(echo $table | cut -d'.' -f1) + tbl=$(echo $table | cut -d'.' -f2) + + echo "Table: ${table}" + + # Get current count + current=$(ignite sql "SELECT COUNT(*) FROM ${table}" | grep -oE '[0-9]+') + echo " Current row count: ${current}" + + # Get expected count + expected=$(cat "${BACKUP_DIR}/${schema}_${tbl}.count" | grep -oE '[0-9]+') + echo " Expected row count: ${expected}" + + if [ "$current" == "$expected" ]; then + echo " Status: ✓ PASS" + else + echo " Status: ✗ FAIL" + fi + echo +done +---- + +[CAUTION] +==== +Do not proceed with application cutover until all verification checks pass successfully. +==== + +== Phase 5: Update Client Applications + +=== Step 5.1: Update Connection Configuration + +Update application configuration to point to the 3.1 cluster: + +[source,properties] +---- +# Old 3.0 connection +ignite.endpoints=old-node1:10800,old-node2:10800,old-node3:10800 + +# New 3.1 connection +ignite.endpoints=new-node1:10800,new-node2:10800,new-node3:10800 +---- + +=== Step 5.2: Review API Changes + +Check for deprecated APIs in your client code: + +==== Java API Changes + +[source,java] +---- +// Deprecated in 3.1 +ignite.clusterNodes() + +// Replace with +ignite.cluster().nodes() +---- + +[TIP] +==== +Refer to the Apache Ignite 3.1 release notes for a complete list of API changes: https://ignite.apache.org/releases/3.1.0/release_notes.html +==== + +=== Step 5.3: Test Client Connectivity + +Before switching production traffic, test connectivity: + +[source,java] +---- +// Connection test +try (IgniteClient client = IgniteClient.builder() + .addresses("new-node1:10800", "new-node2:10800", "new-node3:10800") + .build()) { + + // Verify connectivity + Collection nodes = client.cluster().nodes(); + System.out.println("Connected to " + nodes.size() + " nodes"); + + // Test data access + Table table = client.tables().table("analytics.events"); + RecordView view = table.recordView(); + + Tuple record = view.get(null, Tuple.create().set("id", 1)); + System.out.println("Sample record retrieved: " + record); +} +---- + +Once the connection is confirmed, gradually migrate traffic. + +== Phase 6: Post-Migration Verification + +=== Step 6.1: Verify Zone-Based Replication + +Confirm zone-based replication is active by checking cluster startup logs: + +[source,bash] +---- +# Check node logs for confirmation +grep "Zone based replication" /path/to/node/logs/*.log +---- + +Expected output: +---- +Zone based replication: true +---- + +Verify zones are properly configured: + +[source,sql] +---- +SELECT * FROM SYSTEM.ZONES; +---- \ No newline at end of file From e1e9624a5816dd42cfd3c497323795189c2b600b Mon Sep 17 00:00:00 2001 From: IgGusev Date: Fri, 14 Nov 2025 20:03:52 +0400 Subject: [PATCH 2/3] IGNITE-27064 Fix missing distribution zone doc (#6982) (cherry picked from commit e516650d37f7e768ee1f386cc760414b41d8d4e0) --- docs/_data/toc.yaml | 2 + .../storage/distribution-zones.adoc | 245 ++++++++++++++++++ 2 files changed, 247 insertions(+) create mode 100644 docs/_docs/administrators-guide/storage/distribution-zones.adoc diff --git a/docs/_data/toc.yaml b/docs/_data/toc.yaml index e88eea1bedb3..7ef442e9eb9e 100644 --- a/docs/_data/toc.yaml +++ b/docs/_data/toc.yaml @@ -137,6 +137,8 @@ url: administrators-guide/storage/storage-profiles - title: Data Partitions url: administrators-guide/storage/data-partitions + - title: Data Partitions + url: administrators-guide/storage/distribution-zones - title: Storage Engines url: administrators-guide/storage/engines/storage-engines items: diff --git a/docs/_docs/administrators-guide/storage/distribution-zones.adoc b/docs/_docs/administrators-guide/storage/distribution-zones.adoc new file mode 100644 index 000000000000..47867dc2bb26 --- /dev/null +++ b/docs/_docs/administrators-guide/storage/distribution-zones.adoc @@ -0,0 +1,245 @@ += Distribution Zones + +== What is a Distribution Zone? + +Distribution zones in Ignite are entities that combine sets of tables and define: + +- How these tables are distributed across the cluster, how many copies of data are made, how the data is partitioned, how partitions are assigned to nodes. + +- On which cluster nodes these tables will be stored. + +- How the cluster reacts to nodes entering or leaving the cluster, e.g. whether the tables will automatically start using a new node when the cluster is scaled up. + +Distribution zones are not equivalent to the concept of availability zone commonly used in cloud computing. + +Availability zone is a set of infrastructure resources with independent hardware, networking, power, and is often physically separated from other availability zones. + +Ignite cluster often spans across multiple availability zones, and distribution zones also typically span across multiple availability zones. That way, tables can continue to be available even if one of the availability zones goes down. + +//When a Ignite cluster uses multiple availability zones, it is recommended to use rack awareness feature of distribution zones to ensure that data copies are split between the availability zones. + + +== Default Zone + +Ignite 3 create a `default` distribution zone on startup. This distribution zone stores data from tables when they are not configured to use a different zone, or when a different distribution zone is not available. This distribution zone has 25 partitions, 1 partition replica and does not adjust itself to new nodes entering or exiting the cluster. For production purposes, we recommend creating a new distribution zone adjusted for your purposes. + +== Creating and Using Zones + +Distribution zones in Ignite 3 are created by using the SQL `CREATE ZONE` command. When creating a zone, you must specify the link:administrators-guide/storage/storage-overview[Storage Profile] to use. The storage profile determines what storage engine will be used, and storage properties. + +The example below creates a primary distribution zone with the default storage profile: + +[source,sql] +---- +CREATE ZONE PrimaryZone (PARTITIONS 25) STORAGE PROFILES ['default']; +---- + +== Configuring Data Replication + +You can control the number of partitions (how many pieces the data is split into) and replicas (how many copies of data are stored) by using the `PARTITIONS` and `REPLICAS` options. + +If not specified, the distribution zone creates `(dataNodesCount * coresOnNode * 2) / replicaFactor` partitions, and does not create copies of data. The `dataNodesCount` is the estimated number of nodes that will be in the distribution zone when it is created, according to its link:administrators-guide/storage/distribution-zones#node-filtering[filter] and link:administrators-guide/storage/storage-overview[storage profiles]. At least 1 partition is always created. + +In the example below, the tables will be split into 50 partitions, and each partition will have 3 copies of itself stored on the cluster: + +[source,sql] +---- +CREATE ZONE IF NOT EXISTS exampleZone (PARTITIONS 50, REPLICAS 3) STORAGE PROFILES ['default']; +---- + +Partitions with the same number for all tables in the zone are always stored on the same nodes within the distribution zone. + +You can also specify `ALL` as the number of replicas to automatically scale the number of replicas to be equal to the number of nodes in your cluster. + +[source,sql] +---- +CREATE ZONE exampleZone (REPLICAS ALL) STORAGE PROFILES ['default']; +---- + +=== Replicated Zones + +For scenarios requiring maximum data availability, you can create a replicated zone by specifying `ALL` as the number of replicas. This automatically scales the number of replicas to match the number of nodes in your cluster, placing a copy of every partition on every node. + +[source,sql] +---- +CREATE ZONE exampleZone (REPLICAS ALL) STORAGE PROFILES ['default']; +---- + +When you create a replicated zone, Ignite ensures that each partition has a replica on every node in the cluster. As new nodes join the cluster, they automatically receive replicas and become learners in the RAFT groups until the zone adjusts its configuration. + +==== Combining Replicated and Standard Zones + +A common pattern is to use replicated zones for reference data and standard zones with fewer replicas for transactional data: + +[source,sql] +---- +-- Replicated zone for reference data +CREATE ZONE RefDataZone (REPLICAS ALL) STORAGE PROFILES ['default']; +-- Standard zone for transactional data +CREATE ZONE TransactionalZone (REPLICAS 3) STORAGE PROFILES ['default']; +-- Reference table in replicated zone +CREATE TABLE Countries ( + id int PRIMARY KEY, + code varchar(2), + name varchar(100) +) ZONE RefDataZone; +-- Transactional table in standard zone +CREATE TABLE Orders ( + id int PRIMARY KEY, + customer_id int, + country_code varchar(2), + amount decimal +) ZONE TransactionalZone; +---- + +This approach gives you local access to reference data while keeping storage requirements reasonable for high-volume transactional data. + +=== Storage Profiles + +When creating a distribution zone, you can define a set of link:administrators-guide/storage/storage-overview[storage profiles] that can be used by tables in this zone. You cannot alter storage profiles after the distribution zone was created. To create a Distribution Zone that will use one or multiple Storage Profiles, use the following SQL command: + +[source,sql] +---- +CREATE ZONE exampleZone (PARTITIONS 2, REPLICAS 3) STORAGE PROFILES ['profile1', 'profile3']; +---- + +In this case, the table created in this distribution zones can only use `profile1` or `profile3`. + +=== Quorum Size + +You can set the `QUORUM SIZE` parameter to fine-tune the number of replicas that must be available for the zone to remain operational. + +Ignite automatically configures the minimum recommended number of replicas for your distribution zone. `3` data replicas are required for quorum if the distribution zone has 5 or more replicas, 2 if there are between 2 and 4 replicas, or 1 if only one data replica exists. + +There are the following limitations to quorum sizes depending on the number of replicas: + +* _Minimum_ value: `1` if there is only one replica and `2` if there is more than one. +* _Maximum_ value: half the total number of replicas rounded up. + +The example below shows how you can configure quorum size: + +[source,sql] +---- +CREATE ZONE exampleZone (REPLICAS 9, QUORUM SIZE 5) STORAGE PROFILES ['default']; +---- + +TIP: It is recommended to use odd number of replicas as your quorum size. + +=== Node Filtering + +Distribution zones can get node attributes, that can be specified in link:administrators-guide/config/node-config[node configuration], and dynamically distribute data only to nodes that have the specified attributes. This can be used, for example, to only process data from the application on nodes with SSD drives. If no node matches the filter, the data will be stored on all nodes instead. Distribution zone filter uses JSONPath rules. + +The example below creates a new `storage` attribute and sets it to `SSD`: + +---- +node config update -n defaultNode ignite.nodeAttributes.nodeAttributes.storage="SSD" +---- + +The example below creates a distribution zone that only stores data on nodes that have the SSD attribute: + +[source,sql] +---- +CREATE ZONE IF NOT EXISTS exampleZone (NODES FILTER '$[?(@.storage == "SSD")]') STORAGE PROFILEs ['default']; +---- + +You can change the distribution zone filter by using the `ALTER ZONE` command, for example: + +[source,sql] +---- +ALTER ZONE exampleZone SET DATA_NODES_FILTER='$[?(@.storage == "HDD")]'; +---- + +If you no longer need to filter the data nodes, set the filter to match all nodes: + +[source,sql] +---- +ALTER ZONE exampleZone SET DATA_NODES_FILTER='$..*'; +---- + +=== High Availability + +By default, Ignite ensures strong consistency of data in the cluster. To do this, it requires the majority of link:administrators-guide/storage/distribution-zones[replicas of data partitions] to be available. As partitions are spread across the nodes, it is possible to lose the majority of nodes that hold data for the data region, leading to all operations in the data region being stopped until the majority can be safely restored. This ensures that no data is lost. + +In high load environments, this behavior may be undesirable, as it interrupts writing data at the cost of negating a minor chance of losing data together with the nodes that left the cluster. For this scenario, Ignite provides high availability zones. If a zone has high availability enabled, and the majority of nodes with data from it leave the cluster, the data on them is considered lost and, after a short delay in case the nodes return, the cluster continues to handle read and write requests normally. + +High availability mode can only be enabled when distribution zone is created. To do this, use the following SQL command: + +[source,sql] +---- +CREATE ZONE IF NOT EXISTS exampleZone (REPLICAS 3, CONSISTENCY MODE 'HIGH AVAILABILITY') STORAGE PROFILEs ['default']; +---- + +== Cluster Scaling + +The number of active nodes in the cluster can dynamically change during its operation, as more nodes are added or nodes are taken down for maintenance or leave the cluster unexpectedly. You can configure whether and when Ignite adjusts the distribution zone to match the new cluster topology after a node enter or leaves the cluster. + +Often it is a good idea to provide a buffer period before redistribution begins, allowing in-progress operations to complete. To control this behavior, you can specify the following parameters: + +- `AUTO SCALE UP` - specifies the delay in seconds between nodes joining the cluster and the start of distribution zone adjustment to include the new nodes. This parameter is set to 0 seconds by default (immediate scale up). +- `AUTO SCALE DOWN` - specifies the delay in seconds between nodes leaving the cluster and the start of distribution zone adjustment to exclude the departed nodes. This parameter is set to `OFF` by default (no automatic scale-down occurs). + +The example below shows how you can configure cluster scaling delay: + +[source,sql] +---- +CREATE ZONE IF NOT EXISTS exampleZone (AUTO SCALE UP 300, AUTO SCALE DOWN 300) STORAGE PROFILES['default']; +---- + +Once distribution zone scaling is configured, you can disable it by specifying `OFF` in the corresponding parameter, for example: + +[source,sql] +---- +ALTER ZONE exampleZone SET (AUTO SCALE DOWN OFF); +---- + +=== Considerations for Zone Size + +All tables stored in the distribution zone share resources. As the result, it is recommended to consider how large distribution zone needs to be. + +As partitions are colocated on the same nodes, assigning tables commonly accessed together to the same distribution zone can reduce the overhead required for transmitting query results between nodes, and allows colocated link:developers-guide/compute/compute[compute jobs]. + +However, if a table is under heavy load, it may negatively affect the performance when working with other tables in the same distribution zone. In most scenarios, this should not be a significant concern and correct data distribution for your scenarios should be prioritized. + + +== Checking Distribution Zone Properties + +Distribution zone properties can be viewed through the `system.zones` link:administrators-guide/metrics/system-views[system view]. You can use the following SQL command to get it: + +[source,sql] +---- +SELECT * from system.zones; +---- + +The command lists information about all distribution zones on the cluster. + +== Adjusting Distribution Zones + +To change distribution zone parameters, use the `ALTER ZONE` command. You can use the same parameters as when creating the zone. For example: + +[source,sql] +---- +ALTER ZONE IF EXISTS exampleZone SET (REPLICAS 5); +---- + +== Example Zone Usage + +In this example, we create a distribution zone and then create 2 tables that will be colocated on the same zone. + +[source,sql] +---- +CREATE ZONE IF NOT EXISTS EXAMPLEZONE (PARTITIONS 20, REPLICAS 3) STORAGE PROFILES ['default']; + +CREATE TABLE IF NOT EXISTS Person ( + id int primary key, + city_id int, + name varchar, + age int, + company varchar +) PRIMARY ZONE EXAMPLEZONE; + +CREATE TABLE IF NOT EXISTS Account ( + id int primary key, + name varchar, + amount int +) PRIMARY ZONE EXAMPLEZONE; +---- \ No newline at end of file From 44f57a2563a5e511f038b413e6bea80f110a60bc Mon Sep 17 00:00:00 2001 From: IgGusev Date: Fri, 14 Nov 2025 21:26:27 +0400 Subject: [PATCH 3/3] IGNITE-27067 Fix TOC issue in documentation (#6984) --- docs/_data/toc.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/_data/toc.yaml b/docs/_data/toc.yaml index 7ef442e9eb9e..0d23cf8e4701 100644 --- a/docs/_data/toc.yaml +++ b/docs/_data/toc.yaml @@ -137,7 +137,7 @@ url: administrators-guide/storage/storage-profiles - title: Data Partitions url: administrators-guide/storage/data-partitions - - title: Data Partitions + - title: Distribution Zones url: administrators-guide/storage/distribution-zones - title: Storage Engines url: administrators-guide/storage/engines/storage-engines