diff --git a/src/container/cli.c b/src/container/cli.c index f8af82eaeb1..353ef09f67d 100644 --- a/src/container/cli.c +++ b/src/container/cli.c @@ -3590,5 +3590,8 @@ cont_mark_slave(struct d_hlink *link, void *arg) int dc_cont_mark_all_slave(void) { + if (DAOS_FAIL_CHECK(DAOS_CONT_DESTROY_AFTER_FORK)) + return 0; + return daos_hhash_traverse(DAOS_HTYPE_CO, cont_mark_slave, NULL); } diff --git a/src/include/daos/common.h b/src/include/daos/common.h index 8068736d0dd..233dff820a9 100644 --- a/src/include/daos/common.h +++ b/src/include/daos/common.h @@ -918,6 +918,7 @@ enum { #define DAOS_POOL_RFCHECK_FAIL (DAOS_FAIL_UNIT_TEST_GROUP_LOC | 0xa1) #define DAOS_POOL_REINT_SLOW (DAOS_FAIL_UNIT_TEST_GROUP_LOC | 0xa2) #define DAOS_CONT_DESTROY_FAIL_POST (DAOS_FAIL_UNIT_TEST_GROUP_LOC | 0xa3) +#define DAOS_CONT_DESTROY_AFTER_FORK (DAOS_FAIL_UNIT_TEST_GROUP_LOC | 0xa4) #define DAOS_CHK_CONT_ORPHAN (DAOS_FAIL_UNIT_TEST_GROUP_LOC | 0xb0) #define DAOS_CHK_CONT_BAD_LABEL (DAOS_FAIL_UNIT_TEST_GROUP_LOC | 0xb1) diff --git a/src/tests/ftest/rebuild/inc_reint.py b/src/tests/ftest/rebuild/inc_reint.py new file mode 100644 index 00000000000..00735a73232 --- /dev/null +++ b/src/tests/ftest/rebuild/inc_reint.py @@ -0,0 +1,30 @@ +""" + (C) Copyright 2026 Hewlett Packard Enterprise Development LP + + SPDX-License-Identifier: BSD-2-Clause-Patent +""" +from daos_core_base import DaosCoreBase + + +class IncReintContRecovTest(DaosCoreBase): + # pylint: disable=too-many-ancestors,too-many-public-methods + """Runs daos incremental reintegration core tests. + + :avocado: recursive + """ + + def test_daos_inc_reint_cont_recov(self): + """Jira ID: DAOS-17857 + + Test Description: + Run daos_test -Y + + Use cases: + Core tests for daos container recovery when incremental reintegration + + :avocado: tags=all,daily_regression + :avocado: tags=hw,medium,provider + :avocado: tags=inc_reint,cont_recov + :avocado: tags=IncReintContRecovTest,test_daos_inc_reint_cont_recov + """ + self.run_subtest() diff --git a/src/tests/ftest/rebuild/inc_reint.yaml b/src/tests/ftest/rebuild/inc_reint.yaml new file mode 100644 index 00000000000..de7e2f1a6d6 --- /dev/null +++ b/src/tests/ftest/rebuild/inc_reint.yaml @@ -0,0 +1,63 @@ +hosts: + test_servers: 4 + +timeout: 1H + +server_config: + name: daos_server + engines_per_host: 2 + engines: + 0: + targets: 4 + pinned_numa_node: 0 + nr_xs_helpers: 0 + log_file: daos_server0.log + log_mask: DEBUG,MEM=ERR + env_vars: + - DD_MASK=mgmt,md,any + - D_LOG_FILE_APPEND_PID=1 + - D_LOG_FILE_APPEND_RANK=1 + - D_LOG_FLUSH=DEBUG + - FI_LOG_LEVEL=warn + - D_LOG_STDERR_IN_LOG=1 + storage: auto + + 1: + targets: 4 + pinned_numa_node: 1 + nr_xs_helpers: 0 + log_file: daos_server1.log + log_mask: DEBUG,MEM=ERR + env_vars: + - DD_MASK=mgmt,md,any + - D_LOG_FILE_APPEND_PID=1 + - D_LOG_FILE_APPEND_RANK=1 + - D_LOG_FLUSH=DEBUG + - FI_LOG_LEVEL=warn + - D_LOG_STDERR_IN_LOG=1 + storage: auto + + transport_config: + allow_insecure: true + +pool: + scm_size: 6G + nvme_size: 80G + +agent_config: + transport_config: + allow_insecure: true + +dmg: + transport_config: + allow_insecure: true + +daos_tests: + num_clients: + test_daos_inc_reint_cont_recov: 1 + test_name: + test_daos_inc_reint_cont_recov: DAOS_Inc_Reint_Cont_Recov + daos_test: + test_daos_inc_reint_cont_recov: Y + args: + test_daos_inc_reint_cont_recov: -s3 diff --git a/src/tests/suite/daos_inc_reint.c b/src/tests/suite/daos_inc_reint.c index b723683c32e..c9b988de09d 100644 --- a/src/tests/suite/daos_inc_reint.c +++ b/src/tests/suite/daos_inc_reint.c @@ -1,5 +1,5 @@ /** - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -253,6 +253,12 @@ ir_race(test_arg_t *arg, bool create) /* Wait for ir_rank_reint can be run firstly on parent. */ sleep(3); + if (!create) + daos_fail_loc_set(DAOS_CONT_DESTROY_AFTER_FORK | DAOS_FAIL_ALWAYS); + + rc = daos_reinit(); + assert_rc_equal(rc, 0); + if (create) ir_cont_create(arg, &conts[3]); else @@ -260,12 +266,19 @@ ir_race(test_arg_t *arg, bool create) /* Do NOT exit immediately, otherwise, the pipeline for parent may be broken. */ sleep(15); + print_message("Child process complete and exit\n"); + + if (!create) + daos_fail_loc_reset(); + + daos_fini(); exit(0); } else { /* Incremental reintegration will internally repeat to handle the race. */ rc = ir_rank_reint(arg, 1, false); assert_rc_equal(rc, 0); waitpid(pid, &rc, 0); + print_message("Parent process wait child %d, got %d\n", pid, rc); } rc = daos_debug_set_params(arg->group, -1, DMG_KEY_FAIL_LOC, 0, 0, NULL);