Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions src/container/cli.c
Original file line number Diff line number Diff line change
Expand Up @@ -3590,5 +3590,8 @@ cont_mark_slave(struct d_hlink *link, void *arg)
int
dc_cont_mark_all_slave(void)
{
if (DAOS_FAIL_CHECK(DAOS_CONT_DESTROY_AFTER_FORK))
return 0;

return daos_hhash_traverse(DAOS_HTYPE_CO, cont_mark_slave, NULL);
}
1 change: 1 addition & 0 deletions src/include/daos/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -918,6 +918,7 @@ enum {
#define DAOS_POOL_RFCHECK_FAIL (DAOS_FAIL_UNIT_TEST_GROUP_LOC | 0xa1)
#define DAOS_POOL_REINT_SLOW (DAOS_FAIL_UNIT_TEST_GROUP_LOC | 0xa2)
#define DAOS_CONT_DESTROY_FAIL_POST (DAOS_FAIL_UNIT_TEST_GROUP_LOC | 0xa3)
#define DAOS_CONT_DESTROY_AFTER_FORK (DAOS_FAIL_UNIT_TEST_GROUP_LOC | 0xa4)

#define DAOS_CHK_CONT_ORPHAN (DAOS_FAIL_UNIT_TEST_GROUP_LOC | 0xb0)
#define DAOS_CHK_CONT_BAD_LABEL (DAOS_FAIL_UNIT_TEST_GROUP_LOC | 0xb1)
Expand Down
30 changes: 30 additions & 0 deletions src/tests/ftest/rebuild/inc_reint.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
"""
(C) Copyright 2026 Hewlett Packard Enterprise Development LP

SPDX-License-Identifier: BSD-2-Clause-Patent
"""
from daos_core_base import DaosCoreBase


class IncReintContRecovTest(DaosCoreBase):
# pylint: disable=too-many-ancestors,too-many-public-methods
"""Runs daos incremental reintegration core tests.

:avocado: recursive
"""

def test_daos_inc_reint_cont_recov(self):
"""Jira ID: DAOS-17857

Test Description:
Run daos_test -Y

Use cases:
Core tests for daos container recovery when incremental reintegration

:avocado: tags=all,daily_regression
:avocado: tags=hw,medium,provider
:avocado: tags=inc_reint,cont_recov
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

FYI we should add the daos_test and probably rebuild tags as well
But I have another PR #17975 where I will handle this so you don't need to repush for just this

Suggested change
:avocado: tags=inc_reint,cont_recov
:avocado: tags=daos_test,rebuild,inc_reint,cont_recov

:avocado: tags=IncReintContRecovTest,test_daos_inc_reint_cont_recov
"""
self.run_subtest()
63 changes: 63 additions & 0 deletions src/tests/ftest/rebuild/inc_reint.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
hosts:
test_servers: 4

timeout: 1H

server_config:
name: daos_server
engines_per_host: 2
engines:
0:
targets: 4
pinned_numa_node: 0
nr_xs_helpers: 0
log_file: daos_server0.log
log_mask: DEBUG,MEM=ERR
env_vars:
- DD_MASK=mgmt,md,any
- D_LOG_FILE_APPEND_PID=1
- D_LOG_FILE_APPEND_RANK=1
- D_LOG_FLUSH=DEBUG
- FI_LOG_LEVEL=warn
- D_LOG_STDERR_IN_LOG=1
storage: auto

1:
targets: 4
pinned_numa_node: 1
nr_xs_helpers: 0
log_file: daos_server1.log
log_mask: DEBUG,MEM=ERR
env_vars:
- DD_MASK=mgmt,md,any
- D_LOG_FILE_APPEND_PID=1
- D_LOG_FILE_APPEND_RANK=1
- D_LOG_FLUSH=DEBUG
- FI_LOG_LEVEL=warn
- D_LOG_STDERR_IN_LOG=1
storage: auto

transport_config:
allow_insecure: true

pool:
scm_size: 6G
nvme_size: 80G

agent_config:
transport_config:
allow_insecure: true

dmg:
transport_config:
allow_insecure: true

daos_tests:
num_clients:
test_daos_inc_reint_cont_recov: 1
test_name:
test_daos_inc_reint_cont_recov: DAOS_Inc_Reint_Cont_Recov
daos_test:
test_daos_inc_reint_cont_recov: Y
args:
test_daos_inc_reint_cont_recov: -s3
15 changes: 14 additions & 1 deletion src/tests/suite/daos_inc_reint.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* (C) Copyright 2025 Hewlett Packard Enterprise Development LP
* (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP
*
* SPDX-License-Identifier: BSD-2-Clause-Patent
*/
Expand Down Expand Up @@ -253,19 +253,32 @@ ir_race(test_arg_t *arg, bool create)
/* Wait for ir_rank_reint can be run firstly on parent. */
sleep(3);

if (!create)
daos_fail_loc_set(DAOS_CONT_DESTROY_AFTER_FORK | DAOS_FAIL_ALWAYS);

rc = daos_reinit();
assert_rc_equal(rc, 0);

if (create)
ir_cont_create(arg, &conts[3]);
else
ir_cont_destroy(arg, &conts[1]);

/* Do NOT exit immediately, otherwise, the pipeline for parent may be broken. */
sleep(15);
print_message("Child process complete and exit\n");

if (!create)
daos_fail_loc_reset();

daos_fini();
exit(0);
} else {
/* Incremental reintegration will internally repeat to handle the race. */
rc = ir_rank_reint(arg, 1, false);
assert_rc_equal(rc, 0);
waitpid(pid, &rc, 0);
print_message("Parent process wait child %d, got %d\n", pid, rc);
}

rc = daos_debug_set_params(arg->group, -1, DMG_KEY_FAIL_LOC, 0, 0, NULL);
Expand Down
Loading