diff --git a/.env.example b/.env.example index 27e208f9f..aabd372c3 100644 --- a/.env.example +++ b/.env.example @@ -47,6 +47,9 @@ HINDSIGHT_API_LLM_BASE_URL=https://api.openai.com/v1 HINDSIGHT_API_HOST=0.0.0.0 HINDSIGHT_API_PORT=8888 HINDSIGHT_API_LOG_LEVEL=info +# Optional retain chunking override for structured logs/transcripts. +# Unset uses HINDSIGHT_API_RETAIN_CHUNK_SIZE as the structured-chunk limit. +# HINDSIGHT_API_RETAIN_STRUCTURED_CHUNK_SIZE= # Base Path / Reverse Proxy Support (Optional) # Set these when deploying behind a reverse proxy with path-based routing diff --git a/hindsight-api-slim/hindsight_api/api/http.py b/hindsight-api-slim/hindsight_api/api/http.py index 5e096660c..d499d97d1 100644 --- a/hindsight-api-slim/hindsight_api/api/http.py +++ b/hindsight-api-slim/hindsight_api/api/http.py @@ -1149,7 +1149,14 @@ class CreateBankRequest(BaseModel): ) retain_chunk_size: int | None = Field( default=None, - description="Maximum token size for each content chunk during retain.", + description="Target maximum characters for each content chunk during retain.", + ) + retain_structured_chunk_size: int | None = Field( + default=None, + description=( + "Maximum characters for a single JSONL line or conversation turn to keep whole during retain. " + "Defaults to retain_chunk_size when unset." + ), ) enable_observations: bool | None = Field( default=None, @@ -1189,6 +1196,7 @@ def get_config_updates(self) -> dict[str, Any]: "retain_extraction_mode", "retain_custom_instructions", "retain_chunk_size", + "retain_structured_chunk_size", "enable_observations", "observations_mission", ): @@ -1994,7 +2002,14 @@ class BankTemplateConfig(BaseModel): retain_custom_instructions: str | None = Field( default=None, description="Custom extraction prompt (when mode='custom')" ) - retain_chunk_size: int | None = Field(default=None, description="Max token size for each content chunk") + retain_chunk_size: int | None = Field(default=None, description="Target max characters for each content chunk") + retain_structured_chunk_size: int | None = Field( + default=None, + description=( + "Max characters for a single JSONL line or conversation turn to keep whole; " + "defaults to retain_chunk_size when unset" + ), + ) enable_observations: bool | None = Field(default=None, description="Toggle observation consolidation") observations_mission: str | None = Field(default=None, description="Controls what gets synthesised") disposition_skepticism: int | None = Field(default=None, ge=1, le=5, description="Skepticism trait (1-5)") diff --git a/hindsight-api-slim/hindsight_api/config.py b/hindsight-api-slim/hindsight_api/config.py index 88fa08c4e..dc95e846d 100644 --- a/hindsight-api-slim/hindsight_api/config.py +++ b/hindsight-api-slim/hindsight_api/config.py @@ -396,6 +396,7 @@ def normalize_config_dict(config: dict[str, Any]) -> dict[str, Any]: # Retain settings ENV_RETAIN_MAX_COMPLETION_TOKENS = "HINDSIGHT_API_RETAIN_MAX_COMPLETION_TOKENS" ENV_RETAIN_CHUNK_SIZE = "HINDSIGHT_API_RETAIN_CHUNK_SIZE" +ENV_RETAIN_STRUCTURED_CHUNK_SIZE = "HINDSIGHT_API_RETAIN_STRUCTURED_CHUNK_SIZE" ENV_RETAIN_EXTRACT_CAUSAL_LINKS = "HINDSIGHT_API_RETAIN_EXTRACT_CAUSAL_LINKS" ENV_RETAIN_EXTRACTION_MODE = "HINDSIGHT_API_RETAIN_EXTRACTION_MODE" ENV_RETAIN_MISSION = "HINDSIGHT_API_RETAIN_MISSION" @@ -1077,6 +1078,25 @@ def _parse_optional_positive_int(name: str, raw: str | None) -> int | None: return _parse_positive_int(name, raw, 1) +def _validate_retain_chunking_int(name: str, value: Any) -> int: + if isinstance(value, bool) or not isinstance(value, int): + raise ValueError(f"{name} must be an integer, got {value!r}") + if value < 1: + raise ValueError(f"{name} must be >= 1, got {value}") + return value + + +def validate_retain_chunking_config(retain_chunk_size: Any, retain_structured_chunk_size: Any) -> None: + """Validate retain chunking size fields.""" + _validate_retain_chunking_int("HINDSIGHT_API_RETAIN_CHUNK_SIZE", retain_chunk_size) + if retain_structured_chunk_size is None: + return + _validate_retain_chunking_int( + "HINDSIGHT_API_RETAIN_STRUCTURED_CHUNK_SIZE", + retain_structured_chunk_size, + ) + + def _parse_optional_choice(name: str, raw: str | None, allowed: frozenset[str]) -> str | None: """Parse an optional string env var constrained to a small allowlist.""" if raw is None or raw == "": @@ -1430,6 +1450,7 @@ class HindsightConfig: # Retain settings retain_max_completion_tokens: int retain_chunk_size: int + retain_structured_chunk_size: int | None retain_extract_causal_links: bool retain_extraction_mode: str retain_mission: str | None @@ -1657,6 +1678,7 @@ class HindsightConfig: "mcp_enabled_tools", # Retention settings (behavioral) "retain_chunk_size", + "retain_structured_chunk_size", "retain_extraction_mode", "retain_mission", "retain_custom_instructions", @@ -1816,6 +1838,8 @@ def validate(self) -> None: "disabling observations/consolidation. Reflect will return HTTP 400." ) + validate_retain_chunking_config(self.retain_chunk_size, self.retain_structured_chunk_size) + # RETAIN_MAX_COMPLETION_TOKENS must be greater than RETAIN_CHUNK_SIZE # to ensure the LLM has enough output capacity to extract facts from chunks # (not applicable when provider is "none" since no LLM calls are made) @@ -2303,6 +2327,10 @@ def from_env(cls) -> "HindsightConfig": os.getenv(ENV_RETAIN_MAX_COMPLETION_TOKENS, str(DEFAULT_RETAIN_MAX_COMPLETION_TOKENS)) ), retain_chunk_size=int(os.getenv(ENV_RETAIN_CHUNK_SIZE, str(DEFAULT_RETAIN_CHUNK_SIZE))), + retain_structured_chunk_size=_parse_optional_positive_int( + ENV_RETAIN_STRUCTURED_CHUNK_SIZE, + os.getenv(ENV_RETAIN_STRUCTURED_CHUNK_SIZE), + ), retain_extract_causal_links=os.getenv( ENV_RETAIN_EXTRACT_CAUSAL_LINKS, str(DEFAULT_RETAIN_EXTRACT_CAUSAL_LINKS) ).lower() diff --git a/hindsight-api-slim/hindsight_api/config_resolver.py b/hindsight-api-slim/hindsight_api/config_resolver.py index ef41e1c89..fa910905b 100644 --- a/hindsight-api-slim/hindsight_api/config_resolver.py +++ b/hindsight-api-slim/hindsight_api/config_resolver.py @@ -18,6 +18,7 @@ HindsightConfig, _get_raw_config, normalize_config_dict, + validate_retain_chunking_config, ) from hindsight_api.engine.memory_engine import fq_table from hindsight_api.extensions.tenant import TenantExtension @@ -29,6 +30,27 @@ logger = logging.getLogger(__name__) +def _validate_retain_strategy_chunking(base_config: HindsightConfig, strategies: Any) -> None: + """Validate retain strategy chunking with the same semantics as apply_strategy().""" + if not isinstance(strategies, dict): + return + configurable = HindsightConfig.get_configurable_fields() + for strategy_name, overrides in strategies.items(): + if not isinstance(overrides, dict): + continue + filtered = {k: v for k, v in overrides.items() if k in configurable} + if not filtered: + continue + try: + resolved = replace(base_config, **filtered) + validate_retain_chunking_config( + resolved.retain_chunk_size, + resolved.retain_structured_chunk_size, + ) + except ValueError as e: + raise ValueError(f"Invalid retain strategy {strategy_name!r}: {e}") from e + + class ConfigResolver: """Resolves hierarchical configuration with tenant/bank overrides.""" @@ -46,6 +68,26 @@ def __init__(self, backend: "DatabaseBackend", tenant_extension: TenantExtension self._configurable_fields = HindsightConfig.get_configurable_fields() self._credential_fields = HindsightConfig.get_credential_fields() + async def _resolve_parent_config_dict(self, bank_id: str, context: RequestContext | None = None) -> dict[str, Any]: + """Resolve global + tenant config before bank-level overrides.""" + config_dict = asdict(self._global_config) + + if self.tenant_extension and context: + try: + tenant_overrides = await self.tenant_extension.get_tenant_config(context) + if tenant_overrides: + # Normalize keys and filter to configurable fields only + normalized_tenant = normalize_config_dict(tenant_overrides) + configurable_tenant = {k: v for k, v in normalized_tenant.items() if k in self._configurable_fields} + config_dict.update(configurable_tenant) + logger.debug( + f"Applied tenant config overrides for bank {bank_id}: {list(configurable_tenant.keys())}" + ) + except Exception as e: + logger.warning(f"Failed to load tenant config for bank {bank_id}: {e}") + + return config_dict + async def resolve_full_config(self, bank_id: str, context: RequestContext | None = None) -> HindsightConfig: """ Resolve full HindsightConfig for a bank with hierarchical overrides applied. @@ -65,23 +107,7 @@ async def resolve_full_config(self, bank_id: str, context: RequestContext | None Returns: Complete HindsightConfig with hierarchical overrides applied """ - # Start with global config (all fields) - config_dict = asdict(self._global_config) - - # Load tenant config overrides (if tenant extension available) - if self.tenant_extension and context: - try: - tenant_overrides = await self.tenant_extension.get_tenant_config(context) - if tenant_overrides: - # Normalize keys and filter to configurable fields only - normalized_tenant = normalize_config_dict(tenant_overrides) - configurable_tenant = {k: v for k, v in normalized_tenant.items() if k in self._configurable_fields} - config_dict.update(configurable_tenant) - logger.debug( - f"Applied tenant config overrides for bank {bank_id}: {list(configurable_tenant.keys())}" - ) - except Exception as e: - logger.warning(f"Failed to load tenant config for bank {bank_id}: {e}") + config_dict = await self._resolve_parent_config_dict(bank_id, context) # Load bank config overrides bank_overrides = await self._load_bank_config(bank_id) @@ -92,6 +118,10 @@ async def resolve_full_config(self, bank_id: str, context: RequestContext | None # Return full config object (dataclass doesn't have __init__ that accepts kwargs, so we update the object) # Create a new config instance by copying the global config and updating fields resolved_config = HindsightConfig(**config_dict) + validate_retain_chunking_config( + resolved_config.retain_chunk_size, + resolved_config.retain_structured_chunk_size, + ) return resolved_config async def get_bank_config(self, bank_id: str, context: RequestContext | None = None) -> dict[str, Any]: @@ -266,6 +296,32 @@ async def update_bank_config( # Validate recall budget fields _validate_recall_budget_updates(normalized_updates) + chunking_fields_updated = ( + "retain_chunk_size" in normalized_updates + or "retain_structured_chunk_size" in normalized_updates + or "retain_strategies" in normalized_updates + ) + if chunking_fields_updated: + config_dict = await self._resolve_parent_config_dict(bank_id, context) + active_bank_overrides = await self._load_bank_config(bank_id) + active_bank_overrides.update( + { + key: value + for key, value in normalized_updates.items() + if key in self._configurable_fields and value is not None + } + ) + for key, value in normalized_updates.items(): + if key in self._configurable_fields and value is None: + active_bank_overrides.pop(key, None) + config_dict.update(active_bank_overrides) + base_config = HindsightConfig(**config_dict) + validate_retain_chunking_config( + base_config.retain_chunk_size, + base_config.retain_structured_chunk_size, + ) + _validate_retain_strategy_chunking(base_config, base_config.retain_strategies) + # Persist the override. Banks are created lazily (on first retain), so a # PATCH that precedes any ingestion would otherwise UPDATE zero rows and # silently no-op while returning 200. Ensure the bank row exists first @@ -364,7 +420,8 @@ def apply_strategy(config: HindsightConfig, strategy_name: str) -> HindsightConf A strategy is a named set of hierarchical field overrides stored in config.retain_strategies. Any field in _HIERARCHICAL_FIELDS can be overridden, including retain_extraction_mode, retain_chunk_size, - entity_labels, entities_allow_free_form, etc. + retain_structured_chunk_size, entity_labels, + entities_allow_free_form, etc. Unknown strategy names log a warning and return config unchanged. Unknown or non-hierarchical fields in the strategy are silently ignored. @@ -386,4 +443,6 @@ def apply_strategy(config: HindsightConfig, strategy_name: str) -> HindsightConf return config logger.debug(f"Applying retain strategy '{strategy_name}': {list(filtered.keys())}") - return replace(config, **filtered) + resolved = replace(config, **filtered) + validate_retain_chunking_config(resolved.retain_chunk_size, resolved.retain_structured_chunk_size) + return resolved diff --git a/hindsight-api-slim/hindsight_api/engine/memory_engine.py b/hindsight-api-slim/hindsight_api/engine/memory_engine.py index 1a5df8adf..a4cdb1232 100644 --- a/hindsight-api-slim/hindsight_api/engine/memory_engine.py +++ b/hindsight-api-slim/hindsight_api/engine/memory_engine.py @@ -423,6 +423,12 @@ class _SubBatchSplit: document_body_overrides: list[str | None] = field(default_factory=list) +@dataclass(frozen=True) +class _RetainChunkingConfig: + chunk_size: int + structured_chunk_size: int | None + + def _split_contents_into_sub_batches( contents: list[RetainContentDict], tokens_per_batch: int, @@ -3211,7 +3217,7 @@ async def retain_batch_async( # with, so the offsets match the chunk_index values it assigns. from .retain import fact_extraction, fact_storage - sub_chunk_size = await self._resolve_retain_chunk_size(bank_id, request_context, strategy) + chunking_config = await self._resolve_retain_chunking_config(bank_id, request_context, strategy) chunk_offsets: dict[str, int] = {} # In update_mode="append", retain_batch prepends the existing document @@ -3234,7 +3240,11 @@ async def retain_batch_async( existing_text = await fact_storage.get_document_content(conn, bank_id, append_doc_id) if existing_text: append_prepend_chunks[append_doc_id] = len( - fact_extraction.chunk_text(existing_text, sub_chunk_size) + fact_extraction.chunk_text( + existing_text, + chunking_config.chunk_size, + structured_chunk_size=chunking_config.structured_chunk_size, + ) ) for i, (sub_batch, sub_origins) in enumerate(zip(sub_batches, origin_indices), 1): @@ -3288,7 +3298,13 @@ async def retain_batch_async( # document continues the sequence. if sub_doc_id: sub_chunk_count = sum( - len(fact_extraction.chunk_text(item.get("content", "") or "", sub_chunk_size)) + len( + fact_extraction.chunk_text( + item.get("content", "") or "", + chunking_config.chunk_size, + structured_chunk_size=chunking_config.structured_chunk_size, + ) + ) for item in sub_batch ) # retain_batch only prepends the existing body on the global @@ -3399,13 +3415,13 @@ async def _submit_post_insert_maintenance( except Exception as e: logger.warning(f"Failed to submit graph maintenance task for bank {bank_id}: {e}") - async def _resolve_retain_chunk_size( + async def _resolve_retain_chunking_config( self, bank_id: str, request_context: "RequestContext", strategy: str | None, - ) -> int: - """Resolve the effective ``retain_chunk_size`` for a bank. + ) -> _RetainChunkingConfig: + """Resolve the effective retain chunking settings for a bank. Mirrors the bank-config + strategy resolution that ``_retain_batch_async_internal`` applies before handing config to the @@ -3419,7 +3435,10 @@ async def _resolve_retain_chunk_size( effective_strategy = strategy or resolved_config.retain_default_strategy if effective_strategy: resolved_config = apply_strategy(resolved_config, effective_strategy) - return getattr(resolved_config, "retain_chunk_size", 3000) + return _RetainChunkingConfig( + chunk_size=getattr(resolved_config, "retain_chunk_size", 3000), + structured_chunk_size=getattr(resolved_config, "retain_structured_chunk_size", None), + ) async def _retain_batch_async_internal( self, diff --git a/hindsight-api-slim/hindsight_api/engine/retain/fact_extraction.py b/hindsight-api-slim/hindsight_api/engine/retain/fact_extraction.py index 36f21aaaa..0381f5115 100644 --- a/hindsight-api-slim/hindsight_api/engine/retain/fact_extraction.py +++ b/hindsight-api-slim/hindsight_api/engine/retain/fact_extraction.py @@ -420,19 +420,14 @@ class VerbatimFactExtractionResponse(BaseModel): "", # Characters (last resort) ] -# A single structured unit (a JSONL line or a conversation turn) is kept whole -# even when it overflows the budget — but only up to this multiple. Beyond it, -# the unit is split as text rather than handed to the LLM wildly over budget -# (the extractor has no second re-chunk pass; an oversized chunk just errors). -_CHUNK_OVERFLOW_FACTOR = 1.5 - def _split_oversized_unit(text: str, max_chars: int) -> list[str]: """Sentence-aware split of a single unit that overflowed the budget. Used when one JSONL line / conversation turn is so large it can't be kept - whole within ``_CHUNK_OVERFLOW_FACTOR``. The resulting fragments are no - longer valid JSON, but the fact extractor treats every chunk as plain text. + whole within the configured structured-chunk limit. The resulting fragments + are no longer valid JSON, but the fact extractor treats every chunk as plain + text. """ from langchain_text_splitters import RecursiveCharacterTextSplitter @@ -446,18 +441,25 @@ def _split_oversized_unit(text: str, max_chars: int) -> list[str]: return splitter.split_text(text) -def chunk_text(text: str, max_chars: int) -> list[str]: +def _structured_chunk_size(max_chars: int, structured_chunk_size: int | None) -> int: + return structured_chunk_size if structured_chunk_size is not None else max_chars + + +def chunk_text(text: str, max_chars: int, structured_chunk_size: int | None = None) -> list[str]: """ Split text into chunks, preserving conversation structure when possible. For JSON conversation arrays (user/assistant turns) and JSONL (newline-delimited JSON objects), splits at turn/line boundaries so no object is split across chunks. - A single turn/line that overflows is kept whole up to ``_CHUNK_OVERFLOW_FACTOR``× - the budget, then split as text. For plain text, uses sentence-aware splitting. + A single turn/line that overflows ``max_chars`` is kept whole only up to + ``structured_chunk_size``. When unset, that limit defaults to ``max_chars``. + For plain text, uses sentence-aware splitting. Args: text: Input text to chunk (plain text, JSON conversation, or JSONL) - max_chars: Maximum characters per chunk (default 120k ≈ 30k tokens) + max_chars: Target maximum characters per chunk + structured_chunk_size: Maximum characters for a single JSONL line or + conversation turn to keep whole. Defaults to ``max_chars``. Returns: List of text chunks, roughly under max_chars @@ -471,12 +473,12 @@ def chunk_text(text: str, max_chars: int) -> list[str]: parsed = json.loads(text) if isinstance(parsed, list) and all(isinstance(turn, dict) for turn in parsed): # This looks like a conversation - chunk at turn boundaries - return _chunk_conversation(parsed, max_chars) + return _chunk_conversation(parsed, max_chars, structured_chunk_size) except (json.JSONDecodeError, ValueError): pass # Try to parse as JSONL (newline-delimited JSON objects, e.g. session logs) - jsonl_chunks = _chunk_jsonl(text, max_chars) + jsonl_chunks = _chunk_jsonl(text, max_chars, structured_chunk_size) if jsonl_chunks is not None: return jsonl_chunks @@ -484,19 +486,20 @@ def chunk_text(text: str, max_chars: int) -> list[str]: return _split_oversized_unit(text, max_chars) -def _chunk_conversation(turns: list[dict], max_chars: int) -> list[str]: +def _chunk_conversation(turns: list[dict], max_chars: int, structured_chunk_size: int | None = None) -> list[str]: """ Chunk a conversation array at turn boundaries, preserving complete turns. Args: turns: List of conversation turn dicts (with 'role' and 'content' keys) max_chars: Maximum characters per chunk + structured_chunk_size: Maximum characters for a single turn to keep whole Returns: List of JSON-serialized chunks, each containing complete turns """ - overflow_limit = int(max_chars * _CHUNK_OVERFLOW_FACTOR) + structured_limit = _structured_chunk_size(max_chars, structured_chunk_size) chunks = [] current_chunk = [] @@ -512,11 +515,12 @@ def _flush() -> None: for turn in turns: # Estimate size of this turn when serialized (with comma separator) turn_json = json.dumps(turn, ensure_ascii=False) - turn_size = len(turn_json) + 1 # +1 for comma + turn_unit_size = len(turn_json) + turn_size = turn_unit_size + 1 # +1 for comma # A turn too large to keep whole even alone: flush, then split it as # text so no chunk runs far over budget (the extractor won't re-chunk). - if turn_size > overflow_limit: + if turn_unit_size > structured_limit: _flush() chunks.extend(_split_oversized_unit(turn_json, max_chars)) continue @@ -535,18 +539,21 @@ def _flush() -> None: return chunks if chunks else [json.dumps(turns, ensure_ascii=False)] -def _chunk_jsonl(text: str, max_chars: int) -> list[str] | None: +def _chunk_jsonl(text: str, max_chars: int, structured_chunk_size: int | None = None) -> list[str] | None: """Chunk newline-delimited JSON (JSONL) at line boundaries. Detects JSONL — two or more non-empty lines, each a complete JSON object — and packs whole lines into chunks so no line is split across chunks (multiple - short lines may share a chunk). A line that overflows is kept whole up to - ``_CHUNK_OVERFLOW_FACTOR``× the budget, then split as text. Returns ``None`` - if the input is not JSONL, so the caller falls back to plain-text splitting. + short lines may share a chunk). A line that overflows ``max_chars`` is kept + whole only up to ``structured_chunk_size``. When unset, that limit + defaults to ``max_chars``. Returns ``None`` if the input is not JSONL, so the + caller falls back to plain-text splitting. Args: text: Input text to inspect/chunk. max_chars: Maximum characters per chunk. + structured_chunk_size: Maximum characters for a single JSONL line to + keep whole. Returns: List of JSONL chunks (lines joined by newline), or ``None`` if not JSONL. @@ -563,7 +570,7 @@ def _chunk_jsonl(text: str, max_chars: int) -> list[str] | None: if not isinstance(obj, dict): return None - overflow_limit = int(max_chars * _CHUNK_OVERFLOW_FACTOR) + structured_limit = _structured_chunk_size(max_chars, structured_chunk_size) chunks: list[str] = [] current_chunk: list[str] = [] @@ -577,17 +584,18 @@ def _flush() -> None: current_size = 0 for line in lines: + line_unit_size = len(line) line_size = len(line) + 1 # +1 for the joining newline # A line too large to keep whole even alone: flush, then split it as # text so no chunk runs far over budget (the extractor won't re-chunk). - if line_size > overflow_limit: + if line_unit_size > structured_limit: _flush() chunks.extend(_split_oversized_unit(line, max_chars)) continue # If adding this line would exceed the limit and we have lines, flush. - # A line up to overflow_limit is kept whole (a small, bounded overflow). + # A line up to structured_limit is kept whole (a bounded overflow). if current_size + line_size > max_chars and current_chunk: _flush() @@ -1738,7 +1746,11 @@ async def extract_facts_from_text( - chunks: List of tuples (chunk_text, fact_count) for each chunk - usage: Aggregated token usage across all LLM calls """ - chunks = chunk_text(text, max_chars=config.retain_chunk_size) + chunks = chunk_text( + text, + max_chars=config.retain_chunk_size, + structured_chunk_size=config.retain_structured_chunk_size, + ) # Log chunk count before starting LLM requests total_chars = sum(len(c) for c in chunks) @@ -1921,7 +1933,11 @@ async def extract_facts_from_contents_batch_api( prompt, response_schema = _build_extraction_prompt_and_schema(config) for content_index, item in enumerate(contents): - chunks = chunk_text(item.content, max_chars=config.retain_chunk_size) + chunks = chunk_text( + item.content, + max_chars=config.retain_chunk_size, + structured_chunk_size=config.retain_structured_chunk_size, + ) for chunk_index_in_content, chunk in enumerate(chunks): all_chunks_info.append((chunk, content_index, chunk_index_in_content, item.event_date, item.context)) @@ -2342,7 +2358,11 @@ def _extract_facts_chunks( global_chunk_idx = 0 for content_index, content in enumerate(contents): - chunks = chunk_text(content.content, config.retain_chunk_size) + chunks = chunk_text( + content.content, + config.retain_chunk_size, + structured_chunk_size=config.retain_structured_chunk_size, + ) for chunk in chunks: chunks_metadata.append( ChunkMetadata( diff --git a/hindsight-api-slim/hindsight_api/engine/retain/orchestrator.py b/hindsight-api-slim/hindsight_api/engine/retain/orchestrator.py index 4b9f873e4..cb5be013e 100644 --- a/hindsight-api-slim/hindsight_api/engine/retain/orchestrator.py +++ b/hindsight-api-slim/hindsight_api/engine/retain/orchestrator.py @@ -863,10 +863,15 @@ async def retain_batch( # retain code paths. chunk_batch_size = getattr(config, "retain_chunk_batch_size", 100) chunk_size = getattr(config, "retain_chunk_size", 3000) + structured_chunk_size = getattr(config, "retain_structured_chunk_size", None) all_pre_chunks: list[str] = [] chunk_to_content: list[int] = [] # maps chunk index -> index into contents for content_idx, content in enumerate(contents): - content_chunks = fact_extraction.chunk_text(content.content, chunk_size) + content_chunks = fact_extraction.chunk_text( + content.content, + chunk_size, + structured_chunk_size=structured_chunk_size, + ) all_pre_chunks.extend(content_chunks) chunk_to_content.extend([content_idx] * len(content_chunks)) @@ -2246,9 +2251,14 @@ def _chunk_contents_for_delta(contents: list[RetainContent], config) -> dict[int """ result = {} global_chunk_idx = 0 + chunk_size = getattr(config, "retain_chunk_size", 3000) + structured_chunk_size = getattr(config, "retain_structured_chunk_size", None) for content in contents: - chunk_size = getattr(config, "retain_chunk_size", 3000) - chunks = fact_extraction.chunk_text(content.content, chunk_size) + chunks = fact_extraction.chunk_text( + content.content, + chunk_size, + structured_chunk_size=structured_chunk_size, + ) for chunk_text in chunks: result[global_chunk_idx] = chunk_text global_chunk_idx += 1 diff --git a/hindsight-api-slim/hindsight_api/mcp_tools.py b/hindsight-api-slim/hindsight_api/mcp_tools.py index bba68e442..aff6e111d 100644 --- a/hindsight-api-slim/hindsight_api/mcp_tools.py +++ b/hindsight-api-slim/hindsight_api/mcp_tools.py @@ -3191,7 +3191,8 @@ async def update_bank( - retain_mission: Steers what gets extracted during retain(). - retain_extraction_mode: 'concise' (default), 'verbose', or 'custom'. - retain_custom_instructions: Custom extraction prompt (active when mode is 'custom'). - - retain_chunk_size: Maximum token size for each content chunk. + - retain_chunk_size: Target maximum characters for each content chunk. + - retain_structured_chunk_size: Maximum characters for a single JSONL line or conversation turn to keep whole. - retain_chunk_batch_size: Number of chunks to process in parallel. - enable_observations: Toggle observation consolidation after retain(). - observations_mission: Controls observation synthesis rules. @@ -3250,7 +3251,8 @@ async def update_bank( - retain_mission: Steers what gets extracted during retain(). - retain_extraction_mode: 'concise' (default), 'verbose', or 'custom'. - retain_custom_instructions: Custom extraction prompt (active when mode is 'custom'). - - retain_chunk_size: Maximum token size for each content chunk. + - retain_chunk_size: Target maximum characters for each content chunk. + - retain_structured_chunk_size: Maximum characters for a single JSONL line or conversation turn to keep whole. - retain_chunk_batch_size: Number of chunks to process in parallel. - enable_observations: Toggle observation consolidation after retain(). - observations_mission: Controls observation synthesis rules. diff --git a/hindsight-api-slim/tests/test_bank_template_configurable_fields.py b/hindsight-api-slim/tests/test_bank_template_configurable_fields.py index aee525938..675e04f23 100644 --- a/hindsight-api-slim/tests/test_bank_template_configurable_fields.py +++ b/hindsight-api-slim/tests/test_bank_template_configurable_fields.py @@ -30,6 +30,7 @@ # Each tuple is (field_name, applied_value). Values chosen to differ # visibly from defaults so round-trip bugs surface. NEW_FIELDS: list[tuple[str, object]] = [ + ("retain_structured_chunk_size", 6000), ("retain_default_strategy", "strategy-a"), ("retain_strategies", {"strategy-a": {"mode": "concise", "max_tokens": 512}}), ("retain_chunk_batch_size", 7), diff --git a/hindsight-api-slim/tests/test_chunking.py b/hindsight-api-slim/tests/test_chunking.py index 865535df6..d857f0ff9 100644 --- a/hindsight-api-slim/tests/test_chunking.py +++ b/hindsight-api-slim/tests/test_chunking.py @@ -12,10 +12,6 @@ from hindsight_api.engine.retain.fact_extraction import chunk_text -# Mirror of fact_extraction._CHUNK_OVERFLOW_FACTOR — a unit is kept whole only -# up to this multiple of the budget before being split as text. -OVERFLOW_FACTOR = 1.5 - # --------------------------------------------------------------------------- # Plain text @@ -137,22 +133,35 @@ def test_chunk_jsonl_splits_at_line_boundaries(): assert seen == [json.loads(line) for line in lines], "Lines must be preserved in order" -def test_chunk_jsonl_small_overflow_kept_whole(): - """A JSONL line that overflows by less than 1.5x is kept whole, not split.""" - big = json.dumps({"c": "y" * 20}) # 29 chars; budget 25, cap 37 -> kept whole +def test_chunk_jsonl_default_structured_unit_limit_matches_budget(): + """A JSONL line over the budget is split when no larger structured-chunk cap is set.""" + big = json.dumps({"c": "y" * 20}) # 29 chars; budget 25 -> split small = json.dumps({"c": "ok"}) text = "\n".join([big, small]) - assert 25 < len(big) <= int(25 * OVERFLOW_FACTOR) chunks = chunk_text(text, max_chars=25) - # The line overflows the budget but stays a single intact chunk of its own. + assert chunks == [ + '{"c":', + '"yyyyyyyyyyyyyyyyyyyy"}', + small, + ] + + +def test_chunk_jsonl_custom_structured_unit_limit_keeps_overflow_whole(): + """A JSONL line over the budget is kept whole when the explicit cap allows it.""" + big = json.dumps({"c": "y" * 20}) # 29 chars + small = json.dumps({"c": "ok"}) + text = "\n".join([big, small]) + + chunks = chunk_text(text, max_chars=25, structured_chunk_size=len(big)) + assert chunks == [big, small] def test_chunk_jsonl_huge_line_is_split(): - """A JSONL line past the 1.5x overflow cap is split as text — exact fragments.""" - huge = json.dumps({"c": "y" * 40}) # 50 chars; budget 20, cap 30 -> must split + """A JSONL line past the structured-chunk cap is split as text — exact fragments.""" + huge = json.dumps({"c": "y" * 40}) # 49 chars; budget/cap 20 -> must split small = json.dumps({"c": "ok"}) text = "\n".join([huge, small]) @@ -166,9 +175,9 @@ def test_chunk_jsonl_huge_line_is_split(): 'yy"}', '{"c": "ok"}', ] - # No fragment exceeds the overflow cap. + # No fragment exceeds the configured split budget. for chunk in chunks: - assert len(chunk) <= int(20 * OVERFLOW_FACTOR) + assert len(chunk) <= 20 # --------------------------------------------------------------------------- @@ -212,8 +221,22 @@ def test_chunk_conversation_splits_at_turn_boundaries(): assert seen == turns +def test_chunk_conversation_custom_structured_unit_limit_keeps_overflow_whole(): + """A conversation turn over the budget is kept whole when the explicit cap allows it.""" + turns = [{"c": "y" * 20}, {"c": "ok"}] + text = json.dumps(turns) + turn_size = len(json.dumps(turns[0])) + + chunks = chunk_text(text, max_chars=25, structured_chunk_size=turn_size) + + assert chunks == [ + '[{"c": "yyyyyyyyyyyyyyyyyyyy"}]', + '[{"c": "ok"}]', + ] + + def test_chunk_conversation_huge_turn_is_split(): - """A single turn past the 1.5x overflow cap is split as text — exact fragments.""" + """A single turn past the structured-chunk cap is split as text — exact fragments.""" turns = [{"c": "y" * 40}, {"c": "ok"}] text = json.dumps(turns) @@ -228,7 +251,7 @@ def test_chunk_conversation_huge_turn_is_split(): '[{"c": "ok"}]', ] for chunk in chunks: - assert len(chunk) <= int(20 * OVERFLOW_FACTOR) + assert len(chunk) <= 20 # --------------------------------------------------------------------------- diff --git a/hindsight-api-slim/tests/test_config_validation.py b/hindsight-api-slim/tests/test_config_validation.py index 5872ae209..1a47e6fcf 100644 --- a/hindsight-api-slim/tests/test_config_validation.py +++ b/hindsight-api-slim/tests/test_config_validation.py @@ -20,6 +20,7 @@ def setup_test_env(): "HINDSIGHT_API_RETAIN_MAX_COMPLETION_TOKENS", "HINDSIGHT_API_CONSOLIDATION_MAX_COMPLETION_TOKENS", "HINDSIGHT_API_RETAIN_CHUNK_SIZE", + "HINDSIGHT_API_RETAIN_STRUCTURED_CHUNK_SIZE", "HINDSIGHT_API_LLM_PROVIDER", "HINDSIGHT_API_LLM_MODEL", "HINDSIGHT_API_LLM_REASONING_EFFORT", @@ -104,6 +105,54 @@ def test_valid_retain_config_succeeds(): config = HindsightConfig.from_env() assert config.retain_max_completion_tokens == 64000 assert config.retain_chunk_size == 3000 + assert config.retain_structured_chunk_size is None + + +def test_retain_structured_chunk_size_reads_from_env(): + """Structured JSONL/conversation units can have an explicit character cap.""" + from hindsight_api.config import HindsightConfig + + os.environ["HINDSIGHT_API_RETAIN_MAX_COMPLETION_TOKENS"] = "64000" + os.environ["HINDSIGHT_API_RETAIN_CHUNK_SIZE"] = "3000" + os.environ["HINDSIGHT_API_RETAIN_STRUCTURED_CHUNK_SIZE"] = "9000" + os.environ["HINDSIGHT_API_LLM_PROVIDER"] = "mock" + + config = HindsightConfig.from_env() + assert config.retain_structured_chunk_size == 9000 + + +def test_retain_structured_chunk_size_can_be_less_than_chunk_size(): + """Structured-chunk cap can be smaller than the retain chunk target.""" + from hindsight_api.config import HindsightConfig + + os.environ["HINDSIGHT_API_RETAIN_MAX_COMPLETION_TOKENS"] = "64000" + os.environ["HINDSIGHT_API_RETAIN_CHUNK_SIZE"] = "3000" + os.environ["HINDSIGHT_API_RETAIN_STRUCTURED_CHUNK_SIZE"] = "2000" + os.environ["HINDSIGHT_API_LLM_PROVIDER"] = "mock" + + config = HindsightConfig.from_env() + assert config.retain_chunk_size == 3000 + assert config.retain_structured_chunk_size == 2000 + + +def test_retain_strategy_structured_chunk_size_validation(): + """Retain strategies allow structured-chunk caps below chunk size.""" + from hindsight_api.config import HindsightConfig + from hindsight_api.config_resolver import apply_strategy + + os.environ["HINDSIGHT_API_RETAIN_MAX_COMPLETION_TOKENS"] = "64000" + os.environ["HINDSIGHT_API_RETAIN_CHUNK_SIZE"] = "3000" + os.environ["HINDSIGHT_API_LLM_PROVIDER"] = "mock" + + config = HindsightConfig.from_env() + config.retain_strategies = { + "jsonl": { + "retain_structured_chunk_size": 2000, + } + } + + resolved = apply_strategy(config, "jsonl") + assert resolved.retain_structured_chunk_size == 2000 def test_semantic_min_similarity_reads_from_env(): diff --git a/hindsight-api-slim/tests/test_hierarchical_config.py b/hindsight-api-slim/tests/test_hierarchical_config.py index 3b2ecb2eb..1628bed40 100644 --- a/hindsight-api-slim/tests/test_hierarchical_config.py +++ b/hindsight-api-slim/tests/test_hierarchical_config.py @@ -113,6 +113,7 @@ async def test_hierarchical_fields_categorization(): assert "retain_mission" in configurable assert "retain_custom_instructions" in configurable assert "retain_chunk_size" in configurable + assert "retain_structured_chunk_size" in configurable assert "enable_observations" in configurable assert "consolidation_llm_batch_size" in configurable assert "consolidation_source_facts_max_tokens" in configurable @@ -139,7 +140,7 @@ async def test_hierarchical_fields_categorization(): assert "consolidation_llm_parallelism" in configurable # Verify count is correct - assert len(configurable) == 38 + assert len(configurable) == 39 # Verify credential fields (NEVER exposed) assert "llm_api_key" in credentials @@ -250,6 +251,187 @@ async def test_bank_config_null_consolidation_overrides_use_server_defaults(): assert field_name not in bank_overrides +@pytest.mark.asyncio +async def test_retain_chunking_null_overrides_use_server_defaults(): + """JSON null retain chunking overrides should behave like Server Default.""" + bank_id = "test-null-retain-chunking-config-bank" + resolver = ConfigResolver(backend=FakeBankConfigBackend()) + + await resolver.update_bank_config( + bank_id, + { + "retain_chunk_size": 5000, + "retain_structured_chunk_size": 7000, + }, + ) + config = await resolver.resolve_full_config(bank_id) + assert config.retain_chunk_size == 5000 + assert config.retain_structured_chunk_size == 7000 + + await resolver.update_bank_config( + bank_id, + { + "retain_chunk_size": None, + "retain_structured_chunk_size": None, + }, + ) + + resolved_config = await resolver.resolve_full_config(bank_id) + global_config = resolver._global_config + assert resolved_config.retain_chunk_size == global_config.retain_chunk_size + assert resolved_config.retain_structured_chunk_size == global_config.retain_structured_chunk_size + + +@pytest.mark.asyncio +async def test_retain_chunking_validation_uses_null_cleared_chunk_size(): + """Chunking validation should apply JSON null tombstones before checking final values.""" + bank_id = "test-null-retain-chunking-validation-bank" + resolver = ConfigResolver(backend=FakeBankConfigBackend()) + + await resolver.update_bank_config( + bank_id, + { + "retain_chunk_size": 5000, + "retain_structured_chunk_size": 7000, + }, + ) + + await resolver.update_bank_config( + bank_id, + { + "retain_chunk_size": None, + "retain_structured_chunk_size": 4000, + }, + ) + + resolved_config = await resolver.resolve_full_config(bank_id) + assert resolved_config.retain_chunk_size == resolver._global_config.retain_chunk_size + assert resolved_config.retain_structured_chunk_size == 4000 + + +@pytest.mark.asyncio +async def test_existing_retain_strategy_structured_chunking_survives_chunk_size_changes(): + """Top-level chunk size updates can exceed existing structured chunk caps.""" + from hindsight_api.config_resolver import apply_strategy + + bank_id = "test-existing-retain-strategy-chunking-bank" + resolver = ConfigResolver(backend=FakeBankConfigBackend()) + + await resolver.update_bank_config( + bank_id, + { + "retain_strategies": { + "jsonl": { + "retain_structured_chunk_size": 4000, + }, + }, + }, + ) + + await resolver.update_bank_config(bank_id, {"retain_chunk_size": 5000}) + + config = await resolver.resolve_full_config(bank_id) + strategy_config = apply_strategy(config, "jsonl") + assert strategy_config.retain_chunk_size == 5000 + assert strategy_config.retain_structured_chunk_size == 4000 + + +@pytest.mark.asyncio +async def test_retain_strategy_chunking_null_matches_apply_strategy_semantics(): + """Strategy null values are direct overrides, not bank-config tombstones.""" + from hindsight_api.config_resolver import apply_strategy + + bank_id = "test-retain-strategy-null-chunking-bank" + resolver = ConfigResolver(backend=FakeBankConfigBackend()) + + await resolver.update_bank_config( + bank_id, + { + "retain_structured_chunk_size": 5000, + "retain_strategies": { + "large-turns": { + "retain_chunk_size": 8000, + "retain_structured_chunk_size": None, + }, + }, + }, + ) + + resolved_config = await resolver.resolve_full_config(bank_id) + strategy_config = apply_strategy(resolved_config, "large-turns") + assert strategy_config.retain_chunk_size == 8000 + assert strategy_config.retain_structured_chunk_size is None + + +@pytest.mark.parametrize( + "updates", + [ + {"retain_chunk_size": "5000"}, + {"retain_chunk_size": 5000.5}, + {"retain_chunk_size": True}, + {"retain_structured_chunk_size": "5000"}, + {"retain_structured_chunk_size": 5000.5}, + {"retain_structured_chunk_size": False}, + ], +) +@pytest.mark.asyncio +async def test_retain_chunking_raw_patch_values_must_be_integers(updates): + """Raw config PATCH values should fail as 400-style ValueError, not TypeError.""" + resolver = ConfigResolver(backend=FakeBankConfigBackend()) + + with pytest.raises(ValueError) as exc_info: + await resolver.update_bank_config("test-retain-chunking-malformed-patch-bank", updates) + + assert "must be an integer" in str(exc_info.value) + + +@pytest.mark.asyncio +async def test_retain_strategy_chunk_size_null_rejected_with_value_error(): + """Strategy retain_chunk_size cannot be null because apply_strategy would use it directly.""" + resolver = ConfigResolver(backend=FakeBankConfigBackend()) + + with pytest.raises(ValueError) as exc_info: + await resolver.update_bank_config( + "test-retain-strategy-null-chunk-size-bank", + { + "retain_strategies": { + "bad": { + "retain_chunk_size": None, + }, + }, + }, + ) + + error_message = str(exc_info.value) + assert "Invalid retain strategy 'bad'" in error_message + assert "HINDSIGHT_API_RETAIN_CHUNK_SIZE must be an integer" in error_message + + +@pytest.mark.asyncio +async def test_retain_strategy_structured_chunk_size_can_be_below_same_update_chunk_size(): + """Strategy structured chunk size can be lower than the top-level chunk size.""" + from hindsight_api.config_resolver import apply_strategy + + resolver = ConfigResolver(backend=FakeBankConfigBackend()) + + await resolver.update_bank_config( + "test-retain-strategy-chunking-bank", + { + "retain_chunk_size": 5000, + "retain_strategies": { + "jsonl": { + "retain_structured_chunk_size": 4000, + }, + }, + }, + ) + + config = await resolver.resolve_full_config("test-retain-strategy-chunking-bank") + strategy_config = apply_strategy(config, "jsonl") + assert strategy_config.retain_chunk_size == 5000 + assert strategy_config.retain_structured_chunk_size == 4000 + + @pytest.mark.asyncio async def test_config_validation_rejects_static_fields(memory, request_context): """Test that attempting to override static fields raises ValueError.""" @@ -534,7 +716,12 @@ async def test_config_get_bank_config_no_static_or_credential_fields_leak(memory ) # Verify we have the expected configurable fields (small set) - expected_configurable = ["retain_chunk_size", "retain_extraction_mode", "enable_observations"] + expected_configurable = [ + "retain_chunk_size", + "retain_structured_chunk_size", + "retain_extraction_mode", + "enable_observations", + ] for field in expected_configurable: assert field in config, f"Expected configurable field '{field}' missing from config" diff --git a/hindsight-api-slim/tests/test_mcp_tools.py b/hindsight-api-slim/tests/test_mcp_tools.py index f6d8b2dc6..dfb149e31 100644 --- a/hindsight-api-slim/tests/test_mcp_tools.py +++ b/hindsight-api-slim/tests/test_mcp_tools.py @@ -1572,6 +1572,7 @@ async def test_update_bank_multiple_config_fields(self, mock_memory): "retain_extraction_mode": "custom", "retain_custom_instructions": "Extract only action items", "retain_chunk_size": 2000, + "retain_structured_chunk_size": 5000, } ) config_call = mock_memory._config_resolver.update_bank_config.call_args @@ -1585,6 +1586,7 @@ async def test_update_bank_multiple_config_fields(self, mock_memory): assert updates["retain_extraction_mode"] == "custom" assert updates["retain_custom_instructions"] == "Extract only action items" assert updates["retain_chunk_size"] == 2000 + assert updates["retain_structured_chunk_size"] == 5000 async def test_update_bank_name_and_config_together(self, mock_memory): """name goes to engine, config_updates goes to config resolver.""" diff --git a/hindsight-cli/src/commands/bank.rs b/hindsight-cli/src/commands/bank.rs index f4ecfa187..600c19f2f 100644 --- a/hindsight-cli/src/commands/bank.rs +++ b/hindsight-cli/src/commands/bank.rs @@ -797,6 +797,8 @@ pub fn set_config( llm_base_url: Option, retain_mission: Option, retain_extraction_mode: Option, + retain_chunk_size: Option, + retain_structured_chunk_size: Option, observations_mission: Option, reflect_mission: Option, disposition_skepticism: Option, @@ -842,6 +844,18 @@ pub fn set_config( serde_json::Value::String(mode), ); } + if let Some(size) = retain_chunk_size { + updates.insert( + "retain_chunk_size".to_string(), + serde_json::Value::Number(size.into()), + ); + } + if let Some(size) = retain_structured_chunk_size { + updates.insert( + "retain_structured_chunk_size".to_string(), + serde_json::Value::Number(size.into()), + ); + } if let Some(mission) = observations_mission { updates.insert( "observations_mission".to_string(), @@ -874,7 +888,7 @@ pub fn set_config( } if updates.is_empty() { - return Err(anyhow!("No config updates provided. Use --llm-provider, --llm-model, --retain-mission, --observations-mission, or other flags".to_string())); + return Err(anyhow!("No config updates provided. Use --llm-provider, --llm-model, --retain-mission, --retain-chunk-size, --observations-mission, or other flags".to_string())); } let spinner = if output_format == OutputFormat::Pretty { diff --git a/hindsight-cli/src/main.rs b/hindsight-cli/src/main.rs index ac7b0dffd..01809874b 100644 --- a/hindsight-cli/src/main.rs +++ b/hindsight-cli/src/main.rs @@ -369,6 +369,14 @@ enum BankCommands { #[arg(long)] retain_extraction_mode: Option, + /// Target maximum characters for each content chunk during retain + #[arg(long, value_parser = clap::value_parser!(i64).range(1..))] + retain_chunk_size: Option, + + /// Maximum characters for a JSONL line or conversation turn to keep whole during retain + #[arg(long, value_parser = clap::value_parser!(i64).range(1..))] + retain_structured_chunk_size: Option, + /// Observations mission: what to synthesize into durable observations #[arg(long)] observations_mission: Option, @@ -1294,6 +1302,8 @@ fn run() -> Result<()> { llm_base_url, retain_mission, retain_extraction_mode, + retain_chunk_size, + retain_structured_chunk_size, observations_mission, reflect_mission, disposition_skepticism, @@ -1308,6 +1318,8 @@ fn run() -> Result<()> { llm_base_url, retain_mission, retain_extraction_mode, + retain_chunk_size, + retain_structured_chunk_size, observations_mission, reflect_mission, disposition_skepticism, @@ -2083,7 +2095,11 @@ fn handle_profile(cmd: ProfileCommands, output_format: OutputFormat) -> Result<( } let deleted = Config::delete_profile(&name)?; if output_format == OutputFormat::Pretty { - ui::print_success(&format!("Deleted profile '{}' ({})", name, deleted.display())); + ui::print_success(&format!( + "Deleted profile '{}' ({})", + name, + deleted.display() + )); } else { output::print_output( &serde_json::json!({ diff --git a/hindsight-clients/go/api/openapi.yaml b/hindsight-clients/go/api/openapi.yaml index 2260a39b8..1910b70f4 100644 --- a/hindsight-clients/go/api/openapi.yaml +++ b/hindsight-clients/go/api/openapi.yaml @@ -4412,6 +4412,9 @@ components: retain_chunk_size: nullable: true type: integer + retain_structured_chunk_size: + nullable: true + type: integer enable_observations: nullable: true type: boolean @@ -4930,6 +4933,9 @@ components: retain_chunk_size: nullable: true type: integer + retain_structured_chunk_size: + nullable: true + type: integer enable_observations: nullable: true type: boolean diff --git a/hindsight-clients/go/model_bank_template_config.go b/hindsight-clients/go/model_bank_template_config.go index 51984b660..26c2ef5f8 100644 --- a/hindsight-clients/go/model_bank_template_config.go +++ b/hindsight-clients/go/model_bank_template_config.go @@ -24,6 +24,7 @@ type BankTemplateConfig struct { RetainExtractionMode NullableString `json:"retain_extraction_mode,omitempty"` RetainCustomInstructions NullableString `json:"retain_custom_instructions,omitempty"` RetainChunkSize NullableInt32 `json:"retain_chunk_size,omitempty"` + RetainStructuredChunkSize NullableInt32 `json:"retain_structured_chunk_size,omitempty"` EnableObservations NullableBool `json:"enable_observations,omitempty"` ObservationsMission NullableString `json:"observations_mission,omitempty"` DispositionSkepticism NullableInt32 `json:"disposition_skepticism,omitempty"` @@ -280,6 +281,48 @@ func (o *BankTemplateConfig) UnsetRetainChunkSize() { o.RetainChunkSize.Unset() } +// GetRetainStructuredChunkSize returns the RetainStructuredChunkSize field value if set, zero value otherwise (both if not set or set to explicit null). +func (o *BankTemplateConfig) GetRetainStructuredChunkSize() int32 { + if o == nil || IsNil(o.RetainStructuredChunkSize.Get()) { + var ret int32 + return ret + } + return *o.RetainStructuredChunkSize.Get() +} + +// GetRetainStructuredChunkSizeOk returns a tuple with the RetainStructuredChunkSize field value if set, nil otherwise +// and a boolean to check if the value has been set. +// NOTE: If the value is an explicit nil, `nil, true` will be returned +func (o *BankTemplateConfig) GetRetainStructuredChunkSizeOk() (*int32, bool) { + if o == nil { + return nil, false + } + return o.RetainStructuredChunkSize.Get(), o.RetainStructuredChunkSize.IsSet() +} + +// HasRetainStructuredChunkSize returns a boolean if a field has been set. +func (o *BankTemplateConfig) HasRetainStructuredChunkSize() bool { + if o != nil && o.RetainStructuredChunkSize.IsSet() { + return true + } + + return false +} + +// SetRetainStructuredChunkSize gets a reference to the given NullableInt32 and assigns it to the RetainStructuredChunkSize field. +func (o *BankTemplateConfig) SetRetainStructuredChunkSize(v int32) { + o.RetainStructuredChunkSize.Set(&v) +} +// SetRetainStructuredChunkSizeNil sets the value for RetainStructuredChunkSize to be an explicit nil +func (o *BankTemplateConfig) SetRetainStructuredChunkSizeNil() { + o.RetainStructuredChunkSize.Set(nil) +} + +// UnsetRetainStructuredChunkSize ensures that no value is present for RetainStructuredChunkSize, not even an explicit nil +func (o *BankTemplateConfig) UnsetRetainStructuredChunkSize() { + o.RetainStructuredChunkSize.Unset() +} + // GetEnableObservations returns the EnableObservations field value if set, zero value otherwise (both if not set or set to explicit null). func (o *BankTemplateConfig) GetEnableObservations() bool { if o == nil || IsNil(o.EnableObservations.Get()) { @@ -1394,6 +1437,9 @@ func (o BankTemplateConfig) ToMap() (map[string]interface{}, error) { if o.RetainChunkSize.IsSet() { toSerialize["retain_chunk_size"] = o.RetainChunkSize.Get() } + if o.RetainStructuredChunkSize.IsSet() { + toSerialize["retain_structured_chunk_size"] = o.RetainStructuredChunkSize.Get() + } if o.EnableObservations.IsSet() { toSerialize["enable_observations"] = o.EnableObservations.Get() } diff --git a/hindsight-clients/go/model_create_bank_request.go b/hindsight-clients/go/model_create_bank_request.go index 330685ced..2faf083f4 100644 --- a/hindsight-clients/go/model_create_bank_request.go +++ b/hindsight-clients/go/model_create_bank_request.go @@ -31,6 +31,7 @@ type CreateBankRequest struct { RetainExtractionMode NullableString `json:"retain_extraction_mode,omitempty"` RetainCustomInstructions NullableString `json:"retain_custom_instructions,omitempty"` RetainChunkSize NullableInt32 `json:"retain_chunk_size,omitempty"` + RetainStructuredChunkSize NullableInt32 `json:"retain_structured_chunk_size,omitempty"` EnableObservations NullableBool `json:"enable_observations,omitempty"` ObservationsMission NullableString `json:"observations_mission,omitempty"` } @@ -556,6 +557,48 @@ func (o *CreateBankRequest) UnsetRetainChunkSize() { o.RetainChunkSize.Unset() } +// GetRetainStructuredChunkSize returns the RetainStructuredChunkSize field value if set, zero value otherwise (both if not set or set to explicit null). +func (o *CreateBankRequest) GetRetainStructuredChunkSize() int32 { + if o == nil || IsNil(o.RetainStructuredChunkSize.Get()) { + var ret int32 + return ret + } + return *o.RetainStructuredChunkSize.Get() +} + +// GetRetainStructuredChunkSizeOk returns a tuple with the RetainStructuredChunkSize field value if set, nil otherwise +// and a boolean to check if the value has been set. +// NOTE: If the value is an explicit nil, `nil, true` will be returned +func (o *CreateBankRequest) GetRetainStructuredChunkSizeOk() (*int32, bool) { + if o == nil { + return nil, false + } + return o.RetainStructuredChunkSize.Get(), o.RetainStructuredChunkSize.IsSet() +} + +// HasRetainStructuredChunkSize returns a boolean if a field has been set. +func (o *CreateBankRequest) HasRetainStructuredChunkSize() bool { + if o != nil && o.RetainStructuredChunkSize.IsSet() { + return true + } + + return false +} + +// SetRetainStructuredChunkSize gets a reference to the given NullableInt32 and assigns it to the RetainStructuredChunkSize field. +func (o *CreateBankRequest) SetRetainStructuredChunkSize(v int32) { + o.RetainStructuredChunkSize.Set(&v) +} +// SetRetainStructuredChunkSizeNil sets the value for RetainStructuredChunkSize to be an explicit nil +func (o *CreateBankRequest) SetRetainStructuredChunkSizeNil() { + o.RetainStructuredChunkSize.Set(nil) +} + +// UnsetRetainStructuredChunkSize ensures that no value is present for RetainStructuredChunkSize, not even an explicit nil +func (o *CreateBankRequest) UnsetRetainStructuredChunkSize() { + o.RetainStructuredChunkSize.Unset() +} + // GetEnableObservations returns the EnableObservations field value if set, zero value otherwise (both if not set or set to explicit null). func (o *CreateBankRequest) GetEnableObservations() bool { if o == nil || IsNil(o.EnableObservations.Get()) { @@ -686,6 +729,9 @@ func (o CreateBankRequest) ToMap() (map[string]interface{}, error) { if o.RetainChunkSize.IsSet() { toSerialize["retain_chunk_size"] = o.RetainChunkSize.Get() } + if o.RetainStructuredChunkSize.IsSet() { + toSerialize["retain_structured_chunk_size"] = o.RetainStructuredChunkSize.Get() + } if o.EnableObservations.IsSet() { toSerialize["enable_observations"] = o.EnableObservations.Get() } diff --git a/hindsight-clients/python/hindsight_client/hindsight_client.py b/hindsight-clients/python/hindsight_client/hindsight_client.py index d238d47ce..5740bedf5 100644 --- a/hindsight-clients/python/hindsight_client/hindsight_client.py +++ b/hindsight-clients/python/hindsight_client/hindsight_client.py @@ -537,6 +537,7 @@ def create_bank( retain_extraction_mode: str | None = None, retain_custom_instructions: str | None = None, retain_chunk_size: int | None = None, + retain_structured_chunk_size: int | None = None, enable_observations: bool | None = None, observations_mission: str | None = None, reflect_mission: str | None = None, @@ -555,7 +556,9 @@ def create_bank( retain_mission: Steers what gets extracted during retain(). Injected alongside built-in rules. retain_extraction_mode: Fact extraction mode: 'concise' (default), 'verbose', or 'custom'. retain_custom_instructions: Custom extraction prompt (only active when mode is 'custom'). - retain_chunk_size: Maximum token size for each content chunk during retain. + retain_chunk_size: Target maximum characters for each content chunk during retain. + retain_structured_chunk_size: Maximum characters for a single JSONL line or conversation + turn to keep whole during retain. Defaults to retain_chunk_size when unset. enable_observations: Toggle automatic observation consolidation after retain(). observations_mission: Controls what gets synthesised into observations. Replaces built-in rules. reflect_mission: Mission/context for Reflect operations. @@ -575,6 +578,7 @@ def create_bank( retain_extraction_mode=retain_extraction_mode, retain_custom_instructions=retain_custom_instructions, retain_chunk_size=retain_chunk_size, + retain_structured_chunk_size=retain_structured_chunk_size, enable_observations=enable_observations, observations_mission=observations_mission, background=background, @@ -595,6 +599,7 @@ async def _acreate_bank( retain_extraction_mode: str | None = None, retain_custom_instructions: str | None = None, retain_chunk_size: int | None = None, + retain_structured_chunk_size: int | None = None, enable_observations: bool | None = None, observations_mission: str | None = None, background: str | None = None, @@ -631,6 +636,8 @@ async def _acreate_bank( body["retain_custom_instructions"] = retain_custom_instructions if retain_chunk_size is not None: body["retain_chunk_size"] = retain_chunk_size + if retain_structured_chunk_size is not None: + body["retain_structured_chunk_size"] = retain_structured_chunk_size if enable_observations is not None: body["enable_observations"] = enable_observations if observations_mission is not None: @@ -669,6 +676,7 @@ async def acreate_bank( retain_extraction_mode: str | None = None, retain_custom_instructions: str | None = None, retain_chunk_size: int | None = None, + retain_structured_chunk_size: int | None = None, enable_observations: bool | None = None, observations_mission: str | None = None, reflect_mission: str | None = None, @@ -687,7 +695,9 @@ async def acreate_bank( retain_mission: Steers what gets extracted during retain(). Injected alongside built-in rules. retain_extraction_mode: Fact extraction mode: 'concise' (default), 'verbose', or 'custom'. retain_custom_instructions: Custom extraction prompt (only active when mode is 'custom'). - retain_chunk_size: Maximum token size for each content chunk during retain. + retain_chunk_size: Target maximum characters for each content chunk during retain. + retain_structured_chunk_size: Maximum characters for a single JSONL line or conversation + turn to keep whole during retain. Defaults to retain_chunk_size when unset. enable_observations: Toggle automatic observation consolidation after retain(). observations_mission: Controls what gets synthesised into observations. Replaces built-in rules. reflect_mission: Mission/context for Reflect operations. @@ -706,6 +716,7 @@ async def acreate_bank( retain_extraction_mode=retain_extraction_mode, retain_custom_instructions=retain_custom_instructions, retain_chunk_size=retain_chunk_size, + retain_structured_chunk_size=retain_structured_chunk_size, enable_observations=enable_observations, observations_mission=observations_mission, background=background, @@ -1322,6 +1333,7 @@ def update_bank_config( retain_extraction_mode: str | None = None, retain_custom_instructions: str | None = None, retain_chunk_size: int | None = None, + retain_structured_chunk_size: int | None = None, retain_default_strategy: str | None = None, retain_strategies: dict[str, Any] | None = None, # Entity settings @@ -1355,7 +1367,9 @@ def update_bank_config( retain_mission: Steers what gets extracted during retain(). retain_extraction_mode: Fact extraction mode: 'concise', 'verbose', or 'custom'. retain_custom_instructions: Custom extraction prompt (only active when mode is 'custom'). - retain_chunk_size: Maximum token size for each content chunk during retain. + retain_chunk_size: Target maximum characters for each content chunk during retain. + retain_structured_chunk_size: Maximum characters for a single JSONL line or conversation + turn to keep whole during retain. Defaults to retain_chunk_size when unset. retain_default_strategy: Default retain strategy name. retain_strategies: Named strategy definitions (dict of strategy name to config). entity_labels: Controlled vocabulary for entity type classification. @@ -1386,6 +1400,7 @@ def update_bank_config( "retain_extraction_mode": retain_extraction_mode, "retain_custom_instructions": retain_custom_instructions, "retain_chunk_size": retain_chunk_size, + "retain_structured_chunk_size": retain_structured_chunk_size, "retain_default_strategy": retain_default_strategy, "retain_strategies": retain_strategies, "entity_labels": entity_labels, diff --git a/hindsight-clients/python/hindsight_client_api/models/bank_template_config.py b/hindsight-clients/python/hindsight_client_api/models/bank_template_config.py index c87954406..947b13f1b 100644 --- a/hindsight-clients/python/hindsight_client_api/models/bank_template_config.py +++ b/hindsight-clients/python/hindsight_client_api/models/bank_template_config.py @@ -32,6 +32,7 @@ class BankTemplateConfig(BaseModel): retain_extraction_mode: Optional[StrictStr] = None retain_custom_instructions: Optional[StrictStr] = None retain_chunk_size: Optional[StrictInt] = None + retain_structured_chunk_size: Optional[StrictInt] = None enable_observations: Optional[StrictBool] = None observations_mission: Optional[StrictStr] = None disposition_skepticism: Optional[Annotated[int, Field(le=5, strict=True, ge=1)]] = None @@ -59,7 +60,7 @@ class BankTemplateConfig(BaseModel): recall_budget_adaptive_high: Optional[Union[StrictFloat, StrictInt]] = None recall_budget_min: Optional[StrictInt] = None recall_budget_max: Optional[StrictInt] = None - __properties: ClassVar[List[str]] = ["reflect_mission", "retain_mission", "retain_extraction_mode", "retain_custom_instructions", "retain_chunk_size", "enable_observations", "observations_mission", "disposition_skepticism", "disposition_literalism", "disposition_empathy", "entity_labels", "entities_allow_free_form", "retain_default_strategy", "retain_strategies", "retain_chunk_batch_size", "mcp_enabled_tools", "consolidation_llm_batch_size", "consolidation_source_facts_max_tokens", "consolidation_source_facts_max_tokens_per_observation", "max_observations_per_scope", "observation_scope_limits", "reflect_source_facts_max_tokens", "llm_gemini_safety_settings", "recall_budget_function", "recall_budget_fixed_low", "recall_budget_fixed_mid", "recall_budget_fixed_high", "recall_budget_adaptive_low", "recall_budget_adaptive_mid", "recall_budget_adaptive_high", "recall_budget_min", "recall_budget_max"] + __properties: ClassVar[List[str]] = ["reflect_mission", "retain_mission", "retain_extraction_mode", "retain_custom_instructions", "retain_chunk_size", "retain_structured_chunk_size", "enable_observations", "observations_mission", "disposition_skepticism", "disposition_literalism", "disposition_empathy", "entity_labels", "entities_allow_free_form", "retain_default_strategy", "retain_strategies", "retain_chunk_batch_size", "mcp_enabled_tools", "consolidation_llm_batch_size", "consolidation_source_facts_max_tokens", "consolidation_source_facts_max_tokens_per_observation", "max_observations_per_scope", "observation_scope_limits", "reflect_source_facts_max_tokens", "llm_gemini_safety_settings", "recall_budget_function", "recall_budget_fixed_low", "recall_budget_fixed_mid", "recall_budget_fixed_high", "recall_budget_adaptive_low", "recall_budget_adaptive_mid", "recall_budget_adaptive_high", "recall_budget_min", "recall_budget_max"] model_config = ConfigDict( populate_by_name=True, @@ -125,6 +126,11 @@ def to_dict(self) -> Dict[str, Any]: if self.retain_chunk_size is None and "retain_chunk_size" in self.model_fields_set: _dict['retain_chunk_size'] = None + # set to None if retain_structured_chunk_size (nullable) is None + # and model_fields_set contains the field + if self.retain_structured_chunk_size is None and "retain_structured_chunk_size" in self.model_fields_set: + _dict['retain_structured_chunk_size'] = None + # set to None if enable_observations (nullable) is None # and model_fields_set contains the field if self.enable_observations is None and "enable_observations" in self.model_fields_set: @@ -277,6 +283,7 @@ def from_dict(cls, obj: Optional[Dict[str, Any]]) -> Optional[Self]: "retain_extraction_mode": obj.get("retain_extraction_mode"), "retain_custom_instructions": obj.get("retain_custom_instructions"), "retain_chunk_size": obj.get("retain_chunk_size"), + "retain_structured_chunk_size": obj.get("retain_structured_chunk_size"), "enable_observations": obj.get("enable_observations"), "observations_mission": obj.get("observations_mission"), "disposition_skepticism": obj.get("disposition_skepticism"), diff --git a/hindsight-clients/python/hindsight_client_api/models/create_bank_request.py b/hindsight-clients/python/hindsight_client_api/models/create_bank_request.py index 838bb1aab..c2580f6cc 100644 --- a/hindsight-clients/python/hindsight_client_api/models/create_bank_request.py +++ b/hindsight-clients/python/hindsight_client_api/models/create_bank_request.py @@ -40,9 +40,10 @@ class CreateBankRequest(BaseModel): retain_extraction_mode: Optional[StrictStr] = None retain_custom_instructions: Optional[StrictStr] = None retain_chunk_size: Optional[StrictInt] = None + retain_structured_chunk_size: Optional[StrictInt] = None enable_observations: Optional[StrictBool] = None observations_mission: Optional[StrictStr] = None - __properties: ClassVar[List[str]] = ["name", "disposition", "disposition_skepticism", "disposition_literalism", "disposition_empathy", "mission", "background", "reflect_mission", "retain_mission", "retain_extraction_mode", "retain_custom_instructions", "retain_chunk_size", "enable_observations", "observations_mission"] + __properties: ClassVar[List[str]] = ["name", "disposition", "disposition_skepticism", "disposition_literalism", "disposition_empathy", "mission", "background", "reflect_mission", "retain_mission", "retain_extraction_mode", "retain_custom_instructions", "retain_chunk_size", "retain_structured_chunk_size", "enable_observations", "observations_mission"] model_config = ConfigDict( populate_by_name=True, @@ -146,6 +147,11 @@ def to_dict(self) -> Dict[str, Any]: if self.retain_chunk_size is None and "retain_chunk_size" in self.model_fields_set: _dict['retain_chunk_size'] = None + # set to None if retain_structured_chunk_size (nullable) is None + # and model_fields_set contains the field + if self.retain_structured_chunk_size is None and "retain_structured_chunk_size" in self.model_fields_set: + _dict['retain_structured_chunk_size'] = None + # set to None if enable_observations (nullable) is None # and model_fields_set contains the field if self.enable_observations is None and "enable_observations" in self.model_fields_set: @@ -180,6 +186,7 @@ def from_dict(cls, obj: Optional[Dict[str, Any]]) -> Optional[Self]: "retain_extraction_mode": obj.get("retain_extraction_mode"), "retain_custom_instructions": obj.get("retain_custom_instructions"), "retain_chunk_size": obj.get("retain_chunk_size"), + "retain_structured_chunk_size": obj.get("retain_structured_chunk_size"), "enable_observations": obj.get("enable_observations"), "observations_mission": obj.get("observations_mission") }) diff --git a/hindsight-clients/python/tests/test_bank_config_update_payload.py b/hindsight-clients/python/tests/test_bank_config_update_payload.py new file mode 100644 index 000000000..5ee5e63e4 --- /dev/null +++ b/hindsight-clients/python/tests/test_bank_config_update_payload.py @@ -0,0 +1,38 @@ +from hindsight_client import Hindsight + + +def test_update_bank_config_can_set_retain_structured_chunk_size(monkeypatch): + captured: dict[str, object] = {} + + async def fake_update(self, bank_id, updates): + captured["bank_id"] = bank_id + captured["updates"] = updates + return {"bank_id": bank_id, "config": {}, "overrides": updates} + + monkeypatch.setattr(Hindsight, "_aupdate_bank_config", fake_update) + + client = Hindsight(base_url="http://example.invalid") + result = client.update_bank_config( + "test-bank", + retain_structured_chunk_size=12000, + ) + + assert result["bank_id"] == "test-bank" + assert captured["updates"] == {"retain_structured_chunk_size": 12000} + + +def test_update_bank_config_omits_retain_structured_chunk_size_when_unset(monkeypatch): + captured: dict[str, object] = {} + + async def fake_update(self, bank_id, updates): + captured["bank_id"] = bank_id + captured["updates"] = updates + return {"bank_id": bank_id, "config": {}, "overrides": updates} + + monkeypatch.setattr(Hindsight, "_aupdate_bank_config", fake_update) + + client = Hindsight(base_url="http://example.invalid") + result = client.update_bank_config("test-bank") + + assert result["bank_id"] == "test-bank" + assert captured["updates"] == {} diff --git a/hindsight-clients/typescript/generated/types.gen.ts b/hindsight-clients/typescript/generated/types.gen.ts index e17ad0d83..573ac952e 100644 --- a/hindsight-clients/typescript/generated/types.gen.ts +++ b/hindsight-clients/typescript/generated/types.gen.ts @@ -465,9 +465,15 @@ export type BankTemplateConfig = { /** * Retain Chunk Size * - * Max token size for each content chunk + * Target max characters for each content chunk */ retain_chunk_size?: number | null; + /** + * Retain Structured Chunk Size + * + * Max characters for a single JSONL line or conversation turn to keep whole; defaults to retain_chunk_size when unset + */ + retain_structured_chunk_size?: number | null; /** * Enable Observations * @@ -1086,9 +1092,15 @@ export type CreateBankRequest = { /** * Retain Chunk Size * - * Maximum token size for each content chunk during retain. + * Target maximum characters for each content chunk during retain. */ retain_chunk_size?: number | null; + /** + * Retain Structured Chunk Size + * + * Maximum characters for a single JSONL line or conversation turn to keep whole during retain. Defaults to retain_chunk_size when unset. + */ + retain_structured_chunk_size?: number | null; /** * Enable Observations * diff --git a/hindsight-clients/typescript/src/index.ts b/hindsight-clients/typescript/src/index.ts index 818b76d27..119785eef 100644 --- a/hindsight-clients/typescript/src/index.ts +++ b/hindsight-clients/typescript/src/index.ts @@ -484,8 +484,10 @@ export class HindsightClient { retainExtractionMode?: string; /** Custom extraction prompt (only active when retainExtractionMode is 'custom'). */ retainCustomInstructions?: string; - /** Maximum token size for each content chunk during retain. */ + /** Target maximum characters for each content chunk during retain. */ retainChunkSize?: number; + /** Maximum characters for a single JSONL line or conversation turn to keep whole during retain. */ + retainStructuredChunkSize?: number; /** Toggle automatic observation consolidation after retain(). */ enableObservations?: boolean; /** Controls what gets synthesised into observations. Replaces built-in rules. */ @@ -509,6 +511,7 @@ export class HindsightClient { retain_extraction_mode: options.retainExtractionMode, retain_custom_instructions: options.retainCustomInstructions, retain_chunk_size: options.retainChunkSize, + retain_structured_chunk_size: options.retainStructuredChunkSize, enable_observations: options.enableObservations, observations_mission: options.observationsMission, }, @@ -580,6 +583,7 @@ export class HindsightClient { retainExtractionMode?: string; retainCustomInstructions?: string; retainChunkSize?: number; + retainStructuredChunkSize?: number; enableObservations?: boolean; observationsMission?: string; /** How skeptical vs trusting (1=trusting, 5=skeptical). */ @@ -599,6 +603,8 @@ export class HindsightClient { if (options.retainCustomInstructions !== undefined) updates.retain_custom_instructions = options.retainCustomInstructions; if (options.retainChunkSize !== undefined) updates.retain_chunk_size = options.retainChunkSize; + if (options.retainStructuredChunkSize !== undefined) + updates.retain_structured_chunk_size = options.retainStructuredChunkSize; if (options.enableObservations !== undefined) updates.enable_observations = options.enableObservations; if (options.observationsMission !== undefined) diff --git a/hindsight-control-plane/src/components/bank-config-view.tsx b/hindsight-control-plane/src/components/bank-config-view.tsx index 096d5eb8a..178d09864 100644 --- a/hindsight-control-plane/src/components/bank-config-view.tsx +++ b/hindsight-control-plane/src/components/bank-config-view.tsx @@ -5,6 +5,12 @@ import { useTranslations } from "next-intl"; import { useBank } from "@/lib/bank-context"; import { useFeatures } from "@/lib/features-context"; import { client } from "@/lib/api"; +import { + deserializeRetainStrategies, + serializeRetainStrategies, + type RetainStrategy, + type RetainStrategyValues, +} from "@/lib/retain-strategy-config"; import { Button } from "@/components/ui/button"; import { Input } from "@/components/ui/input"; import { Textarea } from "@/components/ui/textarea"; @@ -43,6 +49,7 @@ interface ProfileData { type RetainEdits = { retain_chunk_size: number | null; + retain_structured_chunk_size: number | null; retain_extraction_mode: string | null; retain_mission: string | null; retain_custom_instructions: string | null; @@ -239,6 +246,7 @@ function parseEntityLabels(raw: unknown): LabelGroup[] | null { function retainSlice(config: Record): RetainEdits { return { retain_chunk_size: config.retain_chunk_size ?? null, + retain_structured_chunk_size: config.retain_structured_chunk_size ?? null, retain_extraction_mode: config.retain_extraction_mode ?? null, retain_mission: config.retain_mission ?? null, retain_custom_instructions: config.retain_custom_instructions ?? null, @@ -343,7 +351,6 @@ export function BankConfigView() { () => JSON.stringify(geminiEdits) !== JSON.stringify(geminiSlice(baseConfig)), [geminiEdits, baseConfig] ); - useEffect(() => { if (bankId) loadAll(); }, [bankId]); @@ -776,14 +783,7 @@ export function BankConfigView() { // ─── Retain strategies panel ────────────────────────────────────────────────── -type RetainFormValues = { - retain_extraction_mode: string | null; - retain_chunk_size: number | null; - retain_mission: string | null; - retain_custom_instructions: string | null; - entities_allow_free_form: boolean | null; - entity_labels: LabelGroup[] | null; -}; +type RetainFormValues = RetainStrategyValues; const EXTRACTION_MODES = ["concise", "verbose", "verbatim", "chunks", "custom"]; const INHERIT_SENTINEL = "__inherit__"; @@ -839,6 +839,24 @@ function RetainStrategyForm({ placeholder={isOverride ? t("inherited") : undefined} /> + +
+ { + onChange({ + retain_structured_chunk_size: e.target.value ? parseFloat(e.target.value) : null, + }); + }} + placeholder={isOverride ? t("inherited") : undefined} + /> +
+
; function fromStrategiesDict(dict: Record> | null): LocalStrategy[] { - if (!dict) return []; - return Object.entries(dict).map(([name, overrides], i) => ({ - id: i, - name, - values: { - retain_extraction_mode: overrides.retain_extraction_mode ?? null, - retain_chunk_size: overrides.retain_chunk_size ?? null, - retain_mission: overrides.retain_mission ?? null, - retain_custom_instructions: overrides.retain_custom_instructions ?? null, - entities_allow_free_form: overrides.entities_allow_free_form ?? null, - entity_labels: parseEntityLabels(overrides.entity_labels), - }, - })); + return deserializeRetainStrategies(dict, parseEntityLabels); } function toStrategiesDict(local: LocalStrategy[]): Record> | null { - const dict: Record> = {}; - for (const s of local) { - if (!s.name.trim()) continue; - const overrides: Record = {}; - if (s.values.retain_extraction_mode !== null) - overrides.retain_extraction_mode = s.values.retain_extraction_mode; - if (s.values.retain_chunk_size !== null) - overrides.retain_chunk_size = s.values.retain_chunk_size; - if (s.values.retain_mission) overrides.retain_mission = s.values.retain_mission; - if (s.values.retain_custom_instructions) - overrides.retain_custom_instructions = s.values.retain_custom_instructions; - if (s.values.entities_allow_free_form !== null) - overrides.entities_allow_free_form = s.values.entities_allow_free_form; - if (s.values.entity_labels !== null) overrides.entity_labels = s.values.entity_labels; - dict[s.name.trim()] = overrides; - } - return Object.keys(dict).length > 0 ? dict : null; + return serializeRetainStrategies(local); } function RetainStrategiesPanel({ @@ -956,6 +946,7 @@ function RetainStrategiesPanel({ values: { retain_extraction_mode: null, retain_chunk_size: null, + retain_structured_chunk_size: null, retain_mission: null, retain_custom_instructions: null, entities_allow_free_form: null, diff --git a/hindsight-control-plane/src/lib/api.ts b/hindsight-control-plane/src/lib/api.ts index 47dae7722..5ec05de35 100644 --- a/hindsight-control-plane/src/lib/api.ts +++ b/hindsight-control-plane/src/lib/api.ts @@ -17,6 +17,10 @@ function describeErrorDetails(details: unknown): string | undefined { if (details == null) return undefined; if (typeof details === "string") return details; if (typeof details === "object") { + const detail = (details as { detail?: unknown }).detail; + if (typeof detail === "string") return detail; + const nestedDetails = (details as { details?: unknown }).details; + if (typeof nestedDetails === "string") return nestedDetails; const violations = (details as { violations?: Array<{ message?: string }> }).violations; if (Array.isArray(violations)) { const messages = violations.map((v) => v?.message).filter(Boolean); @@ -219,6 +223,7 @@ export class ControlPlaneClient { }); if (!response.ok) { + const isClientError = response.status >= 400 && response.status < 500; // Redirect to login on 401 (session expired or not authenticated) const currentPath = stripBasePath(`${window.location.pathname}${window.location.search}`); if (response.status === 401 && !currentPath.startsWith("/login")) { @@ -232,17 +237,21 @@ export class ControlPlaneClient { try { const errorData = await response.json(); - errorMessage = errorData.error || errorMessage; - errorDetails = errorData.details; + if (isClientError) { + errorMessage = errorData.error || errorMessage; + errorDetails = errorData.details ?? errorData.detail ?? errorData.upstream?.detail; + } } catch { - // If JSON parse fails, try to get text - try { - const errorText = await response.text(); - if (errorText) { - errorDetails = errorText; + if (isClientError) { + // If JSON parse fails, try to get text + try { + const errorText = await response.text(); + if (errorText) { + errorDetails = errorText; + } + } catch { + // Ignore text parse errors } - } catch { - // Ignore text parse errors } } @@ -253,7 +262,7 @@ export class ControlPlaneClient { const description = describeErrorDetails(errorDetails) || errorMessage; const status = response.status; - if (status >= 400 && status < 500) { + if (isClientError) { // Client errors (4xx) - validation, bad request, etc. - show as warning toast.warning("Client Error", { description, @@ -274,7 +283,7 @@ export class ControlPlaneClient { } // Still throw error for callers that want to handle it - const error = new Error(errorMessage); + const error = new Error(description || errorMessage); (error as any).status = response.status; (error as any).details = errorDetails; throw error; diff --git a/hindsight-control-plane/src/lib/retain-strategy-config.ts b/hindsight-control-plane/src/lib/retain-strategy-config.ts new file mode 100644 index 000000000..afef6f371 --- /dev/null +++ b/hindsight-control-plane/src/lib/retain-strategy-config.ts @@ -0,0 +1,62 @@ +export type RetainStrategyValues = { + retain_extraction_mode: string | null; + retain_chunk_size: number | null; + retain_structured_chunk_size: number | null; + retain_mission: string | null; + retain_custom_instructions: string | null; + entities_allow_free_form: boolean | null; + entity_labels: TLabels | null; +}; + +export type RetainStrategy = { + id: number; + name: string; + values: RetainStrategyValues; +}; + +export function deserializeRetainStrategies( + dict: Record> | null, + parseEntityLabels: (raw: unknown) => TLabels | null +): RetainStrategy[] { + if (!dict) return []; + return Object.entries(dict).map(([name, overrides], i) => { + return { + id: i, + name, + values: { + retain_extraction_mode: overrides.retain_extraction_mode ?? null, + retain_chunk_size: overrides.retain_chunk_size ?? null, + retain_structured_chunk_size: overrides.retain_structured_chunk_size ?? null, + retain_mission: overrides.retain_mission ?? null, + retain_custom_instructions: overrides.retain_custom_instructions ?? null, + entities_allow_free_form: overrides.entities_allow_free_form ?? null, + entity_labels: parseEntityLabels(overrides.entity_labels), + }, + }; + }); +} + +export function serializeRetainStrategies( + local: RetainStrategy[] +): Record> | null { + const dict: Record> = {}; + for (const s of local) { + if (!s.name.trim()) continue; + const overrides: Record = {}; + if (s.values.retain_extraction_mode !== null) + overrides.retain_extraction_mode = s.values.retain_extraction_mode; + if (s.values.retain_chunk_size !== null) + overrides.retain_chunk_size = s.values.retain_chunk_size; + if (s.values.retain_structured_chunk_size !== null) { + overrides.retain_structured_chunk_size = s.values.retain_structured_chunk_size; + } + if (s.values.retain_mission) overrides.retain_mission = s.values.retain_mission; + if (s.values.retain_custom_instructions) + overrides.retain_custom_instructions = s.values.retain_custom_instructions; + if (s.values.entities_allow_free_form !== null) + overrides.entities_allow_free_form = s.values.entities_allow_free_form; + if (s.values.entity_labels !== null) overrides.entity_labels = s.values.entity_labels; + dict[s.name.trim()] = overrides; + } + return Object.keys(dict).length > 0 ? dict : null; +} diff --git a/hindsight-control-plane/src/lib/sdk-response.ts b/hindsight-control-plane/src/lib/sdk-response.ts index fe003838e..453c6cb6a 100644 --- a/hindsight-control-plane/src/lib/sdk-response.ts +++ b/hindsight-control-plane/src/lib/sdk-response.ts @@ -50,6 +50,16 @@ export type SdkResult = { response?: Response; }; +function extractErrorDetail(error: unknown): unknown { + if (error instanceof Error) return error.message; + if (error && typeof error === "object") { + const record = error as Record; + if (record.detail !== undefined) return record.detail; + if (record.details !== undefined) return record.details; + } + return error; +} + /** * Serialize the result of an SDK call into a NextResponse. * @@ -84,6 +94,8 @@ export function respondWithSdk( if (result.error !== undefined || result.data === undefined) { const upstreamStatus = result.response?.status ?? DEFAULT_UPSTREAM_STATUS; + const exposeDetails = upstreamStatus >= 400 && upstreamStatus < 500; + const details = exposeDetails ? extractErrorDetail(result.error ?? null) : null; const errorKey = errorOptions?.errorKey ?? failureErrorKeys[failureLabel]; console.error(`${failureLabel}:`, { upstreamStatus, @@ -92,16 +104,19 @@ export function respondWithSdk( const payload = { error: failureLabel, ...(errorKey ? { errorKey } : {}), + details, upstream: { status: upstreamStatus, - detail: result.error ?? null, + detail: exposeDetails ? (result.error ?? null) : null, }, }; - return NextResponse.json( - errorKey ? localizeApiErrorPayload(errorOptions?.request, { ...payload, errorKey }) : payload, - { status: upstreamStatus } - ); + const localizedPayload = errorKey + ? localizeApiErrorPayload(errorOptions?.request, { ...payload, errorKey }) + : payload; + localizedPayload.details = details; + + return NextResponse.json(localizedPayload, { status: upstreamStatus }); } return NextResponse.json(result.data, { status: successStatus }); diff --git a/hindsight-control-plane/src/messages/de.json b/hindsight-control-plane/src/messages/de.json index 594c4da4d..e5ddc5bcf 100644 --- a/hindsight-control-plane/src/messages/de.json +++ b/hindsight-control-plane/src/messages/de.json @@ -315,6 +315,8 @@ "retainFailedToSave": "Retain-Einstellungen konnten nicht gespeichert werden", "chunkSizeLabel": "Chunk-Größe", "chunkSizeDescription": "Größe der Textblöcke für die Verarbeitung (Zeichen)", + "structuredChunkSizeLabel": "Strukturierte Chunk-Größe", + "structuredChunkSizeDescription": "Maximale Zeichenanzahl für eine JSONL-Zeile oder Konversationsrunde, die vollständig beibehalten wird. Leer lassen, um die Chunk-Größe zu verwenden.", "missionLabel": "Mission", "retainMissionDescription": "Worauf diese Bank während der Extraktion achten soll. Steuert das LLM, ohne die Extraktionsregeln zu ersetzen — funktioniert zusammen mit jedem Extraktionsmodus.", "retainMissionPlaceholder": "z. B. Technische Entscheidungen, API-Designentscheidungen und architektonische Kompromisse immer einbeziehen. Meeting-Logistik, Begrüßungen und soziale Austausche ignorieren.", diff --git a/hindsight-control-plane/src/messages/en.json b/hindsight-control-plane/src/messages/en.json index 1384c85f0..095f6af4d 100644 --- a/hindsight-control-plane/src/messages/en.json +++ b/hindsight-control-plane/src/messages/en.json @@ -315,6 +315,8 @@ "retainFailedToSave": "Failed to save retain settings", "chunkSizeLabel": "Chunk Size", "chunkSizeDescription": "Size of text chunks for processing (characters)", + "structuredChunkSizeLabel": "Structured Chunk Size", + "structuredChunkSizeDescription": "Maximum characters for one JSONL line or conversation turn to keep whole. Leave blank to use the chunk size.", "missionLabel": "Mission", "retainMissionDescription": "What this bank should pay attention to during extraction. Steers the LLM without replacing the extraction rules — works alongside any extraction mode.", "retainMissionPlaceholder": "e.g. Always include technical decisions, API design choices, and architectural trade-offs. Ignore meeting logistics, greetings, and social exchanges.", diff --git a/hindsight-control-plane/src/messages/es.json b/hindsight-control-plane/src/messages/es.json index 9fa924b99..62bcba143 100644 --- a/hindsight-control-plane/src/messages/es.json +++ b/hindsight-control-plane/src/messages/es.json @@ -315,6 +315,8 @@ "retainFailedToSave": "Error al guardar la configuración de Retain", "chunkSizeLabel": "Tamaño de fragmento", "chunkSizeDescription": "Tamaño de los fragmentos de texto para el procesamiento (caracteres)", + "structuredChunkSizeLabel": "Tamaño de chunk estructurado", + "structuredChunkSizeDescription": "Máximo de caracteres de una línea JSONL o turno de conversación que se conserva completo. Déjalo en blanco para usar el tamaño de fragmento.", "missionLabel": "Misión", "retainMissionDescription": "En qué debe prestar atención este banco durante la extracción. Orienta el LLM sin reemplazar las reglas de extracción — funciona junto con cualquier modo de extracción.", "retainMissionPlaceholder": "Ej.: Incluir siempre decisiones técnicas, opciones de diseño de API y compromisos arquitectónicos. Ignorar la logística de reuniones, saludos e intercambios sociales.", diff --git a/hindsight-control-plane/src/messages/fr.json b/hindsight-control-plane/src/messages/fr.json index c5d31dbc3..38544416a 100644 --- a/hindsight-control-plane/src/messages/fr.json +++ b/hindsight-control-plane/src/messages/fr.json @@ -315,6 +315,8 @@ "retainFailedToSave": "Échec de l'enregistrement des paramètres Retain", "chunkSizeLabel": "Taille des segments", "chunkSizeDescription": "Taille des segments de texte pour le traitement (caractères)", + "structuredChunkSizeLabel": "Taille de segment structuré", + "structuredChunkSizeDescription": "Nombre maximal de caractères pour une ligne JSONL ou un tour de conversation conservé entier. Laissez vide pour utiliser la taille de segment.", "missionLabel": "Mission", "retainMissionDescription": "Ce à quoi cette banque doit prêter attention lors de l'extraction. Oriente le LLM sans remplacer les règles d'extraction — fonctionne en complément du mode d'extraction.", "retainMissionPlaceholder": "ex. : Toujours inclure les décisions techniques, les choix de conception API et les compromis architecturaux. Ignorer la logistique des réunions, les salutations et les échanges sociaux.", diff --git a/hindsight-control-plane/src/messages/ja.json b/hindsight-control-plane/src/messages/ja.json index 1ec463f1b..9c7b918d9 100644 --- a/hindsight-control-plane/src/messages/ja.json +++ b/hindsight-control-plane/src/messages/ja.json @@ -315,6 +315,8 @@ "retainFailedToSave": "Retain設定の保存に失敗しました", "chunkSizeLabel": "チャンクサイズ", "chunkSizeDescription": "処理用テキストチャンクのサイズ(文字数)", + "structuredChunkSizeLabel": "構造化チャンクサイズ", + "structuredChunkSizeDescription": "1 つの JSONL 行または会話ターンを分割せずに保持する最大文字数。空欄の場合はチャンクサイズを使用します。", "missionLabel": "ミッション", "retainMissionDescription": "抽出中にこのバンクが注目すべき内容。抽出ルールを置き換えることなくLLMを誘導します — どの抽出モードとも並行して機能します。", "retainMissionPlaceholder": "例:技術的な決定、API設計の選択、アーキテクチャのトレードオフを常に含める。会議の段取り、挨拶、社交的なやり取りは無視する。", diff --git a/hindsight-control-plane/src/messages/ko.json b/hindsight-control-plane/src/messages/ko.json index 6bc13cdc0..1487f0e54 100644 --- a/hindsight-control-plane/src/messages/ko.json +++ b/hindsight-control-plane/src/messages/ko.json @@ -315,6 +315,8 @@ "retainFailedToSave": "Retain 설정 저장에 실패했습니다", "chunkSizeLabel": "청크 크기", "chunkSizeDescription": "처리를 위한 텍스트 청크 크기 (문자 수)", + "structuredChunkSizeLabel": "구조화 청크 크기", + "structuredChunkSizeDescription": "하나의 JSONL 줄 또는 대화 턴을 통째로 유지할 최대 문자 수입니다. 비워 두면 청크 크기를 사용합니다.", "missionLabel": "미션", "retainMissionDescription": "추출 중에 이 뱅크가 주목해야 할 내용. 추출 규칙을 대체하지 않고 LLM을 유도합니다 — 모든 추출 모드와 함께 작동합니다.", "retainMissionPlaceholder": "예: 기술적 결정, API 설계 선택 및 아키텍처 트레이드오프를 항상 포함하세요. 회의 물류, 인사말 및 사교적 교환은 무시하세요.", diff --git a/hindsight-control-plane/src/messages/pt.json b/hindsight-control-plane/src/messages/pt.json index f5a92d58a..30f838d72 100644 --- a/hindsight-control-plane/src/messages/pt.json +++ b/hindsight-control-plane/src/messages/pt.json @@ -315,6 +315,8 @@ "retainFailedToSave": "Falha ao salvar configurações de Retain", "chunkSizeLabel": "Tamanho do Chunk", "chunkSizeDescription": "Tamanho dos chunks de texto para processamento (caracteres)", + "structuredChunkSizeLabel": "Tamanho de chunk estruturado", + "structuredChunkSizeDescription": "Máximo de caracteres para manter inteira uma linha JSONL ou turno de conversa. Deixe em branco para usar o tamanho do chunk.", "missionLabel": "Missão", "retainMissionDescription": "O que este banco deve observar durante a extração. Direciona o LLM sem substituir as regras de extração — funciona junto a qualquer modo de extração.", "retainMissionPlaceholder": "ex.: Sempre incluir decisões técnicas, escolhas de design de API e trade-offs arquiteturais. Ignorar logística de reuniões, cumprimentos e trocas sociais.", diff --git a/hindsight-control-plane/src/messages/yue-Hant.json b/hindsight-control-plane/src/messages/yue-Hant.json index 077fd1c00..cff6e3dde 100644 --- a/hindsight-control-plane/src/messages/yue-Hant.json +++ b/hindsight-control-plane/src/messages/yue-Hant.json @@ -315,6 +315,8 @@ "retainFailedToSave": "未能儲存保留設定", "chunkSizeLabel": "片段大小", "chunkSizeDescription": "處理的文字片段大小(字元數)", + "structuredChunkSizeLabel": "結構化片段大小", + "structuredChunkSizeDescription": "單條 JSONL 行或對話輪次保持完整嘅最大字元數。留空就使用片段大小。", "missionLabel": "使命", "retainMissionDescription": "此記憶庫在擷取期間應關注的內容。在不取代擷取規則的情況下引導 LLM — 可與任何擷取模式搭配使用。", "retainMissionPlaceholder": "例如:始終包含技術決策、API 設計選擇和架構權衡。忽略會議事務、問候語和社交交流。", diff --git a/hindsight-control-plane/src/messages/zh-CN.json b/hindsight-control-plane/src/messages/zh-CN.json index 70c1a2f89..5676451a3 100644 --- a/hindsight-control-plane/src/messages/zh-CN.json +++ b/hindsight-control-plane/src/messages/zh-CN.json @@ -315,6 +315,8 @@ "retainFailedToSave": "无法保存保留设置", "chunkSizeLabel": "分块大小", "chunkSizeDescription": "处理的文本分块大小(字符数)", + "structuredChunkSizeLabel": "结构化分块大小", + "structuredChunkSizeDescription": "单条 JSONL 行或对话轮次保持完整的最大字符数。留空则使用分块大小。", "missionLabel": "使命", "retainMissionDescription": "此库在提取期间应关注的内容。在不替换提取规则的情况下引导 LLM — 与任何提取模式配合使用。", "retainMissionPlaceholder": "例如:始终包含技术决策、API 设计选择和架构权衡。忽略会议事务、问候语和社交交流。", diff --git a/hindsight-control-plane/src/messages/zh-TW.json b/hindsight-control-plane/src/messages/zh-TW.json index 80c07f00a..3ade673ac 100644 --- a/hindsight-control-plane/src/messages/zh-TW.json +++ b/hindsight-control-plane/src/messages/zh-TW.json @@ -315,6 +315,8 @@ "retainFailedToSave": "無法儲存保留設定", "chunkSizeLabel": "片段大小", "chunkSizeDescription": "處理的文字片段大小(字元數)", + "structuredChunkSizeLabel": "結構化片段大小", + "structuredChunkSizeDescription": "單條 JSONL 行或對話輪次保持完整的最大字元數。留空則使用片段大小。", "missionLabel": "使命", "retainMissionDescription": "此記憶庫在擷取期間應關注的內容。在不取代擷取規則的情況下引導 LLM — 可與任何擷取模式搭配使用。", "retainMissionPlaceholder": "例如:始終包含技術決策、API 設計選擇和架構權衡。忽略會議事務、問候語和社交交流。", diff --git a/hindsight-control-plane/tests/lib/api.test.ts b/hindsight-control-plane/tests/lib/api.test.ts new file mode 100644 index 000000000..b35d5d53a --- /dev/null +++ b/hindsight-control-plane/tests/lib/api.test.ts @@ -0,0 +1,86 @@ +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; +import { toast } from "sonner"; +import { ControlPlaneClient } from "@/lib/api"; + +vi.mock("sonner", () => ({ + toast: { + error: vi.fn(), + warning: vi.fn(), + }, +})); + +describe("ControlPlaneClient error handling", () => { + let fetchSpy: ReturnType; + let client: ControlPlaneClient; + + beforeEach(() => { + client = new ControlPlaneClient(); + fetchSpy = vi.spyOn(globalThis, "fetch"); + Object.defineProperty(globalThis, "window", { + configurable: true, + value: { + location: { + href: "", + pathname: "/en/dashboard", + search: "", + }, + }, + }); + }); + + afterEach(() => { + fetchSpy.mockRestore(); + vi.mocked(toast.error).mockReset(); + vi.mocked(toast.warning).mockReset(); + delete (globalThis as { window?: unknown }).window; + }); + + it("shows client-error details for 4xx validation failures", async () => { + fetchSpy.mockResolvedValueOnce( + new Response( + JSON.stringify({ + error: "Failed to update bank config", + details: "retain_structured_chunk_size must be a positive integer", + }), + { status: 400 } + ) + ); + + await expect(client.getBankConfig("bank-a")).rejects.toMatchObject({ + message: "retain_structured_chunk_size must be a positive integer", + status: 400, + details: "retain_structured_chunk_size must be a positive integer", + }); + + expect(toast.warning).toHaveBeenCalledWith( + "Client Error", + expect.objectContaining({ + description: "retain_structured_chunk_size must be a positive integer", + }) + ); + }); + + it("does not show upstream response details for 5xx failures", async () => { + fetchSpy.mockResolvedValueOnce( + new Response( + JSON.stringify({ + error: "DiskFullError on shared memory", + details: "internal stack trace", + }), + { status: 500 } + ) + ); + + await expect(client.getBankConfig("bank-a")).rejects.toMatchObject({ + message: "HTTP 500", + status: 500, + }); + + expect(toast.error).toHaveBeenCalledWith( + "Server Error", + expect.objectContaining({ + description: "HTTP 500", + }) + ); + }); +}); diff --git a/hindsight-control-plane/tests/lib/retain-strategy-config.test.ts b/hindsight-control-plane/tests/lib/retain-strategy-config.test.ts new file mode 100644 index 000000000..0e1f62ab6 --- /dev/null +++ b/hindsight-control-plane/tests/lib/retain-strategy-config.test.ts @@ -0,0 +1,83 @@ +import { describe, expect, it } from "vitest"; +import { + deserializeRetainStrategies, + serializeRetainStrategies, + type RetainStrategy, + type RetainStrategyValues, +} from "../../src/lib/retain-strategy-config"; + +const parseEntityLabels = (raw: unknown) => (Array.isArray(raw) ? raw : null); +const baseValues = (overrides: Partial = {}): RetainStrategyValues => ({ + retain_extraction_mode: null, + retain_chunk_size: 3000, + retain_structured_chunk_size: null, + retain_mission: null, + retain_custom_instructions: null, + entities_allow_free_form: null, + entity_labels: null, + ...overrides, +}); + +describe("retain strategy config serialization", () => { + it("omits null structured-chunk cap overrides", () => { + const strategies = deserializeRetainStrategies( + { + jsonl: { + retain_chunk_size: 8000, + retain_structured_chunk_size: null, + }, + }, + parseEntityLabels + ); + + expect(strategies[0]?.values.retain_structured_chunk_size).toBeNull(); + expect(serializeRetainStrategies(strategies)).toEqual({ + jsonl: { + retain_chunk_size: 8000, + }, + }); + }); + + it("omits structured-chunk cap when the strategy inherits it", () => { + const strategies = deserializeRetainStrategies( + { + inherited: { + retain_chunk_size: 4000, + }, + }, + parseEntityLabels + ); + + expect(strategies[0]?.values.retain_structured_chunk_size).toBeNull(); + expect(serializeRetainStrategies(strategies)).toEqual({ + inherited: { + retain_chunk_size: 4000, + }, + }); + }); + + it("emits numeric structured-chunk cap overrides independently of chunk size", () => { + const strategies: RetainStrategy[] = [ + { + id: 1, + name: "wide-jsonl", + values: { + retain_extraction_mode: null, + retain_chunk_size: 4000, + retain_structured_chunk_size: 2000, + retain_mission: null, + retain_custom_instructions: null, + entities_allow_free_form: null, + entity_labels: null, + }, + }, + ]; + + expect(serializeRetainStrategies(strategies)).toEqual({ + "wide-jsonl": { + retain_chunk_size: 4000, + retain_structured_chunk_size: 2000, + }, + }); + }); +}); diff --git a/hindsight-control-plane/tests/lib/sdk-response.test.ts b/hindsight-control-plane/tests/lib/sdk-response.test.ts index 316b36a9c..24546a66d 100644 --- a/hindsight-control-plane/tests/lib/sdk-response.test.ts +++ b/hindsight-control-plane/tests/lib/sdk-response.test.ts @@ -96,22 +96,33 @@ describe("respondWithSdk", () => { expect(response.status).toBe(HTTP_UPSTREAM_429); }); - it("includes the upstream error detail in the response body", async () => { - const upstreamError = { detail: "DiskFullError on shared memory" }; + it("includes upstream error detail in the response body for 4xx responses", async () => { + const upstreamError = { detail: "Invalid retain config" }; const response = respondWithSdk( - fail(upstreamError, HTTP_UPSTREAM_500), - "Failed to fetch stats" + fail(upstreamError, HTTP_UPSTREAM_429), + "Failed to update bank config" ); const body = await response.json(); expect(body).toEqual({ - error: "Failed to fetch stats", + error: "Failed to update bank config", + details: "Invalid retain config", upstream: { - status: HTTP_UPSTREAM_500, + status: HTTP_UPSTREAM_429, detail: upstreamError, }, }); }); + it("does not expose upstream error detail in the response body for 5xx responses", async () => { + const response = respondWithSdk( + fail(new Error("ECONNREFUSED"), HTTP_UPSTREAM_503), + "Failed to fetch stats" + ); + const body = await response.json(); + expect(body.details).toBeNull(); + expect(body.upstream.detail).toBeNull(); + }); + it("logs the upstream status and error to console.error", () => { const upstreamError = { detail: "boom" }; respondWithSdk(fail(upstreamError, HTTP_UPSTREAM_500), "Failed to fetch stats"); @@ -148,6 +159,7 @@ describe("respondWithSdk", () => { expect(response.status).toBe(HTTP_UPSTREAM_503); const body = await response.json(); expect(body.error).toBe("Failed to fetch"); + expect(body.details).toBeNull(); expect(body.upstream.detail).toBeNull(); }); }); diff --git a/hindsight-docs/docs/developer/api/bank-templates.mdx b/hindsight-docs/docs/developer/api/bank-templates.mdx index 1eef82763..9d461803f 100644 --- a/hindsight-docs/docs/developer/api/bank-templates.mdx +++ b/hindsight-docs/docs/developer/api/bank-templates.mdx @@ -39,6 +39,7 @@ Browse the [Bank Templates Hub](/templates) for ready-to-use templates. "retain_extraction_mode": "concise | verbose | custom | chunks", "retain_custom_instructions": "...", "retain_chunk_size": 2048, + "retain_structured_chunk_size": 8192, "disposition_skepticism": 3, "disposition_literalism": 3, "disposition_empathy": 3, @@ -95,7 +96,8 @@ All fields in `bank` are optional. Only the fields you include will be set as pe | `retain_mission` | string | Steers what gets extracted during retain | | `retain_extraction_mode` | string | `concise`, `verbose`, `custom`, or `chunks` | | `retain_custom_instructions` | string | Custom extraction prompt (requires `mode=custom`) | -| `retain_chunk_size` | integer | Max token size per content chunk | +| `retain_chunk_size` | integer | Target max characters per content chunk | +| `retain_structured_chunk_size` | integer | Max characters for a single JSONL line or conversation turn to keep whole; defaults to `retain_chunk_size` when unset | | `disposition_skepticism` | integer (1-5) | How skeptical the disposition is | | `disposition_literalism` | integer (1-5) | How literal the disposition is | | `disposition_empathy` | integer (1-5) | How empathetic the disposition is | diff --git a/hindsight-docs/docs/developer/api/memory-banks.mdx b/hindsight-docs/docs/developer/api/memory-banks.mdx index f980ed3d8..065d137b1 100644 --- a/hindsight-docs/docs/developer/api/memory-banks.mdx +++ b/hindsight-docs/docs/developer/api/memory-banks.mdx @@ -90,6 +90,12 @@ Maximum number of characters per chunk when splitting content for fact extractio Default: `3000` +### retain_structured_chunk_size + +Maximum number of characters for a single JSONL line or conversation turn to keep whole when it exceeds `retain_chunk_size`. When unset, the limit is exactly `retain_chunk_size`; set a larger value for structured logs or chat transcripts where splitting a single record would lose useful context. + +Default: unset, which uses `retain_chunk_size` + See [Retain configuration](/developer/configuration#retain) for environment variable names and defaults. ### entity_labels {#entity-labels} diff --git a/hindsight-docs/docs/developer/configuration.md b/hindsight-docs/docs/developer/configuration.md index 79323dff6..68b620fd3 100644 --- a/hindsight-docs/docs/developer/configuration.md +++ b/hindsight-docs/docs/developer/configuration.md @@ -1017,6 +1017,7 @@ Controls the retain (memory ingestion) pipeline. |----------|-------------|---------| | `HINDSIGHT_API_RETAIN_MAX_COMPLETION_TOKENS` | Max completion tokens for fact extraction LLM calls | `64000` | | `HINDSIGHT_API_RETAIN_CHUNK_SIZE` | Max characters per chunk for fact extraction. Larger chunks extract fewer LLM calls but may lose context. | `3000` | +| `HINDSIGHT_API_RETAIN_STRUCTURED_CHUNK_SIZE` | Max characters for a single JSONL line or conversation turn to keep whole. Unset uses `HINDSIGHT_API_RETAIN_CHUNK_SIZE`. Must be a positive integer when set. | - | | `HINDSIGHT_API_RETAIN_EXTRACTION_MODE` | Fact extraction mode: `concise`, `verbose`, `verbatim`, `chunks`, or `custom` | `concise` | | `HINDSIGHT_API_RETAIN_MISSION` | What this bank should pay attention to during extraction. Steers the LLM without replacing the extraction rules — works alongside any extraction mode. | - | | `HINDSIGHT_API_RETAIN_CUSTOM_INSTRUCTIONS` | Full prompt override for fact extraction (only used when mode is `custom`). Replaces built-in extraction rules entirely. | - | @@ -1117,7 +1118,7 @@ export HINDSIGHT_API_RETAIN_EXTRACTION_MODE=verbatim Named strategies let you ingest different content types into the same bank using different extraction settings. A strategy is a set of hierarchical field overrides applied on top of the resolved bank config. -Any field in the hierarchical config can be overridden per strategy, including `retain_extraction_mode`, `retain_chunk_size`, `entity_labels`, `entities_allow_free_form`, `retain_mission`, etc. +Any field in the hierarchical config can be overridden per strategy, including `retain_extraction_mode`, `retain_chunk_size`, `retain_structured_chunk_size`, `entity_labels`, `entities_allow_free_form`, `retain_mission`, etc. Configure strategies via the bank config API: @@ -1127,7 +1128,8 @@ Configure strategies via the bank config API: "retain_strategies": { "conversations": { "retain_extraction_mode": "concise", - "retain_chunk_size": 3000 + "retain_chunk_size": 3000, + "retain_structured_chunk_size": 12000 }, "documents": { "retain_extraction_mode": "chunks", @@ -1777,7 +1779,7 @@ This design prevents bugs where global defaults are used instead of bank overrid Configuration fields are categorized for security: 1. **Configurable Fields** - Safe behavioral settings that can be customized per-bank: - - Retention: `retain_chunk_size`, `retain_extraction_mode`, `retain_mission`, `retain_custom_instructions` + - Retention: `retain_chunk_size`, `retain_structured_chunk_size`, `retain_extraction_mode`, `retain_mission`, `retain_custom_instructions` - Observations: `enable_observations`, `enable_auto_consolidation`, `observations_mission`, `max_observations_per_scope` - MCP access control: `mcp_enabled_tools` diff --git a/hindsight-docs/docs/developer/mcp-server.md b/hindsight-docs/docs/developer/mcp-server.md index 3af50fd9f..eb1e2acfa 100644 --- a/hindsight-docs/docs/developer/mcp-server.md +++ b/hindsight-docs/docs/developer/mcp-server.md @@ -514,7 +514,8 @@ The `config_updates` object accepts any bank-configurable field by its Python fi - `retain_mission` — steers what gets extracted during `retain()` - `retain_extraction_mode` — `concise` (default), `verbose`, or `custom` - `retain_custom_instructions` — custom extraction prompt (active when mode is `custom`) -- `retain_chunk_size` — maximum token size for each content chunk +- `retain_chunk_size` — target maximum characters for each content chunk +- `retain_structured_chunk_size` — maximum characters for a single JSONL line or conversation turn to keep whole - `retain_chunk_batch_size` — number of chunks to process in parallel - `enable_observations` — toggle observation consolidation after `retain()` - `observations_mission` — controls observation synthesis rules diff --git a/hindsight-docs/static/bank-template-schema.json b/hindsight-docs/static/bank-template-schema.json index 5c385396f..0d00cb0f1 100644 --- a/hindsight-docs/static/bank-template-schema.json +++ b/hindsight-docs/static/bank-template-schema.json @@ -65,9 +65,22 @@ } ], "default": null, - "description": "Max token size for each content chunk", + "description": "Target max characters for each content chunk", "title": "Retain Chunk Size" }, + "retain_structured_chunk_size": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Max characters for a single JSONL line or conversation turn to keep whole; defaults to retain_chunk_size when unset", + "title": "Retain Structured Chunk Size" + }, "enable_observations": { "anyOf": [ { diff --git a/hindsight-docs/static/openapi.json b/hindsight-docs/static/openapi.json index ed0cf4638..17a1f003e 100644 --- a/hindsight-docs/static/openapi.json +++ b/hindsight-docs/static/openapi.json @@ -6466,7 +6466,19 @@ } ], "title": "Retain Chunk Size", - "description": "Max token size for each content chunk" + "description": "Target max characters for each content chunk" + }, + "retain_structured_chunk_size": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Retain Structured Chunk Size", + "description": "Max characters for a single JSONL line or conversation turn to keep whole; defaults to retain_chunk_size when unset" }, "enable_observations": { "anyOf": [ @@ -7493,7 +7505,19 @@ } ], "title": "Retain Chunk Size", - "description": "Maximum token size for each content chunk during retain." + "description": "Target maximum characters for each content chunk during retain." + }, + "retain_structured_chunk_size": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Retain Structured Chunk Size", + "description": "Maximum characters for a single JSONL line or conversation turn to keep whole during retain. Defaults to retain_chunk_size when unset." }, "enable_observations": { "anyOf": [ diff --git a/hindsight-embed/hindsight_embed/env.example b/hindsight-embed/hindsight_embed/env.example index 27e208f9f..aabd372c3 100644 --- a/hindsight-embed/hindsight_embed/env.example +++ b/hindsight-embed/hindsight_embed/env.example @@ -47,6 +47,9 @@ HINDSIGHT_API_LLM_BASE_URL=https://api.openai.com/v1 HINDSIGHT_API_HOST=0.0.0.0 HINDSIGHT_API_PORT=8888 HINDSIGHT_API_LOG_LEVEL=info +# Optional retain chunking override for structured logs/transcripts. +# Unset uses HINDSIGHT_API_RETAIN_CHUNK_SIZE as the structured-chunk limit. +# HINDSIGHT_API_RETAIN_STRUCTURED_CHUNK_SIZE= # Base Path / Reverse Proxy Support (Optional) # Set these when deploying behind a reverse proxy with path-based routing diff --git a/skills/hindsight-docs/references/developer/api/bank-templates.md b/skills/hindsight-docs/references/developer/api/bank-templates.md index 4e85c0b21..9fa4d1afd 100644 --- a/skills/hindsight-docs/references/developer/api/bank-templates.md +++ b/skills/hindsight-docs/references/developer/api/bank-templates.md @@ -28,6 +28,7 @@ Browse the Bank Templates Hub for ready-to-use templates. "retain_extraction_mode": "concise | verbose | custom | chunks", "retain_custom_instructions": "...", "retain_chunk_size": 2048, + "retain_structured_chunk_size": 8192, "disposition_skepticism": 3, "disposition_literalism": 3, "disposition_empathy": 3, @@ -84,7 +85,8 @@ All fields in `bank` are optional. Only the fields you include will be set as pe | `retain_mission` | string | Steers what gets extracted during retain | | `retain_extraction_mode` | string | `concise`, `verbose`, `custom`, or `chunks` | | `retain_custom_instructions` | string | Custom extraction prompt (requires `mode=custom`) | -| `retain_chunk_size` | integer | Max token size per content chunk | +| `retain_chunk_size` | integer | Target max characters per content chunk | +| `retain_structured_chunk_size` | integer | Max characters for a single JSONL line or conversation turn to keep whole; defaults to `retain_chunk_size` when unset | | `disposition_skepticism` | integer (1-5) | How skeptical the disposition is | | `disposition_literalism` | integer (1-5) | How literal the disposition is | | `disposition_empathy` | integer (1-5) | How empathetic the disposition is | diff --git a/skills/hindsight-docs/references/developer/api/memory-banks.md b/skills/hindsight-docs/references/developer/api/memory-banks.md index 6c52f934e..b1d6b2977 100644 --- a/skills/hindsight-docs/references/developer/api/memory-banks.md +++ b/skills/hindsight-docs/references/developer/api/memory-banks.md @@ -83,6 +83,12 @@ Maximum number of characters per chunk when splitting content for fact extractio Default: `3000` +### retain_structured_chunk_size + +Maximum number of characters for a single JSONL line or conversation turn to keep whole when it exceeds `retain_chunk_size`. When unset, the limit is exactly `retain_chunk_size`; set a larger value for structured logs or chat transcripts where splitting a single record would lose useful context. + +Default: unset, which uses `retain_chunk_size` + See [Retain configuration](../configuration.md#retain) for environment variable names and defaults. ### entity_labels {#entity-labels} diff --git a/skills/hindsight-docs/references/developer/configuration.md b/skills/hindsight-docs/references/developer/configuration.md index 7e48ffb36..5deb5ce8b 100644 --- a/skills/hindsight-docs/references/developer/configuration.md +++ b/skills/hindsight-docs/references/developer/configuration.md @@ -1017,6 +1017,7 @@ Controls the retain (memory ingestion) pipeline. |----------|-------------|---------| | `HINDSIGHT_API_RETAIN_MAX_COMPLETION_TOKENS` | Max completion tokens for fact extraction LLM calls | `64000` | | `HINDSIGHT_API_RETAIN_CHUNK_SIZE` | Max characters per chunk for fact extraction. Larger chunks extract fewer LLM calls but may lose context. | `3000` | +| `HINDSIGHT_API_RETAIN_STRUCTURED_CHUNK_SIZE` | Max characters for a single JSONL line or conversation turn to keep whole. Unset uses `HINDSIGHT_API_RETAIN_CHUNK_SIZE`. Must be a positive integer when set. | - | | `HINDSIGHT_API_RETAIN_EXTRACTION_MODE` | Fact extraction mode: `concise`, `verbose`, `verbatim`, `chunks`, or `custom` | `concise` | | `HINDSIGHT_API_RETAIN_MISSION` | What this bank should pay attention to during extraction. Steers the LLM without replacing the extraction rules — works alongside any extraction mode. | - | | `HINDSIGHT_API_RETAIN_CUSTOM_INSTRUCTIONS` | Full prompt override for fact extraction (only used when mode is `custom`). Replaces built-in extraction rules entirely. | - | @@ -1117,7 +1118,7 @@ export HINDSIGHT_API_RETAIN_EXTRACTION_MODE=verbatim Named strategies let you ingest different content types into the same bank using different extraction settings. A strategy is a set of hierarchical field overrides applied on top of the resolved bank config. -Any field in the hierarchical config can be overridden per strategy, including `retain_extraction_mode`, `retain_chunk_size`, `entity_labels`, `entities_allow_free_form`, `retain_mission`, etc. +Any field in the hierarchical config can be overridden per strategy, including `retain_extraction_mode`, `retain_chunk_size`, `retain_structured_chunk_size`, `entity_labels`, `entities_allow_free_form`, `retain_mission`, etc. Configure strategies via the bank config API: @@ -1127,7 +1128,8 @@ Configure strategies via the bank config API: "retain_strategies": { "conversations": { "retain_extraction_mode": "concise", - "retain_chunk_size": 3000 + "retain_chunk_size": 3000, + "retain_structured_chunk_size": 12000 }, "documents": { "retain_extraction_mode": "chunks", @@ -1777,7 +1779,7 @@ This design prevents bugs where global defaults are used instead of bank overrid Configuration fields are categorized for security: 1. **Configurable Fields** - Safe behavioral settings that can be customized per-bank: - - Retention: `retain_chunk_size`, `retain_extraction_mode`, `retain_mission`, `retain_custom_instructions` + - Retention: `retain_chunk_size`, `retain_structured_chunk_size`, `retain_extraction_mode`, `retain_mission`, `retain_custom_instructions` - Observations: `enable_observations`, `enable_auto_consolidation`, `observations_mission`, `max_observations_per_scope` - MCP access control: `mcp_enabled_tools` diff --git a/skills/hindsight-docs/references/developer/mcp-server.md b/skills/hindsight-docs/references/developer/mcp-server.md index 3af50fd9f..eb1e2acfa 100644 --- a/skills/hindsight-docs/references/developer/mcp-server.md +++ b/skills/hindsight-docs/references/developer/mcp-server.md @@ -514,7 +514,8 @@ The `config_updates` object accepts any bank-configurable field by its Python fi - `retain_mission` — steers what gets extracted during `retain()` - `retain_extraction_mode` — `concise` (default), `verbose`, or `custom` - `retain_custom_instructions` — custom extraction prompt (active when mode is `custom`) -- `retain_chunk_size` — maximum token size for each content chunk +- `retain_chunk_size` — target maximum characters for each content chunk +- `retain_structured_chunk_size` — maximum characters for a single JSONL line or conversation turn to keep whole - `retain_chunk_batch_size` — number of chunks to process in parallel - `enable_observations` — toggle observation consolidation after `retain()` - `observations_mission` — controls observation synthesis rules diff --git a/skills/hindsight-docs/references/openapi.json b/skills/hindsight-docs/references/openapi.json index ed0cf4638..17a1f003e 100644 --- a/skills/hindsight-docs/references/openapi.json +++ b/skills/hindsight-docs/references/openapi.json @@ -6466,7 +6466,19 @@ } ], "title": "Retain Chunk Size", - "description": "Max token size for each content chunk" + "description": "Target max characters for each content chunk" + }, + "retain_structured_chunk_size": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Retain Structured Chunk Size", + "description": "Max characters for a single JSONL line or conversation turn to keep whole; defaults to retain_chunk_size when unset" }, "enable_observations": { "anyOf": [ @@ -7493,7 +7505,19 @@ } ], "title": "Retain Chunk Size", - "description": "Maximum token size for each content chunk during retain." + "description": "Target maximum characters for each content chunk during retain." + }, + "retain_structured_chunk_size": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Retain Structured Chunk Size", + "description": "Maximum characters for a single JSONL line or conversation turn to keep whole during retain. Defaults to retain_chunk_size when unset." }, "enable_observations": { "anyOf": [