diff --git a/inst/CompactLinearModel.m b/inst/CompactLinearModel.m
new file mode 100644
index 00000000..22ed45cb
--- /dev/null
+++ b/inst/CompactLinearModel.m
@@ -0,0 +1,786 @@
+## Copyright (C) 2026 Jayant Chauhan <0001jayant@gmail.com>
+##
+## This file is part of the statistics package for GNU Octave.
+##
+## This program is free software; you can redistribute it and/or modify it under
+## the terms of the GNU General Public License as published by the Free Software
+## Foundation; either version 3 of the License, or (at your option) any later
+## version.
+##
+## This program is distributed in the hope that it will be useful, but WITHOUT
+## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+## FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
+## details.
+##
+## You should have received a copy of the GNU General Public License along with
+## this program; if not, see <http://www.gnu.org/licenses/>.
+
+classdef CompactLinearModel < handle
+## -*- texinfo -*-
+## @deftp {statistics} CompactLinearModel
+##
+## Compact linear regression model.
+##
+## A @code{CompactLinearModel} object is a compact representation of a linear
+## regression model.  It contains the regression results (coefficient
+## estimates, goodness-of-fit statistics, and model information) but does not
+## contain the training data, residuals, or observation-level diagnostics.
+##
+## A @code{CompactLinearModel} object is created by calling @code{compact} on
+## a @code{LinearModel} object.  Users cannot construct a
+## @code{CompactLinearModel} directly.
+##
+## @strong{Properties}
+##
+## All properties are read-only.
+##
+## @multitable @columnfractions 0.30 0.70
+## @headitem Property @tab Description
+## @item @code{Coefficients} @tab Table of coefficient estimates, SE, tStat,
+## and pValue.
+## @item @code{CoefficientCovariance} @tab Variance-covariance matrix of the
+## coefficient estimates.
+## @item @code{CoefficientNames} @tab Cell array of coefficient names.
+## @item @code{DFE} @tab Error degrees of freedom.
+## @item @code{Formula} @tab Struct describing the model formula.
+## @item @code{LogLikelihood} @tab Log-likelihood of the fitted model.
+## @item @code{ModelCriterion} @tab Struct with AIC, AICc, BIC, CAIC.
+## @item @code{MSE} @tab Mean squared error.
+## @item @code{NumCoefficients} @tab Total number of coefficients.
+## @item @code{NumEstimatedCoefficients} @tab Number of estimated (non-rank-
+## deficient) coefficients.
+## @item @code{NumObservations} @tab Number of observations used in fitting.
+## @item @code{NumPredictors} @tab Number of predictor variables.
+## @item @code{NumVariables} @tab Total number of variables.
+## @item @code{PredictorNames} @tab Cell array of predictor variable names.
+## @item @code{ResponseName} @tab Name of the response variable.
+## @item @code{RMSE} @tab Root mean squared error.
+## @item @code{Robust} @tab Robust fitting information (empty for OLS).
+## @item @code{Rsquared} @tab Struct with Ordinary and Adjusted R-squared.
+## @item @code{SSE} @tab Sum of squared errors.
+## @item @code{SSR} @tab Regression sum of squares.
+## @item @code{SST} @tab Total sum of squares.
+## @item @code{VariableInfo} @tab Table of variable metadata.
+## @item @code{VariableNames} @tab Cell array of all variable names.
+## @end multitable
+##
+## @strong{Methods}
+##
+## @multitable @columnfractions 0.30 0.70
+## @headitem Method @tab Description
+## @item @code{predict} @tab Predict response for new data.
+## @item @code{coefCI} @tab Confidence intervals for coefficients.
+## @item @code{coefTest} @tab Linear hypothesis test on coefficients.
+## @end multitable
+##
+## @seealso{fitlm}
+## @end deftp
+
+  properties (SetAccess = protected)
+
+    ## Coefficient estimates
+    Coefficients              = [];
+    CoefficientNames          = {};
+    CoefficientCovariance     = [];
+    NumCoefficients           = 0;
+    NumEstimatedCoefficients  = 0;
+
+    ## Summary statistics
+    DFE                       = 0;
+    SSE                       = 0;
+    SSR                       = 0;
+    SST                       = 0;
+    MSE                       = 0;
+    RMSE                      = 0;
+    Rsquared                  = [];
+    LogLikelihood             = 0;
+    ModelCriterion            = [];
+
+    ## Fitting method
+    Robust                    = [];
+
+    ## Input data info (survives compact())
+    Formula                   = [];
+    NumObservations           = 0;
+    NumPredictors             = 0;
+    NumVariables              = 0;
+    PredictorNames            = {};
+    ResponseName              = '';
+    VariableInfo              = [];
+    VariableNames             = {};
+
+  endproperties
+
+  ## Constructor — Access = protected so only LinearModel (subclass) or
+  ## internal factory methods can create CompactLinearModel objects.
+  ## When Octave supports (Access = ?LinearModel), switch to that.
+  methods (Access = protected)
+
+    function obj = CompactLinearModel (s)
+      ## CompactLinearModel  Constructor (internal use only).
+      ##
+      ## obj = CompactLinearModel (s)
+      ##
+      ## S is a struct containing all fields needed to populate the object.
+      ## This constructor is called by LinearModel.compact() or by fitlm's
+      ## internal builder.  Users should not call it directly.
+
+      if (nargin < 1)
+        ## Allow zero-arg construction for Octave classdef internals
+        return;
+      endif
+
+      if (! isstruct (s))
+        error ("CompactLinearModel: constructor argument must be a struct.");
+      endif
+
+      ## Coefficient results
+      obj.Coefficients             = s.Coefficients;
+      obj.CoefficientNames         = s.CoefficientNames;
+      obj.CoefficientCovariance    = s.CoefficientCovariance;
+      obj.NumCoefficients          = s.NumCoefficients;
+      obj.NumEstimatedCoefficients = s.NumEstimatedCoefficients;
+
+      ## Summary statistics
+      obj.DFE            = s.DFE;
+      obj.SSE            = s.SSE;
+      obj.SSR            = s.SSR;
+      obj.SST            = s.SST;
+      obj.MSE            = s.MSE;
+      obj.RMSE           = s.RMSE;
+      obj.Rsquared       = s.Rsquared;
+      obj.LogLikelihood  = s.LogLikelihood;
+      obj.ModelCriterion = s.ModelCriterion;
+
+      ## Fitting method
+      obj.Robust = s.Robust;
+
+      ## Formula and variable info
+      obj.Formula         = s.Formula;
+      obj.NumObservations  = s.NumObservations;
+      obj.NumPredictors    = s.NumPredictors;
+      obj.NumVariables     = s.NumVariables;
+      obj.PredictorNames   = s.PredictorNames;
+      obj.ResponseName     = s.ResponseName;
+      obj.VariableInfo     = s.VariableInfo;
+      obj.VariableNames    = s.VariableNames;
+
+    endfunction
+
+  endmethods
+
+  ## Hidden methods: display, disp, subsref
+  methods (Hidden)
+
+    ## Custom display — called when variable name is typed at prompt
+    function display (obj)
+      in_name = inputname (1);
+      if (! isempty (in_name))
+        fprintf ("%s =\n", in_name);
+      endif
+      disp (obj);
+    endfunction
+
+    ## Custom disp — replicates MATLAB's LinearModel display format
+    function disp (obj)
+
+      fprintf ("\n");
+
+      ## Show "Compact" prefix for CompactLinearModel, not for LinearModel
+      if (strcmp (class (obj), "CompactLinearModel"))
+        fprintf ("Compact linear regression model:\n");
+      else
+        fprintf ("Linear regression model:\n");
+      endif
+
+      ## Formula line: ResponseName ~ LinearPredictor
+      lp_str = "";
+      if (isa (obj.Formula, "LinearFormula"))
+        lp_str = obj.Formula.LinearPredictor;
+      elseif (isstruct (obj.Formula) && isfield (obj.Formula, "LinearPredictor"))
+        lp_str = obj.Formula.LinearPredictor;
+      endif
+      if (! isempty (lp_str))
+        if (! isempty (obj.ResponseName))
+          fprintf ("    %s ~ %s\n", obj.ResponseName, lp_str);
+        else
+          fprintf ("    %s\n", lp_str);
+        endif
+      endif
+
+      fprintf ("\n");
+      fprintf ("Estimated Coefficients:\n");
+
+      ## --- Build coefficient table display ---
+      coef_names = obj.CoefficientNames;
+      p = numel (coef_names);
+
+      ## Extract numeric vectors from Coefficients
+      if (isstruct (obj.Coefficients))
+        est  = obj.Coefficients.Estimate;
+        se   = obj.Coefficients.SE;
+        ts   = obj.Coefficients.tStat;
+        pv   = obj.Coefficients.pValue;
+      elseif (isa (obj.Coefficients, "table"))
+        est  = obj.Coefficients.Estimate;
+        se   = obj.Coefficients.SE;
+        ts   = obj.Coefficients.tStat;
+        pv   = obj.Coefficients.pValue;
+      else
+        est = []; se = []; ts = []; pv = [];
+      endif
+
+      if (! isempty (est))
+        ## Column headers
+        hdr_fmt = "    %-20s %12s %12s %12s %12s\n";
+        fprintf (hdr_fmt, "", "Estimate", "SE", "tStat", "pValue");
+
+        ## Separator line
+        sep_fmt = "    %-20s %12s %12s %12s %12s\n";
+        fprintf (sep_fmt, "", "________", "________", "________", "__________");
+        fprintf ("\n");
+
+        ## Data rows
+        for i = 1:p
+          row_fmt = "    %-20s %12.6g %12.6g %12.6g %12.4g\n";
+          fprintf (row_fmt, coef_names{i}, est(i), se(i), ts(i), pv(i));
+        endfor
+      endif
+
+      fprintf ("\n");
+
+      ## Summary footer
+      fprintf ("Number of observations: %d, Error degrees of freedom: %d\n", ...
+               obj.NumObservations, obj.DFE);
+      fprintf ("Root Mean Squared Error: %.4g\n", obj.RMSE);
+
+      if (isstruct (obj.Rsquared))
+        fprintf ("R-squared: %.4g,  Adjusted R-Squared: %.4g\n", ...
+                 obj.Rsquared.Ordinary, obj.Rsquared.Adjusted);
+      endif
+
+      ## F-statistic vs constant model
+      ## F = (SSR / (NumEstimatedCoefficients - 1)) / MSE
+      if (obj.NumEstimatedCoefficients > 1 && obj.MSE > 0)
+        df1 = obj.NumEstimatedCoefficients - 1;
+        F_val = (obj.SSR / df1) / obj.MSE;
+        F_pval = 1 - fcdf (F_val, df1, obj.DFE);
+        fprintf ("F-statistic vs. constant model: %.4g, p-value = %.4g\n", ...
+                 F_val, F_pval);
+      endif
+
+      fprintf ("\n");
+
+    endfunction
+
+    ## Custom subsref — supports chained dot indexing for properties
+    function varargout = subsref (obj, s)
+      chain_s = s(2:end);
+      s = s(1);
+      switch (s.type)
+        case "()"
+          error (strcat ("Invalid () indexing for referencing values", ...
+                         " in a CompactLinearModel object."));
+        case "{}"
+          error (strcat ("Invalid {} indexing for referencing values", ...
+                         " in a CompactLinearModel object."));
+        case "."
+          if (! ischar (s.subs))
+            error (strcat ("CompactLinearModel.subsref: '.'", ...
+                           " indexing argument must be a character vector."));
+          endif
+          ## Check if it's a method call
+          switch (s.subs)
+            case "predict"
+              if (! isempty (chain_s) && strcmp (chain_s(1).type, "()"))
+                args = chain_s(1).subs;
+                chain_s = chain_s(2:end);
+                [varargout{1:nargout}] = predict (obj, args{:});
+              else
+                error ("CompactLinearModel: predict requires arguments.");
+              endif
+              if (! isempty (chain_s))
+                [varargout{1:nargout}] = subsref (varargout{1}, chain_s);
+              endif
+              return;
+            case "coefCI"
+              if (! isempty (chain_s) && strcmp (chain_s(1).type, "()"))
+                args = chain_s(1).subs;
+                chain_s = chain_s(2:end);
+                [varargout{1:nargout}] = coefCI (obj, args{:});
+              else
+                [varargout{1:nargout}] = coefCI (obj);
+              endif
+              if (! isempty (chain_s))
+                [varargout{1:nargout}] = subsref (varargout{1}, chain_s);
+              endif
+              return;
+            case "coefTest"
+              if (! isempty (chain_s) && strcmp (chain_s(1).type, "()"))
+                args = chain_s(1).subs;
+                chain_s = chain_s(2:end);
+                [varargout{1:nargout}] = coefTest (obj, args{:});
+              else
+                [varargout{1:nargout}] = coefTest (obj);
+              endif
+              if (! isempty (chain_s))
+                [varargout{1:nargout}] = subsref (varargout{1}, chain_s);
+              endif
+              return;
+            case "compact"
+              [varargout{1:nargout}] = compact (obj);
+              if (! isempty (chain_s))
+                [varargout{1:nargout}] = subsref (varargout{1}, chain_s);
+              endif
+              return;
+            case "addTerms"
+              if (! isempty (chain_s) && strcmp (chain_s(1).type, "()"))
+                args = chain_s(1).subs;
+                chain_s = chain_s(2:end);
+                [varargout{1:nargout}] = addTerms (obj, args{:});
+              else
+                error ("CompactLinearModel: addTerms requires arguments.");
+              endif
+              if (! isempty (chain_s))
+                [varargout{1:nargout}] = subsref (varargout{1}, chain_s);
+              endif
+              return;
+            case "removeTerms"
+              if (! isempty (chain_s) && strcmp (chain_s(1).type, "()"))
+                args = chain_s(1).subs;
+                chain_s = chain_s(2:end);
+                [varargout{1:nargout}] = removeTerms (obj, args{:});
+              else
+                error ("CompactLinearModel: removeTerms requires arguments.");
+              endif
+              if (! isempty (chain_s))
+                [varargout{1:nargout}] = subsref (varargout{1}, chain_s);
+              endif
+              return;
+            case "step"
+              if (! isempty (chain_s) && strcmp (chain_s(1).type, "()"))
+                args = chain_s(1).subs;
+                chain_s = chain_s(2:end);
+                [varargout{1:nargout}] = step (obj, args{:});
+              else
+                [varargout{1:nargout}] = step (obj);
+              endif
+              if (! isempty (chain_s))
+                [varargout{1:nargout}] = subsref (varargout{1}, chain_s);
+              endif
+              return;
+            case "anova"
+              if (! isempty (chain_s) && strcmp (chain_s(1).type, "()"))
+                args = chain_s(1).subs;
+                chain_s = chain_s(2:end);
+                [varargout{1:nargout}] = anova (obj, args{:});
+              else
+                [varargout{1:nargout}] = anova (obj);
+              endif
+              if (! isempty (chain_s))
+                [varargout{1:nargout}] = subsref (varargout{1}, chain_s);
+              endif
+              return;
+            case "feval"
+              if (! isempty (chain_s) && strcmp (chain_s(1).type, "()"))
+                args = chain_s(1).subs;
+                chain_s = chain_s(2:end);
+                [varargout{1:nargout}] = predict (obj, args{:});
+              else
+                error ("CompactLinearModel: feval requires arguments.");
+              endif
+              if (! isempty (chain_s))
+                [varargout{1:nargout}] = subsref (varargout{1}, chain_s);
+              endif
+              return;
+            case "disp"
+              disp (obj);
+              return;
+          endswitch
+          ## Property access
+          try
+            out = obj.(s.subs);
+          catch
+            error ("CompactLinearModel.subsref: unrecognized property: '%s'.", ...
+                   s.subs);
+          end_try_catch
+      endswitch
+      ## Chained references
+      if (! isempty (chain_s))
+        out = subsref (out, chain_s);
+      endif
+      varargout{1} = out;
+    endfunction
+
+    ## Prevent direct property assignment by users
+    function obj = subsasgn (obj, s, val)
+      if (numel (s) > 1)
+        error (strcat ("CompactLinearModel.subsasgn:", ...
+                       " chained subscripts not allowed."));
+      endif
+      switch (s.type)
+        case "()"
+          error (strcat ("Invalid () indexing for assigning values", ...
+                         " to a CompactLinearModel object."));
+        case "{}"
+          error (strcat ("Invalid {} indexing for assigning values", ...
+                         " to a CompactLinearModel object."));
+        case "."
+          error (strcat ("CompactLinearModel.subsasgn:", ...
+                         " all properties are read-only: '%s'."), s.subs);
+      endswitch
+    endfunction
+
+  endmethods
+
+  ## Public methods: predict, coefCI, coefTest
+  methods (Access = public)
+
+    ## -*- texinfo -*-
+    ## @deftypefn  {CompactLinearModel} {@var{ypred} =} predict (@var{obj}, @var{Xnew})
+    ## @deftypefnx {CompactLinearModel} {[@var{ypred}, @var{yci}] =} predict (@var{obj}, @var{Xnew})
+    ## @deftypefnx {CompactLinearModel} {[@var{ypred}, @var{yci}] =} predict (@dots{}, @var{Name}, @var{Value})
+    ##
+    ## Predict response for new data using a compact linear regression model.
+    ##
+    ## @code{@var{ypred} = predict (@var{obj}, @var{Xnew})} returns the predicted
+    ## response values for the predictor data in @var{Xnew} using the fitted
+    ## model @var{obj}.  @var{Xnew} must be a numeric matrix with the same
+    ## number of predictor columns as used to fit the model (excluding
+    ## intercept — it is added automatically if the model has one).
+    ##
+    ## @code{[@var{ypred}, @var{yci}] = predict (@dots{})} also returns
+    ## confidence or prediction intervals in the @math{n x 2} matrix @var{yci}.
+    ##
+    ## Additional options can be specified as @qcode{Name-Value} pairs:
+    ##
+    ## @table @code
+    ## @item Alpha
+    ## Significance level for the confidence/prediction intervals.
+    ## Default is 0.05 (95% intervals).
+    ##
+    ## @item Prediction
+    ## Type of interval: @qcode{'curve'} for confidence intervals on the
+    ## mean response (default), or @qcode{'observation'} for prediction
+    ## intervals on individual new observations.
+    ##
+    ## @item Simultaneous
+    ## If @code{true}, compute simultaneous Scheffé confidence bands.
+    ## Default is @code{false} (pointwise intervals).
+    ## @end table
+    ##
+    ## @seealso{CompactLinearModel, coefCI, coefTest}
+    ## @end deftypefn
+
+    function [ypred, yci] = predict (obj, Xnew, varargin)
+
+      if (nargin < 2)
+        error ("CompactLinearModel.predict: Xnew is required.");
+      endif
+
+      ## Parse name-value pairs
+      alpha    = 0.05;
+      pred_obs = false;   ## false = curve (CI on mean), true = observation (PI)
+      simult   = false;
+
+      i = 1;
+      while (i <= numel (varargin))
+        if (! ischar (varargin{i}))
+          error ("CompactLinearModel.predict: expected parameter name string.");
+        endif
+        switch (lower (varargin{i}))
+          case "alpha"
+            alpha = varargin{i+1};
+            i = i + 2;
+          case "prediction"
+            pred_obs = strcmpi (varargin{i+1}, "observation");
+            i = i + 2;
+          case "simultaneous"
+            simult = varargin{i+1};
+            i = i + 2;
+          otherwise
+            error ("CompactLinearModel.predict: unknown parameter '%s'.", ...
+                   varargin{i});
+        endswitch
+      endwhile
+
+      ## Determine intercept flag
+      has_int = false;
+      if (isa (obj.Formula, "LinearFormula"))
+        has_int = obj.Formula.HasIntercept;
+      elseif (isstruct (obj.Formula) && isfield (obj.Formula, "HasIntercept"))
+        has_int = obj.Formula.HasIntercept;
+      endif
+
+      ## Unpack table input into numeric design matrix
+      if (isa (Xnew, "table"))
+        Xnew = __predict_table_to_Xmat__ (obj, Xnew, has_int);
+        ## Xnew is now the full design matrix (with intercept if applicable)
+        Xmat = Xnew;
+      else
+        ## Standard numeric matrix path
+        if (has_int)
+          Xmat = [ones(rows (Xnew), 1), Xnew];
+        else
+          Xmat = Xnew;
+        endif
+      endif
+
+      ## Check dimensions
+      if (columns (Xmat) != obj.NumCoefficients)
+        error (strcat ("CompactLinearModel.predict: Xnew has wrong", ...
+                       " number of columns. Expected %d predictor columns."), ...
+               obj.NumCoefficients - double (obj.Formula.HasIntercept));
+      endif
+
+      ## Extract coefficient estimates
+      if (isa (obj.Coefficients, "table"))
+        beta = obj.Coefficients.Estimate;
+      elseif (isstruct (obj.Coefficients))
+        beta = obj.Coefficients.Estimate;
+      else
+        error ("CompactLinearModel.predict: invalid Coefficients format.");
+      endif
+
+      ## Point predictions
+      ypred = Xmat * beta;
+
+      ## Compute intervals if requested
+      if (nargout > 1)
+        se_fit = sqrt (sum ((Xmat * obj.CoefficientCovariance) .* Xmat, 2));
+
+        if (pred_obs)
+          se_pred = sqrt (se_fit .^ 2 + obj.MSE);
+        else
+          se_pred = se_fit;
+        endif
+
+        if (simult)
+          p_eff = obj.NumEstimatedCoefficients;
+          F_crit = finv (1 - alpha, p_eff, obj.DFE);
+          hw = sqrt (p_eff * F_crit) .* se_pred;
+        else
+          hw = tinv (1 - alpha / 2, obj.DFE) .* se_pred;
+        endif
+
+        yci = [ypred - hw, ypred + hw];
+      endif
+
+    endfunction
+
+    ## -*- texinfo -*-
+    ## @deftypefn  {CompactLinearModel} {@var{ci} =} coefCI (@var{obj})
+    ## @deftypefnx {CompactLinearModel} {@var{ci} =} coefCI (@var{obj}, @var{alpha})
+    ##
+    ## Confidence intervals for coefficient estimates.
+    ##
+    ## @code{@var{ci} = coefCI (@var{obj})} returns a @math{p x 2} matrix
+    ## of 95% confidence intervals for the coefficient estimates, where
+    ## @math{p} is the number of coefficients.
+    ##
+    ## @code{@var{ci} = coefCI (@var{obj}, @var{alpha})} specifies the
+    ## significance level.  For example, @code{alpha = 0.01} gives 99%
+    ## confidence intervals.
+    ##
+    ## @seealso{CompactLinearModel, predict, coefTest}
+    ## @end deftypefn
+
+    function ci = coefCI (obj, alpha)
+
+      if (nargin < 2)
+        alpha = 0.05;
+      endif
+
+      t_crit = tinv (1 - alpha / 2, obj.DFE);
+
+      ## Extract estimates and SEs
+      if (isa (obj.Coefficients, "table"))
+        est = obj.Coefficients.Estimate;
+        se  = obj.Coefficients.SE;
+      elseif (isstruct (obj.Coefficients))
+        est = obj.Coefficients.Estimate;
+        se  = obj.Coefficients.SE;
+      else
+        error ("CompactLinearModel.coefCI: invalid Coefficients format.");
+      endif
+
+      ci = [est - t_crit * se, est + t_crit * se];
+
+    endfunction
+
+    ## -*- texinfo -*-
+    ## @deftypefn  {CompactLinearModel} {[@var{pval}, @var{F}, @var{r}] =} coefTest (@var{obj})
+    ## @deftypefnx {CompactLinearModel} {[@var{pval}, @var{F}, @var{r}] =} coefTest (@var{obj}, @var{H})
+    ## @deftypefnx {CompactLinearModel} {[@var{pval}, @var{F}, @var{r}] =} coefTest (@var{obj}, @var{H}, @var{c})
+    ##
+    ## Linear hypothesis test on coefficients.
+    ##
+    ## @code{[@var{pval}, @var{F}, @var{r}] = coefTest (@var{obj})} performs
+    ## the overall F-test: tests whether all non-intercept coefficients are
+    ## simultaneously zero.  The F-statistic matches the "F-statistic vs.
+    ## constant model" shown by @code{disp}.
+    ##
+    ## @code{[@var{pval}, @var{F}, @var{r}] = coefTest (@var{obj}, @var{H})}
+    ## tests the hypothesis @code{@var{H} * beta = 0}, where @var{H} is a
+    ## contrast matrix.
+    ##
+    ## @code{[@var{pval}, @var{F}, @var{r}] = coefTest (@var{obj}, @var{H},
+    ## @var{c})} tests the hypothesis @code{@var{H} * beta = @var{c}}.
+    ##
+    ## The output @var{r} is the numerator degrees of freedom for the
+    ## F-test (equal to the number of rows in @var{H}).  The F-statistic
+    ## has @var{r} degrees of freedom in the numerator and @code{obj.DFE}
+    ## degrees of freedom in the denominator.
+    ##
+    ## @seealso{CompactLinearModel, predict, coefCI}
+    ## @end deftypefn
+
+    function [pval, F, r] = coefTest (obj, H, c)
+
+      ## Extract beta
+      if (isa (obj.Coefficients, "table"))
+        beta = obj.Coefficients.Estimate;
+      elseif (isstruct (obj.Coefficients))
+        beta = obj.Coefficients.Estimate;
+      else
+        error ("CompactLinearModel.coefTest: invalid Coefficients format.");
+      endif
+
+      p = obj.NumCoefficients;
+
+      ## Default H: overall F-test (all non-intercept = 0)
+      if (nargin < 2 || isempty (H))
+        ## H = [zeros(p-1, 1), eye(p-1)]  — tests all columns except intercept
+        H = [zeros(p - 1, 1), eye(p - 1)];
+      endif
+
+      ## Default c: zero vector
+      if (nargin < 3 || isempty (c))
+        c = zeros (rows (H), 1);
+      endif
+
+      ## Contrast vector (used internally for F computation)
+      contrast = H * beta - c;
+
+      ## Wald F-test
+      V   = H * obj.CoefficientCovariance * H';
+      df1 = rows (H);
+      F   = (contrast' * (V \ contrast)) / df1;
+      pval = 1 - fcdf (F, df1, obj.DFE);
+
+      ## r = numerator degrees of freedom (= rows of H)
+      r = df1;
+
+    endfunction
+
+    function [varargout] = feval (obj, varargin)
+      ## feval  Evaluate model (alias for predict).
+      ##
+      ## ypred = feval (mdl, Xnew)
+      ## [ypred, yci] = feval (mdl, Xnew, ...)
+      ##
+      ## This method enables MATLAB-compatible feval(mdl, Xnew) syntax.
+
+      [varargout{1:nargout}] = predict (obj, varargin{:});
+    endfunction
+
+  endmethods
+
+  methods (Access = private)
+
+    function Xmat = __predict_table_to_Xmat__ (obj, tbl_new, has_int)
+      ## __predict_table_to_Xmat__  Convert a table of predictors to design matrix.
+      ##
+      ## Extracts columns matching PredictorNames, expands categoricals
+      ## into dummy columns using the same reference coding as the fitted
+      ## model, and prepends an intercept column if needed.
+
+      pred_names = obj.PredictorNames;
+      p = numel (pred_names);
+      n_new = rows (tbl_new);
+
+      ## Get the CoefficientNames to determine expected dummy structure
+      coef_names = obj.CoefficientNames;
+
+      ## Determine which predictors are actually in the model
+      ## Use InModel from Formula if available
+      in_model = true (1, p);
+      if (isa (obj.Formula, "LinearFormula") && ! isempty (obj.Formula.InModel))
+        im = obj.Formula.InModel;
+        in_model = im(1:min(p, numel(im)));
+      elseif (isstruct (obj.Formula) && isfield (obj.Formula, "InModel"))
+        im = obj.Formula.InModel;
+        in_model = im(1:min(p, numel(im)));
+      endif
+
+      ## Build columns for each predictor that is IN the model
+      Xcols = [];
+      for k = 1:p
+        if (! in_model(k))
+          continue;
+        endif
+        pname = pred_names{k};
+
+        ## Extract column from table
+        try
+          col_data = tbl_new.(pname);
+        catch
+          error ("CompactLinearModel.predict: table is missing predictor column '%s'.", ...
+                 pname);
+        end_try_catch
+
+        if (isa (col_data, "categorical"))
+          ## Categorical: expand into dummy columns using reference coding
+          levs = cellstr (categories (col_data));
+          col_str = cellstr (col_data);
+          [~, cat_idx] = ismember (col_str, levs);
+          n_lev = numel (levs);
+
+          ## Determine which levels the model expects (from CoefficientNames)
+          ## Look for patterns like PredName_LevelName in coef_names
+          dum_cols = [];
+          dum_found = false;
+          for L = 1:n_lev
+            expected_name = sprintf ("%s_%s", pname, levs{L});
+            if (any (strcmp (coef_names, expected_name)))
+              dum_found = true;
+              dum_cols = [dum_cols, double(cat_idx == L)];
+            endif
+          endfor
+
+          if (! dum_found)
+            ## Fallback: reference coding (drop first level)
+            for L = 2:n_lev
+              dum_cols = [dum_cols, double(cat_idx == L)];
+            endfor
+          endif
+
+          Xcols = [Xcols, dum_cols];
+        else
+          ## Numeric predictor
+          Xcols = [Xcols, double(col_data)];
+        endif
+      endfor
+
+      ## Prepend intercept
+      if (has_int)
+        Xmat = [ones(n_new, 1), Xcols];
+      else
+        Xmat = Xcols;
+      endif
+    endfunction
+
+  endmethods
+
+endclassdef
+
+## Test: CompactLinearModel cannot be constructed directly (protected access)
+## This should error since the constructor is protected.
+%!error <CompactLinearModel> CompactLinearModel (struct ())
+
+## Test: verify class definition parses without error
+## (This test just confirms the file is syntactically valid)
+%!test
+%! ## Check that the class exists and is loadable
+%! assert (exist ("CompactLinearModel", "class") > 0 || ...
+%!         exist ("CompactLinearModel") > 0);
diff --git a/inst/LinearFormula.m b/inst/LinearFormula.m
new file mode 100644
index 00000000..c29e0ae6
--- /dev/null
+++ b/inst/LinearFormula.m
@@ -0,0 +1,122 @@
+## Copyright (C) 2026 Jayant Chauhan <0001jayant@gmail.com>
+##
+## This file is part of the statistics package for GNU Octave.
+##
+## This program is free software; you can redistribute it and/or modify it under
+## the terms of the GNU General Public License as published by the Free Software
+## Foundation; either version 3 of the License, or (at your option) any later
+## version.
+##
+## This program is distributed in the hope that it will be useful, but WITHOUT
+## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+## FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
+## details.
+##
+## You should have received a copy of the GNU General Public License along with
+## this program; if not, see <http://www.gnu.org/licenses/>.
+
+classdef LinearFormula
+## -*- texinfo -*-
+## @deftp {statistics} LinearFormula
+##
+## Linear formula encapsulation class.
+##
+## A @code{LinearFormula} object encapsulates the internal formula
+## representation for a @code{LinearModel} or @code{CompactLinearModel}.
+## When displayed in the console, it shows only the human-readable
+## formula string (e.g., @code{y ~ 1 + x1 + x2}).
+##
+## @strong{Properties}
+##
+## @multitable @columnfractions 0.30 0.70
+## @headitem Property @tab Description
+## @item @code{ResponseName} @tab Name of the response variable.
+## @item @code{LinearPredictor} @tab Human-readable formula string.
+## @item @code{Terms} @tab Binary terms matrix (n_terms x p).
+## @item @code{HasIntercept} @tab Logical: true if model has intercept.
+## @item @code{InModel} @tab Logical vector indicating which terms are
+##   active.
+## @item @code{CoefficientNames} @tab Cell array of coefficient names.
+## @item @code{PredictorNames} @tab Cell array of predictor names.
+## @item @code{VariableNames} @tab Cell array of all variable names.
+## @end multitable
+##
+## @seealso{fitlm, LinearModel, CompactLinearModel}
+## @end deftp
+
+  properties (SetAccess = protected)
+    ResponseName     = "";
+    LinearPredictor  = "";
+    Terms            = [];
+    HasIntercept     = true;
+    InModel          = [];
+    CoefficientNames = {};
+    PredictorNames   = {};
+    VariableNames    = {};
+  endproperties
+
+  methods
+
+    function obj = LinearFormula (s)
+      ## LinearFormula  Constructor.
+      ##
+      ## obj = LinearFormula (s)
+      ##
+      ## S is a struct with formula fields.  If S is already a
+      ## LinearFormula, it is returned as-is.
+
+      if (nargin < 1)
+        return;
+      endif
+
+      if (isa (s, "LinearFormula"))
+        obj = s;
+        return;
+      endif
+
+      if (! isstruct (s))
+        error ("LinearFormula: constructor argument must be a struct.");
+      endif
+
+      if (isfield (s, "ResponseName"))
+        obj.ResponseName = s.ResponseName;
+      endif
+      if (isfield (s, "LinearPredictor"))
+        obj.LinearPredictor = s.LinearPredictor;
+      endif
+      if (isfield (s, "Terms"))
+        obj.Terms = s.Terms;
+      endif
+      if (isfield (s, "HasIntercept"))
+        obj.HasIntercept = s.HasIntercept;
+      endif
+      if (isfield (s, "InModel"))
+        obj.InModel = s.InModel;
+      endif
+      if (isfield (s, "CoefficientNames"))
+        obj.CoefficientNames = s.CoefficientNames;
+      endif
+      if (isfield (s, "PredictorNames"))
+        obj.PredictorNames = s.PredictorNames;
+      endif
+      if (isfield (s, "VariableNames"))
+        obj.VariableNames = s.VariableNames;
+      endif
+    endfunction
+
+    function disp (obj)
+      ## disp  Display only the formula string.
+      ##
+      ## Matches MATLAB's LinearFormula display behavior: shows only
+      ## the human-readable formula (e.g., "y ~ 1 + x1 + x2").
+      fprintf ("    %s ~ %s\n", obj.ResponseName, obj.LinearPredictor);
+    endfunction
+
+    function s = char (obj)
+      ## char  Convert to character representation.
+      s = sprintf ("%s ~ %s", obj.ResponseName, obj.LinearPredictor);
+    endfunction
+
+  endmethods
+
+endclassdef
diff --git a/inst/LinearModel.m b/inst/LinearModel.m
new file mode 100644
index 00000000..93713c36
--- /dev/null
+++ b/inst/LinearModel.m
@@ -0,0 +1,1304 @@
+## Copyright (C) 2026 Jayant Chauhan <0001jayant@gmail.com>
+##
+## This file is part of the statistics package for GNU Octave.
+##
+## This program is free software; you can redistribute it and/or modify it under
+## the terms of the GNU General Public License as published by the Free Software
+## Foundation; either version 3 of the License, or (at your option) any later
+## version.
+##
+## This program is distributed in the hope that it will be useful, but WITHOUT
+## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+## FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
+## details.
+##
+## You should have received a copy of the GNU General Public License along with
+## this program; if not, see <http://www.gnu.org/licenses/>.
+
+classdef LinearModel < CompactLinearModel
+## -*- texinfo -*-
+## @deftp {statistics} LinearModel
+##
+## Linear regression model.
+##
+## A @code{LinearModel} object encapsulates the results of fitting a linear
+## regression model.  It inherits from @code{CompactLinearModel} and adds
+## the source training data, observation-level residuals and diagnostics,
+## and model modification methods.
+##
+## A @code{LinearModel} object is created by @code{fitlm} or
+## @code{stepwiselm}.  Users do not call the constructor directly.
+##
+## @strong{Additional Properties (beyond CompactLinearModel)}
+##
+## @multitable @columnfractions 0.25 0.75
+## @headitem Property @tab Description
+## @item @code{Variables} @tab Original input data (all rows, including excluded).
+## @item @code{ObservationNames} @tab Cell array of row names.
+## @item @code{ObservationInfo} @tab Table with columns Weights, Excluded, Missing, Subset.
+## @item @code{Fitted} @tab Fitted values for all observations.
+## @item @code{Residuals} @tab Table with columns Raw, Pearson, Studentized, Standardized.
+## @item @code{Diagnostics} @tab Table with columns Leverage, CooksDistance, Dffits, S2_i, CovRatio, Dfbetas, HatMatrix.
+## @item @code{ModelFitVsNullModel} @tab Struct with fields Fstat, Pvalue, NullModel.
+## @item @code{Steps} @tab Stepwise fitting history (empty for fitlm).
+## @end multitable
+##
+## @strong{Methods}
+##
+## @multitable @columnfractions 0.25 0.75
+## @headitem Method @tab Description
+## @item @code{compact} @tab Strip to CompactLinearModel.
+## @item @code{addTerms} @tab Add terms and refit.
+## @item @code{removeTerms} @tab Remove terms and refit.
+## @item @code{step} @tab Single optimal stepwise step.
+## @item @code{anova} @tab ANOVA table (partial SS).
+## @end multitable
+##
+## @seealso{fitlm, CompactLinearModel}
+## @end deftp
+
+  properties (SetAccess = protected)
+    ## Source data (not on CompactLinearModel)
+    Variables = [];
+    ObservationNames = {};
+    ObservationInfo = [];
+
+    ## Diagnostics (not on CompactLinearModel)
+    Fitted = [];
+    Residuals = [];
+    Diagnostics = [];
+    ModelFitVsNullModel = [];
+
+    ## Fitting method
+    Steps = [];
+  endproperties
+
+
+  methods (Access = protected)
+
+    function obj = LinearModel (s)
+      ## LinearModel  Constructor (internal use only).
+      ##
+      ## obj = LinearModel (s)
+      ##
+      ## S is a struct with two groups of fields:
+      ##   - All CompactLinearModel fields (passed to parent constructor)
+      ##   - LinearModel-only fields: Variables, ObservationNames,
+      ##     ObservationInfo, Fitted, Residuals, Diagnostics,
+      ##     ModelFitVsNullModel, Steps
+
+      if (nargin < 1)
+        ## Allow zero-arg construction for Octave classdef internals
+        return;
+  endif
+
+      if (!isstruct(s))
+          error("LinearModel: constructor argument must be a struct.");
+  endif
+
+      ## Call parent constructor with the CompactLinearModel fields
+      obj = obj@CompactLinearModel (s);
+
+      ## Assign LinearModel-only properties
+      if (isfield (s, "Variables"))
+        obj.Variables = s.Variables;
+      endif
+      if (isfield (s, "ObservationNames"))
+        obj.ObservationNames = s.ObservationNames;
+      endif
+      if (isfield (s, "ObservationInfo"))
+        obj.ObservationInfo = s.ObservationInfo;
+      endif
+      if (isfield (s, "Fitted"))
+        obj.Fitted = s.Fitted;
+      endif
+      if (isfield (s, "Residuals"))
+        obj.Residuals = s.Residuals;
+      endif
+      if (isfield (s, "Diagnostics"))
+        obj.Diagnostics = s.Diagnostics;
+      endif
+      if (isfield (s, "ModelFitVsNullModel"))
+        obj.ModelFitVsNullModel = s.ModelFitVsNullModel;
+      endif
+      if (isfield (s, "Steps"))
+        obj.Steps = s.Steps;
+      endif
+
+    endfunction
+
+  endmethods
+
+  methods (Static, Access = public)
+
+    function obj = fromFit (fit_s, X, y, formula, pred_names, resp_name, ...
+                              var_names, var_info, obs_names, obs_info, ...
+                              weights, excluded, missing_mask, steps, ...
+                              variables_data, X_full)
+      ## fromFit  Build a LinearModel from lm_fit_engine output.
+      ##
+      ## This static factory method takes the raw lm_fit_engine struct and
+      ## all metadata, computes observation-level diagnostics, and returns
+      ## a fully-populated LinearModel object.
+      ##
+      ## Parameters:
+      ##   fit_s          - struct from lm_fit_engine(X_used, y_used, w_used)
+      ##   X              - design matrix for USED (subset) observations only
+      ##   y              - response vector for USED observations only
+      ##   formula         - Formula struct
+      ##   pred_names      - cell of predictor names
+      ##   resp_name       - response variable name (char)
+      ##   var_names       - cell of all variable names
+      ##   var_info        - variable info struct/table
+      ##   obs_names       - cell of observation names (or {})
+      ##   obs_info        - struct with Weights, Excluded, Missing, Subset
+      ##   weights         - original weights vector (n_total x 1, or [])
+      ##   excluded        - logical n_total x 1 excluded mask
+      ##   missing_mask    - logical n_total x 1 missing mask
+      ##   steps           - Steps struct ([] for fitlm)
+      ##   variables_data  - Variables struct/table (source data)
+
+      n_total = numel (excluded);
+      n_used = fit_s.n;
+      subset = obs_info.Subset;
+
+      ## Compute Fitted for ALL observations (including excluded/missing)
+      fitted_all = NaN (n_total, 1);
+      fitted_all(subset) = fit_s.yhat;
+      ## Excluded-but-not-missing rows: compute yhat = X_full * beta (Phase 4)
+      ## MATLAB Block 7: excluded rows DO get Fitted values (X * beta extrapolation)
+      if (nargin >= 16 && ! isempty (X_full))
+        excl_not_missing = excluded & ! missing_mask;
+        if (any (excl_not_missing))
+          fitted_all(excl_not_missing) = X_full(excl_not_missing, :) * fit_s.beta;
+        endif
+      endif
+
+      ## Compute diagnostics for used observations
+      h     = fit_s.h;
+      raw   = fit_s.resid;
+      rmse  = fit_s.rmse;
+      mse   = fit_s.mse;
+      sse   = fit_s.sse;
+      dfe   = fit_s.dfe;
+      p_est = fit_s.p_est;
+
+      ## Residual types (for used observations only)
+      pearson      = raw / rmse;
+      standardized = raw ./ (rmse * sqrt (1 - h));
+      S2_i_used    = (sse - raw .^ 2 ./ (1 - h)) / (dfe - 1);
+      studentized  = raw ./ (sqrt (S2_i_used) .* sqrt (1 - h));
+
+      ## Expand residuals to all observations (NaN for excluded/missing)
+      raw_all          = NaN (n_total, 1);
+      pearson_all      = NaN (n_total, 1);
+      studentized_all  = NaN (n_total, 1);
+      standardized_all = NaN (n_total, 1);
+
+      raw_all(subset)          = raw;
+      pearson_all(subset)      = pearson;
+      studentized_all(subset)  = studentized;
+      standardized_all(subset) = standardized;
+
+      ## Build Residuals table (MATLAB returns n x 4 table)
+      if (! isempty (obs_names))
+        resid_tbl = table (raw_all, pearson_all, studentized_all, ...
+                           standardized_all, ...
+                           "VariableNames", {"Raw", "Pearson", ...
+                                             "Studentized", "Standardized"}, ...
+                           "RowNames", obs_names(:)');
+      else
+        resid_tbl = table (raw_all, pearson_all, studentized_all, ...
+                           standardized_all, ...
+                           "VariableNames", {"Raw", "Pearson", ...
+                                             "Studentized", "Standardized"});
+      endif
+
+      ## Diagnostics for used observations
+      CooksDistance_used = (1 / p_est) * (h ./ (1 - h)) .* standardized .^ 2;
+      Dffits_used = studentized .* sqrt (h ./ (1 - h));
+      ## CovRatio: Candidate 3 formula, verified to 3.55e-14 against MATLAB
+      CovRatio_used = (S2_i_used / mse) .^ p_est ./ (1 - h);
+
+      ## Hat matrix from QR factor (n_used x n_used)
+      HatMatrix_full = NaN (n_total, n_total);
+      Q_est = fit_s.Q;
+      H_used = Q_est * Q_est';
+      ## Place into full matrix
+      idx_used = find (subset);
+      HatMatrix_full(idx_used, idx_used) = H_used;
+      ## Excluded rows: set to 0
+      idx_excl = find (! subset);
+      HatMatrix_full(idx_excl, :) = 0;
+      HatMatrix_full(:, idx_excl) = 0;
+
+      ## Leverage for all observations
+      leverage_all = zeros (n_total, 1);
+      leverage_all(subset) = h;
+
+      ## CooksDistance, Dffits, S2_i, CovRatio for all obs
+      CooksDistance_all = NaN (n_total, 1);
+      Dffits_all        = NaN (n_total, 1);
+      S2_i_all          = NaN (n_total, 1);
+      CovRatio_all      = NaN (n_total, 1);
+
+      CooksDistance_all(subset) = CooksDistance_used;
+      Dffits_all(subset)        = Dffits_used;
+      S2_i_all(subset)          = S2_i_used;
+      CovRatio_all(subset)      = CovRatio_used;
+
+      ## Dfbetas (n_total x p_tot) via update formula
+      p_tot = fit_s.p_tot;
+      Dfbetas_all = NaN (n_total, p_tot);
+      vcov = fit_s.vcov;
+      se_coef = sqrt (diag (vcov));
+      for i = 1:n_used
+        ii = idx_used(i);
+        x_i = X(i, :)';
+        e_i = raw(i);
+        h_i = h(i);
+        s2_i = S2_i_used(i);
+        if (s2_i > 0 && (1 - h_i) > 0)
+          Dfbetas_all(ii, :) = ...
+            ((vcov * x_i * e_i) / ((1 - h_i) * sqrt (s2_i))) ./ se_coef;
+        endif
+      endfor
+
+      ## Build Diagnostics table (MATLAB returns n x 7 table)
+      if (! isempty (obs_names))
+        diag_tbl = table (leverage_all, CooksDistance_all, Dffits_all, ...
+                          S2_i_all, CovRatio_all, Dfbetas_all, ...
+                          HatMatrix_full, ...
+                          "VariableNames", {"Leverage", "CooksDistance", ...
+                                            "Dffits", "S2_i", "CovRatio", ...
+                                            "Dfbetas", "HatMatrix"}, ...
+                          "RowNames", obs_names(:)');
+      else
+        diag_tbl = table (leverage_all, CooksDistance_all, Dffits_all, ...
+                          S2_i_all, CovRatio_all, Dfbetas_all, ...
+                          HatMatrix_full, ...
+                          "VariableNames", {"Leverage", "CooksDistance", ...
+                                            "Dffits", "S2_i", "CovRatio", ...
+                                            "Dfbetas", "HatMatrix"});
+      endif
+
+      ## ModelFitVsNullModel
+      if (p_est > 1 && mse > 0)
+        df_reg = p_est - 1;
+        F_null = (fit_s.ssr / df_reg) / mse;
+        p_null = 1 - fcdf (F_null, df_reg, dfe);
+      else
+        F_null = NaN;
+        p_null = NaN;
+      endif
+      mfvnm = struct ("Fstat", F_null, "Pvalue", p_null, ...
+                       "NullModel", "constant");
+
+      ## Build struct for parent + child constructor
+      s = struct ();
+
+      ## CompactLinearModel fields
+      s.Coefficients             = table (fit_s.beta, fit_s.se, ...
+                                           fit_s.tstat, fit_s.pval, ...
+                                           "VariableNames", {"Estimate", "SE", "tStat", "pValue"}, ...
+                                           "RowNames", formula.CoefficientNames(:)');
+      s.CoefficientNames         = formula.CoefficientNames;
+      s.CoefficientCovariance    = fit_s.vcov;
+      s.NumCoefficients          = fit_s.p_tot;
+      s.NumEstimatedCoefficients = fit_s.p_est;
+      s.DFE                      = fit_s.dfe;
+      s.SSE                      = fit_s.sse;
+      s.SSR                      = fit_s.ssr;
+      s.SST                      = fit_s.sst;
+      s.MSE                      = fit_s.mse;
+      s.RMSE                     = fit_s.rmse;
+      s.Rsquared                 = fit_s.rsq;
+      s.LogLikelihood            = fit_s.logL;
+      s.ModelCriterion           = fit_s.crit;
+      s.Robust                   = [];
+      s.Formula                  = LinearFormula (formula);
+      s.NumObservations          = n_used;
+      s.NumPredictors            = numel (pred_names);
+      s.NumVariables             = numel (var_names);
+      s.PredictorNames           = pred_names;
+      s.ResponseName             = resp_name;
+      ## Wrap VariableInfo as table if it is still a raw struct
+      if (isstruct (var_info) && ! isempty (var_info))
+        s.VariableInfo = table (var_info.Class, var_info.Range, ...
+                                var_info.InModel, var_info.IsCategorical, ...
+                                "VariableNames", {"Class", "Range", ...
+                                                  "InModel", "IsCategorical"}, ...
+                                "RowNames", var_names(:)');
+      else
+        s.VariableInfo = var_info;
+      endif
+      s.VariableNames            = var_names;
+
+      ## LinearModel-only fields
+      if (nargin >= 15 && ! isempty (variables_data))
+        s.Variables = variables_data;
+      else
+        s.Variables = [];
+      endif
+      s.ObservationNames  = obs_names;
+      ## Wrap ObservationInfo as table if it is still a raw struct
+      if (isstruct (obs_info) && ! isempty (obs_info))
+        if (! isempty (obs_names))
+          s.ObservationInfo = table (obs_info.Weights, obs_info.Excluded, ...
+                                     obs_info.Missing, obs_info.Subset, ...
+                                     "VariableNames", {"Weights", "Excluded", ...
+                                                       "Missing", "Subset"}, ...
+                                     "RowNames", obs_names(:)');
+        else
+          s.ObservationInfo = table (obs_info.Weights, obs_info.Excluded, ...
+                                     obs_info.Missing, obs_info.Subset, ...
+                                     "VariableNames", {"Weights", "Excluded", ...
+                                                       "Missing", "Subset"});
+        endif
+      else
+        s.ObservationInfo = obs_info;
+      endif
+      s.Fitted            = fitted_all;
+      s.Residuals         = resid_tbl;
+      s.Diagnostics       = diag_tbl;
+      s.ModelFitVsNullModel = mfvnm;
+      s.Steps             = steps;
+
+      obj = LinearModel (s);
+
+    endfunction
+
+  endmethods
+
+  ## Public methods
+  methods (Access = public)
+
+    function cmdl = compact (obj)
+      ## compact  Strip to CompactLinearModel.
+      ##
+      ## cmdl = compact (mdl)
+      ##
+      ## Returns a CompactLinearModel with all 23 CompactLinearModel
+      ## properties copied.  LinearModel-only properties (Variables,
+      ## Residuals, Diagnostics, Fitted, ObservationInfo, ObservationNames,
+      ## ModelFitVsNullModel, Steps) are NOT included.
+
+      s = struct ();
+      s.Coefficients             = obj.Coefficients;
+      s.CoefficientNames         = obj.CoefficientNames;
+      s.CoefficientCovariance    = obj.CoefficientCovariance;
+      s.NumCoefficients          = obj.NumCoefficients;
+      s.NumEstimatedCoefficients = obj.NumEstimatedCoefficients;
+      s.DFE                      = obj.DFE;
+      s.SSE                      = obj.SSE;
+      s.SSR                      = obj.SSR;
+      s.SST                      = obj.SST;
+      s.MSE                      = obj.MSE;
+      s.RMSE                     = obj.RMSE;
+      s.Rsquared                 = obj.Rsquared;
+      s.LogLikelihood            = obj.LogLikelihood;
+      s.ModelCriterion           = obj.ModelCriterion;
+      s.Robust                   = obj.Robust;
+      s.Formula                  = obj.Formula;
+      s.NumObservations          = obj.NumObservations;
+      s.NumPredictors            = obj.NumPredictors;
+      s.NumVariables             = obj.NumVariables;
+      s.PredictorNames           = obj.PredictorNames;
+      s.ResponseName             = obj.ResponseName;
+      s.VariableInfo             = obj.VariableInfo;
+      s.VariableNames            = obj.VariableNames;
+      cmdl = CompactLinearModel (s);
+    endfunction
+
+    function NewMdl = addTerms (obj, terms)
+      ## addTerms  Add terms to the model and refit.
+      ##
+      ## NewMdl = addTerms (mdl, terms)
+      ##
+      ## TERMS can be:
+      ##   - A string giving the Wilkinson notation for terms to add
+      ##     (e.g. 'x2' or 'x1:x2')
+      ##   - A terms matrix row(s) to add (binary matrix)
+      ##
+      ## Returns a NEW LinearModel; does not modify the original.
+
+      if (nargin < 2)
+        error ("LinearModel.addTerms: TERMS argument is required.");
+      endif
+
+      ## Get current formula info
+      current_terms = obj.Formula.Terms;
+      current_formula = obj.Formula;
+      p_names = obj.PredictorNames;
+
+      if (ischar (terms) || isstring (terms))
+        ## Parse the Wilkinson term string into term rows
+        terms = char (terms);
+        new_rows = __parse_term_string__ (terms, p_names);
+      elseif (isnumeric (terms))
+        new_rows = terms;
+      else
+        error ("LinearModel.addTerms: TERMS must be a string or numeric matrix.");
+      endif
+
+      ## Pad with response column(s) if Terms matrix is wider than new_rows
+      n_cols_terms = columns (current_terms);
+      if (columns (new_rows) < n_cols_terms)
+        new_rows = [new_rows, zeros(rows (new_rows), ...
+                     n_cols_terms - columns (new_rows))];
+      endif
+
+      ## Merge: add new rows to existing terms (skip duplicates)
+      merged_terms = current_terms;
+      for i = 1:rows (new_rows)
+        is_dup = false;
+        for j = 1:rows (merged_terms)
+          if (isequal (new_rows(i, :), merged_terms(j, :)))
+            is_dup = true;
+            break;
+          endif
+        endfor
+        if (! is_dup)
+          merged_terms = [merged_terms; new_rows(i, :)];
+        endif
+      endfor
+
+      ## Sort terms: intercept first, then by order (sum of row)
+      term_order = sum (merged_terms, 2);
+      [~, sort_idx] = sort (term_order);
+      merged_terms = merged_terms(sort_idx, :);
+
+      ## Rebuild design matrix
+      y_vec = __get_y__ (obj);
+      w_vec = __get_weights__ (obj);
+
+      ## Build new design matrix (Phase 3B: returns coef names for categoricals)
+      [X_new, new_coef_names] = __build_X_from_terms__ (obj, merged_terms);
+
+      ## Refit
+      fit_s = lm_fit_engine (X_new, y_vec, w_vec);
+
+      ## Update formula (convert to struct for modification; fromFit re-wraps)
+      if (isa (current_formula, "LinearFormula"))
+        new_formula = struct ("ResponseName", current_formula.ResponseName, ...
+                              "LinearPredictor", current_formula.LinearPredictor, ...
+                              "Terms", current_formula.Terms, ...
+                              "HasIntercept", current_formula.HasIntercept, ...
+                              "InModel", current_formula.InModel, ...
+                              "CoefficientNames", {current_formula.CoefficientNames}, ...
+                              "PredictorNames", {current_formula.PredictorNames}, ...
+                              "VariableNames", {current_formula.VariableNames});
+      else
+        new_formula = current_formula;
+      endif
+      new_formula.Terms = merged_terms;
+      new_formula.CoefficientNames = new_coef_names;
+      new_formula.LinearPredictor = __terms_to_formula_str__ ( ...
+                                      merged_terms, p_names, ...
+                                      new_formula.HasIntercept);
+
+      ## Build new LinearModel
+      subset = obj.ObservationInfo.Subset;
+      NewMdl = LinearModel.fromFit (fit_s, X_new, y_vec, new_formula, ...
+                 p_names, obj.ResponseName, obj.VariableNames, ...
+                 obj.VariableInfo, obj.ObservationNames, ...
+                 obj.ObservationInfo, w_vec, ...
+                 obj.ObservationInfo.Excluded, ...
+                 obj.ObservationInfo.Missing, obj.Steps, ...
+                 obj.Variables);
+
+    endfunction
+
+    function NewMdl = removeTerms (obj, terms)
+      ## removeTerms  Remove terms from the model and refit.
+      ##
+      ## NewMdl = removeTerms (mdl, terms)
+      ##
+      ## TERMS can be:
+      ##   - A string giving the Wilkinson notation for terms to remove
+      ##   - A terms matrix row(s) to remove (binary matrix)
+      ##
+      ## Returns a NEW LinearModel; does not modify the original.
+
+      if (nargin < 2)
+        error ("LinearModel.removeTerms: TERMS argument is required.");
+      endif
+
+      ## Get current formula info
+      current_terms = obj.Formula.Terms;
+      current_formula = obj.Formula;
+      p_names = obj.PredictorNames;
+
+      if (ischar (terms) || isstring (terms))
+        terms = char (terms);
+        remove_rows = __parse_term_string__ (terms, p_names);
+      elseif (isnumeric (terms))
+        remove_rows = terms;
+      else
+        error ("LinearModel.removeTerms: TERMS must be a string or numeric.");
+      endif
+
+      ## Pad with response column(s) if Terms matrix is wider than remove_rows
+      n_cols_terms = columns (current_terms);
+      if (columns (remove_rows) < n_cols_terms)
+        remove_rows = [remove_rows, zeros(rows (remove_rows), ...
+                        n_cols_terms - columns (remove_rows))];
+      endif
+
+      ## Remove matching rows from current terms
+      keep_mask = true (rows (current_terms), 1);
+      for i = 1:rows (remove_rows)
+        for j = 1:rows (current_terms)
+          if (isequal (remove_rows(i, :), current_terms(j, :)))
+            keep_mask(j) = false;
+            break;
+          endif
+        endfor
+      endfor
+
+      if (sum (keep_mask) == rows (current_terms))
+        warning ("LinearModel:removeTerms", ...
+                 "LinearModel.removeTerms: no matching terms found.");
+      endif
+
+      merged_terms = current_terms(keep_mask, :);
+
+      ## Rebuild design matrix
+      y_vec = __get_y__ (obj);
+      w_vec = __get_weights__ (obj);
+      [X_new, new_coef_names] = __build_X_from_terms__ (obj, merged_terms);
+
+      ## Refit
+      fit_s = lm_fit_engine (X_new, y_vec, w_vec);
+
+      ## Update formula (convert to struct for modification; fromFit re-wraps)
+      if (isa (current_formula, "LinearFormula"))
+        new_formula = struct ("ResponseName", current_formula.ResponseName, ...
+                              "LinearPredictor", current_formula.LinearPredictor, ...
+                              "Terms", current_formula.Terms, ...
+                              "HasIntercept", current_formula.HasIntercept, ...
+                              "InModel", current_formula.InModel, ...
+                              "CoefficientNames", {current_formula.CoefficientNames}, ...
+                              "PredictorNames", {current_formula.PredictorNames}, ...
+                              "VariableNames", {current_formula.VariableNames});
+      else
+        new_formula = current_formula;
+      endif
+      new_formula.Terms = merged_terms;
+      new_formula.CoefficientNames = new_coef_names;
+      new_formula.LinearPredictor = __terms_to_formula_str__ ( ...
+                                      merged_terms, p_names, ...
+                                      new_formula.HasIntercept);
+
+      ## Build new LinearModel
+      NewMdl = LinearModel.fromFit (fit_s, X_new, y_vec, new_formula, ...
+                 p_names, obj.ResponseName, obj.VariableNames, ...
+                 obj.VariableInfo, obj.ObservationNames, ...
+                 obj.ObservationInfo, w_vec, ...
+                 obj.ObservationInfo.Excluded, ...
+                 obj.ObservationInfo.Missing, obj.Steps, ...
+                 obj.Variables);
+
+    endfunction
+
+    function NewMdl = step (obj, varargin)
+      ## step  Take one or more stepwise steps.
+      ##
+      ## NewMdl = step (mdl)
+      ## NewMdl = step (mdl, Name, Value)
+      ##
+      ## Name-Value pairs:
+      ##   'Criterion'  - 'sse' (default), 'aic', 'bic'
+      ##   'PEnter'     - p-value threshold for adding (default 0.05)
+      ##   'PRemove'    - p-value threshold for removing (default 0.10)
+      ##   'NSteps'     - maximum number of steps (default 1)
+      ##   'Verbose'    - 0 (silent), 1 (default, print steps)
+      ##   'Upper'      - maximum model (default 'interactions')
+      ##   'Lower'      - minimum model (default 'constant')
+
+      ## Parse name-value pairs
+      criterion  = "sse";
+      p_enter    = 0.05;
+      p_remove   = 0.10;
+      n_steps    = 1;
+      verbose    = 1;
+      upper_spec = "interactions";
+      lower_spec = "constant";
+
+      i = 1;
+      while (i <= numel (varargin))
+        if (! ischar (varargin{i}))
+          error ("LinearModel.step: expected parameter name string.");
+        endif
+        switch (lower (varargin{i}))
+          case "criterion"
+            criterion = lower (varargin{i+1});
+            i = i + 2;
+          case "penter"
+            p_enter = varargin{i+1};
+            i = i + 2;
+          case "premove"
+            p_remove = varargin{i+1};
+            i = i + 2;
+          case "nsteps"
+            n_steps = varargin{i+1};
+            i = i + 2;
+          case "verbose"
+            verbose = varargin{i+1};
+            i = i + 2;
+          case "upper"
+            upper_spec = varargin{i+1};
+            i = i + 2;
+          case "lower"
+            lower_spec = varargin{i+1};
+            i = i + 2;
+          otherwise
+            error ("LinearModel.step: unknown parameter '%s'.", varargin{i});
+        endswitch
+      endwhile
+
+      p_names = obj.PredictorNames;
+      n_pred = numel (p_names);
+
+      ## Build upper/lower terms matrices
+      upper_terms = [__step_spec_to_terms__(upper_spec, n_pred), ...
+                     zeros(rows (__step_spec_to_terms__(upper_spec, n_pred)), 1)];
+      lower_terms = [__step_spec_to_terms__(lower_spec, n_pred), ...
+                     zeros(rows (__step_spec_to_terms__(lower_spec, n_pred)), 1)];
+
+      ## Build cat_flags from VariableInfo
+      cat_flags = false (1, n_pred + 1);
+      if ((isstruct (obj.VariableInfo) && isfield (obj.VariableInfo, "IsCategorical")) || ...
+          isa (obj.VariableInfo, "table"))
+        for k = 1:n_pred
+          vi_idx = strcmp (obj.VariableNames, p_names{k});
+          if (any (vi_idx))
+            cat_flags(k) = obj.VariableInfo.IsCategorical(vi_idx);
+          endif
+        endfor
+      endif
+
+      current_terms = obj.Formula.Terms;
+      use_ftest = strcmp (criterion, "sse");
+
+      ## Initialize or extend Steps.History
+      if (isempty (obj.Steps))
+        start_lp = obj.Formula.LinearPredictor;
+        log_action  = {'Start'};
+        log_term    = {start_lp};
+        log_terms   = {current_terms};
+        log_df      = obj.NumCoefficients;
+        log_del_df  = NaN;
+        log_fstat   = NaN;
+        log_pval    = NaN;
+        log_critval = __get_criterion__ (obj, criterion);
+        n_log = 1;
+      else
+        prev_h = obj.Steps.History;
+        n_log = numel (prev_h.Action);
+        log_action  = prev_h.Action;
+        log_term    = cell (n_log, 1);
+        for k = 1:n_log
+          if (iscell (prev_h.TermName{k}))
+            log_term{k} = prev_h.TermName{k}{1};
+          else
+            log_term{k} = prev_h.TermName{k};
+          endif
+        endfor
+        log_terms   = prev_h.Terms;
+        log_df      = prev_h.DF;
+        log_del_df  = prev_h.delDF;
+        if (use_ftest && isfield (prev_h, "FStat"))
+          log_fstat = prev_h.FStat;
+          log_pval  = prev_h.pValue;
+        else
+          log_fstat = NaN (n_log, 1);
+          log_pval  = NaN (n_log, 1);
+        endif
+        log_critval = NaN (n_log, 1);
+      endif
+
+      ## Check for empty candidate set
+      all_cands = __stepwiselm_candidates__ (current_terms, lower_terms, ...
+                                              upper_terms, cat_flags, p_names);
+      if (isempty (all_cands) && verbose >= 1)
+        fprintf ("No terms to add to or remove from initial model.\n");
+      endif
+
+      ## Search loop
+      n_taken = 0;
+      current_mdl = obj;
+
+      while (n_taken < n_steps)
+        made_change = false;
+
+        ## Phase A: BACKWARD SCAN (remove until nothing qualifies)
+        while (n_taken < n_steps)
+          candidates = __stepwiselm_candidates__ ( ...
+              current_terms, lower_terms, upper_terms, cat_flags, p_names);
+          rem_idx = [];
+          for ci = 1:numel (candidates)
+            if (strcmp (candidates(ci).action, "remove"))
+              rem_idx(end+1) = ci;
+            endif
+          endfor
+          if (isempty (rem_idx)); break; endif
+
+          best_ri = 0; best_pval_r = -Inf; best_fstat_r = 0; best_df_r = 0;
+          for ri = 1:numel (rem_idx)
+            c = candidates(rem_idx(ri));
+            f_stat = 0; f_pval = 1;
+            try
+              test_mdl = removeTerms (current_mdl, c.term_row(1:n_pred));
+              ss_d = test_mdl.SSE - current_mdl.SSE;
+              df_d = test_mdl.DFE - current_mdl.DFE;
+              if (df_d > 0 && current_mdl.MSE > 0)
+                f_stat = (ss_d / df_d) / current_mdl.MSE;
+                f_pval = 1 - fcdf (f_stat, df_d, current_mdl.DFE);
+              endif
+            catch; continue; end_try_catch
+            if (f_pval > p_remove && f_pval > best_pval_r)
+              best_ri = rem_idx(ri); best_pval_r = f_pval;
+              best_fstat_r = f_stat; best_df_r = df_d;
+            endif
+          endfor
+          if (best_ri == 0); break; endif
+
+          best_c = candidates(best_ri);
+          current_mdl = removeTerms (current_mdl, best_c.term_row(1:n_pred));
+          mask = true (rows (current_terms), 1);
+          for j = 1:rows (current_terms)
+            if (isequal (current_terms(j,:), best_c.term_row))
+              mask(j) = false; break;
+            endif
+          endfor
+          current_terms = current_terms(mask, :);
+          n_taken++; made_change = true;
+          n_log++;
+          log_action{n_log} = "Remove"; log_term{n_log} = best_c.term_name;
+          log_terms{n_log} = current_terms;
+          log_df(n_log) = current_mdl.NumCoefficients;
+          log_del_df(n_log) = -best_df_r;
+          log_fstat(n_log) = best_fstat_r; log_pval(n_log) = best_pval_r;
+          log_critval(n_log) = __get_criterion__ (current_mdl, criterion);
+          if (verbose >= 1)
+            fprintf ("%d. Removing %s, FStat = %g, pValue = %g\n", ...
+                     n_taken, best_c.term_name, best_fstat_r, best_pval_r);
+          endif
+        endwhile
+        if (n_taken >= n_steps); break; endif
+
+        ## Phase B: FORWARD SCAN (add one term)
+        candidates = __stepwiselm_candidates__ ( ...
+            current_terms, lower_terms, upper_terms, cat_flags, p_names);
+        add_idx = [];
+        for ci = 1:numel (candidates)
+          if (strcmp (candidates(ci).action, "add"))
+            add_idx(end+1) = ci;
+          endif
+        endfor
+        if (isempty (add_idx)); break; endif
+
+        best_ai = 0; best_pval_a = Inf; best_fstat_a = 0;
+        best_df_a = 0; best_add_mdl = [];
+        for ai = 1:numel (add_idx)
+          c = candidates(add_idx(ai));
+          f_stat = 0; f_pval = 1;
+          try
+            test_mdl = addTerms (current_mdl, c.term_row(1:n_pred));
+            ss_d = current_mdl.SSE - test_mdl.SSE;
+            df_d = current_mdl.DFE - test_mdl.DFE;
+            if (df_d > 0 && test_mdl.MSE > 0)
+              f_stat = (ss_d / df_d) / test_mdl.MSE;
+              f_pval = 1 - fcdf (f_stat, df_d, test_mdl.DFE);
+            endif
+          catch; continue; end_try_catch
+          if (f_pval < p_enter && f_pval < best_pval_a)
+            best_ai = add_idx(ai); best_pval_a = f_pval;
+            best_fstat_a = f_stat; best_df_a = df_d;
+            best_add_mdl = test_mdl;
+          endif
+        endfor
+        if (best_ai == 0)
+          if (! made_change); break; endif
+          break;
+        endif
+
+        best_c = candidates(best_ai);
+        current_mdl = best_add_mdl;
+        current_terms = [current_terms; best_c.term_row];
+        term_order = sum (current_terms(:, 1:n_pred), 2);
+        [~, si] = sort (term_order);
+        current_terms = current_terms(si, :);
+        n_taken++; made_change = true;
+        n_log++;
+        log_action{n_log} = "Add"; log_term{n_log} = best_c.term_name;
+        log_terms{n_log} = current_terms;
+        log_df(n_log) = current_mdl.NumCoefficients;
+        log_del_df(n_log) = best_df_a;
+        log_fstat(n_log) = best_fstat_a; log_pval(n_log) = best_pval_a;
+        log_critval(n_log) = __get_criterion__ (current_mdl, criterion);
+        if (verbose >= 1)
+          fprintf ("%d. Adding %s, FStat = %g, pValue = %g\n", ...
+                   n_taken, best_c.term_name, best_fstat_a, best_pval_a);
+        endif
+      endwhile
+
+      ## Build Steps struct
+      resp_n = obj.ResponseName;
+      if (isempty (obj.Steps))
+        start_formula = [resp_n, " ~ ", log_term{1}];
+      else
+        start_formula = obj.Steps.Start;
+      endif
+      Steps.Start = start_formula; Steps.Lower = lower_spec;
+      Steps.Upper = upper_spec; Steps.Criterion = upper (criterion);
+      Steps.PEnter = p_enter; Steps.PRemove = p_remove;
+      History.Action = log_action(:);
+      History.TermName = cell (n_log, 1);
+      for k = 1:n_log
+        History.TermName{k} = {log_term{k}};
+      endfor
+      History.Terms = log_terms(:);
+      History.DF = log_df(:);  History.delDF = log_del_df(:);
+      if (use_ftest)
+        History.FStat = log_fstat(:); History.pValue = log_pval(:);
+      else
+        History.(upper (criterion)) = log_critval(:);
+      endif
+      Steps.History = History;
+      current_mdl.Steps = Steps;
+      NewMdl = current_mdl;
+
+    endfunction
+
+    function tbl = anova (obj, varargin)
+      ## anova  ANOVA table for LinearModel (Partial SS / Type III).
+      ##
+      ## tbl = anova (mdl)
+      ## tbl = anova (mdl, 'component')
+      ##
+      ## Returns a struct with fields:
+      ##   SumSq  - sum of squares for each term + Error
+      ##   DF     - degrees of freedom
+      ##   MeanSq - mean squares
+      ##   F      - F-statistic (NaN for Error row)
+      ##   pValue - p-value (NaN for Error row)
+      ##   RowNames - cell of term names + 'Error'
+
+      ## Parse optional anova type argument (only 'component' for now)
+
+      current_terms = obj.Formula.Terms;
+      p_names = obj.PredictorNames;
+
+      ## Identify non-intercept terms
+      is_intercept = all (current_terms == 0, 2);
+      term_idx = find (~is_intercept);
+      n_terms = numel (term_idx);
+
+      ## Compute partial SS for each term (Type III):
+      ## SS(term_k) = SSE(model without term_k) - SSE(full model)
+      SumSq  = zeros (n_terms + 1, 1);
+      DF     = zeros (n_terms + 1, 1);
+      MeanSq = zeros (n_terms + 1, 1);
+      F_val  = NaN (n_terms + 1, 1);
+      p_val  = NaN (n_terms + 1, 1);
+      term_names = cell (n_terms + 1, 1);
+
+      y_vec = __get_y__ (obj);
+      w_vec = __get_weights__ (obj);
+
+      for k = 1:n_terms
+        ## Remove term k and refit
+        reduced_terms = current_terms;
+        reduced_terms(term_idx(k), :) = [];
+
+        [X_reduced, ~] = __build_X_from_terms__ (obj, reduced_terms);
+        fit_reduced = lm_fit_engine (X_reduced, y_vec, w_vec);
+
+        ## Partial SS
+        ss_k = fit_reduced.sse - obj.SSE;
+        df_k = fit_reduced.dfe - obj.DFE;
+
+        SumSq(k) = ss_k;
+        DF(k) = df_k;
+        MeanSq(k) = ss_k / df_k;
+        F_val(k) = MeanSq(k) / obj.MSE;
+        p_val(k) = 1 - fcdf (F_val(k), df_k, obj.DFE);
+
+        ## Term name
+        idx = find (current_terms(term_idx(k), :));
+        parts = {};
+        for j = 1:numel (idx)
+          parts{j} = p_names{idx(j)};
+        endfor
+        ## Handle power terms
+        row = current_terms(term_idx(k), :);
+        power_parts = {};
+        for j = 1:numel (idx)
+          if (row(idx(j)) == 1)
+            power_parts{end+1} = p_names{idx(j)};
+          else
+            power_parts{end+1} = sprintf ("%s^%d", p_names{idx(j)}, ...
+                                          row(idx(j)));
+          endif
+        endfor
+        term_names{k} = strjoin (power_parts, ":");
+      endfor
+
+      ## Error row
+      SumSq(end)  = obj.SSE;
+      DF(end)     = obj.DFE;
+      MeanSq(end) = obj.MSE;
+      F_val(end)  = NaN;
+      p_val(end)  = NaN;
+      term_names{end} = "Error";
+
+      tbl = struct ("SumSq", SumSq, "DF", DF, "MeanSq", MeanSq, ...
+                    "F", F_val, "pValue", p_val, "RowNames", {term_names});
+
+    endfunction
+
+  endmethods
+
+endclassdef
+
+## ============================================================
+## Private helper functions
+## ============================================================
+
+function rows_out = __parse_term_string__ (term_str, pred_names)
+  ## Parse a Wilkinson-style term string into binary term matrix rows.
+  ## e.g. 'x2' -> [0 1 0], 'x1:x2' -> [1 1 0]
+
+  n_pred = numel (pred_names);
+
+  ## Split by '+' to get individual terms
+  terms = strtrim (strsplit (term_str, "+"));
+  rows_out = zeros (numel (terms), n_pred);
+
+  for i = 1:numel (terms)
+    ## Split by ':' for interaction terms
+    parts = strtrim (strsplit (terms{i}, ":"));
+    for j = 1:numel (parts)
+      ## Handle power notation (e.g., 'x1^2')
+      p = parts{j};
+      caret = strfind (p, "^");
+      if (! isempty (caret))
+        base = strtrim (p(1:caret-1));
+        pwr = str2double (p(caret+1:end));
+      else
+        base = p;
+        pwr = 1;
+      endif
+      ## Find the predictor index
+      idx = find (strcmp (pred_names, base));
+      if (isempty (idx))
+        error ("LinearModel: unrecognized predictor name '%s'.", base);
+      endif
+      rows_out(i, idx(1)) = pwr;
+    endfor
+  endfor
+endfunction
+
+function [X, coef_names] = __rebuild_X__ (obj)
+  ## Rebuild the design matrix from stored Formula + predictor data.
+  [X, coef_names] = __build_X_from_terms__ (obj, obj.Formula.Terms);
+endfunction
+
+function [X, coef_names] = __build_X_from_terms__ (obj, terms)
+  ## Build design matrix from terms matrix and stored predictor data.
+  ##
+  ## Phase 3B: handles categorical variables via reference (corner-point)
+  ## dummy coding.  For categorical predictors with K levels, K-1 dummy
+  ## columns are generated (dropping the first level when intercept is
+  ## present).  Interactions between categorical and numeric variables
+  ## produce a Cartesian product of dummy columns x numeric columns.
+  ##
+  ## Returns:
+  ##   X          - n_obs x p design matrix
+  ##   coef_names - 1 x p cell of coefficient name strings
+
+  ## Get predictor data for subset observations
+  subset = obj.ObservationInfo.Subset;
+  p_names = obj.PredictorNames;
+  n_pred = numel (p_names);
+
+  ## Detect intercept
+  has_intercept = any (all (terms == 0, 2));
+
+  ## Extract predictor columns and detect categorical status
+  n_obs = sum (subset);
+  pred_raw = cell (1, n_pred);
+  is_categorical = false (1, n_pred);
+  cat_levels = cell (1, n_pred);
+  cat_indices = cell (1, n_pred);
+
+  for k = 1:n_pred
+    pname = p_names{k};
+    if (! isempty (obj.Variables) && isa (obj.Variables, "table"))
+      col_full = obj.Variables.(pname);
+    elseif (! isempty (obj.Variables) && isstruct (obj.Variables))
+      if (isfield (obj.Variables, pname))
+        col_full = obj.Variables.(pname);
+      else
+        error ("LinearModel: predictor '%s' not found in Variables.", pname);
+      endif
+    else
+      error (strcat ("LinearModel: cannot rebuild design matrix", ...
+                     " without stored Variables data."));
+    endif
+
+    ## Extract subset rows
+    if (iscell (col_full) || iscellstr (col_full) || isstring (col_full))
+      col = col_full(subset);
+    elseif (isa (col_full, "categorical"))
+      col = col_full(subset);
+    else
+      col = col_full(subset, :);
+    endif
+
+    pred_raw{k} = col;
+
+    ## Detect categorical: use VariableInfo.IsCategorical as primary source
+    ## (Fix 4, Phase 3B), with type-sniffing as a secondary fallback.
+    vi_is_cat = false;
+    if (! isempty (obj.VariableInfo) && ...
+        ((isstruct (obj.VariableInfo) && isfield (obj.VariableInfo, "IsCategorical")) || ...
+         isa (obj.VariableInfo, "table")))
+      vi_idx = strcmp (obj.VariableNames, pname);
+      if (any (vi_idx))
+        vi_is_cat = obj.VariableInfo.IsCategorical(vi_idx);
+      endif
+    endif
+
+    ## Type-sniff fallback (cellstr, string, categorical)
+    type_is_cat = iscellstr (col) || isstring (col) || isa (col, "categorical");
+
+    if (vi_is_cat || type_is_cat)
+      is_categorical(k) = true;
+      if (isa (col, "categorical"))
+        levs = categories (col);
+        cat_levels{k} = cellstr (levs);
+        [~, idx] = ismember (col, levs);
+        cat_indices{k} = idx;
+      elseif (iscellstr (col))
+        [u, ~, idx] = unique (col);
+        cat_levels{k} = u;
+        cat_indices{k} = idx;
+      elseif (isstring (col))
+        col_c = cellstr (col);
+        [u, ~, idx] = unique (col_c);
+        cat_levels{k} = u;
+        cat_indices{k} = idx;
+      else
+        ## Numeric column flagged categorical via VariableInfo.IsCategorical
+        ## Treat unique numeric values as levels (sorted)
+        [u, ~, idx] = unique (col);
+        cat_levels{k} = cellfun (@num2str, num2cell (u), "UniformOutput", false);
+        cat_indices{k} = idx;
+      endif
+    else
+      is_categorical(k) = false;
+    endif
+  endfor
+
+  ## Build X column by column from terms
+  X = [];
+  coef_names = {};
+
+  for i = 1:rows (terms)
+    row = terms(i, :);
+
+    if (all (row == 0))
+      ## Intercept
+      X = [X, ones(n_obs, 1)];
+      coef_names{end+1} = "(Intercept)";
+      continue;
+    endif
+
+    ## Find active predictors for this term
+    active_idx = find (row);
+
+    ## Build the term's columns via Cartesian product approach.
+    ## Start with a single column of ones, then for each active predictor:
+    ##   - If numeric: multiply current block by (data.^power)
+    ##   - If categorical: Cartesian product with dummy columns
+    current_block = ones (n_obs, 1);
+    current_names = {''};
+
+    for ai = 1:numel (active_idx)
+      v = active_idx(ai);
+      pname = p_names{v};
+      pwr = row(v);
+
+      if (is_categorical(v))
+        ## Build dummy columns (reference coding)
+        levs = cat_levels{v};
+        idx = cat_indices{v};
+        n_lev = numel (levs);
+
+        if (has_intercept)
+          start_lev = 2;
+        else
+          start_lev = 1;
+        endif
+
+        n_dum = n_lev - start_lev + 1;
+        dummies = zeros (n_obs, n_dum);
+        dum_names = cell (1, n_dum);
+
+        for L = start_lev:n_lev
+          col_idx = L - start_lev + 1;
+          dummies(:, col_idx) = double (idx == L);
+          dum_names{col_idx} = sprintf ("%s_%s", pname, levs{L});
+        endfor
+
+        ## Cartesian product of current block and dummies
+        next_block = [];
+        next_names = {};
+        for c1 = 1:columns (current_block)
+          for c2 = 1:columns (dummies)
+            next_block = [next_block, current_block(:, c1) .* dummies(:, c2)];
+            n1 = current_names{c1};
+            n2 = dum_names{c2};
+            if (isempty (n1))
+              next_names{end+1} = n2;
+            else
+              next_names{end+1} = [n1, ":", n2];
+            endif
+          endfor
+        endfor
+        current_block = next_block;
+        current_names = next_names;
+
+      else
+        ## Numeric predictor: multiply current block by data.^power
+        col_data = pred_raw{v};
+        if (pwr > 1)
+          col_data = col_data .^ pwr;
+          disp_name = sprintf ("%s^%d", pname, pwr);
+        else
+          disp_name = pname;
+        endif
+
+        current_block = bsxfun (@times, current_block, col_data);
+        for cn = 1:numel (current_names)
+          if (isempty (current_names{cn}))
+            current_names{cn} = disp_name;
+          else
+            current_names{cn} = [current_names{cn}, ":", disp_name];
+          endif
+        endfor
+      endif
+    endfor
+
+    X = [X, current_block];
+    coef_names = [coef_names, current_names];
+  endfor
+endfunction
+
+function y = __get_y__ (obj)
+  ## Get response vector for subset observations.
+  subset = obj.ObservationInfo.Subset;
+  resp = obj.ResponseName;
+
+  if (! isempty (obj.Variables) && isa (obj.Variables, "table"))
+    y_full = obj.Variables.(resp);
+    y = y_full(subset);
+  elseif (! isempty (obj.Variables) && isstruct (obj.Variables))
+    if (isfield (obj.Variables, resp))
+      y_full = obj.Variables.(resp);
+      y = y_full(subset);
+    else
+      error ("LinearModel: response '%s' not found in Variables.", resp);
+    endif
+  else
+    ## Reconstruct from Fitted + Residuals
+    y = obj.Fitted(subset) + obj.Residuals.Raw(subset);
+  endif
+endfunction
+
+function w = __get_weights__ (obj)
+  ## Get weights vector for subset observations, or [] if unweighted.
+  subset = obj.ObservationInfo.Subset;
+  w_all = obj.ObservationInfo.Weights;
+  if (all (w_all == 1))
+    w = [];
+  else
+    w = w_all(subset);
+  endif
+endfunction
+
+function score = __get_criterion__ (mdl, criterion)
+  ## Get model selection criterion value.
+  switch (criterion)
+    case "sse"
+      score = mdl.SSE;
+    case "aic"
+      score = mdl.ModelCriterion.AIC;
+    case "bic"
+      score = mdl.ModelCriterion.BIC;
+    case "rsquared"
+      score = -mdl.Rsquared.Ordinary;   ## negate so lower = better
+    case "adjrsquared"
+      score = -mdl.Rsquared.Adjusted;
+    otherwise
+      score = mdl.SSE;
+  endswitch
+endfunction
+
+function names = __terms_to_coef_names__ (terms, pred_names, has_intercept)
+  ## Convert terms matrix to coefficient names.
+  names = {};
+  for i = 1:rows (terms)
+    row = terms(i, :);
+    if (all (row == 0))
+      names{end+1} = "(Intercept)";
+    else
+      parts = {};
+      idx = find (row);
+      for j = 1:numel (idx)
+        if (row(idx(j)) == 1)
+          parts{end+1} = pred_names{idx(j)};
+        else
+          parts{end+1} = sprintf ("%s^%d", pred_names{idx(j)}, row(idx(j)));
+        endif
+      endfor
+      names{end+1} = strjoin (parts, ":");
+    endif
+  endfor
+endfunction
+
+function fstr = __terms_to_formula_str__ (terms, pred_names, has_intercept)
+  ## Convert terms matrix to Wilkinson formula string (RHS only).
+  parts = {};
+  for i = 1:rows (terms)
+    row = terms(i, :);
+    if (all (row == 0))
+      parts{end+1} = "1";
+    else
+      tparts = {};
+      idx = find (row);
+      for j = 1:numel (idx)
+        if (row(idx(j)) == 1)
+          tparts{end+1} = pred_names{idx(j)};
+        else
+          tparts{end+1} = sprintf ("%s^%d", pred_names{idx(j)}, row(idx(j)));
+        endif
+      endfor
+      parts{end+1} = strjoin (tparts, ":");
+    endif
+  endfor
+  fstr = strjoin (parts, " + ");
+endfunction
+
+## ============================================================
+## Tests
+## ============================================================
+
+## Test: LinearModel class exists and is parseable
+%!test
+%! assert (exist ("LinearModel", "class") > 0 || ...
+%!         exist ("LinearModel") > 0);
+
diff --git a/inst/dummyvar.m b/inst/dummyvar.m
index 4a86e103..fc720430 100644
--- a/inst/dummyvar.m
+++ b/inst/dummyvar.m
@@ -1,4 +1,4 @@
-## Copyright (C) 2025 Jayant Chauhan <0001jayant@gmail.com>
+## Copyright (C) 2025-26 Jayant Chauhan <0001jayant@gmail.com>
 ## Copyright (C) 2026 Andreas Bertsatos <abertsatos@biol.uoa.gr>
 ##
 ## This file is part of the statistics package for GNU Octave.
@@ -59,31 +59,28 @@
     print_usage;
   endif
 
+  if (!isnumeric (g) && !isa (g, 'categorical'))
+    error ("Number of elements must not change. Use [] as one of the size inputs to automatically calculate the appropriate size for that dimension.");
+  endif
+
   [nr, nc] = size (g);
 
   ## --- CATEGORICAL branch ---
   if (isa (g, 'categorical'))
 
-    if (! isvector (g) || nc != 1)
-      error (strcat ("dummyvar: categorical grouping", ...
-                     " variable must be a column vector."));
+    if (nc != 1 || ndims(g) > 2)
+      error ("Categorical grouping variable must have one column.");
     endif
 
-    g_cats = cellstr (categories (g));
-    g_cstr = cellstr (g(:));
-    K = numel (g_cats);
+    [idx, gn] = grp2idx (g);
+    K = numel (gn);
     D = zeros (nr, K);
 
     for i = 1:nr
-      if (isundefined (g(i)))
+      if (isnan (idx(i)))
         D(i,:) = NaN;
       else
-        for k = 1:K
-          if (strcmp (g_cstr{i}, g_cats{k}))
-            D(i,k) = 1;
-            break;
-          endif
-        endfor
+        D(i, idx(i)) = 1;
       endif
     endfor
 
@@ -91,12 +88,10 @@
   elseif (isnumeric (g))
 
     if (ndims (g) != 2)
-      error (strcat ("dummyvar: numeric grouping variable", ...
-                     " must be either a vector or a matrix."));
+      error ("dummyvar: numeric grouping variable must be either a vector or a matrix.");
     endif
-    if (any (g(:) <= 0) || any (g(:) != fix (g(:))))
-      error (strcat ("dummyvar: numeric grouping variable", ...
-                     " must explicitly contain positive integers."));
+    if (!isempty(g) && (any (g(:) <= 0) || any (g(:) != fix (g(:)))))
+      error ("dummyvar: numeric grouping variable must explicitly contain positive integers.");
     endif
 
     ## Force vector to column vector
@@ -107,6 +102,11 @@
     endif
 
     K = max (g, [], 1);
+    ## Handle empty input properly
+    if (isempty(K))
+        D = [];
+        return;
+    endif
     D = zeros (nr, sum (K));
 
     ij = 0;
@@ -118,45 +118,11 @@
       endfor
     endfor
 
-  ## --- CELLSTRING branch ---
-  elseif (iscellstr (g) && isvector (g))
-
-    if (! isvector (g) || nc != 1)
-      error (strcat ("dummyvar: cellstring grouping", ...
-                     " variable must be a column vector."));
-    endif
-
-    g = grp2idx (g);
-    K = max (g);
-    D = zeros (length (g), K);
-    for i = 1:K
-      D(g == i, i) = 1;
-    endfor
-
-  ## --- CELL ARRAY branch ---
-  elseif (iscell (g) && isvector (g))
-
-    if (any (diff (cellfun (@(x) size (x, 1), g))))
-      error (strcat ("dummyvar: all grouping variables in cell array", ...
-                     " must have the same number of observations."));
-    endif
-
-    D = [];
-    for i = 1:numel (g)
-      g_var = g{i};
-      D_var = dummyvar (g_var);
-      D = [D, D_var];
-    endfor
-
-  else
-    error ("dummyvar: unsupported type of grouping variable.");
   endif
 
 endfunction
 
 ## Test output
-%!assert (dummyvar ([]), [])
-%!assert (dummyvar (ones (2, 0)), ones (2, 0))
 %!test
 %! ## numeric grouping vector
 %! g = [1; 2; 1; 3; 2];
@@ -188,42 +154,32 @@
 %!       0, 1, 0, 1, 0; 0, 1, 0, 0, 1; 0, 1, 1, 0, 0; 0, 1, 0, 1, 0];
 %! assert (D, D1);
 %!test
-%! phone = {'mob'; 'land'; 'mob';'mob';'mob';'land';'land'};
-%! codes = categorical ([202; 202; 103; 103; 202; 103; 202]);
-%! D = dummyvar ({phone, codes});
-%! D1 = [1, 0, 0, 1; 0, 1, 0, 1; 1, 0, 1, 0; 1, 0, 1, 0; ...
-%!       1, 0, 0, 1; 0, 1, 1, 0; 0, 1, 0, 1];
-%! assert (D, D1);
-%!test
 %! colors = {'red'; 'blue'; 'red'; 'green'; 'yellow'; 'blue'};
 %! D = dummyvar (categorical (colors));
 %! D1 = [0, 0, 1, 0; 1, 0, 0, 0; 0, 0, 1, 0; 0, 1, 0, 0; 0, 0, 0, 1; 1, 0, 0, 0];
 %! assert (D, D1);
 %!test
-%! colors = {'red'; 'blue'; 'red'; 'green'; 'yellow'; 'blue'};
-%! D = dummyvar (colors);
-%! D1 = [1, 0, 0, 0; 0, 1, 0, 0; 1, 0, 0, 0; 0, 0, 1, 0; 0, 0, 0, 1; 0, 1, 0, 0];
-%! assert (D, D1);
-%! D = dummyvar ({colors});
-%! D1 = [1, 0, 0, 0; 0, 1, 0, 0; 1, 0, 0, 0; 0, 0, 1, 0; 0, 0, 0, 1; 0, 1, 0, 0];
-%! assert (D, D1);
-%!test
 %! g = [1, 2, 1, 2, 1, 3, 2, 1];
 %! D = dummyvar (g);
 %! D1 = [1, 0, 0; 0, 1, 0; 1, 0, 0; 0, 1, 0; 1, 0, 0; 0, 0, 1; 0, 1, 0; 1, 0, 0];
 %! assert (D, D1);
-
-## Test input validation
+%!test
+%! g = categorical ({'a'; 'b'; 'a'}, {'b', 'a', 'c'}, {'b', 'a', 'c'});
+%! D = dummyvar (g);
+%! assert (D, [0, 1, 0; 1, 0, 0; 0, 1, 0]);
+%!test
+%! g = categorical ({''; ''; ''}, {'x', 'y'});
+%! D = dummyvar (g);
+%! assert (D, [NaN, NaN; NaN, NaN; NaN, NaN]);
+%!test
+%! g = categorical ({'a'; 'a'; 'b'}, {'a', 'b', 'c'});
+%! D = dummyvar (g);
+%! assert (D, [1, 0, 0; 1, 0, 0; 0, 1, 0]);
+%! assert (size (D, 2), 3);
 %!error dummyvar ()
-%!error<dummyvar: function called with too many inputs> dummyvar (1, 2)
-%!error<dummyvar: categorical grouping variable must be a column vector.> ...
-%! dummyvar (categorical ({'a', 'b'}))
-%!error<dummyvar: numeric grouping variable must be either a vector or a matrix.> ...
-%! dummyvar (ones (3, 3, 3))
-%!error<dummyvar: numeric grouping variable must explicitly contain positive integers.> ...
-%! dummyvar ([2, 4, 0, 8, 1])
-%!error<dummyvar: cellstring grouping variable must be a column vector.> ...
-%! dummyvar ({'a', 'b'})
-%!error<dummyvar: all grouping variables in cell array must have the same number of observations.> ...
-%! dummyvar ({[2;3;4;5], [1;2;3]})
-%!error<dummyvar: unsupported type of grouping variable.> dummyvar ([true; false])
+%!error<too many inputs> dummyvar (1, 2)
+%!error<Categorical grouping variable must have one column> dummyvar (categorical ({'a', 'b'}))
+%!error<Number of elements must not change> dummyvar ({'a', 'b'})
+%!error<dummyvar: numeric grouping variable must be either a vector or a matrix> dummyvar (ones (3, 3, 3))
+%!error<dummyvar: numeric grouping variable must explicitly contain positive integers> dummyvar ([2, 4, 0, 8, 1])
+%!error<Number of elements must not change> dummyvar ([true; false])
diff --git a/inst/fitlm.m b/inst/fitlm.m
index bb397e4a..ff51f2f2 100644
--- a/inst/fitlm.m
+++ b/inst/fitlm.m
@@ -1,4 +1,4 @@
-## Copyright (C) 2022 Andrew Penn <A.C.Penn@sussex.ac.uk>
+## Copyright (C) 2026 Jayant Chauhan <0001jayant@gmail.com>
 ##
 ## This file is part of the statistics package for GNU Octave.
 ##
@@ -16,397 +16,322 @@
 ## this program; if not, see <http://www.gnu.org/licenses/>.
 
 ## -*- texinfo -*-
-## @deftypefn  {statistics} {@var{tab} =} fitlm (@var{X}, @var{y})
-## @deftypefnx {statistics} {@var{tab} =} fitlm (@var{X}, @var{y}, @var{name}, @var{value})
-## @deftypefnx {statistics} {@var{tab} =} fitlm (@var{X}, @var{y}, @var{modelspec})
-## @deftypefnx {statistics} {@var{tab} =} fitlm (@var{X}, @var{y}, @var{modelspec}, @var{name}, @var{value})
-## @deftypefnx {statistics} {[@var{tab}] =} fitlm (@dots{})
-## @deftypefnx {statistics} {[@var{tab}, @var{stats}] =} fitlm (@dots{})
-## @deftypefnx {statistics} {[@var{tab}, @var{stats}] =} fitlm (@dots{})
+## @deftypefn  {statistics} {@var{mdl} =} fitlm (@var{X}, @var{y})
+## @deftypefnx {statistics} {@var{mdl} =} fitlm (@var{X}, @var{y}, @var{modelspec})
+## @deftypefnx {statistics} {@var{mdl} =} fitlm (@var{X}, @var{y}, @var{modelspec}, @var{Name}, @var{Value})
+## @deftypefnx {statistics} {@var{mdl} =} fitlm (@var{tbl})
+## @deftypefnx {statistics} {@var{mdl} =} fitlm (@var{tbl}, @var{modelspec})
+## @deftypefnx {statistics} {@var{mdl} =} fitlm (@var{tbl}, @var{modelspec}, @var{Name}, @var{Value})
 ##
-## Regress the continuous outcome (i.e. dependent variable) @var{y} on
-## continuous or categorical predictors (i.e. independent variables) @var{X}
-## by minimizing the sum-of-squared residuals. Unless requested otherwise,
-## @qcode{fitlm} prints the model formula, the regression coefficients (i.e.
-## parameters/contrasts) and an ANOVA table. Note that unlike @qcode{anovan},
-## @qcode{fitlm} treats all factors as continuous by default. A bootstrap
-## resampling variant of this function, @code{bootlm}, is available in the
-## statistics-resampling package and has similar usage.
+## Fit a linear regression model.
 ##
-## @var{X} must be a column major matrix or cell array consisting of the
-## predictors. A constant term (intercept) should not be included in X - it
-## is automatically added to the model. @var{y} must be a column vector
-## corresponding to the outcome variable. @var{modelspec} can specified as
-## one of the following:
+## @code{fitlm} fits a linear regression model and returns a
+## @code{LinearModel} object.
 ##
-## @itemize
-## @item
-## "constant" : model contains only a constant (intercept) term.
-##
-## @item
-## "linear" (default) : model contains an intercept and linear term for each
-## predictor.
-##
-## @item
-## "interactions" : model contains an intercept, linear term for each predictor
-## and all products of pairs of distinct predictors.
-##
-## @item
-## "full" : model contains an intercept, linear term for each predictor and
-## all combinations of the predictors.
+## @strong{Inputs}
 ##
-## @item
-## a matrix of term definitions : an t-by-(N+1) matrix specifying terms in
-## a model, where t is the number of terms, N is the number of predictor
-## variables, and +1 accounts for the outcome variable. The outcome variable
-## is the last column in the terms matrix and must be a column of zeros.
-## An intercept must be specified in the first row of the terms matrix and
-## must be a row of zeros.
-## @end itemize
-##
-## @qcode{fitlm} can take a number of optional parameters as name-value pairs.
-##
-## @code{[@dots{}] = fitlm (..., "CategoricalVars", @var{categorical})}
+## @var{X} is an @math{n x p} numeric matrix of predictor values.
+## @var{y} is an @math{n x 1} numeric response vector.
+## @var{tbl} is a @code{table} object containing all variables.
 ##
+## @var{modelspec} can be:
 ## @itemize
-## @item
-## @var{categorical} is a vector of indices indicating which of the columns
-## (i.e. variables) in @var{X} should be treated as categorical predictors
-## rather than as continuous predictors.
+## @item @qcode{'linear'} (default) — intercept + all linear terms
+## @item @qcode{'constant'} — intercept only
+## @item @qcode{'interactions'} — linear + all pairwise interactions
+## @item @qcode{'purequadratic'} — linear + all squared terms
+## @item @qcode{'quadratic'} — linear + interactions + squared terms
+## @item A Wilkinson formula string, e.g. @qcode{'y ~ x1 + x2 + x1:x2'}
 ## @end itemize
 ##
-## @qcode{fitlm} also accepts optional @qcode{anovan} parameters as name-value
-## pairs (except for the "model" parameter). The accepted parameter names from
-## @qcode{anovan} and their default values in @qcode{fitlm} are:
-##
-## @itemize
-## @item
-## @var{CONTRASTS} : "treatment"
-##
-## @item
-## @var{SSTYPE}: 2
-##
-## @item
-## @var{ALPHA}: 0.05
-##
-## @item
-## @var{DISPLAY}: "on"
+## @strong{Name-Value Arguments}
 ##
-## @item
-## @var{WEIGHTS}: [] (empty)
-##
-## @item
-## @var{RANDOM}: [] (empty)
-##
-## @item
-## @var{CONTINUOUS}: [1:N]
-##
-## @item
-## @var{VARNAMES}: [] (empty)
-## @end itemize
+## @table @asis
+## @item @qcode{'Weights'} — n×1 double, per-observation weights
+## @item @qcode{'Exclude'} — integer indices or logical vector of rows to exclude
+## @item @qcode{'Intercept'} — logical (default: @code{true})
+## @item @qcode{'CategoricalVars'} — integer indices or cell of names
+## @item @qcode{'VarNames'} — cell of char, override variable names (matrix input)
+## @item @qcode{'ResponseVar'} — char, override response variable (table input)
+## @item @qcode{'PredictorVars'} — cell or indices, specify predictor subset (table input)
+## @end table
 ##
-## Type '@qcode{help anovan}' to find out more about what these options do.
+## @strong{Output}
 ##
-## @qcode{fitlm} can return up to two output arguments:
+## @var{mdl} is a @code{LinearModel} object with all properties populated.
 ##
-## [@var{tab}] = fitlm (@dots{}) returns a cell array containing a
-## table of model parameters
-##
-## [@var{tab}, @var{stats}] = fitlm (@dots{}) returns a structure
-## containing additional statistics, including degrees of freedom and effect
-## sizes for each term in the linear model, the design matrix, the
-## variance-covariance matrix, (weighted) model residuals, and the mean squared
-## error. The columns of @var{stats}.coeffs (from left-to-right) report the
-## model coefficients, standard errors, lower and upper 100*(1-alpha)%
-## confidence interval bounds, t-statistics, and p-values relating to the
-## contrasts. The number appended to each term name in @var{stats}.coeffnames
-## corresponds to the column number in the relevant contrast matrix for that
-## factor. The @var{stats} structure can be used as input for @qcode{multcompare}.
-## Note that if the model contains a continuous variable and you wish to use
-## the @var{STATS} output as input to @qcode{multcompare}, then the model needs
-## to be refit with the "contrast" parameter set to a sum-to-zero contrast
-## coding scheme, e.g."simple".
-##
-## @seealso{anovan, multcompare}
+## @seealso{LinearModel, CompactLinearModel, stepwiselm}
 ## @end deftypefn
 
-function [T, STATS] = fitlm (X, y, varargin)
+function mdl = fitlm (varargin)
 
-    ## Check input and output arguments
-    if (nargin < 2)
-      error (strcat ("fitlm usage: ""fitlm (X, y, varargin)""; ", ...
-                     " atleast 2 input arguments required"));
-    endif
-    if (nargout > 3)
-      error ("fitlm: invalid number of output arguments requested");
-    endif
+  if (nargin < 1)
+    print_usage ();
+  endif
 
-    ## Evaluate input data
-    [n, N] = size (X);
-    msg = strcat ("fitlm: do not include the intercept column", ...
-                  " in X - it will be added automatically");
-    if (iscell (X))
-      if (~ iscell (X{:,1}))
-        if (all (X{:,1} == 1))
-          error (msg)
-        endif
-      endif
-    else
-      if (all (X(:,1) == 1))
-        error (msg)
-      endif
-    endif
+  ## ── [Step 1] Parse inputs ────────────────────────────────────────────────
+  [raw_X, raw_y, var_names, cat_flags, weights, excl_mask, ...
+   modelspec, obs_names, resp_name, pred_names, has_intercept, raw_cols, ...
+   dummy_coding] = ...
+    __fitlm_parse_inputs__ (varargin{:});
 
-    ## Fetch anovan options
-    options = varargin;
-    if (isempty(options))
-      options{1} = "linear";
-    endif
+  n = rows (raw_y);
+  p = numel (pred_names);
 
-    ## Check if MODELSPEC was provided. If not create it.
-    if (ischar (options{1}))
-      if (! ismember (lower (options{1}), {"sstype", "varnames", "contrasts", ...
-                      "weights", "alpha", "display", "continuous", ...
-                      "categorical", "categoricalvars", "random", "model"}))
-        MODELSPEC = options{1};
-        options(1) = [];
-        CONTINUOUS = [];
-      else
-        ## If MODELSPEC is not provided, set it for an additive linear model
-        MODELSPEC = zeros (N + 1);
-        MODELSPEC(2:N+1, 1:N) = eye (N);
-      end
-    else
-      MODELSPEC = options{1};
-      options(1) = [];
-    endif
+  ## ── [Step 2] Build design matrix ─────────────────────────────────────────
+  [terms_mat, coef_names, X_full, y_full, incl_mask, p_tot, has_intercept, lp_str] = ...
+    __fitlm_build_design__ (raw_X, raw_y, modelspec, var_names, cat_flags, ...
+                            weights, excl_mask, pred_names, raw_cols, ...
+                            has_intercept, dummy_coding);
 
-    ## Evaluate MODELSPEC
-    if (ischar (MODELSPEC))
-      MODELSPEC = lower (MODELSPEC);
-      if (! isempty (regexp (MODELSPEC, "~")))
-        error ("fitlm: model formulae are not a supported format for MODELSPEC")
-      endif
-      if (! ismember (MODELSPEC, {"constant", "linear", "interaction", ...
-                                  "interactions", "full"}))
-        error ("fitlm: character vector for model specification not recognised")
-      endif
-      if strcmp (MODELSPEC, "constant")
-        X = [];
-        MODELSPEC = "linear";
-        N = 0;
-      endif
-    else
-      if (size (MODELSPEC, 1) < N + 1)
-        error ("fitlm: number of rows in MODELSPEC must  1 + number of columns in X");
-      endif
-      if (size (MODELSPEC, 2) != N + 1)
-        error ("fitlm: number of columns in MODELSPEC must = 1 + number of columns in X");
-      endif
-      if (! all (ismember (MODELSPEC(:), [0,1])))
-        error (strcat ("fitlm: elements of the model terms matrix must be ", ...
-                       " either 0 or 1. Higher order terms are not supported"));
-      endif
-      MODELSPEC = logical (MODELSPEC(2:N+1,1:N));
-    endif
+  ## ── [Step 3] Call math engine on included rows only ──────────────────────
+  X_fit = X_full(incl_mask, :);
+  y_fit = y_full(incl_mask);
+  w_fit = weights(incl_mask);
 
-    ## Check for unsupported options used by anovan
-    if (any (strcmpi ("MODEL", options)))
-      error (strcat("fitlm: modelspec should be specified in the", ...
-                    " third input argument of fitlm (if at all)"));
-    endif
+  warning ("off", "CompactLinearModel:rankDeficient");
+  s = lm_fit_engine (X_fit, y_fit, w_fit);
+  warning ("on",  "CompactLinearModel:rankDeficient");
 
-    ## Check and set variable types
-    idx = find (any (cat (1, strcmpi ("categorical", options), ...
-                          strcmpi ("categoricalvars", options))));
-    if (! isempty (idx))
-      CONTINUOUS = [1:N];
-      CONTINUOUS(ismember(CONTINUOUS,options{idx+1})) = [];
-      options(idx:idx+1) = [];
-    else
-      CONTINUOUS = [1:N];
-    endif
-    idx = find (strcmpi ("continuous", options));
-    if (! isempty (idx))
-      ## Note that setting continuous parameter will override settings made
-      ## to "categorical"
-      CONTINUOUS = options{idx+1};
-    endif
+  n_used = s.n;
 
-    ## Check if anovan CONTRASTS option was used
-    idx = find (strcmpi ("contrasts", options));
-    if (isempty (idx))
-      CONTRASTS = "treatment";
-    else
-      CONTRASTS = options{idx+1};
-      if (ischar(CONTRASTS))
-        contr_str = CONTRASTS;
-        CONTRASTS = cell (1, N);
-        CONTRASTS(:) = {contr_str};
-      endif
-      if (! iscell (CONTRASTS))
-        CONTRASTS = {CONTRASTS};
-      endif
-      for i = 1:N
-        if (! isnumeric(CONTRASTS{i}))
-          if (! isempty (CONTRASTS{i}))
-            if (! ismember (CONTRASTS{i}, ...
-                            {"simple","poly","helmert","effect","treatment"}))
-              error (strcat("fitlm: the choices for built-in contrasts are", ...
-                     " 'simple', 'poly', 'helmert', 'effect', or 'treatment'"));
-            endif
-          endif
-        endif
-      endfor
-    endif
+  ## ── [Step 4] Build Formula struct ────────────────────────────────────────
+  ## fromFit expects formula.CoefficientNames to be set
+  Formula = __fitlm_build_formula__ (terms_mat, var_names, pred_names, ...
+                                     resp_name, has_intercept, lp_str, coef_names);
+  Formula.CoefficientNames = coef_names;   ## required by fromFit (line 288)
 
-    ## Check if anovan SSTYPE option was used
-    idx = find (strcmpi ("sstype", options));
-    if (isempty (idx))
-      SSTYPE = 2;
-    else
-      SSTYPE = options{idx+1};
-    endif
+  ## ── [Step 5] Build VariableInfo struct ────────────────────────────────────
+  VariableInfo = __fitlm_build_varinfo__ (var_names, cat_flags, raw_cols, ...
+                                          Formula.InModel);
+
+  ## ── [Step 6] Build ObservationInfo ───────────────────────────────────────
+  ObservationInfo = __fitlm_build_obsinfo__ (weights, excl_mask, raw_X, raw_y, obs_names);
+
+  ## ── [Step 7] Build Variables struct ──────────────────────────────────────
+  Variables = __fitlm_build_variables_table__ (raw_cols, var_names, obs_names);
 
-    ## Perform model fit and ANOVA
-    [~, ~, STATS] = anovan (y, X, options{:}, ...
-                            "model", MODELSPEC, ...
-                            "contrasts", CONTRASTS, ...
-                            "continuous", CONTINUOUS, ...
-                            "sstype", SSTYPE);
+  ## ── [Step 8] Compute missing mask ────────────────────────────────────────
+  missing_mask = ObservationInfo.Missing;
 
-    ## Create table of regression coefficients
-    ncoeff = sum (STATS.df);
-    T = cell (2 + ncoeff, 7);
-    T(1,:) = {"Parameter", "Estimate", "SE", ...
-              "Lower.CI", "Upper.CI", "t", "Prob>|t|"};
-    T(2:end,1) = STATS.coeffnames;
-    T(2:end,2:7) = num2cell (STATS.coeffs);
+  ## ── [Step 9] Call LinearModel.fromFit ────────────────────────────────────
+  ##
+  ## fromFit signature:
+  ##   fromFit(fit_s, X, y, formula, pred_names, resp_name,
+  ##           var_names, var_info, obs_names, obs_info,
+  ##           weights, excluded, missing_mask, steps, variables_data)
+  ##
+  ## X and y here are the USED (included) subsets — fromFit computes diagnostics
+  mdl = LinearModel.fromFit ( ...
+    s, ...                   ## lm_fit_engine output
+    X_fit, ...               ## design matrix (used rows only)
+    y_fit, ...               ## response (used rows only)
+    Formula, ...             ## Formula struct (with CoefficientNames field)
+    pred_names, ...          ## predictor variable names
+    resp_name, ...           ## response variable name
+    var_names, ...           ## all variable names
+    VariableInfo, ...        ## VariableInfo struct
+    obs_names, ...           ## ObservationNames
+    ObservationInfo, ...     ## ObservationInfo struct
+    weights, ...             ## full n weights
+    excl_mask, ...           ## full n exclusion mask
+    missing_mask, ...        ## full n missing mask
+    [], ...                  ## Steps = [] for fitlm
+    Variables, ...           ## Variables struct
+    X_full);                 ## full n design matrix for excluded-row Fitted
+
+endfunction
 
-    ## Update STATS structure
-    STATS.source = "fitlm";
 
+## ── Private helper — build Variables struct ───────────────────────────────────
+
+function Vars = __fitlm_build_variables_table__ (raw_cols, var_names, obs_names)
+  ## Build Variables as a struct where each field = one column (all n rows).
+  Vars = struct ();
+  for k = 1:numel (var_names)
+    vn = var_names{k};
+    ## Replace invalid fieldname chars
+    fn = regexprep (vn, '[^a-zA-Z0-9_]', '_');
+    if (isempty (fn) || ! isempty (regexp (fn(1), '[^a-zA-Z_]')))
+      fn = ['v_', fn];
+    endif
+    Vars.(fn) = raw_cols{k};
+  endfor
 endfunction
 
-%!demo
-%! y =  [ 8.706 10.362 11.552  6.941 10.983 10.092  6.421 14.943 15.931 ...
-%!        22.968 18.590 16.567 15.944 21.637 14.492 17.965 18.851 22.891 ...
-%!        22.028 16.884 17.252 18.325 25.435 19.141 21.238 22.196 18.038 ...
-%!        22.628 31.163 26.053 24.419 32.145 28.966 30.207 29.142 33.212 ...
-%!        25.694 ]';
-%! X = [1 1 1 1 1 1 1 1 2 2 2 2 2 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5]';
-%!
-%! [TAB,STATS] = fitlm (X,y,"linear","CategoricalVars",1,"display","on");
-
-%!demo
-%! popcorn = [5.5, 4.5, 3.5; 5.5, 4.5, 4.0; 6.0, 4.0, 3.0; ...
-%!            6.5, 5.0, 4.0; 7.0, 5.5, 5.0; 7.0, 5.0, 4.5];
-%! brands = {'Gourmet', 'National', 'Generic'; ...
-%!           'Gourmet', 'National', 'Generic'; ...
-%!           'Gourmet', 'National', 'Generic'; ...
-%!           'Gourmet', 'National', 'Generic'; ...
-%!           'Gourmet', 'National', 'Generic'; ...
-%!           'Gourmet', 'National', 'Generic'};
-%! popper = {'oil', 'oil', 'oil'; 'oil', 'oil', 'oil'; 'oil', 'oil', 'oil'; ...
-%!           'air', 'air', 'air'; 'air', 'air', 'air'; 'air', 'air', 'air'};
-%!
-%! [TAB, STATS] = fitlm ({brands(:),popper(:)},popcorn(:),"interactions",...
-%!                          "CategoricalVars",[1,2],"display","on");
 
+## ─── UNIT TESTS ───────────────────────────────────────────────────────────────
+
+## Test 1: basic matrix input returns LinearModel
+%!test
+%! rng (42);
+%! X = randn (20, 3);
+%! y = X * [1; 2; -1] + randn (20, 1) * 0.5;
+%! mdl = fitlm (X, y);
+%! assert (isa (mdl, 'LinearModel'));
+%! assert (mdl.NumPredictors, 3);
+%! assert (mdl.NumObservations, 20);
+%! assert (mdl.NumCoefficients, 4);
+%! assert (isempty (mdl.Steps));
+%! assert (mdl.DFE, 16);
+
+## Test 2: VariableNames and ResponseName for matrix input
+%!test
+%! rng (1);
+%! X = randn (10, 2);
+%! y = X * [2; -1] + randn (10, 1) * 0.3;
+%! mdl = fitlm (X, y);
+%! assert (isequal (mdl.VariableNames, {'x1', 'x2', 'y'}));
+%! assert (strcmp (mdl.ResponseName, 'y'));
+%! assert (isequal (mdl.PredictorNames, {'x1', 'x2'}));
+%! assert (mdl.NumVariables, 3);
+
+## Test 3: VarNames override
+%!test
+%! rng (1);
+%! X = randn (15, 2);
+%! y = X * [3; -1] + randn (15, 1) * 0.5;
+%! mdl = fitlm (X, y, 'VarNames', {'Height', 'Weight', 'BP'});
+%! assert (strcmp (mdl.ResponseName, 'BP'));
+%! assert (isequal (mdl.PredictorNames, {'Height', 'Weight'}));
+%! assert (isequal (mdl.CoefficientNames, {'(Intercept)', 'Height', 'Weight'}));
+
+## Test 5: modelspec 'constant'
+%!test
+%! rng (1);
+%! X = randn (10, 3);
+%! y = randn (10, 1);
+%! mdl = fitlm (X, y, 'constant');
+%! assert (mdl.NumCoefficients, 1);
+%! assert (isequal (mdl.CoefficientNames, {'(Intercept)'}));
+
+## Test 6: modelspec 'interactions'
+%!test
+%! rng (1);
+%! X = randn (15, 2);
+%! y = randn (15, 1);
+%! mdl = fitlm (X, y, 'interactions');
+%! assert (mdl.NumCoefficients, 4);   ## 1 + x1 + x2 + x1:x2
+%! assert (any (strcmp (mdl.CoefficientNames, 'x1:x2')));
+
+## Test 7: Intercept=false
+%!test
+%! rng (42);
+%! X = randn (20, 2);
+%! y = X * [3; -1] + randn (20, 1) * 0.3;
+%! mdl = fitlm (X, y, 'Intercept', false);
+%! assert (mdl.NumCoefficients, 2);
+%! assert (mdl.DFE, 18);
+%! assert (mdl.Formula.HasIntercept, false);
+%! assert (! any (strcmp (mdl.CoefficientNames, '(Intercept)')));
+
+## Test 8: Exclude integer indices
+%!test
+%! rng (1);
+%! X = randn (10, 2);
+%! y = X * [2; -1] + randn (10, 1) * 0.3;
+%! mdl = fitlm (X, y, 'Exclude', [3, 7]);
+%! assert (mdl.NumObservations, 8);
+%! assert (mdl.ObservationInfo.Excluded(3), true);
+%! assert (mdl.ObservationInfo.Excluded(7), true);
+%! assert (mdl.ObservationInfo.Subset(3),   false);
+%! assert (isnan (mdl.Residuals.Raw(3)));
+%! assert (isnan (mdl.Residuals.Raw(7)));
+
+## Test 9: Exclude logical vector gives same coefficients as integer
+%!test
+%! rng (1);
+%! X = randn (10, 2);
+%! y = X * [2; -1] + randn (10, 1) * 0.3;
+%! mdl_int = fitlm (X, y, 'Exclude', [3, 7]);
+%! excl_log = false (10, 1); excl_log([3,7]) = true;
+%! mdl_log = fitlm (X, y, 'Exclude', excl_log);
+%! assert (mdl_int.Coefficients.Estimate, mdl_log.Coefficients.Estimate, 1e-12);
+
+## Test 10: NaN in X → Missing flag, Fitted = NaN
+%!test
+%! rng (1);
+%! X = randn (10, 2);
+%! y = X * [2; -1] + randn (10, 1) * 0.3;
+%! X(5, 1) = NaN;
+%! mdl = fitlm (X, y);
+%! assert (mdl.NumObservations, 9);
+%! assert (mdl.ObservationInfo.Missing(5), true);
+%! assert (isnan (mdl.Fitted(5)));
+%! assert (isnan (mdl.Residuals.Raw(5)));
+
+## Test 11: Weighted regression
+%!test
+%! rng (42);
+%! X = randn (15, 2);
+%! y = X * [3; -1] + randn (15, 1) * 0.5;
+%! w = abs (randn (15, 1)) + 0.5;
+%! mdl_w = fitlm (X, y, 'Weights', w);
+%! mdl_u = fitlm (X, y);
+%! assert (mdl_w.DFE, mdl_u.DFE);
+%! assert (mdl_w.NumObservations, 15);
+%! assert (mdl_w.ObservationInfo.Weights, w, 1e-14);
+
+## Test 12: CategoricalVars on numeric matrix
+%!test
+%! rng (42);
+%! X = [randn(30, 1), repmat([1;2;3], 10, 1), randn(30, 1)];
+%! y = X(:,1)*2 + (X(:,2)==2)*1.5 + randn(30,1)*0.3;
+%! mdl = fitlm (X, y, 'CategoricalVars', 2);
+%! assert (mdl.VariableInfo.IsCategorical(2), true);
+%! assert (mdl.NumCoefficients, 5);   ## intercept + x1 + x2_2 + x2_3 + x3
+%! assert (mdl.NumPredictors, 3);
+%! assert (any (strcmp (mdl.CoefficientNames, 'x2_2')));
+%! assert (any (strcmp (mdl.CoefficientNames, 'x2_3')));
+
+## Test 13: compact() preserves key fields
+%!test
+%! rng (1);
+%! X = randn (15, 2);
+%! y = X * [2; -1] + randn (15, 1) * 0.3;
+%! mdl = fitlm (X, y);
+%! cmdl = compact (mdl);
+%! assert (isa (cmdl, 'CompactLinearModel'));
+%! assert (! isa (cmdl, 'LinearModel'));
+%! assert (cmdl.MSE,  mdl.MSE,  1e-10);
+%! assert (cmdl.RMSE, mdl.RMSE, 1e-10);
+%! assert (cmdl.DFE,  mdl.DFE);
+
+## Test 14: rank-deficient — NumEstimatedCoefficients < NumCoefficients
+%!test
+%! warning ('off', 'CompactLinearModel:rankDeficient');
+%! rng (1);
+%! X_base = randn (20, 2);
+%! X_rd = [X_base, X_base(:,1) + X_base(:,2)];
+%! y = X_base * [2; -1] + randn (20, 1) * 0.3;
+%! mdl = fitlm (X_rd, y);
+%! assert (mdl.NumCoefficients, 4);
+%! assert (mdl.NumEstimatedCoefficients, 3);
+%! warning ('on', 'CompactLinearModel:rankDeficient');
+
+## Test 15: Steps is [] for fitlm models
 %!test
-%! y =  [ 8.706 10.362 11.552  6.941 10.983 10.092  6.421 14.943 15.931 ...
-%!        22.968 18.590 16.567 15.944 21.637 14.492 17.965 18.851 22.891 ...
-%!        22.028 16.884 17.252 18.325 25.435 19.141 21.238 22.196 18.038 ...
-%!        22.628 31.163 26.053 24.419 32.145 28.966 30.207 29.142 33.212 ...
-%!        25.694 ]';
-%! X = [1 1 1 1 1 1 1 1 2 2 2 2 2 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5]';
-%! [TAB,STATS] = fitlm (X,y,"continuous",[],"display","off");
-%! [TAB,STATS] = fitlm (X,y,"CategoricalVars",1,"display","off");
-%! [TAB,STATS] = fitlm (X,y,"constant","categorical",1,"display","off");
-%! [TAB,STATS] = fitlm (X,y,"linear","categorical",1,"display","off");
-%! [TAB,STATS] = fitlm (X,y,[0,0;1,0],"categorical",1,"display","off");
-%! assert (TAB{2,2}, 10, 1e-04);
-%! assert (TAB{3,2}, 7.99999999999999, 1e-09);
-%! assert (TAB{4,2}, 8.99999999999999, 1e-09);
-%! assert (TAB{5,2}, 11.0001428571429, 1e-09);
-%! assert (TAB{6,2}, 19.0001111111111, 1e-09);
-%! assert (TAB{2,3}, 1.01775379540949, 1e-09);
-%! assert (TAB{3,3}, 1.64107868458008, 1e-09);
-%! assert (TAB{4,3}, 1.43932122062479, 1e-09);
-%! assert (TAB{5,3}, 1.48983900477565, 1e-09);
-%! assert (TAB{6,3}, 1.3987687997822, 1e-09);
-%! assert (TAB{2,6}, 9.82555903510687, 1e-09);
-%! assert (TAB{3,6}, 4.87484242844031, 1e-09);
-%! assert (TAB{4,6}, 6.25294748040552, 1e-09);
-%! assert (TAB{5,6}, 7.38344399756088, 1e-09);
-%! assert (TAB{6,6}, 13.5834536158296, 1e-09);
-%! assert (TAB{3,7}, 2.85812420217862e-05, 1e-12);
-%! assert (TAB{4,7}, 5.22936741204002e-07, 1e-06);
-%! assert (TAB{5,7}, 2.12794763209106e-08, 1e-07);
-%! assert (TAB{6,7}, 7.82091664406755e-15, 1e-08);
+%! rng (1);
+%! X = randn (10, 2);
+%! y = randn (10, 1);
+%! mdl = fitlm (X, y);
+%! assert (isempty (mdl.Steps));
 
+## Test 17: Wilkinson formula string input
 %!test
-%! popcorn = [5.5, 4.5, 3.5; 5.5, 4.5, 4.0; 6.0, 4.0, 3.0; ...
-%!            6.5, 5.0, 4.0; 7.0, 5.5, 5.0; 7.0, 5.0, 4.5];
-%! brands = bsxfun (@times, ones(6,1), [1,2,3]);
-%! popper = bsxfun (@times, [1;1;1;2;2;2], ones(1,3));
-%!
-%! [TAB, STATS] = fitlm ({brands(:),popper(:)},popcorn(:),"interactions",...
-%!                          "categoricalvars",[1,2],"display","off");
-%! assert (TAB{2,2}, 5.66666666666667, 1e-09);
-%! assert (TAB{3,2}, -1.33333333333333, 1e-09);
-%! assert (TAB{4,2}, -2.16666666666667, 1e-09);
-%! assert (TAB{5,2}, 1.16666666666667, 1e-09);
-%! assert (TAB{6,2}, -0.333333333333334, 1e-09);
-%! assert (TAB{7,2}, -0.166666666666667, 1e-09);
-%! assert (TAB{2,3}, 0.215165741455965, 1e-09);
-%! assert (TAB{3,3}, 0.304290309725089, 1e-09);
-%! assert (TAB{4,3}, 0.304290309725089, 1e-09);
-%! assert (TAB{5,3}, 0.304290309725089, 1e-09);
-%! assert (TAB{6,3}, 0.43033148291193, 1e-09);
-%! assert (TAB{7,3}, 0.43033148291193, 1e-09);
-%! assert (TAB{2,6}, 26.3362867542108, 1e-09);
-%! assert (TAB{3,6}, -4.38178046004138, 1e-09);
-%! assert (TAB{4,6}, -7.12039324756724, 1e-09);
-%! assert (TAB{5,6}, 3.83405790253621, 1e-09);
-%! assert (TAB{6,6}, -0.774596669241495, 1e-09);
-%! assert (TAB{7,6}, -0.387298334620748, 1e-09);
-%! assert (TAB{2,7}, 5.49841502258254e-12, 1e-09);
-%! assert (TAB{3,7}, 0.000893505495903642, 1e-09);
-%! assert (TAB{4,7}, 1.21291454302428e-05, 1e-09);
-%! assert (TAB{5,7}, 0.00237798044119407, 1e-09);
-%! assert (TAB{6,7}, 0.453570536021938, 1e-09);
-%! assert (TAB{7,7}, 0.705316781644046, 1e-09);
-%! ## Test with string ids for categorical variables
-%! brands = {'Gourmet', 'National', 'Generic'; ...
-%!           'Gourmet', 'National', 'Generic'; ...
-%!           'Gourmet', 'National', 'Generic'; ...
-%!           'Gourmet', 'National', 'Generic'; ...
-%!           'Gourmet', 'National', 'Generic'; ...
-%!           'Gourmet', 'National', 'Generic'};
-%! popper = {'oil', 'oil', 'oil'; 'oil', 'oil', 'oil'; 'oil', 'oil', 'oil'; ...
-%!           'air', 'air', 'air'; 'air', 'air', 'air'; 'air', 'air', 'air'};
-%! [TAB, STATS] = fitlm ({brands(:),popper(:)},popcorn(:),"interactions",...
-%!                          "categoricalvars",[1,2],"display","off");
+%! rng (1);
+%! X = randn (20, 2);
+%! y = X * [1; 2] + randn (20, 1) * 0.5;
+%! mdl = fitlm (X, y, 'y ~ x1 + x2');
+%! assert (isa (mdl, 'LinearModel'));
+%! assert (mdl.Formula.HasIntercept, true);
+%! assert (strcmp (mdl.Formula.ResponseName, 'y'));
 
+## Test 18: anova() still works on fitlm output
 %!test
-%! load carsmall
-%! X = [Weight,Horsepower,Acceleration];
-%! [TAB, STATS] = fitlm (X, MPG,"constant","display","off");
-%! [TAB, STATS] = fitlm (X, MPG,"linear","display","off");
-%! assert (TAB{2,2}, 47.9767628118615, 1e-09);
-%! assert (TAB{3,2}, -0.00654155878851796, 1e-09);
-%! assert (TAB{4,2}, -0.0429433065881864, 1e-09);
-%! assert (TAB{5,2}, -0.0115826516894871, 1e-09);
-%! assert (TAB{2,3}, 3.87851641748551, 1e-09);
-%! assert (TAB{3,3}, 0.00112741016370336, 1e-09);
-%! assert (TAB{4,3}, 0.0243130608813806, 1e-09);
-%! assert (TAB{5,3}, 0.193325043113178, 1e-09);
-%! assert (TAB{2,6}, 12.369874881944, 1e-09);
-%! assert (TAB{3,6}, -5.80228828790225, 1e-09);
-%! assert (TAB{4,6}, -1.76626492228599, 1e-09);
-%! assert (TAB{5,6}, -0.0599128364487485, 1e-09);
-%! assert (TAB{2,7}, 4.89570341688996e-21, 1e-09);
-%! assert (TAB{3,7}, 9.87424814144e-08, 1e-09);
-%! assert (TAB{4,7}, 0.0807803098213114, 1e-09);
-%! assert (TAB{5,7}, 0.952359384151778, 1e-09);
+%! rng (1);
+%! X = randn (20, 2);
+%! y = X * [1; 2] + randn (20, 1) * 0.5;
+%! mdl = fitlm (X, y);
+%! T = anova (mdl);
+%! assert (isstruct (T));
diff --git a/inst/grp2idx.m b/inst/grp2idx.m
index ae7ed87b..ed376491 100644
--- a/inst/grp2idx.m
+++ b/inst/grp2idx.m
@@ -1,5 +1,6 @@
 ## Copyright (C) 2015 Carnë Draug <carandraug@octave.org>
 ## Copyright (C) 2022-2026 Andreas Bertsatos <abertsatos@biol.uoa.gr>
+## Copyright (C) 2026 Jayant Chauhan <0001jayant@gmail.com>
 ##
 ## This file is part of the statistics package for GNU Octave.
 ##
@@ -67,6 +68,20 @@
   if (nargin != 1)
     print_usage ();
   endif
+
+  ## Handle empty input immediately
+  if (isempty (s))
+    if (! isvector (s) && ! ischar (s) && ! isstring (s) && ! iscellstr (s) && ! iscategorical (s))
+      error ("grp2idx: Grouping variable must be a vector, character array, string array, or cell array of character vectors.");
+    endif
+    g = [];
+    gn = {};
+    if (nargout > 2)
+      gl = [];
+    endif
+    return;
+  endif
+  
   if (ndims (s) != 2)
     error ("grp2idx: S must be either a vector or a matrix.");
   endif
@@ -82,10 +97,36 @@
       error ("grp2idx: 'categorical' grouping variable must be a vector.");
     endif
     is_categorical = true;
-    undef = isundefined (s);
     cats = categories (s);
-    s = cellstr (s);
-    s(undef) = {""};
+    s_text = cellstr (s(:));
+    g = NaN (numel (s_text), 1);
+
+    if (nargout > 1)
+      if (isempty (cats))
+        gn = cell (0, 1);
+      else
+        gn = cellstr (cats);
+      endif
+    endif
+
+    if (nargout > 2)
+      if (isempty (cats))
+        gl = categorical (cell (0, 1));
+      else
+        gl = categorical (cellstr (cats), cellstr (cats), cellstr (cats));
+      endif
+    endif
+
+    if (isempty (cats))
+      return;
+    endif
+
+    cat_names = cellstr (cats);
+    undef = isundefined (s(:));
+    for i = 1:numel (cat_names)
+      g(! undef & strcmp (s_text, cat_names{i})) = i;
+    endfor
+    return;
   elseif (ischar (s))
     is_char_array = true;
     s = cellstr (s);
@@ -121,16 +162,7 @@
   [gl, I, g] = unique (s(:));
   ## Fix order in here, since unique does not support this yet
   if (iscellstr (s) && ! is_categorical)
-    I = sort (I);
-    for i = 1:length (gl)
-      gl_s(i) = gl(g(I(i)));
-      idx(i,:) = (g == g(I(i)));
-    endfor
-    for i = 1:length (gl)
-      g(idx(i,:)) = i;
-    endfor
-    gl = gl_s;
-    gl = gl';
+    [g, gl] = g2i_reindex_stable (g, gl, I);
   endif
 
   ## handle NaNs and empty strings
@@ -208,6 +240,24 @@
 
 endfunction
 
+function [g, gl] = g2i_reindex_stable (g, gl, first_idx)
+
+  first_idx = sort (first_idx(:));
+  ng = numel (gl);
+  new_gl = cell (ng, 1);
+  new_g = g;
+
+  for i = 1:ng
+    old_idx = g(first_idx(i));
+    new_gl{i} = gl{old_idx};
+    new_g(g == old_idx) = i;
+  endfor
+
+  gl = new_gl;
+  g = new_g;
+
+endfunction
+
 # test for one output argument
 %!test
 %! g = grp2idx ([3 2 1 2 3 1]);
@@ -390,6 +440,13 @@
 %! assert (iscategorical (gl), true);
 %! assert (cellstr (gl), gn);
 
+%!test
+%! s = categorical ({'a'; 'b'; 'a'}, {'b', 'a', 'c'}, {'b', 'a', 'c'});
+%! [g, gn, gl] = grp2idx (s);
+%! assert (g, [2; 1; 2]);
+%! assert (gn, {'b'; 'a'; 'c'});
+%! assert (cellstr (gl), gn);
+
 ## test for duration arrays
 %!test
 %! g = gn = gl = [];
diff --git a/inst/parseWilkinsonFormula.m b/inst/parseWilkinsonFormula.m
index 703c260c..f0a3d6fe 100644
--- a/inst/parseWilkinsonFormula.m
+++ b/inst/parseWilkinsonFormula.m
@@ -1,5 +1,6 @@
 ## Copyright (C) 2026 Andreas Bertsatos <abertsatos@biol.uoa.gr>
 ## Copyright (C) 2026 Avanish Salunke <avanishsalunke16@gmail.com>
+## Copyright (C) 2026 Jayant Chauhan <0001jayant@gmail.com>
 ##
 ## This file is part of the statistics package for GNU Octave.
 ##
@@ -201,7 +202,7 @@
       else
         lhs_vars = resolve_lhs_symbolic (lhs_str);
       endif
-      
+
       ## build the required output.
       varargout{1} = run_equation_builder (lhs_vars, rhs_terms);
 
@@ -519,14 +520,14 @@
       args_str_parts = {};
       for k = 1:length (node.args)
         arg_res = run_expander (node.args{k}, mode);
-        
+
         if (! isempty (arg_res) && ! isempty (arg_res{1}))
-           args_str_parts{end+1} = arg_res{1}{1}; 
+           args_str_parts{end+1} = arg_res{1}{1};
         else
            args_str_parts{end+1} = '';
         endif
       endfor
-      
+
       full_term = sprintf ("%s(%s)", node.name, strjoin (args_str_parts, ','));
       result = {{full_term}};
     else
@@ -774,11 +775,8 @@
     endif
   endfunction
 
-  ## extract variables
+  ## extract variables — collect RHS predictors first, then LHS response
   all_vars = {};
-  if (! isempty (lhs_term))
-    all_vars = [all_vars, flatten_recursive(lhs_term)];
-  endif
 
   cleaned_rhs = cell (length (rhs_terms), 1);
   for i = 1:length (rhs_terms)
@@ -789,11 +787,26 @@
       final_term_vars = [final_term_vars, parts];
     endfor
     cleaned_rhs{i} = final_term_vars;
-    all_vars = [all_vars, final_term_vars];
+    ## Add only the BASE variable name to all_vars (strip ^k suffix)
+    for j = 1:length (final_term_vars)
+      tv = final_term_vars{j};
+      caret = strfind (tv, '^');
+      if (! isempty (caret))
+        all_vars{end+1} = tv(1:caret(1)-1);
+      else
+        all_vars{end+1} = tv;
+      endif
+    endfor
   endfor
 
-  all_vars = unique (all_vars);
-  ## Remove intercept marker from var list
+  ## Append LHS (response) variables after RHS predictors
+  if (! isempty (lhs_term))
+    all_vars = [all_vars, flatten_recursive(lhs_term)];
+  endif
+
+  ## Stable dedup (preserves first-seen order)
+  [~, ui] = unique (all_vars, 'stable');
+  all_vars = all_vars(sort (ui));
   all_vars(strcmp (all_vars, '1')) = [];
 
   schema.VariableNames = all_vars;
@@ -808,7 +821,7 @@
     endif
   endif
 
-  ## Build terms matrix
+  ## Build terms matrix (stores exponents, not just binary 1)
   n_vars = length (all_vars);
   n_terms = length (cleaned_rhs);
   terms_mat = zeros (n_terms, n_vars);
@@ -822,22 +835,46 @@
       continue;
     endif
 
-    [found, idx] = ismember (vars_in_this_term, all_vars);
-    if (any (! found))
-      error ("parseWilkinsonFormula: Unknown variable in term definition.");
-    endif
-    terms_mat(i, idx) = 1;
+    for jj = 1:length (vars_in_this_term)
+      tv = vars_in_this_term{jj};
+      caret = strfind (tv, '^');
+      if (! isempty (caret))
+        base_name = tv(1:caret(1)-1);
+        exp_val   = str2double (tv(caret(1)+1:end));
+      else
+        base_name = tv;
+        exp_val   = 1;
+      endif
+      col = find (strcmp (all_vars, base_name));
+      if (isempty (col))
+        error ("parseWilkinsonFormula: Unknown variable '%s' in term.", ...
+               base_name);
+      endif
+      terms_mat(i, col) = exp_val;
+    endfor
   endfor
 
-  ## sorting : order by order.
-  term_orders = sum (terms_mat, 2);
-  M = [term_orders, terms_mat];
+  ## sorting: order by (1) number of active variables ascending,
+  ## (2) binary mask descending (x1 before x2 before x3),
+  ## (3) exponent values ascending (x before x^2)
+  term_orders = sum (terms_mat > 0, 2);
+  binary_mask = double (terms_mat > 0);
+  M = [term_orders, binary_mask, terms_mat];
 
-  [~, unique_idx] = unique (M, 'rows');
+  ## Create unique rows based on actual terms
+  [~, unique_idx] = unique ([term_orders, terms_mat], 'rows');
   terms_mat = terms_mat (unique_idx, :);
+  cleaned_rhs = cleaned_rhs(unique_idx);
+  M = M (unique_idx, :);
+
+  n_v = size (terms_mat, 2);
+  ## Direction: [asc on order, desc on binary mask, asc on exponents]
+  sort_dirs = [1, -(2:1+n_v), (2+n_v:1+2*n_v)];
 
-  [~, sort_idx] = sortrows ([sum(terms_mat, 2), terms_mat]);
+  ## Sort using the direction vector
+  [~, sort_idx] = sortrows (M, sort_dirs);
   schema.Terms = terms_mat (sort_idx, :);
+  schema.TermTokens = cleaned_rhs(sort_idx);
 
 endfunction
 
@@ -921,6 +958,19 @@
     endif
   endfor
 
+  ## Reorder schema.VariableNames to match table column order (Fix B4)
+  tbl_col_order = data.Properties.VariableNames;
+  [~, pos] = ismember (schema.VariableNames, tbl_col_order);
+  pos(pos == 0) = length (tbl_col_order) + 1;
+  [~, reorder_idx] = sort (pos);
+  schema.VariableNames = schema.VariableNames(reorder_idx);
+  schema.Terms         = schema.Terms(:, reorder_idx);
+  if (! isempty (schema.ResponseIdx))
+    resp_name = req_vars{schema.ResponseIdx};
+    schema.ResponseIdx = find (strcmp (schema.VariableNames, resp_name));
+  endif
+  req_vars = schema.VariableNames;
+
   ## Build Design Matrix X
   X = [];
   col_names = {};
@@ -930,6 +980,7 @@
   has_intercept = ! isempty (intercept_row_idx);
 
   n_terms = size (schema.Terms, 1);
+  cleaned_rhs_terms = schema.TermTokens;
 
   for i = 1:n_terms
     term_row = schema.Terms(i, :);
@@ -942,20 +993,53 @@
       continue;
     endif
 
+    ## Sort vars_idx: numeric variables before categorical (Fix B5)
+    if (length (vars_idx) > 1)
+      is_cat = arrayfun (@(v) strcmp (var_info.(req_vars{v}).type, ...
+                                     'categorical'), vars_idx);
+      [~, sort_order] = sort (is_cat);
+      vars_idx = vars_idx(sort_order);
+    endif
+
     current_block = ones (n_rows, 1);
     current_names = {''};
 
+    ## Resolve term tokens for polynomial support (Fix B3)
+    term_tokens = cleaned_rhs_terms{i};
+    token_idx = 0;
+
     for v = vars_idx
-      vname = req_vars{v};
+      token_idx = token_idx + 1;
+      if (! isempty (term_tokens) && token_idx <= length (term_tokens))
+        term_token = term_tokens{token_idx};
+      else
+        term_token = req_vars{v};
+      endif
+
+      caret = strfind (term_token, '^');
+      if (! isempty (caret))
+        vname = term_token(1:caret(1)-1);
+      else
+        vname = req_vars{v};
+      endif
       info = var_info.(vname);
 
       if (strcmp (info.type, 'numeric'))
-        current_block = current_block .* info.data;
+        ## Resolve polynomial exponent from term token
+        if (! isempty (caret))
+          exp_val   = str2double (term_token(caret(1)+1:end));
+          col_data  = info.data .^ exp_val;
+          disp_name = term_token;
+        else
+          col_data  = info.data;
+          disp_name = vname;
+        endif
+        current_block = current_block .* col_data;
         for k = 1:length (current_names)
           if (isempty (current_names{k}))
-            current_names{k} = vname;
+            current_names{k} = disp_name;
           else
-            current_names{k} = [current_names{k}, ':', vname];
+            current_names{k} = [current_names{k}, ':', disp_name];
           endif
         endfor
       else
@@ -1140,14 +1224,14 @@
   ## process RHS
   rhs_tokens = run_lexer (rhs_str);
   [rhs_tree, ~] = run_parser (rhs_tokens);
-  
+
   wrapper.type = 'OPERATOR';
   wrapper.value = '~';
   wrapper.left = [];
   wrapper.right = rhs_tree;
-  
+
   expanded = run_expander (wrapper, mode);
-  
+
   ## extract the terms.
   if (isstruct (expanded) && isfield (expanded, 'model'))
     rhs_terms = expanded.model;
@@ -1164,22 +1248,22 @@
   for i = 1:length (parts)
     p = strtrim (parts{i});
     if (isempty (p)), continue; endif
-    
+
     range_parts = strsplit (p, '-');
-    
+
     if (length (range_parts) == 2)
       s_str = strtrim (range_parts{1});
       e_str = strtrim (range_parts{2});
-      
+
       [s_tok] = regexp (s_str, '^([a-zA-Z_]\w*)(\d+)$', 'tokens');
       [e_tok] = regexp (e_str, '^([a-zA-Z_]\w*)(\d+)$', 'tokens');
-      
+
       if (! isempty (s_tok) && ! isempty (e_tok))
         prefix = s_tok{1}{1};
         s_num  = str2double (s_tok{1}{2});
         e_prefix = e_tok{1}{1};
         e_num    = str2double (e_tok{1}{2});
-        
+
         if (strcmp (prefix, e_prefix) && s_num <= e_num)
           for n = s_num:e_num
             vars{end+1} = sprintf ("%s%d", prefix, n);
@@ -1203,7 +1287,7 @@
   for i = 1:length (rhs_terms)
     t = rhs_terms{i};
     if (isempty (t))
-      term_strs{end+1} = ''; 
+      term_strs{end+1} = '';
     else
       if (length (t) == 1 && any (strfind (t{1}, "(")))
          term_strs{end+1} = t{1};
@@ -1229,13 +1313,13 @@
         rhs_parts{end+1} = sprintf ("%s*%s", coeff, t_str);
       endif
     endfor
-    
+
     full_rhs = strjoin (rhs_parts, ' + ');
     if (isempty (full_rhs)), full_rhs = '0'; endif
     lines{end+1} = sprintf ("%s = %s", lhs_vars{k}, full_rhs);
   endfor
 
-  eq_list = string (lines'); 
+  eq_list = string (lines');
 endfunction
 
 %!demo
@@ -1271,7 +1355,7 @@
 
 %!demo
 %!
-%! ## Interaction Effects : 
+%! ## Interaction Effects :
 %! ## We analyze Relief Score based on Drug Type and Dosage Level.
 %! ## The '*' operator expands to the main effects PLUS the interaction term.
 %! ## Categorical variables are automatically created.
@@ -1287,11 +1371,11 @@
 
 %!demo
 %!
-%! ## Polynomial Regression : 
+%! ## Polynomial Regression :
 %! ## Uses the power operator (^) to model non-linear relationships.
 %! Distance = [20; 45; 80; 125];
 %! Speed    = [30; 50; 70; 90];
-%! Speed_2  = Speed .^ 2; 
+%! Speed_2  = Speed .^ 2;
 %! t = table (Distance, Speed, Speed_2, 'VariableNames', {'Distance', 'Speed', 'Speed^2'});
 %!
 %! formula = 'Distance ~ Speed^2';
@@ -1316,7 +1400,7 @@
 
 %!demo
 %!
-%! ## Explicit Nesting : 
+%! ## Explicit Nesting :
 %! ## The parser also supports the explicit 'B(A)' syntax, which means
 %! ## 'B is nested within A'. This is equivalent to the interaction 'A:B'
 %! ## but often used to denote random effects or specific hierarchy.
@@ -1327,7 +1411,7 @@
 
 %!demo
 %!
-%! ## Excluding Terms : 
+%! ## Excluding Terms :
 %! ## Demonstrates building a complex model and then simplifying it.
 %! ## We define a full 3-way interaction (A*B*C) but explicitly remove the
 %! ## three-way term (A:B:C) using the minus operator.
@@ -1338,7 +1422,7 @@
 
 %!demo
 %!
-%! ## Repeated Measures : 
+%! ## Repeated Measures :
 %! ## This allows predicting multiple outcomes simultaneously.
 %! ## The range operator '-' selects all variables between 'T1' and 'T3'
 %! ## as the response matrix Y.
@@ -1487,7 +1571,7 @@
 %! C = {'lo'; 'hi'};
 %! d = table (y, N, C);
 %! [M, ~, names] = parseWilkinsonFormula ('~ N * C', 'model_matrix', d);
-%! assert (any (strcmp (names, 'C_lo:N')));
+%! assert (any (strcmp (names, 'N:C_lo')));
 %!test
 %! ## Test : Intercept Only Model
 %! y = [1; 2; 3];
@@ -1659,6 +1743,48 @@
 %! eq = parseWilkinsonFormula ('y ~ A - A', 'equation');
 %! expected = string('y = c1');
 %! assert (isequal (eq, expected));
+%!test
+%! ## Verify parseWilkinsonFormula schema matches MATLAB fitlm sorting
+%! formula = 'Y ~ x1 * x2 * x3';
+%! schema = parseWilkinsonFormula(formula, 'matrix');
+%!
+%! ## Columns: predictors first in formula order, response last
+%! expected_terms = [0, 0, 0, 0;  ## (Intercept)
+%!                   1, 0, 0, 0;  ## x1
+%!                   0, 1, 0, 0;  ## x2
+%!                   0, 0, 1, 0;  ## x3
+%!                   1, 1, 0, 0;  ## x1:x2
+%!                   1, 0, 1, 0;  ## x1:x3
+%!                   0, 1, 1, 0;  ## x2:x3
+%!                   1, 1, 1, 0]; ## x1:x2:x3
+%!
+%! assert(schema.VariableNames, {'x1', 'x2', 'x3', 'Y'});
+%! assert(schema.Terms, expected_terms);
+%!test
+%! ## polynomial x^2 produces base var with exponent in terms matrix
+%! s = parseWilkinsonFormula ('y ~ x^2', 'matrix');
+%! assert (s.VariableNames, {'x', 'y'});
+%! assert (s.Terms, [0 0; 1 0; 2 0]);
+%!test
+%! ## polynomial model_matrix computes x.^2 column
+%! x = (1:5)'; y = x .^ 2;
+%! t = table (x, y);
+%! [X, ~, names] = parseWilkinsonFormula ('y ~ x^2', 'model_matrix', t);
+%! assert (names, {'(Intercept)'; 'x'; 'x^2'});
+%! assert (X(:,2), x);
+%! assert (X(:,3), x .^ 2);
+%!test
+%! ## variable ordering follows table column order
+%! Age = [25;30]; Weight = [70;75]; BP = [120;122];
+%! t = table (Age, Weight, BP);
+%! [~, ~, n] = parseWilkinsonFormula ('BP ~ Age * Weight', 'model_matrix', t);
+%! assert (n, {'(Intercept)'; 'Age'; 'Weight'; 'Age:Weight'});
+%!test
+%! ## numeric columns named before categorical in interactions
+%! N = [10;20]; C = {'lo';'hi'}; y = [1;2];
+%! d = table (N, C, y);
+%! [~, ~, n] = parseWilkinsonFormula ('y ~ N * C', 'model_matrix', d);
+%! assert (n, {'(Intercept)'; 'N'; 'C_lo'; 'N:C_lo'});
 %!error <Input formula string is required> parseWilkinsonFormula ()
 %!error <Unknown mode> parseWilkinsonFormula ('y ~ x', 'invalid_mode')
 %!error <Unexpected End Of Formula> parseWilkinsonFormula ('', 'parse')
diff --git a/inst/private/__fitlm_build_design__.m b/inst/private/__fitlm_build_design__.m
new file mode 100644
index 00000000..08610bed
--- /dev/null
+++ b/inst/private/__fitlm_build_design__.m
@@ -0,0 +1,754 @@
+## Copyright (C) 2026 Jayant Chauhan <0001jayant@gmail.com>
+##
+## This file is part of the statistics package for GNU Octave.
+##
+## This program is free software; you can redistribute it and/or modify it under
+## the terms of the GNU General Public License as published by the Free Software
+## Foundation; either version 3 of the License, or (at your option) any later
+## version.
+##
+## This program is distributed in the hope that it will be useful, but WITHOUT
+## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+## FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
+## details.
+##
+## You should have received a copy of the GNU General Public License along with
+## this program; if not, see <http://www.gnu.org/licenses/>.
+
+## -*- texinfo -*-
+## @deftypefn {Private Function} {[@var{terms_mat}, @var{coef_names}, @var{X_full}, @
+##   @var{y_full}, @var{incl_mask}, @var{p_tot}, @var{has_intercept}, @
+##   @var{lp_str}] =} __fitlm_build_design__ (@var{raw_X}, @var{raw_y}, @
+##   @var{modelspec}, @var{var_names}, @var{cat_flags}, @var{weights}, @
+##   @var{excl_mask}, @var{pred_names}, @var{raw_cols}, @var{has_intercept})
+##
+## Build design matrix and terms matrix from parsed fitlm inputs.
+##
+## Returns:
+##   terms_mat    - (n_terms x n_all_vars) binary terms matrix (last col = response = 0)
+##   coef_names   - 1 x p_tot cell of char coefficient names
+##   X_full       - n x p_tot design matrix (ALL rows, NaN for missing rows)
+##   y_full       - n x 1 response (NaN for missing rows)
+##   incl_mask    - n x 1 logical: rows used in fit
+##   p_tot        - total columns in design matrix
+##   has_intercept - logical
+##   lp_str       - LinearPredictor RHS string
+## @end deftypefn
+
+function [terms_mat, coef_names, X_full, y_full, incl_mask, p_tot, has_intercept, lp_str] = ...
+    __fitlm_build_design__ (raw_X, raw_y, modelspec, var_names, cat_flags, ...
+                            weights, excl_mask, pred_names, raw_cols, ...
+                            has_intercept, dummy_coding)
+
+  if (nargin < 11 || isempty (dummy_coding))
+    dummy_coding = "reference";
+  endif
+
+  n = rows (raw_X);
+  p = columns (raw_X);           ## number of predictor VARIABLES
+  n_vars = p + 1;                ## predictors + response
+
+  ## ── Step 1: Resolve shorthand → Terms matrix ────────────────────────────
+  if (isnumeric (modelspec) && ismatrix (modelspec) && ndims (modelspec) == 2)
+    ## Numeric terms matrix passed directly (e.g., from stepwiselm)
+    terms_pred = modelspec;
+    ## If width = p+1 (includes response column), strip last column
+    if (columns (terms_pred) == n_vars)
+      terms_pred = terms_pred(:, 1:p);
+    endif
+    ## Detect intercept from the terms matrix
+    has_intercept = any (all (terms_pred == 0, 2));
+  else
+    is_formula = ! isempty (strfind (modelspec, "~"));
+
+    if (is_formula)
+      ## Formula string — parse response name and RHS
+      tilde = strfind (modelspec, "~");
+      rhs = strtrim (modelspec(tilde(1)+1:end));
+      ## Check for unsupported C() notation
+      if (! isempty (regexp (rhs, 'C\s*\(')))
+        error ("Unable to understand the character vector or string scalar '%s'.", modelspec);
+      endif
+      ## Check for intercept suppression
+      if (! isempty (regexp (rhs, '(^|\s|[+])\s*-\s*1\b')))
+        has_intercept = false;
+        rhs = regexprep (rhs, '\s*[-]\s*1\b', '');
+        rhs = strtrim (rhs);
+      endif
+      ## Parse the RHS terms
+      terms_pred = __parse_rhs_terms__ (rhs, pred_names, has_intercept);
+    else
+      ## Shorthand string
+      terms_pred = __shorthand_to_terms__ (lower (modelspec), p, has_intercept);
+    endif
+  endif
+
+
+  ## terms_pred: (n_terms x p) — predictor columns only
+  ## Expand to include response column (always 0)
+  terms_mat = [terms_pred, zeros(rows(terms_pred), 1)];
+
+  ## ── Step 2: Build design matrix column by column ─────────────────────────
+  ##
+  ## We iterate over term rows.  For each term:
+  ##   - intercept row (all 0): add ones column
+  ##   - linear/squared numeric: add X^power column
+  ##   - categorical main effect: add k-1 dummy columns
+  ##   - interaction: element-wise product of factor block columns
+
+  ## Pre-compute per-predictor "base columns" (for numerics) and
+  ## "dummy block" (for categoricals)
+  base_cols   = cell (1, p);    ## each cell = n x something
+  base_cnames = cell (1, p);    ## cell of cell of char
+  cat_levels  = cell (1, p);    ## sorted unique levels for each cat predictor
+
+  for k = 1:p
+    if (cat_flags(k))
+      ## Categorical predictor — build dummy columns (reference = first level)
+      col_raw = raw_cols{k};
+      if (isa (col_raw, "categorical"))
+        levs = cellstr (categories (col_raw));
+        [~, cat_idx] = ismember (cellstr (col_raw), levs);
+      elseif (iscellstr (col_raw) || isstring (col_raw))
+        if (isstring (col_raw))
+          col_raw = cellstr (col_raw);
+        endif
+        levs = unique (col_raw);
+        levs = sort (levs);
+        [~, ~, cat_idx] = unique (col_raw);
+        ## Re-sort so idx matches sorted levs
+        [levs2, ~, cat_idx] = unique (col_raw);
+        levs = levs2;
+      else
+        ## Numeric forced categorical
+        vals = raw_X(:, k);
+        levs_num = unique (vals(! isnan (vals)));
+        levs_num = sort (levs_num);
+        levs = arrayfun (@num2str, levs_num, "UniformOutput", false);
+        cat_idx = zeros (n, 1);
+        for li = 1:numel (levs_num)
+          cat_idx(vals == levs_num(li)) = li;
+        endfor
+      endif
+      cat_levels{k} = levs;
+      n_lev = numel (levs);
+      ## Dummy coding: 'reference' drops first level, 'full' keeps all
+      if (strcmp (dummy_coding, "full"))
+        n_dum = n_lev;
+        D = zeros (n, n_dum);
+        dum_names = cell (1, n_dum);
+        for L = 1:n_lev
+          D(:, L) = double (cat_idx == L);
+          if (isnumeric (levs))
+            lev_str = num2str (levs(L));
+          else
+            lev_str = levs{L};
+          endif
+          dum_names{L} = sprintf ("%s_%s", pred_names{k}, lev_str);
+        endfor
+      else
+        ## Reference coding: drop first level
+        n_dum = n_lev - 1;
+        D = zeros (n, n_dum);
+        dum_names = cell (1, n_dum);
+        for L = 2:n_lev
+          D(:, L-1) = double (cat_idx == L);
+          if (isnumeric (levs))
+            lev_str = num2str (levs(L));
+          else
+            lev_str = levs{L};
+          endif
+          dum_names{L-1} = sprintf ("%s_%s", pred_names{k}, lev_str);
+        endfor
+      endif
+      base_cols{k}   = D;
+      base_cnames{k} = dum_names;
+    else
+      ## Numeric predictor — single column; power applied per-term
+      base_cols{k}   = raw_X(:, k);
+      base_cnames{k} = {pred_names{k}};
+    endif
+  endfor
+
+  ## Now build X_full column by column from terms_pred
+  X_full     = [];
+  coef_names = {};
+
+  for i = 1:rows (terms_pred)
+    row = terms_pred(i, :);
+
+    if (all (row == 0))
+      ## Intercept
+      X_full     = [X_full, ones(n, 1)];
+      coef_names{end+1} = "(Intercept)";
+      continue;
+    endif
+
+    ## Find active predictors in this term
+    active = find (row);
+    ## Start with a block of ones × empty name
+    blk   = ones (n, 1);
+    blk_n = {""};
+
+    for ai = 1:numel (active)
+      v   = active(ai);
+      pwr = row(v);
+
+      if (cat_flags(v))
+        ## Categorical: Cartesian product with dummy block
+        D = base_cols{v};      ## n x (n_lev-1)
+        dn = base_cnames{v};   ## 1 x (n_lev-1) cell
+        new_blk   = [];
+        new_blk_n = {};
+        for c1 = 1:columns (blk)
+          for c2 = 1:columns (D)
+            new_blk = [new_blk, blk(:, c1) .* D(:, c2)];
+            n1 = blk_n{c1};
+            n2 = dn{c2};
+            if (isempty (n1))
+              new_blk_n{end+1} = n2;
+            else
+              new_blk_n{end+1} = [n1, ":", n2];
+            endif
+          endfor
+        endfor
+        blk   = new_blk;
+        blk_n = new_blk_n;
+      else
+        ## Numeric: multiply by column^power
+        col = base_cols{v};
+        if (pwr > 1)
+          col = col .^ pwr;
+          cname = sprintf ("%s^%d", pred_names{v}, pwr);
+        else
+          cname = pred_names{v};
+        endif
+        blk = bsxfun (@times, blk, col);
+        for c1 = 1:numel (blk_n)
+          if (isempty (blk_n{c1}))
+            blk_n{c1} = cname;
+          else
+            blk_n{c1} = [blk_n{c1}, ":", cname];
+          endif
+        endfor
+      endif
+    endfor
+
+    X_full     = [X_full, blk];
+    coef_names = [coef_names, blk_n];
+  endfor
+
+  ## ── Step 3: Compute incl_mask ───────────────────────────────────────────
+  ## Missing: any NaN in the NUMERIC data (raw_X numeric cols or raw_y)
+  nan_in_X   = any (isnan (raw_X), 2);
+  nan_in_y   = isnan (raw_y);
+  ## For categorical columns stored as codes, NaN means missing
+  missing_mask = nan_in_X | nan_in_y;
+
+  incl_mask = (! excl_mask) & (! missing_mask);
+  y_full    = raw_y;
+
+  p_tot = columns (X_full);
+
+  ## ── Step 4: Build LinearPredictor string ────────────────────────────────
+  lp_str = __build_lp_str__ (terms_pred, pred_names, has_intercept);
+
+endfunction
+
+
+## ── Shorthand → terms_pred (n_terms x p) ────────────────────────────────────
+
+function terms = __shorthand_to_terms__ (spec, p, has_intercept)
+  ## Returns (n_terms x p) matrix (no response column yet)
+  switch (spec)
+    case "constant"
+      if (has_intercept)
+        terms = zeros (1, p);
+      else
+        terms = zeros (0, p);
+      endif
+
+    case "linear"
+      if (has_intercept)
+        terms = [zeros(1, p); eye(p)];
+      else
+        terms = eye (p);
+      endif
+
+    case "interactions"
+      lin_terms = eye (p);
+      inter_rows = [];
+      for i = 1:p-1
+        for j = i+1:p
+          r = zeros (1, p);
+          r(i) = 1; r(j) = 1;
+          inter_rows = [inter_rows; r];
+        endfor
+      endfor
+      if (has_intercept)
+        terms = [zeros(1,p); lin_terms; inter_rows];
+      else
+        terms = [lin_terms; inter_rows];
+      endif
+
+    case "purequadratic"
+      lin_terms = eye (p);
+      quad_rows = 2 * eye (p);
+      if (has_intercept)
+        terms = [zeros(1,p); lin_terms; quad_rows];
+      else
+        terms = [lin_terms; quad_rows];
+      endif
+
+    case {"quadratic", "full"}
+      lin_terms = eye (p);
+      inter_rows = [];
+      for i = 1:p-1
+        for j = i+1:p
+          r = zeros (1, p);
+          r(i) = 1; r(j) = 1;
+          inter_rows = [inter_rows; r];
+        endfor
+      endfor
+      quad_rows = 2 * eye (p);
+      if (has_intercept)
+        terms = [zeros(1,p); lin_terms; inter_rows; quad_rows];
+      else
+        terms = [lin_terms; inter_rows; quad_rows];
+      endif
+
+    otherwise
+      ## Check for polyNM pattern (e.g., 'poly22', 'poly33')
+      m = regexp (spec, '^poly(\d+)$', 'tokens');
+      if (! isempty (m))
+        digits_str = m{1}{1};
+        if (numel (digits_str) != p)
+          error ("fitlm: 'poly%s' requires %d digits for %d predictors.", ...
+                 digits_str, p, p);
+        endif
+        max_powers = arrayfun (@(c) str2double (c), digits_str);
+        terms = __poly_to_terms__ (max_powers, p, has_intercept);
+      else
+        ## Unknown — default to linear
+        if (has_intercept)
+          terms = [zeros(1, p); eye(p)];
+        else
+          terms = eye (p);
+        endif
+      endif
+  endswitch
+endfunction
+
+
+## ── Formula RHS → terms_pred ─────────────────────────────────────────────────
+
+function terms = __parse_rhs_terms__ (rhs, pred_names, has_intercept)
+  ## Parse a Wilkinson RHS string with full operator support:
+  ## +, -, *, /, :, ^, (group)^N
+  ## Returns (n_terms x p) matrix
+  p = numel (pred_names);
+
+  ## Split into positive and negative terms (respecting parentheses)
+  [add_toks, sub_toks, has_int_in_rhs, remove_int] = __split_additive__ (rhs);
+
+  ## Expand each positive token into term rows
+  add_rows = {};
+  for i = 1:numel (add_toks)
+    expanded = __expand_token__ (add_toks{i}, pred_names, p);
+    if (isempty (expanded))
+      error ("The model formula contains names not recognized as predictor or response names.");
+    endif
+    add_rows = [add_rows, expanded];
+  endfor
+
+  ## Remove subtracted terms
+  for i = 1:numel (sub_toks)
+    expanded = __expand_token__ (sub_toks{i}, pred_names, p);
+    for j = 1:numel (expanded)
+      add_rows = __remove_row__ (add_rows, expanded{j});
+    endfor
+  endfor
+
+  ## Deduplicate
+  add_rows = __dedup_rows__ (add_rows);
+
+  ## Sort by MATLAB canonical order: (total_degree, max_power, first_var_index)
+  if (numel (add_rows) > 1)
+    sort_keys = zeros (numel (add_rows), 3);
+    for i = 1:numel (add_rows)
+      row = add_rows{i};
+      sort_keys(i, 1) = sum (row);           ## total degree
+      sort_keys(i, 2) = max (row);           ## max individual power
+      nz = find (row > 0);
+      if (! isempty (nz))
+        sort_keys(i, 3) = nz(1);             ## first nonzero variable index
+      endif
+    endfor
+    [~, ord] = sortrows (sort_keys);
+    add_rows = add_rows(ord);
+  endif
+
+  ## Assemble terms matrix
+  n_terms = numel (add_rows);
+  terms_body = zeros (n_terms, p);
+  for i = 1:n_terms
+    terms_body(i, :) = add_rows{i};
+  endfor
+
+  ## Handle intercept
+  if (remove_int)
+    has_intercept = false;
+  endif
+  if (has_intercept || has_int_in_rhs)
+    terms = [zeros(1, p); terms_body];
+  else
+    terms = terms_body;
+  endif
+endfunction
+
+
+## ── LinearPredictor string builder ───────────────────────────────────────────
+
+function s = __build_lp_str__ (terms_pred, pred_names, has_intercept)
+  parts = {};
+  for i = 1:rows (terms_pred)
+    row = terms_pred(i, :);
+    if (all (row == 0))
+      parts{end+1} = "1";
+    else
+      active = find (row);
+      tparts = {};
+      for j = 1:numel (active)
+        v = active(j);
+        pwr = row(v);
+        if (pwr == 1)
+          tparts{end+1} = pred_names{v};
+        else
+          tparts{end+1} = sprintf ("%s^%d", pred_names{v}, pwr);
+        endif
+      endfor
+      parts{end+1} = strjoin (tparts, ":");
+    endif
+  endfor
+  s = strjoin (parts, " + ");
+endfunction
+
+
+## ── Split RHS on + and - (respecting parentheses) ───────────────────────────
+
+function [add_toks, sub_toks, has_int, remove_int] = __split_additive__ (rhs)
+  add_toks = {};
+  sub_toks = {};
+  has_int = false;
+  remove_int = false;
+  rhs = strtrim (rhs);
+  if (isempty (rhs))
+    return;
+  endif
+  depth = 0;
+  current = '';
+  sign = 1;
+  for i = 1:numel (rhs)
+    ch = rhs(i);
+    if (ch == '(')
+      depth++;
+      current = [current, ch];
+    elseif (ch == ')')
+      depth--;
+      current = [current, ch];
+    elseif (depth == 0 && (ch == '+' || ch == '-'))
+      tok = strtrim (current);
+      if (! isempty (tok))
+        if (strcmp (tok, '1'))
+          if (sign == -1), remove_int = true; else, has_int = true; endif
+        else
+          if (sign == -1), sub_toks{end+1} = tok; else, add_toks{end+1} = tok; endif
+        endif
+      endif
+      current = '';
+      if (ch == '+'), sign = 1; else, sign = -1; endif
+    else
+      current = [current, ch];
+    endif
+  endfor
+  tok = strtrim (current);
+  if (! isempty (tok))
+    if (strcmp (tok, '1'))
+      if (sign == -1), remove_int = true; else, has_int = true; endif
+    else
+      if (sign == -1), sub_toks{end+1} = tok; else, add_toks{end+1} = tok; endif
+    endif
+  endif
+endfunction
+
+
+## ── Expand a single token into term rows ─────────────────────────────────────
+
+function rows = __expand_token__ (tok, pred_names, p)
+  tok = strtrim (tok);
+  rows = {};
+
+  ## Case 1: (group)^N
+  m = regexp (tok, '^\((.+)\)\^(\d+)$', 'tokens');
+  if (! isempty (m))
+    inner_str = m{1}{1};
+    power = str2double (m{1}{2});
+    inner_toks = strtrim (strsplit (inner_str, '+'));
+    inner_vars = [];
+    for i = 1:numel (inner_toks)
+      idx = find (strcmp (pred_names, strtrim (inner_toks{i})));
+      if (! isempty (idx))
+        inner_vars(end+1) = idx;
+      endif
+    endfor
+    if (! isempty (inner_vars))
+      rows = __group_power_expand__ (inner_vars, power, p);
+    endif
+    return;
+  endif
+
+  ## Case 2: bare (group)
+  m = regexp (tok, '^\((.+)\)$', 'tokens');
+  if (! isempty (m))
+    inner_str = m{1}{1};
+    [add_t, sub_t, ~, ~] = __split_additive__ (inner_str);
+    for i = 1:numel (add_t)
+      tmp_exp = __expand_token__ (add_t{i}, pred_names, p);
+      rows = [rows, tmp_exp];
+    endfor
+    for i = 1:numel (sub_t)
+      sub_r = __expand_token__ (sub_t{i}, pred_names, p);
+      for j = 1:numel (sub_r)
+        rows = __remove_row__ (rows, sub_r{j});
+      endfor
+    endfor
+    rows = __dedup_rows__ (rows);
+    return;
+  endif
+
+  ## Case 3: crossing (*)
+  if (__has_at_toplevel__ (tok, '*'))
+    parts = __split_toplevel__ (tok, '*');
+    result = __expand_token__ (parts{1}, pred_names, p);
+    for i = 2:numel (parts)
+      right = __expand_token__ (parts{i}, pred_names, p);
+      result = __crossing_rows__ (result, right);
+    endfor
+    rows = result;
+    return;
+  endif
+
+  ## Case 4: nesting (/)
+  if (__has_at_toplevel__ (tok, '/'))
+    parts = __split_toplevel__ (tok, '/');
+    left = __expand_token__ (parts{1}, pred_names, p);
+    for i = 2:numel (parts)
+      right = __expand_token__ (parts{i}, pred_names, p);
+      cross = {};
+      for li = 1:numel (left)
+        for ri = 1:numel (right)
+          cross{end+1} = left{li} + right{ri};
+        endfor
+      endfor
+      left = [left, cross];
+    endfor
+    rows = __dedup_rows__ (left);
+    return;
+  endif
+
+  ## Case 5: power ladder (var^N)
+  m = regexp (tok, '^([a-zA-Z_][a-zA-Z0-9_]*)\^(\d+)$', 'tokens');
+  if (! isempty (m))
+    varname = m{1}{1};
+    power = str2double (m{1}{2});
+    idx = find (strcmp (pred_names, varname));
+    if (! isempty (idx))
+      for pwr = 1:power
+        row = zeros (1, p);
+        row(idx) = pwr;
+        rows{end+1} = row;
+      endfor
+      return;
+    endif
+  endif
+
+  ## Case 6: interaction (A:B)
+  if (any (tok == ':'))
+    parts = strtrim (strsplit (tok, ':'));
+    row = zeros (1, p);
+    valid = true;
+    for i = 1:numel (parts)
+      part = parts{i};
+      caret = strfind (part, '^');
+      if (! isempty (caret))
+        base = strtrim (part(1:caret(1)-1));
+        pwr = str2double (part(caret(1)+1:end));
+      else
+        base = part;
+        pwr = 1;
+      endif
+      idx = find (strcmp (pred_names, base));
+      if (isempty (idx))
+        valid = false;
+        break;
+      endif
+      row(idx(1)) = pwr;
+    endfor
+    if (valid)
+      rows = {row};
+    endif
+    return;
+  endif
+
+  ## Case 7: simple variable
+  idx = find (strcmp (pred_names, tok));
+  if (! isempty (idx))
+    row = zeros (1, p);
+    row(idx) = 1;
+    rows = {row};
+  endif
+endfunction
+
+
+## ── Group power expansion ────────────────────────────────────────────────────
+
+function rows = __group_power_expand__ (var_indices, max_power, p)
+  nv = numel (var_indices);
+  all_exps = __enum_exponents__ (nv, max_power);
+  rows = {};
+  for i = 1:size (all_exps, 1)
+    row = zeros (1, p);
+    for j = 1:nv
+      row(var_indices(j)) = all_exps(i, j);
+    endfor
+    rows{end+1} = row;
+  endfor
+endfunction
+
+
+## ── Enumerate exponent vectors ───────────────────────────────────────────────
+
+function E = __enum_exponents__ (nv, max_deg)
+  E = [];
+  total = (max_deg + 1) ^ nv;
+  for i = 0:total-1
+    vec = zeros (1, nv);
+    val = i;
+    for j = 1:nv
+      vec(j) = mod (val, max_deg + 1);
+      val = floor (val / (max_deg + 1));
+    endfor
+    s = sum (vec);
+    if (s >= 1 && s <= max_deg)
+      E = [E; vec];
+    endif
+  endfor
+endfunction
+
+
+## ── Crossing rows (A*B expansion) ────────────────────────────────────────────
+
+function rows = __crossing_rows__ (left, right)
+  cross = {};
+  for li = 1:numel (left)
+    for ri = 1:numel (right)
+      cross{end+1} = left{li} + right{ri};
+    endfor
+  endfor
+  rows = [left, right, cross];
+  rows = __dedup_rows__ (rows);
+endfunction
+
+
+## ── Remove first matching row ────────────────────────────────────────────────
+
+function rows = __remove_row__ (rows, target)
+  for i = 1:numel (rows)
+    if (isequal (rows{i}, target))
+      rows(i) = [];
+      return;
+    endif
+  endfor
+endfunction
+
+
+## ── Deduplicate rows ─────────────────────────────────────────────────────────
+
+function rows = __dedup_rows__ (rows)
+  keep = true (numel (rows), 1);
+  for i = 1:numel (rows)
+    if (! keep(i)), continue; endif
+    for j = i+1:numel (rows)
+      if (keep(j) && isequal (rows{i}, rows{j}))
+        keep(j) = false;
+      endif
+    endfor
+  endfor
+  rows = rows(keep);
+endfunction
+
+
+## ── Check for char at top level (outside parens) ─────────────────────────────
+
+function result = __has_at_toplevel__ (tok, ch)
+  result = false;
+  depth = 0;
+  for i = 1:numel (tok)
+    if (tok(i) == '('), depth++;
+    elseif (tok(i) == ')'), depth--;
+    elseif (depth == 0 && tok(i) == ch)
+      result = true;
+      return;
+    endif
+  endfor
+endfunction
+
+
+## ── Split on char at top level ───────────────────────────────────────────────
+
+function parts = __split_toplevel__ (tok, ch)
+  parts = {};
+  depth = 0;
+  current = '';
+  for i = 1:numel (tok)
+    if (tok(i) == '('), depth++; current = [current, tok(i)];
+    elseif (tok(i) == ')'), depth--; current = [current, tok(i)];
+    elseif (depth == 0 && tok(i) == ch)
+      parts{end+1} = strtrim (current);
+      current = '';
+    else
+      current = [current, tok(i)];
+    endif
+  endfor
+  if (! isempty (current))
+    parts{end+1} = strtrim (current);
+  endif
+endfunction
+
+
+## ── Polynomial shorthand to terms ────────────────────────────────────────────
+
+function terms = __poly_to_terms__ (max_powers, p, has_intercept)
+  max_deg = max (max_powers);
+  exps = cell (1, p);
+  for j = 1:p
+    exps{j} = 0:max_powers(j);
+  endfor
+  grids = cell (1, p);
+  [grids{:}] = ndgrid (exps{:});
+  n_combos = numel (grids{1});
+  all_rows = zeros (n_combos, p);
+  for j = 1:p
+    all_rows(:, j) = grids{j}(:);
+  endfor
+  total_deg = sum (all_rows, 2);
+  keep = (total_deg >= 1) & (total_deg <= max_deg);
+  terms_body = all_rows(keep, :);
+  [~, ord] = sortrows ([sum(terms_body, 2), terms_body]);
+  terms_body = terms_body(ord, :);
+  if (has_intercept)
+    terms = [zeros(1, p); terms_body];
+  else
+    terms = terms_body;
+  endif
+endfunction
diff --git a/inst/private/__fitlm_build_diagnostics__.m b/inst/private/__fitlm_build_diagnostics__.m
new file mode 100644
index 00000000..dae490ff
--- /dev/null
+++ b/inst/private/__fitlm_build_diagnostics__.m
@@ -0,0 +1,182 @@
+## Copyright (C) 2026 Jayant Chauhan <0001jayant@gmail.com>
+##
+## This file is part of the statistics package for GNU Octave.
+##
+## This program is free software; you can redistribute it and/or modify it under
+## the terms of the GNU General Public License as published by the Free Software
+## Foundation; either version 3 of the License, or (at your option) any later
+## version.
+##
+## This program is distributed in the hope that it will be useful, but WITHOUT
+## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+## FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
+## details.
+##
+## You should have received a copy of the GNU General Public License along with
+## this program; if not, see <http://www.gnu.org/licenses/>.
+
+## -*- texinfo -*-
+## @deftypefn {Private Function} {[@var{Fitted}, @var{Residuals}, @var{Diagnostics}, @
+##   @var{MFVNM}] =} __fitlm_build_diagnostics__ (@var{s}, @var{X_full}, @
+##   @var{y_full}, @var{incl_mask}, @var{excl_mask})
+##
+## Expand lm_fit_engine output from n_used rows to full n rows.
+##
+## Key behaviors confirmed from MATLAB Phase 4 cross-validation:
+##   - Excluded rows: Fitted computed (X*beta), Residuals = NaN
+##   - Missing rows (NaN in X or y): Fitted = NaN, Residuals = NaN
+##   - Diagnostics: Leverage=0, CooksD/Dffits/S2_i/CovRatio=NaN for non-included rows
+##
+## @var{s} is the struct returned by @code{lm_fit_engine}.
+## @var{X_full} is the full n x p_tot design matrix.
+## @var{y_full} is the full n x 1 response (NaN for missing rows).
+## @var{incl_mask} is n x 1 logical: rows used in fit.
+## @var{excl_mask} is n x 1 logical: rows explicitly excluded.
+## @end deftypefn
+
+function [Fitted, Residuals, Diagnostics, MFVNM] = ...
+    __fitlm_build_diagnostics__ (s, X_full, y_full, incl_mask, excl_mask)
+
+  n     = rows (X_full);
+  p_est = s.p_est;
+  mse   = s.mse;
+  dfe   = s.dfe;
+
+  ## ── Fitted ──────────────────────────────────────────────────────────────
+  Fitted = NaN (n, 1);
+
+  ## Included rows
+  Fitted(incl_mask) = s.yhat;
+
+  ## Excluded-but-not-missing rows: compute X * beta
+  missing_mask = ! incl_mask & ! excl_mask;   ## true = missing (NaN in data)
+  excl_not_missing = excl_mask & ! missing_mask;
+  if (any (excl_not_missing))
+    Fitted(excl_not_missing) = X_full(excl_not_missing, :) * s.beta;
+  endif
+  ## Missing rows: Fitted stays NaN
+
+  ## ── Residuals (n x 4 struct) ────────────────────────────────────────────
+  raw_r  = NaN (n, 1);
+  pear_r = NaN (n, 1);
+  stud_r = NaN (n, 1);
+  std_r  = NaN (n, 1);
+
+  ## Leverage for included rows
+  h_inc = s.h;   ## n_used x 1
+
+  ## Pearson = raw / sqrt(MSE)
+  ## Standardized = raw / sqrt(MSE * (1 - h))
+  ## Studentized (externally) = raw / sqrt(S2_i * (1 - h))
+  ##   where S2_i = (dfe * MSE - raw^2 / (1-h)) / (dfe - 1)
+
+  raw_inc = s.resid;
+
+  raw_r(incl_mask)  = raw_inc;
+  pear_r(incl_mask) = raw_inc / sqrt (mse);
+  std_denom         = sqrt (mse * (1 - h_inc));
+  std_r(incl_mask)  = raw_inc ./ std_denom;
+
+  ## Studentized (leave-one-out MSE)
+  s2_i_inc = max (0, (dfe * mse - raw_inc .^ 2 ./ (1 - h_inc)) / max (1, dfe - 1));
+  stud_denom = sqrt (s2_i_inc .* (1 - h_inc));
+  stud_r_inc = raw_inc ./ stud_denom;
+  stud_r_inc(stud_denom == 0) = 0;
+  stud_r(incl_mask) = stud_r_inc;
+
+  Residuals.Raw          = raw_r;
+  Residuals.Pearson      = pear_r;
+  Residuals.Studentized  = stud_r;
+  Residuals.Standardized = std_r;
+
+  ## ── Diagnostics (n x 7 struct) ──────────────────────────────────────────
+  lev        = zeros (n, 1);
+  cooks      = NaN   (n, 1);
+  dffits_v   = NaN   (n, 1);
+  s2_i_v     = NaN   (n, 1);
+  covratio_v = NaN   (n, 1);
+  dfbetas_v  = zeros (n, p_est);
+  hatmat     = zeros (n, n);
+
+  ## Build hat matrix block for included rows
+  Q_inc   = s.Q;             ## n_used x p_est
+  H_inc   = Q_inc * Q_inc';  ## n_used x n_used
+  h_diag  = h_inc;
+
+  lev(incl_mask) = h_diag;
+
+  ## Hat matrix: fill in the included rows/cols
+  inc_idx = find (incl_mask);
+  for ii = 1:numel (inc_idx)
+    for jj = 1:numel (inc_idx)
+      hatmat(inc_idx(ii), inc_idx(jj)) = H_inc(ii, jj);
+    endfor
+  endfor
+
+  ## Cook's distance: D_i = (raw_i^2 * h_i) / (p_est * MSE * (1-h_i)^2)
+  h_inc_safe = max (h_inc, eps);
+  cooks_inc = (raw_inc .^ 2 .* h_inc) ./ (p_est * mse * (1 - h_inc) .^ 2);
+  cooks(incl_mask) = cooks_inc;
+
+  ## DFFITS: raw / sqrt(S2_i * h)
+  dffits_inc = raw_inc ./ sqrt (s2_i_inc .* h_inc_safe);
+  dffits_inc(h_inc == 0) = 0;
+  dffits_v(incl_mask) = dffits_inc;
+
+  ## S2_i (leave-one-out variance)
+  s2_i_v(incl_mask) = s2_i_inc;
+
+  ## CovRatio: (1 / (1-h)) ^ p_est * (dfe / (dfe - 1) + raw^2 / (mse * (dfe-1) * (1-h)))^(-p_est)
+  ## Simpler: (S2_i / MSE)^p_est / (1-h)
+  s2_ratio = s2_i_inc / max (mse, eps);
+  covrat_inc = (s2_ratio .^ p_est) ./ (1 - h_inc);
+  covrat_inc(h_inc >= 1) = NaN;
+  covratio_v(incl_mask) = covrat_inc;
+
+  ## DFBETAS: (beta_full - beta_(-i)) / sqrt(S2_i * diag(XtX_inv))
+  ## Approximation via s.Q: dfbetas_i = (Q(i,:)' * resid_i) / (1-h_i) / sqrt(S2_i * diag(vcov))
+  vcov_diag = diag (s.vcov);           ## p_est x 1  (from engine vcov sized p_tot x p_tot)
+  if (numel (vcov_diag) > p_est)
+    vcov_diag = vcov_diag(1:p_est);
+  endif
+  se_beta = sqrt (max (vcov_diag, 0));  ## p_est x 1
+
+  for ii = 1:numel (inc_idx)
+    ri    = raw_inc(ii);
+    hi    = h_inc(ii);
+    s2i   = s2_i_inc(ii);
+    qi    = Q_inc(ii, :)';           ## p_est x 1
+    denom = (1 - hi) .* sqrt (s2i) .* se_beta;
+    denom(denom == 0) = NaN;
+    dfb   = (ri ./ (1 - hi)) * qi ./ sqrt (s2i .* max (vcov_diag, eps));
+    dfbetas_v(inc_idx(ii), 1:p_est) = dfb';
+  endfor
+
+  Diagnostics.Leverage     = lev;
+  Diagnostics.CooksDistance = cooks;
+  Diagnostics.Dffits       = dffits_v;
+  Diagnostics.S2_i         = s2_i_v;
+  Diagnostics.CovRatio     = covratio_v;
+  Diagnostics.Dfbetas      = dfbetas_v;
+  Diagnostics.HatMatrix    = hatmat;
+
+  ## ── ModelFitVsNullModel ─────────────────────────────────────────────────
+  ## Check if model has intercept (intercept row = zeros(1,p) in terms_pred)
+  ## We detect via lp_str starting with '1'
+  has_int = (s.p_est > 0) && (s.ssr > 0 || s.sse > 0);
+
+  ## If model has intercept: compute F-stat and p-value
+  ## If no intercept: all NaN (confirmed Block 10-P4)
+  ##
+  ## We check via DFR = p_est - 1 if intercept, else p_est
+  ## However we don't have has_intercept here. Use heuristic:
+  %% detect from sst — if sst == sse+ssr and dfe = n-p_est, we can check
+  %% whether the design matrix has an intercept by seeing if beta(1) was ~unconstrained
+  %% Simplest: pass intercept flag when calling this function.
+  %% Since we don't have it here, let the caller override MFVNM.
+
+  MFVNM.Fstat     = NaN;
+  MFVNM.Pvalue    = NaN;
+  MFVNM.NullModel = NaN;
+
+endfunction
diff --git a/inst/private/__fitlm_build_formula__.m b/inst/private/__fitlm_build_formula__.m
new file mode 100644
index 00000000..b45a6d70
--- /dev/null
+++ b/inst/private/__fitlm_build_formula__.m
@@ -0,0 +1,87 @@
+## Copyright (C) 2026 Jayant Chauhan <0001jayant@gmail.com>
+##
+## This file is part of the statistics package for GNU Octave.
+##
+## This program is free software; you can redistribute it and/or modify it under
+## the terms of the GNU General Public License as published by the Free Software
+## Foundation; either version 3 of the License, or (at your option) any later
+## version.
+##
+## This program is distributed in the hope that it will be useful, but WITHOUT
+## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+## FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
+## details.
+##
+## You should have received a copy of the GNU General Public License along with
+## this program; if not, see <http://www.gnu.org/licenses/>.
+
+## -*- texinfo -*-
+## @deftypefn {Private Function} {@var{Formula} =} __fitlm_build_formula__ (@
+##   @var{terms_mat}, @var{var_names}, @var{pred_names}, @var{resp_name}, @
+##   @var{has_intercept}, @var{lp_str}, @var{coef_names})
+##
+## Build the Formula struct for a LinearModel.
+##
+## @var{terms_mat} is the (n_terms x n_all_vars) binary matrix including
+## the response column (always 0).
+##
+## Returns a struct with the 14 confirmed fields from MATLAB Block 1-P4.
+## @end deftypefn
+
+function Formula = __fitlm_build_formula__ (terms_mat, var_names, pred_names, ...
+                                             resp_name, has_intercept, lp_str, coef_names)
+
+  n_terms = rows (terms_mat);
+  n_vars  = columns (terms_mat);     ## includes response column
+  n_pred  = numel (pred_names);
+
+  ## Build TermNames from terms_mat (using pred columns only)
+  terms_pred = terms_mat(:, 1:n_pred);
+  term_names = cell (n_terms, 1);
+  for i = 1:n_terms
+    row = terms_pred(i, :);
+    if (all (row == 0))
+      term_names{i} = "(Intercept)";
+    else
+      active = find (row);
+      parts = {};
+      for j = 1:numel (active)
+        v = active(j);
+        pwr = row(v);
+        if (pwr == 1)
+          parts{end+1} = pred_names{v};
+        else
+          parts{end+1} = sprintf ("%s^%d", pred_names{v}, pwr);
+        endif
+      endfor
+      term_names{i} = strjoin (parts, ":");
+    endif
+  endfor
+
+  ## InModel: logical vector over var_names (1 = predictor in model, 0 = response)
+  ## Length = n_vars.  Response column is always 0.  A predictor is InModel if
+  ## any non-intercept term has a non-zero entry in that predictor's column.
+  in_model = false (1, n_vars);
+  for j = 1:n_pred
+    if (any (terms_pred(:, j) != 0))
+      in_model(j) = true;
+    endif
+  endfor
+  ## Response column (last) = 0 (already false)
+
+  Formula.LinearPredictor  = lp_str;
+  Formula.ResponseName     = resp_name;
+  Formula.Terms            = terms_mat;
+  Formula.VariableNames    = var_names;     ## e.g. {'x1','x2','x3','y'}
+  Formula.HasIntercept     = logical (has_intercept);
+  Formula.InModel          = in_model;
+  Formula.TermNames        = term_names;
+  Formula.PredictorNames   = pred_names;
+  Formula.NTerms           = n_terms;
+  Formula.NVars            = n_vars;
+  Formula.NPredictors      = n_pred;
+  Formula.Link             = "identity";
+  Formula.ModelFun         = @(b, X) X * b;
+  Formula.FunctionCalls    = {};
+
+endfunction
diff --git a/inst/private/__fitlm_build_obsinfo__.m b/inst/private/__fitlm_build_obsinfo__.m
new file mode 100644
index 00000000..622350ed
--- /dev/null
+++ b/inst/private/__fitlm_build_obsinfo__.m
@@ -0,0 +1,46 @@
+## Copyright (C) 2026 Jayant Chauhan <0001jayant@gmail.com>
+##
+## This file is part of the statistics package for GNU Octave.
+##
+## This program is free software; you can redistribute it and/or modify it under
+## the terms of the GNU General Public License as published by the Free Software
+## Foundation; either version 3 of the License, or (at your option) any later
+## version.
+##
+## This program is distributed in the hope that it will be useful, but WITHOUT
+## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+## FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
+## details.
+##
+## You should have received a copy of the GNU General Public License along with
+## this program; if not, see <http://www.gnu.org/licenses/>.
+
+## -*- texinfo -*-
+## @deftypefn {Private Function} {@var{ObservationInfo} =} __fitlm_build_obsinfo__ (@
+##   @var{weights}, @var{excl_mask}, @var{raw_X}, @var{raw_y}, @var{obs_names})
+##
+## Build the ObservationInfo table for a LinearModel.
+##
+## Confirmed column names from MATLAB Block 7-P4:
+##   {Weights, Excluded, Missing, Subset}
+## Subset = true means the row was used in the fit (not excluded, not missing).
+## @end deftypefn
+
+function ObservationInfo = __fitlm_build_obsinfo__ (weights, excl_mask, raw_X, raw_y, obs_names)
+
+  n = rows (raw_y);
+
+  ## Missing: any NaN in X rows or y
+  nan_in_X = any (isnan (raw_X), 2);
+  nan_in_y = isnan (raw_y);
+  missing = logical (nan_in_X | nan_in_y);
+
+  excluded = logical (excl_mask(:));
+  subset = (!excluded) & (!missing);
+
+  ObservationInfo.Weights = weights(:);
+  ObservationInfo.Excluded = excluded;
+  ObservationInfo.Missing = missing;
+  ObservationInfo.Subset = subset;
+
+endfunction
diff --git a/inst/private/__fitlm_build_varinfo__.m b/inst/private/__fitlm_build_varinfo__.m
new file mode 100644
index 00000000..d802a6f7
--- /dev/null
+++ b/inst/private/__fitlm_build_varinfo__.m
@@ -0,0 +1,101 @@
+## Copyright (C) 2026 Jayant Chauhan <0001jayant@gmail.com>
+##
+## This file is part of the statistics package for GNU Octave.
+##
+## This program is free software; you can redistribute it and/or modify it under
+## the terms of the GNU General Public License as published by the Free Software
+## Foundation; either version 3 of the License, or (at your option) any later
+## version.
+##
+## This program is distributed in the hope that it will be useful, but WITHOUT
+## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+## FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
+## details.
+##
+## You should have received a copy of the GNU General Public License along with
+## this program; if not, see <http://www.gnu.org/licenses/>.
+
+## -*- texinfo -*-
+## @deftypefn {Private Function} {@var{VariableInfo} =} __fitlm_build_varinfo__ (@
+##   @var{var_names}, @var{cat_flags}, @var{raw_cols}, @var{in_model})
+##
+## Build the VariableInfo struct for a LinearModel.
+##
+## Confirmed from MATLAB Block 2-P4:
+##   VariableInfo is a STRUCT (mirroring table layout) with fields:
+##     Class         - cell of char: 'double', 'categorical', etc.
+##     Range         - cell of cell: {[min max]} for numeric, {[sorted_uniq]} for categorical
+##     InModel       - logical column vector
+##     IsCategorical - logical column vector
+##
+## Row names are stored separately (= var_names).
+##
+## @var{in_model} is a logical row vector of length numel(var_names): true for
+## each predictor that appears in the model (false for response).
+## @end deftypefn
+
+function VariableInfo = __fitlm_build_varinfo__ (var_names, cat_flags, raw_cols, in_model)
+
+  n_vars = numel (var_names);
+
+  Class = cell (n_vars, 1);
+  Range = cell (n_vars, 1);
+  InModel = false (n_vars, 1);
+  IsCategorical = false (n_vars, 1);
+
+  for j = 1:n_vars
+    col = raw_cols{j};
+
+    ## IsCategorical: from cat_flags (set by CategoricalVars or type detection)
+    IsCategorical(j) = logical (cat_flags(j));
+
+    ## Class
+    if (isa (col, "categorical"))
+      Class{j} = "categorical";
+    elseif (iscellstr (col) || isstring (col))
+      Class{j} = "cell";
+    elseif (isnumeric (col))
+      Class{j} = "double";
+    else
+      Class{j} = class (col);
+    endif
+
+    ## Range
+    if (IsCategorical(j))
+      ## Sorted unique levels
+      if (isa (col, "categorical"))
+        levs = cellstr (categories (col));
+      elseif (iscellstr (col))
+        levs = unique (col);
+      elseif (isstring (col))
+        levs = unique (cellstr (col));
+      else
+        ## Numeric forced categorical — unique numeric values
+        uvals = unique (col(!isnan (col)));
+        levs = uvals(:)';
+      endif
+      Range{j} = {levs};
+    else
+      ## Numeric — [min max]
+      if (isnumeric (col))
+        valid = col(!isnan (col));
+        if (isempty (valid))
+          Range{j} = {[NaN NaN]};
+        else
+          Range{j} = {[min(valid), max(valid)]};
+        endif
+      else
+        Range{j} = {[]};
+      endif
+    endif
+
+    ## InModel
+    InModel(j) = logical (in_model(j));
+  endfor
+
+  VariableInfo.Class = Class;
+  VariableInfo.Range = Range;
+  VariableInfo.InModel = InModel;
+  VariableInfo.IsCategorical = IsCategorical;
+
+endfunction
diff --git a/inst/private/__fitlm_parse_inputs__.m b/inst/private/__fitlm_parse_inputs__.m
new file mode 100644
index 00000000..b7974e9b
--- /dev/null
+++ b/inst/private/__fitlm_parse_inputs__.m
@@ -0,0 +1,347 @@
+## Copyright (C) 2026 Jayant Chauhan <0001jayant@gmail.com>
+##
+## This file is part of the statistics package for GNU Octave.
+##
+## This program is free software; you can redistribute it and/or modify it under
+## the terms of the GNU General Public License as published by the Free Software
+## Foundation; either version 3 of the License, or (at your option) any later
+## version.
+##
+## This program is distributed in the hope that it will be useful, but WITHOUT
+## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+## FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
+## details.
+##
+## You should have received a copy of the GNU General Public License along with
+## this program; if not, see <http://www.gnu.org/licenses/>.
+
+## -*- texinfo -*-
+## @deftypefn {Private Function} {[@var{raw_X}, @var{raw_y}, @var{var_names}, @
+##   @var{cat_flags}, @var{weights}, @var{excl_mask}, @var{modelspec}, @
+##   @var{obs_names}, @var{resp_name}, @var{pred_names}, @var{has_intercept}] =} @
+##   __fitlm_parse_inputs__ (@var{varargin})
+##
+## Parse fitlm inputs into clean typed outputs.
+##
+## Returns:
+##   raw_X        - n x p numeric matrix (NaN in missing rows preserved)
+##   raw_y        - n x 1 numeric response vector
+##   var_names    - 1 x (p+1) cell of char: {pred_names..., resp_name}
+##   cat_flags    - 1 x (p+1) logical: IsCategorical for each variable
+##   weights      - n x 1 double (1.0 if unweighted)
+##   excl_mask    - n x 1 logical (true = explicitly excluded)
+##   modelspec    - char: Wilkinson formula string OR shorthand string
+##   obs_names    - cell of char (row names or {})
+##   resp_name    - char: response variable name
+##   pred_names   - 1 x p cell of char: predictor variable names
+##   has_intercept - logical: true unless explicitly suppressed
+##   raw_cols     - cell of original columns (for VariableInfo building)
+## @end deftypefn
+
+function [raw_X, raw_y, var_names, cat_flags, weights, excl_mask, ...
+          modelspec, obs_names, resp_name, pred_names, has_intercept, ...
+          raw_cols, dummy_coding] = ...
+    __fitlm_parse_inputs__ (varargin)
+
+  if (numel (varargin) < 1)
+    error ("fitlm: at least one input argument required.");
+  endif
+
+  ## --- Detect first argument: table or matrix ---
+  is_table_input = isa (varargin{1}, "table");
+
+  ## Initialize name-value defaults
+  weights_arg     = [];
+  excl_arg        = [];
+  intercept_arg   = true;
+  cat_vars_arg    = [];     ## integer indices or {}
+  var_names_arg   = {};     ## VarNames override
+  resp_var_arg    = "";     ## ResponseVar override
+  pred_vars_arg   = {};     ## PredictorVars override
+
+  if (is_table_input)
+    ## ── TABLE INPUT ──────────────────────────────────────────────────────────
+    tbl = varargin{1};
+    varargin(1) = [];
+
+    ## Extract table metadata
+    all_col_names = tbl.Properties.VariableNames;
+    n_vars_tbl    = numel (all_col_names);
+
+    ## ObservationNames from row names
+    obs_names = tbl.Properties.RowNames;     ## cell or {} if unset
+
+    ## Parse optional modelspec (2nd positional arg if char/not a NV key,
+    ## or a numeric terms matrix)
+    modelspec = "linear";                    ## default shorthand
+    if (! isempty (varargin) && isnumeric (varargin{1}) && ...
+        ismatrix (varargin{1}) && ndims (varargin{1}) == 2)
+      ## Numeric terms matrix passed directly
+      modelspec = varargin{1};
+      varargin(1) = [];
+    elseif (! isempty (varargin) && ischar (varargin{1}) && ...
+        ! __is_nv_key__ (varargin{1}))
+      modelspec = varargin{1};
+      varargin(1) = [];
+    endif
+
+    ## Parse name-value pairs
+    [weights_arg, excl_arg, intercept_arg, cat_vars_arg, var_names_arg, ...
+     resp_var_arg, pred_vars_arg, dummy_coding_arg] = __parse_nv__ (varargin{:});
+
+    ## Determine response and predictor variables
+    if (! isempty (resp_var_arg))
+      resp_name = resp_var_arg;
+    elseif (! isempty (strfind (modelspec, "~")))
+      ## Formula string — response is LHS
+      tilde_pos = strfind (modelspec, "~");
+      resp_name = strtrim (modelspec(1:tilde_pos(1)-1));
+    else
+      ## Default: last column of table
+      resp_name = all_col_names{end};
+    endif
+
+    ## Predictor names
+    if (! isempty (pred_vars_arg))
+      if (iscell (pred_vars_arg))
+        pred_names = pred_vars_arg;
+      else
+        pred_names = all_col_names(pred_vars_arg);
+      endif
+    else
+      ## All columns except response
+      pred_names = all_col_names(! strcmp (all_col_names, resp_name));
+    endif
+
+    ## var_names: predictor order then response (matches MATLAB Block 4-P4)
+    var_names = [pred_names, {resp_name}];
+
+    ## Extract raw columns
+    n = rows (tbl);
+    p = numel (pred_names);
+    raw_X = zeros (n, p);
+    raw_cols = cell (1, p + 1);
+
+    cat_flags = false (1, p + 1);
+
+    ## Handle CategoricalVars — may be names or indices into pred_names
+    cat_idx_set = __resolve_cat_vars__ (cat_vars_arg, pred_names);
+
+    for k = 1:p
+      col = tbl.(pred_names{k});
+      raw_cols{k} = col;
+      if (isa (col, "categorical"))
+        ## Store numeric codes — raw_X stays as codes for design builder
+        cat_flags(k) = true;
+        raw_X(:, k) = double (col);   ## integer codes (1-based after sort)
+      elseif (iscellstr (col) || isstring (col))
+        cat_flags(k) = true;
+        raw_X(:, k) = NaN;            ## placeholder; design builder uses raw_cols
+      elseif (ismember (k, cat_idx_set))
+        cat_flags(k) = true;
+        raw_X(:, k) = col;
+      else
+        raw_X(:, k) = col;
+      endif
+    endfor
+
+    ## Response column
+    raw_y = tbl.(resp_name);
+    raw_y = raw_y(:);
+    raw_cols{p+1} = raw_y;
+
+    ## cat_flags for response (always false)
+    cat_flags(p+1) = false;
+
+    ## No VarNames override for table input
+
+  else
+    ## ── MATRIX INPUT ─────────────────────────────────────────────────────────
+    X_in = varargin{1};
+    if (nargin < 2 || numel (varargin) < 2)
+      error ("fitlm: matrix input requires at least X and y arguments.");
+    endif
+    y_in = varargin{2}(:);
+    varargin(1:2) = [];
+
+    n = rows (X_in);
+    p = columns (X_in);
+    obs_names = {};
+
+    ## Parse optional modelspec (char string, shorthand, or numeric terms matrix)
+    modelspec = "linear";
+    if (! isempty (varargin) && isnumeric (varargin{1}) && ...
+        ismatrix (varargin{1}) && ndims (varargin{1}) == 2)
+      ## Numeric terms matrix passed directly
+      modelspec = varargin{1};
+      varargin(1) = [];
+    elseif (! isempty (varargin) && ischar (varargin{1}) && ...
+        ! __is_nv_key__ (varargin{1}))
+      modelspec = varargin{1};
+      varargin(1) = [];
+    endif
+
+    ## Parse name-value pairs
+    [weights_arg, excl_arg, intercept_arg, cat_vars_arg, var_names_arg, ...
+     resp_var_arg, pred_vars_arg, dummy_coding_arg] = __parse_nv__ (varargin{:});
+
+    ## Variable names
+    if (! isempty (var_names_arg))
+      ## VarNames override: last = response, rest = predictors
+      if (numel (var_names_arg) != p + 1)
+        error ("fitlm: VarNames must have %d elements (p predictors + 1 response).", p+1);
+      endif
+      pred_names = var_names_arg(1:p);
+      resp_name  = var_names_arg{p+1};
+    else
+      ## Auto-generate: x1...xp and y
+      pred_names = arrayfun (@(j) sprintf ("x%d", j), 1:p, "UniformOutput", false);
+      resp_name  = "y";
+    endif
+
+    var_names = [pred_names, {resp_name}];
+
+    raw_X = X_in;
+    raw_y = y_in;
+
+    ## cat_flags — start all false, then mark CategoricalVars
+    cat_flags = false (1, p + 1);
+    cat_idx_set = __resolve_cat_vars__ (cat_vars_arg, pred_names);
+    if (! isempty (cat_idx_set))
+      cat_flags(cat_idx_set) = true;
+    endif
+
+    ## raw_cols for VariableInfo building
+    raw_cols = cell (1, p + 1);
+    for k = 1:p
+      raw_cols{k} = raw_X(:, k);
+    endfor
+    raw_cols{p+1} = raw_y;
+
+  endif  ## is_table_input
+
+  ## --- Weights ---
+  if (isempty (weights_arg))
+    weights = ones (n, 1);
+  else
+    weights = weights_arg(:);
+    if (numel (weights) != n)
+      error ("fitlm: Weights must have %d elements.", n);
+    endif
+  endif
+
+  ## --- Exclusion mask ---
+  excl_mask = false (n, 1);
+  if (! isempty (excl_arg))
+    if (islogical (excl_arg))
+      excl_mask = logical (excl_arg(:));
+    else
+      idx = round (excl_arg(:));
+      excl_mask(idx) = true;
+    endif
+  endif
+
+  ## --- Intercept ---
+  has_intercept = logical (intercept_arg);
+
+  ## If modelspec is a formula string, extract has_intercept from it
+  if (ischar (modelspec) && ! isempty (strfind (modelspec, "~")))
+    has_intercept = isempty (regexp (modelspec, '-\s*1'));
+    if (! has_intercept)
+      ## '-1' suppresses intercept; strip it for later parsing
+    endif
+  endif
+
+
+  ## DummyVarCoding
+  dummy_coding = dummy_coding_arg;
+
+  ## If Intercept=false was explicitly given, override
+  if (! intercept_arg)
+    has_intercept = false;
+  endif
+
+endfunction
+
+## ── Internal helpers ─────────────────────────────────────────────────────────
+
+function result = __is_nv_key__ (s)
+  ## Return true if s is a known name-value argument name.
+  ## Everything that is NOT a known key and contains '~' or is a shorthand
+  ## is passed as modelspec.
+  known = {"weights", "exclude", "intercept", "categoricalvars", ...
+           "varnames", "robustopts", "responsevar", "predictorvars", ...
+           "dummyvarcoding"};
+  result = any (strcmpi (s, known));
+endfunction
+
+function [weights_arg, excl_arg, intercept_arg, cat_vars_arg, var_names_arg, ...
+          resp_var_arg, pred_vars_arg, dummy_coding_arg] = __parse_nv__ (varargin)
+  weights_arg     = [];
+  excl_arg        = [];
+  intercept_arg   = true;
+  cat_vars_arg    = [];
+  var_names_arg   = {};
+  resp_var_arg    = "";
+  pred_vars_arg   = {};
+  dummy_coding_arg = "reference";
+
+  i = 1;
+  while (i <= numel (varargin))
+    if (! ischar (varargin{i}))
+      error ("fitlm: expected name-value argument name at position %d.", i);
+    endif
+    key = lower (varargin{i});
+    if (i + 1 > numel (varargin))
+      error ("fitlm: name-value argument '%s' has no value.", varargin{i});
+    endif
+    val = varargin{i+1};
+    switch (key)
+      case "weights"
+        weights_arg = val;
+      case "exclude"
+        excl_arg = val;
+      case "intercept"
+        intercept_arg = logical (val);
+      case "categoricalvars"
+        cat_vars_arg = val;
+      case "varnames"
+        var_names_arg = val;
+      case "robustopts"
+        ## Accepted but not implemented in Phase 4
+      case "dummyvarcoding"
+        dummy_coding_arg = lower (val);
+      case "responsevar"
+        resp_var_arg = val;
+      case "predictorvars"
+        pred_vars_arg = val;
+      otherwise
+        warning ("fitlm: ignoring unknown name-value argument '%s'.", varargin{i});
+    endswitch
+    i = i + 2;
+  endwhile
+endfunction
+
+function idx_set = __resolve_cat_vars__ (cat_vars_arg, pred_names)
+  ## Return integer indices (into pred_names) of categorical predictors.
+  idx_set = [];
+  if (isempty (cat_vars_arg))
+    return;
+  endif
+  p = numel (pred_names);
+  if (isnumeric (cat_vars_arg))
+    idx_set = round (cat_vars_arg(:))';
+  elseif (iscell (cat_vars_arg))
+    for k = 1:numel (cat_vars_arg)
+      m = find (strcmp (pred_names, cat_vars_arg{k}));
+      if (! isempty (m))
+        idx_set(end+1) = m;
+      endif
+    endfor
+  elseif (ischar (cat_vars_arg))
+    m = find (strcmp (pred_names, cat_vars_arg));
+    if (! isempty (m))
+      idx_set = m;
+    endif
+  endif
+endfunction
diff --git a/inst/private/__stepwiselm_candidates__.m b/inst/private/__stepwiselm_candidates__.m
new file mode 100644
index 00000000..de8d92ef
--- /dev/null
+++ b/inst/private/__stepwiselm_candidates__.m
@@ -0,0 +1,219 @@
+## Copyright (C) 2026 Jayant Chauhan <0001jayant@gmail.com>
+##
+## This file is part of the statistics package for GNU Octave.
+##
+## This program is free software; you can redistribute it and/or modify it under
+## the terms of the GNU General Public License as published by the Free Software
+## Foundation; either version 3 of the License, or (at your option) any later
+## version.
+##
+## This program is distributed in the hope that it will be useful, but WITHOUT
+## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+## FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
+## details.
+##
+## You should have received a copy of the GNU General Public License along with
+## this program; if not, see <http://www.gnu.org/licenses/>.
+
+## -*- texinfo -*-
+## @deftypefn {Private Function} {@var{candidates} =} __stepwiselm_candidates__ @
+##   (@var{current_terms}, @var{lower_terms}, @var{upper_terms}, @
+##    @var{cat_flags}, @var{pred_names})
+##
+## Enumerate candidate terms for adding or removing in stepwise regression.
+##
+## @strong{Inputs}
+##
+## @table @var
+## @item current_terms
+## Binary terms matrix of the current model (n_terms x n_vars).
+## Includes the response column (last column, always 0).
+## @item lower_terms
+## Binary terms matrix of the lower (minimum) model.
+## @item upper_terms
+## Binary terms matrix of the upper (maximum) model.
+## @item cat_flags
+## Logical vector (1 x n_vars) indicating categorical variables.
+## @item pred_names
+## Cell array of predictor variable names.
+## @end table
+##
+## @strong{Output}
+##
+## @var{candidates} is a struct array.  Each element has fields:
+## @table @code
+## @item term_row
+## Binary row vector (1 x n_vars) for this term.
+## @item term_name
+## Char string: e.g., @code{'x1'}, @code{'x1:x2'}, @code{'Species'}.
+## @item action
+## @code{'add'} or @code{'remove'}.
+## @item df_term
+## Number of design-matrix columns this term contributes:
+## 1 for numeric, k-1 for k-level categorical, products for interactions.
+## @end table
+##
+## @seealso{stepwiselm, __stepwiselm_ftest__}
+## @end deftypefn
+
+function candidates = __stepwiselm_candidates__ (current_terms, lower_terms, ...
+                                                  upper_terms, cat_flags, ...
+                                                  pred_names)
+
+  candidates = struct ("term_row", {}, "term_name", {}, ...
+                       "action", {}, "df_term", {});
+
+  p = numel (pred_names);     ## number of predictors (excl. response)
+  n_vars = columns (current_terms);   ## p + 1 (includes response col)
+
+  ## ── REMOVE candidates ─────────────────────────────────────────────────────
+  ## Terms in current_terms that are NOT in lower_terms and NOT the intercept.
+  for i = 1:rows (current_terms)
+    row = current_terms(i, :);
+
+    ## Skip intercept (all-zeros row)
+    if (all (row == 0))
+      continue;
+    endif
+
+    ## Check if this term is in lower_terms (protected)
+    is_lower = false;
+    for j = 1:rows (lower_terms)
+      if (isequal (row, lower_terms(j, :)))
+        is_lower = true;
+        break;
+      endif
+    endfor
+    if (is_lower)
+      continue;
+    endif
+
+    ## This is a valid removal candidate
+    c.term_row  = row;
+    c.term_name = __term_row_to_name__ (row, pred_names, p);
+    c.action    = "remove";
+    c.df_term   = __compute_df_term__ (row, cat_flags, pred_names, p);
+    candidates(end+1) = c;
+  endfor
+
+  ## ── ADD candidates ────────────────────────────────────────────────────────
+  ## Terms in upper_terms that are NOT in current_terms.
+  for i = 1:rows (upper_terms)
+    row = upper_terms(i, :);
+
+    ## Skip intercept (it's always in the model)
+    if (all (row == 0))
+      continue;
+    endif
+
+    ## Check if already in current_terms
+    already_in = false;
+    for j = 1:rows (current_terms)
+      if (isequal (row, current_terms(j, :)))
+        already_in = true;
+        break;
+      endif
+    endfor
+    if (already_in)
+      continue;
+    endif
+
+    ## Valid add candidate
+    c.term_row  = row;
+    c.term_name = __term_row_to_name__ (row, pred_names, p);
+    c.action    = "add";
+    c.df_term   = __compute_df_term__ (row, cat_flags, pred_names, p);
+    candidates(end+1) = c;
+  endfor
+
+endfunction
+
+
+## ── Helper: term row → human-readable name ──────────────────────────────────
+
+function name = __term_row_to_name__ (row, pred_names, p)
+  ## Convert a terms matrix row (1 x n_vars) to a human-readable term name
+  ## like 'x1', 'x1:x2', 'Species', 'x1^2'.
+  ##
+  ## The row may have width = p or p+1 (with response column).
+  ## We only look at columns 1..p.
+
+  active = find (row(1:p));
+  if (isempty (active))
+    name = "1";    ## intercept
+    return;
+  endif
+
+  parts = cell (1, numel (active));
+  for k = 1:numel (active)
+    v = active(k);
+    pwr = row(v);
+    if (pwr == 1)
+      parts{k} = pred_names{v};
+    else
+      parts{k} = sprintf ("%s^%d", pred_names{v}, pwr);
+    endif
+  endfor
+  name = strjoin (parts, ":");
+endfunction
+
+
+## ── Helper: compute df_term ─────────────────────────────────────────────────
+
+function df = __compute_df_term__ (row, cat_flags, pred_names, p)
+  ## Compute number of design matrix columns for a term.
+  ##
+  ## Rules (§8 of reference doc):
+  ##   Numeric predictor:             df = 1
+  ##   Categorical (k levels):        df = k - 1
+  ##   Interaction num × num:         df = 1
+  ##   Interaction num × cat(k):      df = k - 1
+  ##   Interaction cat(j) × cat(k):   df = (j-1) × (k-1)
+  ##   Squared numeric (x^2):         df = 1
+
+  active = find (row(1:p));
+  if (isempty (active))
+    df = 1;  ## intercept
+    return;
+  endif
+
+  df = 1;
+  for k = 1:numel (active)
+    v = active(k);
+    if (cat_flags(v))
+      ## Categorical: need to determine number of levels.
+      ## Since we don't have the data here, we need to infer from context.
+      ## For now, store a sentinel value of 0 to be resolved later.
+      ## Actually, we need the level count. We'll require it as external info.
+      ## For the initial implementation, use _PriorSteps mechanism or pass
+      ## level info. But cat_flags alone doesn't tell us k.
+      ##
+      ## DESIGN DECISION: cat_flags is a logical vector. We extend it to
+      ## carry level counts for categorical variables. We pass cat_nlevels
+      ## as an additional argument or embed it in cat_flags (negative = n_levels).
+      ##
+      ## SIMPLER APPROACH: accept cat_flags as a numeric vector where:
+      ##   0 = numeric, k = number of levels for categorical.
+      ## This is more information-dense.
+      ##
+      ## For now, when cat_flags is logical (boolean), we default to df=1
+      ## and let the F-test function compute df dynamically from the design
+      ## matrix column count difference. This is actually the correct approach
+      ## because df_term is verified at F-test time.
+      ## cat_flags can be:
+      ##   - logical true: we default df contribution for this factor = 1
+      ##     (will be corrected in __stepwiselm_ftest__ using actual design matrix)
+      ##   - numeric > 1: number of levels, contribution = val - 1
+      if (isnumeric (cat_flags) && cat_flags(v) > 1)
+        df = df * (cat_flags(v) - 1);
+      else
+        ## Default: will be resolved by design matrix column counting
+        ## in __stepwiselm_ftest__
+        df = df * 1;
+      endif
+    else
+      ## Numeric: 1 column regardless of power
+      df = df * 1;
+    endif
+  endfor
+endfunction
diff --git a/inst/private/__stepwiselm_ftest__.m b/inst/private/__stepwiselm_ftest__.m
new file mode 100644
index 00000000..00b110c2
--- /dev/null
+++ b/inst/private/__stepwiselm_ftest__.m
@@ -0,0 +1,202 @@
+## Copyright (C) 2026 Jayant Chauhan <0001jayant@gmail.com>
+##
+## This file is part of the statistics package for GNU Octave.
+##
+## This program is free software; you can redistribute it and/or modify it under
+## the terms of the GNU General Public License as published by the Free Software
+## Foundation; either version 3 of the License, or (at your option) any later
+## version.
+##
+## This program is distributed in the hope that it will be useful, but WITHOUT
+## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+## FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
+## details.
+##
+## You should have received a copy of the GNU General Public License along with
+## this program; if not, see <http://www.gnu.org/licenses/>.
+
+## -*- texinfo -*-
+## @deftypefn {Private Function} {[@var{fstat}, @var{pval}, @var{sse_new}, @
+##   @var{mse_new}, @var{df_term}, @var{crit_new}] =} @
+##   __stepwiselm_ftest__ (@var{raw_X}, @var{raw_y}, @var{weights}, @
+##   @var{incl_mask}, @var{current_terms}, @var{candidate}, @
+##   @var{cat_flags}, @var{var_names}, @var{pred_names}, @
+##   @var{raw_cols}, @var{has_intercept}, @var{criterion})
+##
+## Compute the partial F-test statistic for a candidate term action.
+##
+## This function builds the design matrix for the candidate model using
+## @code{__fitlm_build_design__} and calls @code{lm_fit_engine} directly
+## (not @code{fitlm}) for maximum performance inside the stepwise loop.
+##
+## @strong{F-test formula (confirmed §7)}
+##
+## For @strong{adding} a term:
+## @example
+## F = (SSE_current - SSE_augmented) / df_term / MSE_augmented
+## p = 1 - fcdf(F, df_term, DFE_augmented)
+## @end example
+##
+## For @strong{removing} a term:
+## @example
+## F = (SSE_reduced - SSE_current) / abs(df_term) / MSE_current
+## p = 1 - fcdf(F, abs(df_term), DFE_current)
+## @end example
+##
+## Key rule: the denominator always uses MSE of the @strong{larger}
+## (more complex) model.
+##
+## @seealso{stepwiselm, __stepwiselm_candidates__, lm_fit_engine}
+## @end deftypefn
+
+function [fstat, pval, sse_new, mse_new, df_term, crit_new] = ...
+    __stepwiselm_ftest__ (raw_X, raw_y, weights, incl_mask, ...
+                          current_terms, candidate, cat_flags, ...
+                          var_names, pred_names, raw_cols, ...
+                          has_intercept, criterion)
+
+  n_pred = numel (pred_names);
+  excl_mask = ! incl_mask;   ## __fitlm_build_design__ expects excl_mask
+
+  ## ── Build design matrices for both current and candidate models ──────────
+
+  if (strcmp (candidate.action, "add"))
+    ## Adding term: current model is smaller, candidate model is larger
+    new_terms = [current_terms; candidate.term_row];
+
+    ## Sort by term order (number of active predictors) for clean design
+    term_order = sum (new_terms(:, 1:n_pred), 2);
+    [~, sort_idx] = sort (term_order);
+    new_terms = new_terms(sort_idx, :);
+
+    ## Build design for current model (smaller)
+    [~, ~, X_curr, ~, ~, ~, ~, ~] = ...
+      __fitlm_build_design__ (raw_X, raw_y, current_terms, var_names, ...
+                              cat_flags, weights, excl_mask, pred_names, ...
+                              raw_cols, has_intercept);
+
+    ## Build design for augmented model (larger)
+    [~, ~, X_new, ~, ~, ~, ~, ~] = ...
+      __fitlm_build_design__ (raw_X, raw_y, new_terms, var_names, ...
+                              cat_flags, weights, excl_mask, pred_names, ...
+                              raw_cols, has_intercept);
+
+    ## Extract included rows
+    X_curr_incl = X_curr(incl_mask, :);
+    X_new_incl  = X_new(incl_mask, :);
+    y_incl      = raw_y(incl_mask);
+    w_incl      = weights(incl_mask);
+
+    ## Fit both models
+    warning ("off", "CompactLinearModel:rankDeficient");
+    s_curr = lm_fit_engine (X_curr_incl, y_incl, w_incl);
+    s_new  = lm_fit_engine (X_new_incl, y_incl, w_incl);
+    warning ("on", "CompactLinearModel:rankDeficient");
+
+    ## df_term = difference in design matrix columns (correct for categoricals)
+    df_term = columns (X_new_incl) - columns (X_curr_incl);
+    if (df_term <= 0)
+      df_term = 1;  ## safety fallback
+    endif
+
+    ## F = (SSE_curr - SSE_new) / df / MSE_new
+    ## Denominator is MSE of the LARGER (augmented) model
+    if (s_new.mse > 0 && df_term > 0)
+      fstat = (s_curr.sse - s_new.sse) / df_term / s_new.mse;
+      pval  = 1 - fcdf (fstat, df_term, s_new.dfe);
+    else
+      fstat = 0;
+      pval  = 1;
+    endif
+
+    sse_new = s_new.sse;
+    mse_new = s_new.mse;
+    crit_new = __compute_criterion__ (s_new, criterion);
+
+  else
+    ## Removing term: current model is larger, candidate model is smaller
+    ## Build reduced model (without the term)
+    red_terms = __remove_term_row__ (current_terms, candidate.term_row);
+
+    ## Build design for current model (larger)
+    [~, ~, X_curr, ~, ~, ~, ~, ~] = ...
+      __fitlm_build_design__ (raw_X, raw_y, current_terms, var_names, ...
+                              cat_flags, weights, excl_mask, pred_names, ...
+                              raw_cols, has_intercept);
+
+    ## Build design for reduced model (smaller)
+    [~, ~, X_red, ~, ~, ~, ~, ~] = ...
+      __fitlm_build_design__ (raw_X, raw_y, red_terms, var_names, ...
+                              cat_flags, weights, excl_mask, pred_names, ...
+                              raw_cols, has_intercept);
+
+    ## Extract included rows
+    X_curr_incl = X_curr(incl_mask, :);
+    X_red_incl  = X_red(incl_mask, :);
+    y_incl      = raw_y(incl_mask);
+    w_incl      = weights(incl_mask);
+
+    ## Fit both models
+    warning ("off", "CompactLinearModel:rankDeficient");
+    s_curr = lm_fit_engine (X_curr_incl, y_incl, w_incl);
+    s_red  = lm_fit_engine (X_red_incl, y_incl, w_incl);
+    warning ("on", "CompactLinearModel:rankDeficient");
+
+    ## df_term = difference in design matrix columns
+    df_term = columns (X_curr_incl) - columns (X_red_incl);
+    if (df_term <= 0)
+      df_term = 1;
+    endif
+
+    ## F = (SSE_red - SSE_curr) / df / MSE_curr
+    ## Denominator is MSE of the LARGER (current) model
+    if (s_curr.mse > 0 && df_term > 0)
+      fstat = (s_red.sse - s_curr.sse) / df_term / s_curr.mse;
+      pval  = 1 - fcdf (fstat, df_term, s_curr.dfe);
+    else
+      fstat = 0;
+      pval  = 1;
+    endif
+
+    sse_new = s_red.sse;
+    mse_new = s_red.mse;
+    crit_new = __compute_criterion__ (s_red, criterion);
+  endif
+
+endfunction
+
+
+## ── Helper: remove a term row from a terms matrix ───────────────────────────
+
+function new_terms = __remove_term_row__ (terms, term_row)
+  ## Remove all rows matching term_row from terms matrix.
+  mask = true (rows (terms), 1);
+  for j = 1:rows (terms)
+    if (isequal (terms(j, :), term_row))
+      mask(j) = false;
+      break;   ## remove only the first match
+    endif
+  endfor
+  new_terms = terms(mask, :);
+endfunction
+
+
+## ── Helper: compute criterion value from lm_fit_engine output ───────────────
+
+function val = __compute_criterion__ (s, criterion)
+  ## Compute model selection criterion from lm_fit_engine struct.
+  switch (criterion)
+    case "sse"
+      val = s.sse;
+    case "aic"
+      val = s.crit.AIC;
+    case "bic"
+      val = s.crit.BIC;
+    case "rsquared"
+      val = -s.rsq.Ordinary;   ## negate so lower = better
+    case "adjrsquared"
+      val = -s.rsq.Adjusted;
+    otherwise
+      val = s.sse;
+  endswitch
+endfunction
diff --git a/inst/private/__stepwiselm_parse_inputs__.m b/inst/private/__stepwiselm_parse_inputs__.m
new file mode 100644
index 00000000..f1c30220
--- /dev/null
+++ b/inst/private/__stepwiselm_parse_inputs__.m
@@ -0,0 +1,138 @@
+## Copyright (C) 2026 Jayant Chauhan <0001jayant@gmail.com>
+##
+## This file is part of the statistics package for GNU Octave.
+##
+## This program is free software; you can redistribute it and/or modify it under
+## the terms of the GNU General Public License as published by the Free Software
+## Foundation; either version 3 of the License, or (at your option) any later
+## version.
+##
+## This program is distributed in the hope that it will be useful, but WITHOUT
+## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+## FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
+## details.
+##
+## You should have received a copy of the GNU General Public License along with
+## this program; if not, see <http://www.gnu.org/licenses/>.
+
+## -*- texinfo -*-
+## @deftypefn {Private Function} {[@var{raw_X}, @var{raw_y}, @var{var_names}, @
+##   @var{cat_flags}, @var{weights}, @var{excl_mask}, @var{modelspec}, @
+##   @var{obs_names}, @var{resp_name}, @var{pred_names}, @var{has_intercept}, @
+##   @var{raw_cols}, @var{lower_spec}, @var{upper_spec}, @var{p_enter}, @
+##   @var{p_remove}, @var{criterion}, @var{n_steps}, @var{verbose}] =} @
+##   __stepwiselm_parse_inputs__ (@var{varargin})
+##
+## Parse stepwiselm inputs.  Extracts the seven stepwise-specific name-value
+## arguments (Lower, Upper, PEnter, PRemove, Criterion, NSteps, Verbose),
+## validates them, then delegates the remaining arguments to
+## @code{__fitlm_parse_inputs__} for standard fitlm parsing.
+##
+## Validation rules:
+## @itemize
+## @item @code{PEnter} must be strictly less than @code{PRemove}, regardless
+## of criterion.  This matches MATLAB's behavior (even for AIC/BIC).
+## @item @code{Criterion} must be one of @code{'sse'}, @code{'aic'},
+## @code{'bic'}, @code{'rsquared'}, @code{'adjrsquared'}.
+## @item @code{NSteps} must be a positive integer or @code{Inf}.
+## @item @code{Verbose} must be 0, 1, or 2.
+## @end itemize
+##
+## @seealso{stepwiselm, __fitlm_parse_inputs__}
+## @end deftypefn
+
+function [raw_X, raw_y, var_names, cat_flags, weights, excl_mask, ...
+          modelspec, obs_names, resp_name, pred_names, has_intercept, ...
+          raw_cols, lower_spec, upper_spec, p_enter, p_remove, ...
+          criterion, n_steps, verbose] = ...
+    __stepwiselm_parse_inputs__ (varargin)
+
+  ## ── Initialize stepwise-specific defaults ─────────────────────────────────
+  lower_spec = "constant";
+  upper_spec = "interactions";
+  p_enter    = 0.05;
+  p_remove   = 0.10;
+  criterion  = "sse";
+  n_steps    = Inf;
+  verbose    = 1;
+
+  ## ── Extract stepwise NV args from varargin before passing to fitlm parser ─
+  ## We walk through varargin and pull out stepwise-specific keys,
+  ## leaving everything else for __fitlm_parse_inputs__.
+  remaining = {};
+  stepwise_keys = {"lower", "upper", "penter", "premove", ...
+                   "criterion", "nsteps", "verbose"};
+
+  i = 1;
+  while (i <= numel (varargin))
+    ## Non-char args (data matrices, table, numeric modelspec) pass through
+    if (! ischar (varargin{i}))
+      remaining{end+1} = varargin{i};
+      i = i + 1;
+      continue;
+    endif
+
+    key_lower = lower (varargin{i});
+
+    ## Check if this is a stepwise-specific key
+    if (any (strcmp (key_lower, stepwise_keys)))
+      if (i + 1 > numel (varargin))
+        error ("stepwiselm: name-value argument '%s' has no value.", ...
+               varargin{i});
+      endif
+      val = varargin{i+1};
+      switch (key_lower)
+        case "lower"
+          lower_spec = val;
+        case "upper"
+          upper_spec = val;
+        case "penter"
+          p_enter = double (val);
+        case "premove"
+          p_remove = double (val);
+        case "criterion"
+          criterion = lower (val);
+        case "nsteps"
+          n_steps = double (val);
+        case "verbose"
+          verbose = double (val);
+      endswitch
+      i = i + 2;
+    else
+      ## Not a stepwise key — pass through to fitlm parser
+      remaining{end+1} = varargin{i};
+      i = i + 1;
+    endif
+  endwhile
+
+  ## ── Validate stepwise arguments ───────────────────────────────────────────
+
+  ## PEnter < PRemove always required (matches MATLAB, even for AIC/BIC)
+  if (p_enter >= p_remove)
+    error ("stepwiselm: Cannot have PEnter (%g) >= PRemove (%g) for the %s criterion.", ...
+           p_enter, p_remove, criterion);
+  endif
+
+  ## Criterion must be valid
+  valid_criteria = {"sse", "aic", "bic", "rsquared", "adjrsquared"};
+  if (! any (strcmp (criterion, valid_criteria)))
+    error ("stepwiselm: Criterion must be one of: %s.", ...
+           strjoin (valid_criteria, ", "));
+  endif
+
+  ## NSteps must be positive
+  if (! (isinf (n_steps) || (n_steps > 0 && n_steps == round (n_steps))))
+    error ("stepwiselm: NSteps must be a positive integer or Inf.");
+  endif
+
+  ## Verbose must be 0, 1, or 2
+  if (! any (verbose == [0, 1, 2]))
+    error ("stepwiselm: Verbose must be 0, 1, or 2.");
+  endif
+
+  ## ── Delegate to fitlm parser ──────────────────────────────────────────────
+  [raw_X, raw_y, var_names, cat_flags, weights, excl_mask, ...
+   modelspec, obs_names, resp_name, pred_names, has_intercept, raw_cols] = ...
+    __fitlm_parse_inputs__ (remaining{:});
+
+endfunction
diff --git a/inst/private/lm_fit_engine.m b/inst/private/lm_fit_engine.m
new file mode 100644
index 00000000..1ca03d43
--- /dev/null
+++ b/inst/private/lm_fit_engine.m
@@ -0,0 +1,263 @@
+## Copyright (C) 2026 Jayant Chauhan <0001jayant@gmail.com>
+##
+## This file is part of the statistics package for GNU Octave.
+##
+## This program is free software; you can redistribute it and/or modify it under
+## the terms of the GNU General Public License as published by the Free Software
+## Foundation; either version 3 of the License, or (at your option) any later
+## version.
+##
+## This program is distributed in the hope that it will be useful, but WITHOUT
+## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+## FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
+## details.
+##
+## You should have received a copy of the GNU General Public License along with
+## this program; if not, see <http://www.gnu.org/licenses/>.
+
+## -*- texinfo -*-
+## @deftypefn {Private Function} {@var{s} =} lm_fit_engine (@var{X}, @var{y})
+## @deftypefnx {Private Function} {@var{s} =} lm_fit_engine (@var{X}, @var{y}, @var{weights})
+##
+## QR-based ordinary least squares regression engine.
+##
+## This is the single source of truth for all linear regression math in the
+## statistics package.  It is called by @code{fitlm} (Phase 4) and by
+## @code{LinearModel} methods such as @code{addTerms} and @code{removeTerms}
+## (Phase 3).
+##
+## @var{X} is the @math{n x p} design matrix (including intercept column if
+## desired).  @var{y} is the @math{n x 1} response vector.  @var{weights} is
+## an optional @math{n x 1} vector of observation weights; pass @code{[]} for
+## unweighted OLS.
+##
+## Returns a struct @var{s} with the following fields:
+##
+## @table @code
+## @item beta
+## Coefficient estimates (p x 1).
+## @item se
+## Standard errors (p x 1).
+## @item tstat
+## t-statistics (p x 1).
+## @item pval
+## p-values for two-sided t-test (p x 1).
+## @item vcov
+## Coefficient covariance matrix (p x p).
+## @item yhat
+## Fitted values (n x 1).
+## @item resid
+## Residuals (n x 1).
+## @item dfe
+## Error degrees of freedom.
+## @item sse
+## Sum of squared errors (residual sum of squares).
+## @item ssr
+## Regression sum of squares.
+## @item sst
+## Total sum of squares (= SSE + SSR).
+## @item mse
+## Mean squared error (SSE / DFE).
+## @item rmse
+## Root mean squared error.
+## @item logL
+## Log-likelihood under normality assumption.
+## @item p_est
+## Number of estimated (non-rank-deficient) coefficients.
+## @item p_tot
+## Total number of coefficients in the model.
+## @item rank_def
+## Logical vector indicating rank-deficient columns.
+## @item rsq
+## Struct with fields @code{Ordinary} and @code{Adjusted}.
+## @item crit
+## Struct with fields @code{AIC}, @code{AICc}, @code{BIC}, @code{CAIC}.
+## @item n
+## Number of observations.
+## @item Q
+## Economy QR factor for non-deficient columns (n x p_est).  Used by
+## @code{LinearModel} to compute the hat matrix without a second QR.
+## @item h
+## Leverage vector (n x 1), equal to @code{diag(Q*Q')} = row sums of
+## squares of @var{Q}.
+## @end table
+##
+## @seealso{CompactLinearModel}
+## @end deftypefn
+
+function s = lm_fit_engine (X, y, weights)
+
+  if (nargin < 2)
+    print_usage ();
+  endif
+  if (nargin < 3)
+    weights = [];
+  endif
+
+  n = rows (X);
+  p = columns (X);
+
+  ## Weighted or unweighted QR
+  if (isempty (weights))
+    [Q, R] = qr (X, 0);
+    Qty    = Q' * y;
+  else
+    sqw    = sqrt (weights(:));
+    [Q, R] = qr (bsxfun (@times, sqw, X), 0);
+    Qty    = Q' * (sqw .* y);
+  endif
+
+  ## Rank deficiency detection
+  ## Detect before solving to avoid spurious singular-matrix warnings
+  tol      = max ([n, p]) * eps (norm (R, "inf"));
+  rank_def = abs (diag (R)) < tol;
+
+  ## Solve for beta
+  if (any (rank_def))
+    warning ("CompactLinearModel:rankDeficient", ...
+             "CompactLinearModel: rank deficient, rank = %d.", ...
+             sum (! rank_def));
+    ## Solve only the non-deficient subsystem
+    idx_good = find (! rank_def);
+    beta = zeros (p, 1);
+    R_good = R(idx_good, idx_good);
+    beta(idx_good) = R_good \ Qty(idx_good);
+  else
+    beta = R \ Qty;
+  endif
+
+  ## Residuals and sums of squares
+  yhat  = X * beta;
+  resid = y - yhat;
+  p_est = sum (! rank_def);           ## NumEstimatedCoefficients
+  dfe   = n - p_est;
+  sse   = sum (resid .^ 2);
+  ybar  = mean (y);
+  ssr   = sum ((yhat - ybar) .^ 2);
+  sst   = sse + ssr;                  ## Always SSE + SSR (not sum((y-ybar)^2))
+  mse   = sse / dfe;
+  rmse  = sqrt (mse);
+
+  ## Coefficient covariance
+  ## Use pinv-style via R to handle rank deficiency
+  Rinv  = zeros (p, p);
+  idx   = find (! rank_def);
+  R_est = R(idx, idx);
+  Rinv(idx, idx) = R_est \ eye (numel (idx));
+  vcov  = (Rinv * Rinv') * mse;
+
+  se        = sqrt (diag (vcov));
+  tstat     = beta ./ se;
+  tstat(rank_def) = NaN;
+  se(rank_def)    = 0;
+  pval      = 2 * (1 - tcdf (abs (tstat), dfe));
+  pval(rank_def)  = NaN;
+
+  ## Log-likelihood and information criteria
+  ## Under normality, MLE variance estimate is sigma2 = SSE/n (not MSE = SSE/DFE)
+  ## This matches MATLAB's computation of LogLikelihood.
+  sigma2_mle = sse / n;
+  logL = -n/2 * log (2 * pi * sigma2_mle) - sse / (2 * sigma2_mle);
+
+  ## m = NumEstimatedCoefficients (no +1 for sigma^2)
+  ## Verified: for n=150, logL=-34.787, p_est=5 -> AIC = -2*(-34.787)+2*5 = 79.574
+  m    = p_est;
+  aic  = -2 * logL + 2 * m;
+  aicc = aic + (2 * m * (m + 1)) / (n - m - 1);
+  bic  = -2 * logL + m * log (n);
+  caic = -2 * logL + m * (log (n) + 1);
+
+  ## Pack output struct
+  s.beta     = beta;
+  s.se       = se;
+  s.tstat    = tstat;
+  s.pval     = pval;
+  s.vcov     = vcov;
+  s.yhat     = yhat;
+  s.resid    = resid;
+  s.n        = n;
+  s.dfe      = dfe;
+  s.sse      = sse;
+  s.ssr      = ssr;
+  s.sst      = sst;
+  s.mse      = mse;
+  s.rmse     = rmse;
+  s.logL     = logL;
+  s.p_est    = p_est;
+  s.p_tot    = p;
+  s.rank_def = rank_def;
+  s.rsq      = struct ("Ordinary",  ssr / sst, ...
+                        "Adjusted", 1 - (sse / dfe) / (sst / (n - 1)));
+  s.crit     = struct ("AIC", aic, "AICc", aicc, "BIC", bic, "CAIC", caic);
+
+  ## QR factor and leverage for LinearModel diagnostics
+  Q_est = Q(:, ! rank_def);
+  s.Q   = Q_est;
+  s.h   = sum (Q_est .^ 2, 2);
+
+endfunction
+
+## Simple 2-predictor OLS — linearly independent columns
+%!test
+%! X = [ones(5,1), [1;2;3;4;5], [2;1;4;3;6]];
+%! y = X * [1; 2; -0.5];
+%! s = lm_fit_engine (X, y);
+%! ## Perfect fit (noise-free)
+%! assert (s.beta, [1; 2; -0.5], 1e-10);
+%! assert (s.resid, zeros (5, 1), 1e-10);
+%! assert (s.p_tot, 3);
+%! assert (s.n, 5);
+
+## Known regression — y = 2 + 3*x with noise-free data
+%!test
+%! x = (1:10)';
+%! y = 2 + 3 * x;
+%! X = [ones(10,1), x];
+%! s = lm_fit_engine (X, y);
+%! assert (s.beta, [2; 3], 1e-10);
+%! assert (s.sse, 0, 1e-10);
+%! assert (s.rsq.Ordinary, 1, 1e-10);
+
+## Rank-deficient design matrix
+%!test
+%! warning ("off", "CompactLinearModel:rankDeficient");
+%! X = [1, 2, 4; 1, 3, 6; 1, 4, 8; 1, 5, 10];  ## col3 = 2*col2
+%! y = [1; 2; 3; 4];
+%! s = lm_fit_engine (X, y);
+%! assert (s.p_est, 2);
+%! assert (s.p_tot, 3);
+%! assert (any (s.rank_def), true);
+%! ## The rank-deficient coefficient should be zeroed
+%! assert (s.beta(s.rank_def), 0);
+%! assert (s.se(s.rank_def), 0);
+%! assert (isnan (s.tstat(s.rank_def)), true);
+%! assert (isnan (s.pval(s.rank_def)), true);
+%! warning ("on", "CompactLinearModel:rankDeficient");
+
+## SST = SSE + SSR identity
+%!test
+%! X = [ones(20,1), randn(20,2)];
+%! y = X * [1; 2; -1] + 0.5 * randn (20, 1);
+%! s = lm_fit_engine (X, y);
+%! assert (s.sst, s.sse + s.ssr, 1e-10);
+%! assert (s.dfe, 20 - 3);
+
+## ModelCriterion — m = p_est (no +1)
+## Verify AIC = -2*logL + 2*p_est
+%!test
+%! X = [ones(20,1), (1:20)'];
+%! y = 3 + 2 * (1:20)' + randn (20, 1) * 0.1;
+%! s = lm_fit_engine (X, y);
+%! assert (s.crit.AIC, -2 * s.logL + 2 * s.p_est, 1e-10);
+%! assert (s.crit.BIC, -2 * s.logL + s.p_est * log (s.n), 1e-10);
+
+## Weighted regression
+%!test
+%! X = [ones(5,1), [1;2;3;4;5]];
+%! y = [2; 4; 5; 4; 5];
+%! w = [1; 1; 1; 1; 1];
+%! s1 = lm_fit_engine (X, y, []);
+%! s2 = lm_fit_engine (X, y, w);
+%! ## Equal weights should give same result as unweighted
+%! assert (s1.beta, s2.beta, 1e-10);
+%! assert (s1.sse, s2.sse, 1e-10);
diff --git a/inst/stepwiselm.m b/inst/stepwiselm.m
new file mode 100644
index 00000000..37bb28ef
--- /dev/null
+++ b/inst/stepwiselm.m
@@ -0,0 +1,488 @@
+## Copyright (C) 2026 Jayant Chauhan <0001jayant@gmail.com>
+##
+## This file is part of the statistics package for GNU Octave.
+##
+## This program is free software; you can redistribute it and/or modify it under
+## the terms of the GNU General Public License as published by the Free Software
+## Foundation; either version 3 of the License, or (at your option) any later
+## version.
+##
+## This program is distributed in the hope that it will be useful, but WITHOUT
+## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+## FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
+## details.
+##
+## You should have received a copy of the GNU General Public License along with
+## this program; if not, see <http://www.gnu.org/licenses/>.
+
+## -*- texinfo -*-
+## @deftypefn  {statistics} {@var{mdl} =} stepwiselm (@var{X}, @var{y})
+## @deftypefnx {statistics} {@var{mdl} =} stepwiselm (@var{X}, @var{y}, @var{modelspec})
+## @deftypefnx {statistics} {@var{mdl} =} stepwiselm (@var{tbl})
+## @deftypefnx {statistics} {@var{mdl} =} stepwiselm (@var{tbl}, @var{modelspec})
+## @deftypefnx {statistics} {@var{mdl} =} stepwiselm (@dots{}, @var{Name}, @var{Value})
+##
+## Fit a linear regression model using stepwise regression.
+##
+## @code{stepwiselm} performs stepwise regression starting from an initial
+## model and iteratively adding or removing terms based on their statistical
+## significance, returning a @code{LinearModel} object.
+##
+## @var{modelspec} specifies the @strong{starting} model (default:
+## @qcode{'constant'}).
+##
+## @strong{Stepwise-specific Name-Value Arguments}
+##
+## @table @asis
+## @item @qcode{'Lower'} — minimum model (default: @qcode{'constant'})
+## @item @qcode{'Upper'} — maximum model (default: @qcode{'interactions'})
+## @item @qcode{'PEnter'} — p-value threshold to add a term (default: 0.05)
+## @item @qcode{'PRemove'} — p-value threshold to remove a term (default: 0.10)
+## @item @qcode{'Criterion'} — @qcode{'sse'}, @qcode{'aic'}, or @qcode{'bic'}
+## @item @qcode{'NSteps'} — maximum number of steps (default: Inf)
+## @item @qcode{'Verbose'} — 0 (silent), 1 (one line/step), 2 (per-candidate)
+## @end table
+##
+## All @code{fitlm} name-value arguments are also accepted.
+##
+## @seealso{fitlm, LinearModel}
+## @end deftypefn
+
+function mdl = stepwiselm (varargin)
+
+  if (nargin < 1)
+    print_usage ();
+  endif
+
+  ## ── [1] Parse inputs ──────────────────────────────────────────────────────
+  [raw_X, raw_y, var_names, cat_flags, weights, excl_mask, ...
+   modelspec, obs_names, resp_name, pred_names, has_intercept, ...
+   raw_cols, lower_spec, upper_spec, p_enter, p_remove, ...
+   criterion, n_steps, verbose] = ...
+    __stepwiselm_parse_inputs__ (varargin{:});
+
+  n_pred = numel (pred_names);
+
+  ## ── [2] Build lower/upper terms matrices ──────────────────────────────────
+  lower_terms_pred = __spec_to_terms__ (lower_spec, n_pred, true);
+  upper_terms_pred = __spec_to_terms__ (upper_spec, n_pred, true);
+
+  ## Add response column (always 0)
+  lower_terms = [lower_terms_pred, zeros(rows (lower_terms_pred), 1)];
+  upper_terms = [upper_terms_pred, zeros(rows (upper_terms_pred), 1)];
+
+  ## ── [3] Fit starting model ────────────────────────────────────────────────
+  cat_idx = find (cat_flags(1:n_pred));
+  fitlm_args = {};
+  if (! isempty (cat_idx))
+    fitlm_args = [fitlm_args, {'CategoricalVars', cat_idx}];
+  endif
+  if (any (weights != 1))
+    fitlm_args = [fitlm_args, {'Weights', weights}];
+  endif
+  if (any (excl_mask))
+    fitlm_args = [fitlm_args, {'Exclude', excl_mask}];
+  endif
+  if (! isempty (var_names))
+    fitlm_args = [fitlm_args, {'VarNames', var_names}];
+  endif
+
+  if (isa (raw_X, "table"))
+    mdl0 = fitlm (raw_X, modelspec, fitlm_args{:});
+  else
+    mdl0 = fitlm (raw_X, raw_y, modelspec, fitlm_args{:});
+  endif
+
+  current_terms = mdl0.Formula.Terms;
+  incl_mask = mdl0.ObservationInfo.Subset;
+
+  ## ── [4] Initialize step log ───────────────────────────────────────────────
+  start_lp = mdl0.Formula.LinearPredictor;
+  crit_current = __get_model_crit__ (mdl0, criterion);
+
+  n_log = 1;
+  log_action    = {'Start'};
+  log_term      = {start_lp};
+  log_terms     = {current_terms};
+  log_df        = mdl0.NumCoefficients;
+  log_del_df    = NaN;
+  log_fstat     = NaN;
+  log_pval      = NaN;
+  log_critval   = crit_current;
+
+  ## ── [5] Check for empty candidate set ─────────────────────────────────────
+  all_candidates = __stepwiselm_candidates__ (current_terms, lower_terms, ...
+                                               upper_terms, cat_flags, ...
+                                               pred_names);
+  if (isempty (all_candidates) && verbose >= 1)
+    fprintf ("No terms to add to or remove from initial model.\n");
+  endif
+
+  ## ── [6] Search loop ───────────────────────────────────────────────────────
+  n_taken = 0;
+  use_ftest = strcmp (criterion, "sse");
+
+  while (n_taken < n_steps)
+    made_change = false;
+
+    ## ── Phase A: BACKWARD SCAN (keep removing) ────────────────────────────
+    while (n_taken < n_steps)
+      candidates = __stepwiselm_candidates__ (current_terms, lower_terms, ...
+                                               upper_terms, cat_flags, ...
+                                               pred_names);
+      ## Filter to remove only
+      rem_idx = [];
+      for ci = 1:numel (candidates)
+        if (strcmp (candidates(ci).action, "remove"))
+          rem_idx(end+1) = ci;
+        endif
+      endfor
+
+      if (isempty (rem_idx))
+        break;
+      endif
+
+      ## Score each removal candidate
+      best_ri = 0;
+      best_pval = -Inf;
+      best_fstat = 0;
+      best_crit = Inf;
+      best_df = 0;
+
+      for ri = 1:numel (rem_idx)
+        c = candidates(rem_idx(ri));
+        [f, p, ~, ~, df_t, cr] = __stepwiselm_ftest__ ( ...
+            raw_X, raw_y, weights, incl_mask, current_terms, c, ...
+            cat_flags, var_names, pred_names, raw_cols, has_intercept, ...
+            criterion);
+
+        if (verbose >= 2)
+          if (use_ftest)
+            fprintf ("   pValue for removing %s is %g\n", c.term_name, p);
+          else
+            fprintf ("   %s for removing %s is %g\n", ...
+                     upper (criterion), c.term_name, cr);
+          endif
+        endif
+
+        should_take = false;
+        if (use_ftest)
+          if (p > p_remove && p > best_pval)
+            should_take = true;
+          endif
+        else
+          if (cr < crit_current && cr < best_crit)
+            should_take = true;
+          endif
+        endif
+
+        if (should_take)
+          best_ri = rem_idx(ri);
+          best_pval = p;
+          best_fstat = f;
+          best_crit = cr;
+          best_df = df_t;
+        endif
+      endfor
+
+      if (best_ri == 0)
+        break;  ## no qualifying removal
+      endif
+
+      ## Apply removal
+      best_c = candidates(best_ri);
+      mask = true (rows (current_terms), 1);
+      for j = 1:rows (current_terms)
+        if (isequal (current_terms(j, :), best_c.term_row))
+          mask(j) = false;
+          break;
+        endif
+      endfor
+      current_terms = current_terms(mask, :);
+      n_taken++;
+      made_change = true;
+
+      ## Update criterion
+      if (! use_ftest)
+        crit_current = best_crit;
+      endif
+
+      ## Log
+      n_log++;
+      log_action{n_log}  = "Remove";
+      log_term{n_log}    = best_c.term_name;
+      log_terms{n_log}   = current_terms;
+      log_df(n_log)      = log_df(n_log - 1) - best_df;
+      log_del_df(n_log)  = -best_df;
+      log_fstat(n_log)   = best_fstat;
+      log_pval(n_log)    = best_pval;
+      log_critval(n_log) = best_crit;
+
+      ## Verbose
+      if (verbose >= 1)
+        __print_step__ (n_taken, best_c, best_fstat, best_pval, ...
+                        best_crit, criterion);
+      endif
+    endwhile  ## backward
+
+    if (n_taken >= n_steps)
+      break;
+    endif
+
+    ## ── Phase B: FORWARD SCAN (add one) ───────────────────────────────────
+    candidates = __stepwiselm_candidates__ (current_terms, lower_terms, ...
+                                             upper_terms, cat_flags, ...
+                                             pred_names);
+    ## Filter to add only
+    add_idx = [];
+    for ci = 1:numel (candidates)
+      if (strcmp (candidates(ci).action, "add"))
+        add_idx(end+1) = ci;
+      endif
+    endfor
+
+    if (isempty (add_idx))
+      break;  ## converged
+    endif
+
+    ## Score each add candidate
+    best_ai = 0;
+    best_pval_a = Inf;
+    best_fstat_a = 0;
+    best_crit_a = Inf;
+    best_df_a = 0;
+
+    for ai = 1:numel (add_idx)
+      c = candidates(add_idx(ai));
+      [f, p, ~, ~, df_t, cr] = __stepwiselm_ftest__ ( ...
+          raw_X, raw_y, weights, incl_mask, current_terms, c, ...
+          cat_flags, var_names, pred_names, raw_cols, has_intercept, ...
+          criterion);
+
+      if (verbose >= 2)
+        if (use_ftest)
+          fprintf ("   pValue for adding %s is %g\n", c.term_name, p);
+        else
+          fprintf ("   %s for adding %s is %g\n", ...
+                   upper (criterion), c.term_name, cr);
+        endif
+      endif
+
+      should_take = false;
+      if (use_ftest)
+        if (p < p_enter && p < best_pval_a)
+          should_take = true;
+        endif
+      else
+        if (cr < crit_current && cr < best_crit_a)
+          should_take = true;
+        endif
+      endif
+
+      if (should_take)
+        best_ai = add_idx(ai);
+        best_pval_a = p;
+        best_fstat_a = f;
+        best_crit_a = cr;
+        best_df_a = df_t;
+      endif
+    endfor
+
+    if (best_ai == 0)
+      if (! made_change)
+        break;  ## converged — no add and no remove happened
+      endif
+      break;
+    endif
+
+    ## Apply addition
+    best_c = candidates(best_ai);
+    current_terms = [current_terms; best_c.term_row];
+    ## Sort by term order
+    term_order = sum (current_terms(:, 1:n_pred), 2);
+    [~, si] = sort (term_order);
+    current_terms = current_terms(si, :);
+    n_taken++;
+    made_change = true;
+
+    ## Update criterion
+    if (! use_ftest)
+      crit_current = best_crit_a;
+    endif
+
+    ## Log
+    n_log++;
+    log_action{n_log}  = "Add";
+    log_term{n_log}    = best_c.term_name;
+    log_terms{n_log}   = current_terms;
+    log_df(n_log)      = log_df(n_log - 1) + best_df_a;
+    log_del_df(n_log)  = best_df_a;
+    log_fstat(n_log)   = best_fstat_a;
+    log_pval(n_log)    = best_pval_a;
+    log_critval(n_log) = best_crit_a;
+
+    ## Verbose
+    if (verbose >= 1)
+      __print_step__ (n_taken, best_c, best_fstat_a, best_pval_a, ...
+                      best_crit_a, criterion);
+    endif
+
+    ## Continue outer loop → back to backward scan
+  endwhile
+
+  ## ── [7] Fit final model ───────────────────────────────────────────────────
+  if (isa (raw_X, "table"))
+    mdl = fitlm (raw_X, current_terms, fitlm_args{:});
+  else
+    mdl = fitlm (raw_X, raw_y, current_terms, fitlm_args{:});
+  endif
+
+  ## ── [8] Build Steps struct ────────────────────────────────────────────────
+  Steps.Start     = [resp_name, " ~ ", log_term{1}];
+  Steps.Lower     = lower_spec;
+  Steps.Upper     = upper_spec;
+  Steps.Criterion = upper (criterion);
+  Steps.PEnter    = p_enter;
+  Steps.PRemove   = p_remove;
+
+  ## Build History struct (mimicking a table)
+  History.Action   = log_action(:);
+  History.TermName = cell (n_log, 1);
+  for k = 1:n_log
+    History.TermName{k} = {log_term{k}};
+  endfor
+  History.Terms = log_terms(:);
+  History.DF    = log_df(:);
+  History.delDF = log_del_df(:);
+
+  if (use_ftest)
+    History.FStat  = log_fstat(:);
+    History.pValue = log_pval(:);
+  else
+    ## For AIC/BIC, the column is the criterion name, not FStat/pValue
+    History.(upper (criterion)) = log_critval(:);
+  endif
+
+  Steps.History = History;
+
+  ## ── [9] Inject Steps into LinearModel ─────────────────────────────────────
+  mdl.Steps = Steps;
+
+endfunction
+
+
+## ═══════════════════════════════════════════════════════════════════════════════
+## Local helper functions
+## ═══════════════════════════════════════════════════════════════════════════════
+
+function terms = __spec_to_terms__ (spec, p, has_intercept)
+  ## Convert a shorthand modelspec string to a (n_terms x p) terms matrix.
+  ## Also handles formula strings 'y ~ x1 + x2' and numeric matrices.
+
+  if (isnumeric (spec) && ismatrix (spec))
+    terms = spec;
+    ## Strip response column if present
+    if (columns (terms) == p + 1)
+      terms = terms(:, 1:p);
+    endif
+    return;
+  endif
+
+  if (! ischar (spec))
+    spec = "interactions";
+  endif
+
+  ## Check for formula string
+  if (! isempty (strfind (spec, "~")))
+    ## Formula — extract terms. For upper/lower bounds this is unusual
+    ## but handle gracefully: just use 'interactions' as fallback
+    spec = "interactions";
+  endif
+
+  switch (lower (spec))
+    case "constant"
+      terms = zeros (1, p);
+
+    case "linear"
+      terms = [zeros(1, p); eye(p)];
+
+    case "interactions"
+      lin = eye (p);
+      inter = [];
+      for i = 1:p-1
+        for j = i+1:p
+          r = zeros (1, p);
+          r(i) = 1; r(j) = 1;
+          inter = [inter; r];
+        endfor
+      endfor
+      terms = [zeros(1, p); lin; inter];
+
+    case "purequadratic"
+      terms = [zeros(1, p); eye(p); 2*eye(p)];
+
+    case {"quadratic", "full"}
+      lin = eye (p);
+      inter = [];
+      for i = 1:p-1
+        for j = i+1:p
+          r = zeros (1, p);
+          r(i) = 1; r(j) = 1;
+          inter = [inter; r];
+        endfor
+      endfor
+      terms = [zeros(1, p); lin; inter; 2*eye(p)];
+
+    otherwise
+      terms = [zeros(1, p); eye(p)];
+  endswitch
+endfunction
+
+
+function val = __get_model_crit__ (mdl, criterion)
+  switch (criterion)
+    case "sse"
+      val = mdl.SSE;
+    case "aic"
+      val = mdl.ModelCriterion.AIC;
+    case "bic"
+      val = mdl.ModelCriterion.BIC;
+    case "rsquared"
+      val = -mdl.Rsquared.Ordinary;
+    case "adjrsquared"
+      val = -mdl.Rsquared.Adjusted;
+    otherwise
+      val = mdl.SSE;
+  endswitch
+endfunction
+
+
+function __print_step__ (step_num, candidate, fstat, pval, critval, criterion)
+  action_str = [toupper(candidate.action(1)), candidate.action(2:end)];
+  action_verb = "Adding";
+  if (strcmp (candidate.action, "remove"))
+    action_verb = "Removing";
+  endif
+
+  if (strcmp (criterion, "sse"))
+    fprintf ("%d. %s %s, FStat = %g, pValue = %g\n", ...
+             step_num, action_verb, candidate.term_name, fstat, pval);
+  else
+    fprintf ("%d. %s %s, %s = %g\n", ...
+             step_num, action_verb, candidate.term_name, ...
+             upper (criterion), critval);
+  endif
+endfunction
+
+
+## ═══════════════════════════════════════════════════════════════════════════════
+## Unit tests
+## ═══════════════════════════════════════════════════════════════════════════════
+
+## Test 1: PEnter >= PRemove error
+%!error <PEnter>
+%! rng (1);
+%! X = randn (20, 3);
+%! y = randn (20, 1);
+%! stepwiselm (X, y, 'constant', 'PEnter', 0.1, 'PRemove', 0.05);
+