AnotherSamWilson · AnotherSamWilson · Oct 26, 2025 · Oct 26, 2025 · Oct 26, 2025 · Oct 26, 2025
diff --git a/.gitignore b/.gitignore
@@ -25,3 +25,4 @@ pyproject.toml
 .devcontainer
 Dockerfile
 dev_guide.md
+.pypirc
diff --git a/LICENSE b/LICENSE
diff --git a/README.md b/README.md
@@ -29,7 +29,7 @@ with lightgbm. The R version of this package may be found
       - Has efficient mean matching solutions.
       - Can utilize GPU training
   - **Flexible**
-      - Can impute pandas dataframes
+      - Can impute pandas dataframes and numpy arrays
       - Handles categorical data automatically
       - Fits into a sklearn pipeline
       - User can customize every aspect of the imputation process
@@ -39,6 +39,7 @@ with lightgbm. The R version of this package may be found
       - Data can be imputed in place to save memory
       - Can build models on non-missing data
 
+
 This document contains a thorough walkthrough of the package,
 benchmarks, and an introduction to multiple imputation. More information
 on MICE can be found in Stef van Buuren’s excellent online book, which
@@ -338,7 +339,7 @@ new_data_imputed = cust_kernel.impute_new_data(new_data=new_data)
 print(f"New Data imputed in {(datetime.now() - start_t).total_seconds()} seconds")
 ```
 
-    New Data imputed in 0.040396 seconds
+    New Data imputed in 0.035129 seconds
 
 
 ## Saving and Loading Kernels
@@ -506,6 +507,19 @@ pd.DataFrame(optimal_params)
 
 
 <div>
+<style scoped>
+    .dataframe tbody tr th:only-of-type {
+        vertical-align: middle;
+    }
+
+    .dataframe tbody tr th {
+        vertical-align: top;
+    }
+
+    .dataframe thead th {
+        text-align: right;
+    }
+</style>
 <table border="1" class="dataframe">
   <thead>
     <tr style="text-align: right;">
@@ -561,10 +575,10 @@ pd.DataFrame(optimal_params)
     </tr>
     <tr>
       <th>min_sum_hessian_in_leaf</th>
-      <td>0.1</td>
-      <td>0.1</td>
-      <td>0.1</td>
-      <td>0.1</td>
+      <td>0.01</td>
+      <td>0.01</td>
+      <td>0.01</td>
+      <td>0.01</td>
     </tr>
     <tr>
       <th>min_gain_to_split</th>
@@ -811,7 +825,7 @@ kernel.plot_feature_importance(dataset=0)
 
 
 
-![png](README_files/README_48_0.png)
+![png](README_files/README_49_0.png)
 
 
 
@@ -824,7 +838,7 @@ kernel.plot_imputed_distributions()
 
 
 
-![png](README_files/README_50_0.png)
+![png](README_files/README_51_0.png)
 
 
 
@@ -871,7 +885,7 @@ acclist
     0    0.35
     1    0.81
     2    0.81
-    3    0.78
+    3    0.84
     Name: Species Imputation Accuracy, dtype: float64
 
 
@@ -1021,7 +1035,7 @@ plot_matrix(dat, dat.columns)
 
 
 
-![png](README_files/README_60_0.png)
+![png](README_files/README_61_0.png)
 
 
 
@@ -1054,7 +1068,7 @@ kernel_mean_match.plot_imputed_distributions()
 
 
 
-![png](README_files/README_63_0.png)
+![png](README_files/README_64_0.png)
 
 
 
@@ -1065,7 +1079,7 @@ kernel_no_mean_match.plot_imputed_distributions()
 
 
 
-![png](README_files/README_64_0.png)
+![png](README_files/README_65_0.png)
 
 
 

diff --git a/README_files/README_49_0.png b/README_files/README_49_0.png
diff --git a/README_files/README_51_0.png b/README_files/README_51_0.png
diff --git a/README_files/README_61_0.png b/README_files/README_61_0.png
diff --git a/README_files/README_64_0.png b/README_files/README_64_0.png
diff --git a/README_files/README_65_0.png b/README_files/README_65_0.png
diff --git a/README_gen.ipynb b/README_gen.ipynb
diff --git a/miceforest/utils.py b/miceforest/utils.py
@@ -50,11 +50,11 @@ def ampute_data(
     amputed_data = data.copy()
     num_rows = amputed_data.shape[0]
     amp_rows = int(perc * num_rows)
-    random_state = ensure_rng(random_state)
+    rs = ensure_rng(random_state)
     variables = list(data.columns) if variables is None else variables
 
     for col in variables:
-        ind = random_state.choice(amputed_data.index, size=amp_rows, replace=False)
+        ind = rs.choice(amputed_data.index, size=amp_rows, replace=False)
         amputed_data.loc[ind, col] = np.nan
 
     return amputed_data
@@ -91,7 +91,7 @@ def stratified_subset(
 
     """
 
-    random_state = ensure_rng(random_state=random_state)
+    rs = ensure_rng(random_state=random_state)
 
     cat = False
     if y.dtype.name == "category":
@@ -112,9 +112,7 @@ def stratified_subset(
     digits_s = (digits_p * size).round(0).astype("int32")
     diff = size - digits_s.sum()
     if diff != 0:
-        digits_fix = random_state.choice(
-            digits_i, size=abs(diff), p=digits_p, replace=False
-        )
+        digits_fix = rs.choice(digits_i, size=abs(diff), p=digits_p, replace=False)
         if diff < 0:
             for d in digits_fix:
                 digits_s[d] -= 1
@@ -128,7 +126,7 @@ def stratified_subset(
         d_v = digits_v[d_i]
         n = digits_s[d_i]
         ind = np.where(digits == d_v)[0]
-        choice = random_state.choice(ind, size=n, replace=False)
+        choice = rs.choice(ind, size=n, replace=False)
         sub[added : (added + n)] = choice
         added += n