Coverage for src/colorspace/statshelper.py: 100%
102 statements
« prev ^ index » next coverage.py v7.6.4, created at 2024-10-29 15:11 +0000
« prev ^ index » next coverage.py v7.6.4, created at 2024-10-29 15:11 +0000
3def nprange(x):
4 """Range of Values
6 Mimiking R's `range()` function, takes a numeric numpy array
7 and returns an array of length `2` with the minimum and maximum.
9 Args:
10 x (numpy.ndarray): Numpy array, must be numeric.
12 Returns:
13 numpy.array: Retuns a numpy array with two elements
14 containing the minimum of `x` and the maximum of `x`.
15 """
16 import numpy as np
17 if not isinstance(x, np.ndarray):
18 raise TypeError("argument `x` must be a numpy.array")
19 elif not np.issubdtype(x.dtype, np.floating) and not np.issubdtype(x.dtype, np.integer):
20 raise TypeError("argument `x` must be float or int")
21 elif not len(x) > 0 or len(x.shape) != 1:
22 raise ValueError("argument `x` must be of length > 0 and 1D")
24 return np.asarray([np.min(x), np.max(x)])
27def natural_cubic_spline(x, y, xout):
28 """Natural Cubic Spline Interpolation
30 Natural cubic spline interpolation. Takes two arrays `x` and `y`
31 trough which a spline is fitted and evaluated at `xout`. Performs
32 second-order (linear) extrapolation for values in `xout` outside
33 of `x`.
35 Args:
36 x (numpy.ndarray): original x data points. Must be float or int
37 and length > 0.
38 y (numpy.ndarray): original y data points, same requirements
39 as `x`. Must also be of the same length as `y`.
40 xout (numpy.ndarray): numeric vectotr (float or int; length > 0)
41 at which the spline should be evaluated.
43 Returns:
44 dict: Dictionary with two elements, `x` (same as input `xout`)
45 and `y` with the interpolated values evaluated at `x` (`xout`).
47 Examples:
48 >>> from colorspace.statshelper import natural_cubic_spline
49 >>> import numpy as np
50 >>> x = np.arange(10, 20.1, 0.5)
51 >>> y = np.sin((x - 3) / 2)
52 >>> xout = np.arange(0, 40, 0.2)
53 >>>
54 >>> res = natural_cubic_spline(x, y, xout)
55 >>>
56 >>> from matplotlib import pyplot as plt
57 >>> plt.figure()
58 >>> plt.plot(x, y, "o", label = "data points")
59 >>> plt.plot(res["x"], res["y"], label = "cubic spline", color = "orange")
60 >>> plt.legend()
61 >>> plt.show()
62 >>>
63 >>> #: Example used for tests
64 >>> x = np.asarray([1, 2, 3, 5.5, 6.5])
65 >>> y = np.asarray([6.5, 5.5, 5., 8.5, 9.5])
66 >>> xout = np.arange(-3, 9, 0.01)
67 >>>
68 >>> from colorspace.statshelper import natural_cubic_spline as ncs
69 >>> res = ncs(x = x, y = y, xout = xout)
70 >>> f = plt.figure()
71 >>> plt.plot(res["x"], res["y"])
72 >>> plt.show()
74 """
75 import numpy as np
77 if not isinstance(x, np.ndarray):
78 raise TypeError("argument `x` must be numpy.ndarray")
79 if not isinstance(y, np.ndarray):
80 raise TypeError("argument `y` must be numpy.ndarray")
81 if not isinstance(xout, np.ndarray):
82 raise TypeError("argument `xout` must be numpy.ndarray")
83 if len(x) == 0:
84 raise ValueError("array on `x` must be of length > 0")
85 if not len(x) == len(y):
86 raise ValueError("length of `x` and `y` must be identical")
88 def check(x):
89 return np.issubdtype(x.dtype, np.floating) or np.issubdtype(x.dtype, np.integer)
90 if not check(x):
91 raise TypeError("argument `x` must be np.float or np.integer")
92 if not check(y):
93 raise TypeError("argument `y` must be np.float or np.integer")
94 if not check(xout):
95 raise TypeError("argument `xout` must be np.float or np.integer")
96 if len(xout) == 0:
97 raise ValueError("array on `xout` must be of length > 0")
99 # Enforce float
100 y = y.astype(np.float32)
101 x = x.astype(np.float32)
102 xout = xout.astype(np.float32)
104 # If length of y is only 1 we can't fit a spline and the
105 # return is simply a constant y[0] for each xout.
106 if len(x) == 1:
107 return {"x": xout, "y": np.repeat(y[0], len(xout))}
109 # Index length
110 n = len(x) - 1
112 # Step 1: Calculate h_i
113 h = np.diff(x)
115 # Step 2: Calculate the coefficients a, b, c, and d
116 a = y[:-1]
118 # Step 3: Solve the tridiagonal system for c
119 A = np.zeros((n + 1, n + 1))
120 b = np.zeros(n + 1)
122 # Natural spline boundary conditions
123 A[0, 0] = 1
124 A[n, n] = 1
126 for i in range(1, n):
127 A[i, i - 1] = h[i-1]
128 A[i, i] = 2 * (h[i - 1] + h[i])
129 A[i, i + 1] = h[i]
130 b[i] = 3 * ((y[i + 1] - y[i]) / h[i] - (y[i] - y[i - 1]) / h[i - 1])
132 c = np.linalg.solve(A, b)
134 # Step 4: Calculate b and d
135 b = (y[1:] - y[:-1]) / h - h * (2 * c[:-1] + c[1:]) / 3
136 d = (c[1:] - c[:-1]) / (3 * h)
138 # Organize coefficients
139 coef = np.transpose([a, b, c[:-1], d])
141 # Prediction/evaluation
142 yout = np.zeros_like(xout)
144 for j in range(len(xout)):
145 # Extrapolation left hand side (linear)
146 if xout[j] < np.min(x):
147 dx = np.min(x) - xout[j]
148 yout[j] = y[0] - coef[0, 1] * dx - coef[0, 2] * dx
149 # Extrapolation right hand side (linear)
150 elif xout[j] > np.max(x):
151 # Coef should be 0.8259146
152 k = coef.shape[0] - 1
153 dx = xout[j] - np.max(x)
154 yout[j] = y[-1] + coef[k, 1] * dx + coef[k, 2] * dx
155 # Else interpolate btw. two neighboring points
156 else:
157 for i in range(n):
158 if x[i] <= xout[j] <= x[i + 1]:
159 dx = xout[j] - x[i]
160 yout[j] = coef[i, 0] + coef[i, 1] * dx + coef[i, 2] * dx**2 + coef[i, 3] * dx**3
162 return {"x": xout, "y": yout}
165# Simple linear regression solver (OLS solver)
166def lm(y, X, Xout):
167 """Linear Regression
169 OLS solver for (simple) linear regression models.
171 Args:
172 y (np.ndarray): response, one-dimensional array (float).
173 X (np.ndarray): model matrix, two dimensional with observations in
174 rows (number of rows equal to length of y; float).
175 Xout (np.ndarray): Same format as `X`, the model matrix for which
176 the predictions will be calculated and returned.
178 Returns:
179 list: Returns a list containing the estimated regression coefficients
180 (`coef`), the standard error of the residuals (`sigma`), and the
181 predictions for `Xout` (on `Yout`; 1-d).
183 Example:
184 >>> # Example from Rs stats::lm man page + additional linear effect.
185 >>> # Annette Dobson (1990) "An Introduction to Generalized Linear Models".
186 >>> # Page 9: Plant Weight Data.
187 >>> weight = np.asarray([4.17, 5.58, 5.18, 6.11, 4.50, 4.61, 5.17, 4.53, 5.33, 5.14,
188 >>> 4.81, 4.17, 4.41, 3.59, 5.87, 3.83, 6.03, 4.89, 4.32, 4.69])
189 >>>
190 >>> # Dummy variable for 'Treatment group'
191 >>> trt = np.repeat([0., 1.], 10)
192 >>>
193 >>> # Alternating +/-0.1 'noise'
194 >>> rand = np.repeat(-0.1, 20)
195 >>> rand[::2] = +0.1
196 >>> rand = weight / 2 + rand
197 >>>
198 >>> # Create model matrix
199 >>> from colorspace.statshelper import lm
200 >>> X = np.transpose([np.repeat(1., 20), rand, trt])
201 >>> mod = lm(y = weight, X = X, Xout = X)
202 >>> print(mod)
204 Raises:
205 TypeError: If `y`, `X`, `Xout` are not numpy.ndarrays.
206 ValueError: If `X` or `Xout` are not two-dimensional.
207 ValueError: If length `y` does not match first dimension of `X`.
208 ValueError: If second dimension of `X` and `Xout` mismatch.
209 """
210 import numpy as np
211 if not isinstance(y, np.ndarray):
212 raise TypeError("argument `y` must be numpy.ndarray")
213 if not isinstance(X, np.ndarray):
214 raise TypeError("argument `X` must be numpy.ndarray")
215 if not isinstance(Xout, np.ndarray):
216 raise TypeError("argument `Xout` must be numpy.ndarray")
218 # Shape
219 if not len(X.shape) == 2 or not len(Xout.shape) == 2:
220 raise ValueError("both `X` and `Xout` must be two-dimensional")
221 if not len(y) == X.shape[0]:
222 raise ValueError("length of `y` does not match first dimension of `X`")
223 if not X.shape[1] == Xout.shape[1]:
224 raise ValueError("second dimension of `X` and `Xout` do not match")
226 # Solving
227 coef = np.linalg.lstsq(X, y, rcond=None)[0]
229 # Fitted values
230 yfit = np.dot(X, coef)
232 # Residual standard error
233 sigma = np.std(y - yfit, ddof = len(coef))
235 # Prediction on Xout
236 Yout = np.dot(Xout, coef)
238 return({"coef": coef, "sigma": sigma, "Yout": Yout})
242def split(x, y):
243 """Divide into Groups and Reassemble
245 Helper function mimiking Rs split() function.
246 Takes two numpy arrays (x, y) of same length.
247 Splits x in segments according to the values of y (whenever
248 the value in y changes). E.g.,
250 Args:
251 x (numpy.ndarray): Array containing the data to be divided into groups.
252 y (numpy.ndarray): Array of the same length as `x` containing the
253 grouping information. The original array `x` will be cut at each
254 element in `y` where the value changes.
256 Return:
257 list of lists: Returns a list of lists, where each element in the
258 object corresponds to one group, containing the original but now divided
259 elements from `x`.
261 Examples:
262 >>> tmp = np.asarray([1, 2, 3, 4, 5])
263 >>> split(tmp, tmp == 3)
264 >>> [[1, 2], [3], [4, 5]]
266 >>> tmp = np.asarray([1, 2, 3, 4, 5])
267 >>> split(tmp, np.asarray([1, 1, 2, 2, 1]))
268 >>> [[1, 2], [3, 4], [5]]
269 """
270 import numpy as np
271 if not isinstance(x, np.ndarray):
272 raise TypeError("argument `x` must be numpy array")
273 if not isinstance(y, np.ndarray):
274 raise TypeError("argument `y` must be numpy array")
275 if not len(x) > 0:
276 raise ValueError("array x must be length >= 1")
277 if not len(x) == len(y):
278 raise ValueError("arrays x/y must be of same length")
280 # If length of x is one: Return as is
281 if len(x) == 1:
282 return [x]
284 # Start with list-of-lists containing first element
285 res = [[x[0]]]
286 for i in range(1, len(x)):
287 if y[i] == y[i - 1]: res[len(res) - 1].append(x[i]) # Append
288 else: res.append([x[i]]) # Add new list
290 return res