Descriptive Statistics
Basic Setup
local
stats: STATISTICS
data: ARRAY [REAL_64]
do
create stats.make
data := {ARRAY [REAL_64]} << 1.0, 2.0, 3.0, 4.0, 5.0 >>
Central Tendency
mean_val := stats.mean (data) -- Arithmetic mean
median_val := stats.median (data) -- 50th percentile
mode_val := stats.mode (data) -- Most frequent value
Dispersion
variance := stats.variance (data) -- Population variance
std_dev := stats.std_dev (data) -- Standard deviation
range := stats.range (data) -- Max - Min
min := stats.min_value (data) -- Minimum
max := stats.max_value (data) -- Maximum
Quantiles
p50 := stats.percentile (data, 50.0) -- 50th percentile
p95 := stats.percentile (data, 95.0) -- 95th percentile
q := stats.quartiles (data) -- [Q1, Q2, Q3]
-- q[1] = 25th percentile, q[2] = median, q[3] = 75th percentile
Summation
total := stats.sum (data) -- Sum using Kahan summation
Correlation & Covariance
local
x, y: ARRAY [REAL_64]
do
x := {ARRAY [REAL_64]} << 1.0, 2.0, 3.0, 4.0, 5.0 >>
y := {ARRAY [REAL_64]} << 2.0, 4.0, 6.0, 8.0, 10.0 >>
cov := stats.covariance (x, y) -- Covariance
corr := stats.correlation (x, y) -- Pearson r in [-1, 1]
Note: Correlation is symmetric: corr(x,y) = corr(y,x).
Perfect positive correlation = 1.0, perfect negative = -1.0.
Linear Regression
local
result: REGRESSION_RESULT
do
result := stats.linear_regression (x, y)
-- Access results
slope := result.slope
intercept := result.intercept
r_squared := result.r_squared -- R² in [0, 1]
-- Make prediction
predicted_y := result.predict (new_x)
Note: R² indicates fit quality. R² = 1.0 is perfect fit, R² = 0.0 is no linear relationship.
Hypothesis Testing
One-Sample t-test
result := stats.t_test_one_sample (data, 3.0) -- Test if mean = 3.0
t_stat := result.statistic
p_value := result.p_value
dof := result.degrees_of_freedom
Two-Sample t-test
result := stats.t_test_two_sample (group1, group2)
if result.is_significant (0.05) then
print ("Groups significantly different at 0.05 level%N")
end
Paired t-test
result := stats.t_test_paired (before, after) -- Test if mean(before - after) = 0
Chi-Square Test
observed := {ARRAY [REAL_64]} << 10.0, 15.0, 20.0 >>
expected := {ARRAY [REAL_64]} << 12.0, 12.0, 21.0 >>
result := stats.chi_square_test (observed, expected)
ANOVA
local
groups: ARRAY [ARRAY [REAL_64]]
do
create groups.make_filled (create {ARRAY [REAL_64]}.make_empty, 1, 3)
groups[1] := {ARRAY [REAL_64]} << 1.0, 2.0, 3.0 >>
groups[2] := {ARRAY [REAL_64]} << 4.0, 5.0, 6.0 >>
groups[3] := {ARRAY [REAL_64]} << 7.0, 8.0, 9.0 >>
result := stats.anova (groups)
if result.conclusion (0.05) then
print ("Significant difference among groups%N")
end
Note: P-values are currently placeholders (0.5). They will be computed correctly
when distribution CDFs are implemented.
Data Cleaning
local
clean: CLEANED_STATISTICS
dirty_data, clean_data: ARRAY [REAL_64]
do
create clean.make
-- Remove NaN and infinite values
clean_data := clean.clean (dirty_data)
-- Or individually
clean_data := clean.remove_nan (dirty_data)
clean_data := clean.remove_infinite (dirty_data)
Test Result Objects
TEST_RESULT
-- Returned by: t_test_*, chi_square_test, anova
result.statistic -- Test statistic
result.p_value -- P-value (0 to 1)
result.degrees_of_freedom -- dof for distribution
result.is_significant (alpha) -- Boolean test at alpha level
result.conclusion (alpha) -- Boolean for p < alpha
REGRESSION_RESULT
-- Returned by: linear_regression
result.slope -- Regression slope (y = slope*x + intercept)
result.intercept -- Y-intercept
result.r_squared -- R² in [0, 1]
result.predict (x) -- Predict y for new x
Next Steps
- User Guide - In-depth tutorials with more examples
- API Reference - Complete documentation for all classes
- Cookbook - Real-world usage patterns and recipes