simple_statistics

Quick API Reference

Descriptive Statistics

Basic Setup

local
    stats: STATISTICS
    data: ARRAY [REAL_64]
do
    create stats.make
    data := {ARRAY [REAL_64]} << 1.0, 2.0, 3.0, 4.0, 5.0 >>

Central Tendency

mean_val := stats.mean (data)        -- Arithmetic mean
median_val := stats.median (data)    -- 50th percentile
mode_val := stats.mode (data)        -- Most frequent value

Dispersion

variance := stats.variance (data)    -- Population variance
std_dev := stats.std_dev (data)      -- Standard deviation
range := stats.range (data)          -- Max - Min
min := stats.min_value (data)        -- Minimum
max := stats.max_value (data)        -- Maximum

Quantiles

p50 := stats.percentile (data, 50.0)    -- 50th percentile
p95 := stats.percentile (data, 95.0)    -- 95th percentile
q := stats.quartiles (data)             -- [Q1, Q2, Q3]
-- q[1] = 25th percentile, q[2] = median, q[3] = 75th percentile

Summation

total := stats.sum (data)       -- Sum using Kahan summation

Correlation & Covariance

local
    x, y: ARRAY [REAL_64]
do
    x := {ARRAY [REAL_64]} << 1.0, 2.0, 3.0, 4.0, 5.0 >>
    y := {ARRAY [REAL_64]} << 2.0, 4.0, 6.0, 8.0, 10.0 >>

    cov := stats.covariance (x, y)      -- Covariance
    corr := stats.correlation (x, y)    -- Pearson r in [-1, 1]
Note: Correlation is symmetric: corr(x,y) = corr(y,x). Perfect positive correlation = 1.0, perfect negative = -1.0.

Linear Regression

local
    result: REGRESSION_RESULT
do
    result := stats.linear_regression (x, y)

    -- Access results
    slope := result.slope
    intercept := result.intercept
    r_squared := result.r_squared        -- R² in [0, 1]

    -- Make prediction
    predicted_y := result.predict (new_x)
Note: R² indicates fit quality. R² = 1.0 is perfect fit, R² = 0.0 is no linear relationship.

Hypothesis Testing

One-Sample t-test

result := stats.t_test_one_sample (data, 3.0)  -- Test if mean = 3.0
t_stat := result.statistic
p_value := result.p_value
dof := result.degrees_of_freedom

Two-Sample t-test

result := stats.t_test_two_sample (group1, group2)
if result.is_significant (0.05) then
    print ("Groups significantly different at 0.05 level%N")
end

Paired t-test

result := stats.t_test_paired (before, after)  -- Test if mean(before - after) = 0

Chi-Square Test

observed := {ARRAY [REAL_64]} << 10.0, 15.0, 20.0 >>
expected := {ARRAY [REAL_64]} << 12.0, 12.0, 21.0 >>
result := stats.chi_square_test (observed, expected)

ANOVA

local
    groups: ARRAY [ARRAY [REAL_64]]
do
    create groups.make_filled (create {ARRAY [REAL_64]}.make_empty, 1, 3)
    groups[1] := {ARRAY [REAL_64]} << 1.0, 2.0, 3.0 >>
    groups[2] := {ARRAY [REAL_64]} << 4.0, 5.0, 6.0 >>
    groups[3] := {ARRAY [REAL_64]} << 7.0, 8.0, 9.0 >>

    result := stats.anova (groups)
    if result.conclusion (0.05) then
        print ("Significant difference among groups%N")
    end
Note: P-values are currently placeholders (0.5). They will be computed correctly when distribution CDFs are implemented.

Data Cleaning

local
    clean: CLEANED_STATISTICS
    dirty_data, clean_data: ARRAY [REAL_64]
do
    create clean.make

    -- Remove NaN and infinite values
    clean_data := clean.clean (dirty_data)

    -- Or individually
    clean_data := clean.remove_nan (dirty_data)
    clean_data := clean.remove_infinite (dirty_data)

Test Result Objects

TEST_RESULT

-- Returned by: t_test_*, chi_square_test, anova
result.statistic                    -- Test statistic
result.p_value                      -- P-value (0 to 1)
result.degrees_of_freedom           -- dof for distribution
result.is_significant (alpha)       -- Boolean test at alpha level
result.conclusion (alpha)           -- Boolean for p < alpha

REGRESSION_RESULT

-- Returned by: linear_regression
result.slope                        -- Regression slope (y = slope*x + intercept)
result.intercept                    -- Y-intercept
result.r_squared                    -- R² in [0, 1]
result.predict (x)                  -- Predict y for new x

Next Steps