simple_statistics

User Guide

Getting Started

Installation

Add simple_statistics to your ECF file:

<library name="simple_statistics" location="$SIMPLE_EIFFEL/simple_statistics/simple_statistics.ecf"/>

Basic Setup

To use the library, create an instance of STATISTICS:

local
    stats: STATISTICS
do
    create stats.make
    -- stats is now ready to use

The STATISTICS class is stateless - you can share a single instance across your application or create new instances as needed.

Comprehensive Examples

Example 1: Analyzing Test Scores

local
    stats: STATISTICS
    scores: ARRAY [REAL_64]
    mean, median, std: REAL_64
do
    create stats.make
    scores := {ARRAY [REAL_64]} << 85.0, 92.0, 78.0, 88.0, 95.0, 82.0 >>

    -- Calculate descriptive statistics
    mean := stats.mean (scores)              -- Average: 86.67
    median := stats.median (scores)          -- Middle value: 86.5
    std := stats.std_dev (scores)            -- Variability: 6.12

    print ("Class Performance Report%N")
    print ("Mean Score: " + mean.out + "%N")
    print ("Median Score: " + median.out + "%N")
    print ("Std Dev: " + std.out + "%N")

Example 2: Correlation Analysis

local
    stats: STATISTICS
    hours_studied, test_scores: ARRAY [REAL_64]
    correlation: REAL_64
do
    create stats.make
    hours_studied := {ARRAY [REAL_64]} << 1.0, 2.0, 3.0, 4.0, 5.0 >>
    test_scores := {ARRAY [REAL_64]} << 65.0, 72.0, 81.0, 88.0, 95.0 >>

    correlation := stats.correlation (hours_studied, test_scores)

    if correlation > 0.9 then
        print ("Strong positive correlation: more study time = higher scores%N")
    elseif correlation > 0.7 then
        print ("Moderate positive correlation%N")
    else
        print ("Weak or no correlation%N")
    end

Example 3: Linear Regression

local
    stats: STATISTICS
    x, y: ARRAY [REAL_64]
    result: REGRESSION_RESULT
do
    create stats.make
    x := {ARRAY [REAL_64]} << 1.0, 2.0, 3.0, 4.0, 5.0 >>
    y := {ARRAY [REAL_64]} << 2.0, 4.0, 6.0, 8.0, 10.0 >>

    result := stats.linear_regression (x, y)

    print ("Regression Equation: y = " + result.slope.out + "*x + " + result.intercept.out + "%N")
    print ("R-squared: " + result.r_squared.out + "%N")
    print ("Prediction at x=6: " + result.predict (6.0).out + "%N")

Example 4: Hypothesis Testing

local
    stats: STATISTICS
    group1, group2: ARRAY [REAL_64]
    result: TEST_RESULT
do
    create stats.make
    group1 := {ARRAY [REAL_64]} << 100.0, 105.0, 110.0, 95.0, 108.0 >>
    group2 := {ARRAY [REAL_64]} << 98.0, 102.0, 104.0, 96.0, 101.0 >>

    -- Test if groups have significantly different means
    result := stats.t_test_two_sample (group1, group2)

    if result.is_significant (0.05) then
        print ("Groups are significantly different at 0.05 level%N")
    else
        print ("No significant difference between groups%N")
    end

    print ("t-statistic: " + result.statistic.out + "%N")
    print ("p-value: " + result.p_value.out + "%N")

Best Practices

Data Validation

Always ensure your data is clean before analysis:

local
    stats: STATISTICS
    clean: CLEANED_STATISTICS
    raw_data, clean_data: ARRAY [REAL_64]
do
    create stats.make
    create clean.make

    -- Remove invalid values
    clean_data := clean.clean (raw_data)

    -- Now safe to analyze
    if clean_data.count > 0 then
        print ("Mean: " + stats.mean (clean_data).out + "%N")
    end

Precondition Handling

All features have preconditions. Check them before calling:

-- Check that data is not empty before calling mean
if not data.is_empty then
    avg := stats.mean (data)
else
    print ("Error: cannot compute mean of empty array%N")
end

-- Check that arrays have same length
if x.count = y.count then
    corr := stats.correlation (x, y)
else
    print ("Error: arrays must have same length%N")
end

Numerical Stability

The library uses numerically stable algorithms:

These ensure accurate results even with extreme magnitudes or large datasets.

Contract Verification

All features in simple_statistics are specified with Design by Contract. This means:

Example from `mean`:

mean (data: ARRAY [REAL_64]): REAL_64
    require
        data_not_empty: not data.is_empty
    do
        -- Implementation
    ensure
        result_is_average: True  -- result is the arithmetic mean
    end

These contracts are verified by the implementation and checked during testing. You can rely on them.

Next Steps