Tcl Library Source Code

Check-in [991cdf14aa]
Login
Bounty program for improvements to Tcl and certain Tcl packages.

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Corrected use of bandwidth, adjusted a test to take care of pecularities of the kernels
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 991cdf14aaf67dff84df63b03494827a619b3082
User & Date: markus 2014-01-20 05:17:53
Context
2014-01-21
17:28
Ticket [861f53ff24]. Added proper initialization to CreateNonce when falling back to md5 (no dev/urandom). Bumped version to 1.3.3. check-in: 8175173735 user: andreask tags: trunk
2014-01-20
05:17
Corrected use of bandwidth, adjusted a test to take care of pecularities of the kernels check-in: 991cdf14aa user: markus tags: trunk
2014-01-18
14:20
Added three test cases for the kernel density estimation. Resulted in a few small corrections (deal with missing values) check-in: 2456e0d413 user: markus tags: trunk
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to modules/math/ChangeLog.





1
2
3
4
5
6
7



2014-01-18  Arjen Markus <[email protected]>
	* statistics.tcl: Added stat_kernel.tcl
	* stat_kernel.tcl: Implements a straightforward kernel density estimation procedure
	* statistics.man: Describe the kernel denstity estimation procedure, moved the description of several
	                  tests to the general section
	* statistics.test: Added three tests for the kernel density estimation (note: one result is a bit troublesome)
	* pkgIndex.tcl: Bumped version of statistics package to 0.9
>
>
>
>







1
2
3
4
5
6
7
8
9
10
11
2014-01-19  Arjen Markus <[email protected]>
	* stat_kernel.tcl: Corrected use of bandwidth
	* statistics.test: Added margin per kernel - not quite satisfactory in the case of the uniform kernel

2014-01-18  Arjen Markus <[email protected]>
	* statistics.tcl: Added stat_kernel.tcl
	* stat_kernel.tcl: Implements a straightforward kernel density estimation procedure
	* statistics.man: Describe the kernel denstity estimation procedure, moved the description of several
	                  tests to the general section
	* statistics.test: Added three tests for the kernel density estimation (note: one result is a bit troublesome)
	* pkgIndex.tcl: Bumped version of statistics package to 0.9

Changes to modules/math/stat_kernel.tcl.

140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
...
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
        lappend xvalue [expr {$xb + $i * $dx}]
    }

    #
    # Construct the density function
    #
    set density {}
    set scale   [expr {$opt(-bandwidth)}]
    foreach x $xvalue {
        set sum 0.0
        foreach d $data w $weight {
            if { $d != {} } {
                set kvalue [$opt(-kernel) [expr {$scale * ($x-$d)}]]
                set sum [expr {$sum + $w * $kvalue}]
            }
................................................................................
        }
        lappend density [expr {$sum * $scale}]
    }

    #
    # Return the result
    #
    return [list $xvalue $density [list $xbegin $xend $mean $stdev $scale]]
}

# gaussian, uniform, triangular, epanechnikov, biweight, cosine, logistic --
#    The Gaussian kernel
#
# Arguments:
#    x            (Scaled) argument






|







 







|







140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
...
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
        lappend xvalue [expr {$xb + $i * $dx}]
    }

    #
    # Construct the density function
    #
    set density {}
    set scale   [expr {1.0/$opt(-bandwidth)}]
    foreach x $xvalue {
        set sum 0.0
        foreach d $data w $weight {
            if { $d != {} } {
                set kvalue [$opt(-kernel) [expr {$scale * ($x-$d)}]]
                set sum [expr {$sum + $w * $kvalue}]
            }
................................................................................
        }
        lappend density [expr {$sum * $scale}]
    }

    #
    # Return the result
    #
    return [list $xvalue $density [list $xbegin $xend $mean $stdev $opt(-bandwidth)]]
}

# gaussian, uniform, triangular, epanechnikov, biweight, cosine, logistic --
#    The Gaussian kernel
#
# Arguments:
#    x            (Scaled) argument

Changes to modules/math/statistics.test.

596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612

613
614
615
616
617
618
619
...
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
test "spearman-rank-extended-1.0" "Test extended Spearman rank correlation procedure" -match tolerant -body {
    ::math::statistics::spearman-rank-extended {106  86 100 101  99 103  97 113 112 110} \
                                               {  7   0  27  50  28  29  20  12   6  17}
} -result {-0.175757575758 10 -0.456397284}

#
# Note: for the uniform kernel the sum is roughly 0.92. It would appear from a visual
# investigation that the sides may influence this, as well as the default bandwidth.
# Leave it for now.
#
test "kernel-density-1.0" "Test various kernel functions" -body {
    set data {1 2 3 4 5 6 7 8 9 10}

    set roughlyOne {}

    foreach kernel {gaussian uniform triangular epanechnikov biweight cosine logistic} {

        set result [::math::statistics::kernel-density $data -kernel $kernel]

        set sum 0.0
        set xbegin [lindex $result 2 0]
        set xend   [lindex $result 2 1]
        set number [llength [lindex $result 0]]
        set dx     [expr {($xend-$xbegin) / $number}]
................................................................................
        # Integral should be roughly one
        #
        set sum 0.0
        foreach v [lindex $result 1] {
            set sum [expr {$sum + $dx * $v}]
        }

        lappend roughlyOne [expr {abs($sum-1.0) < 0.01}]
    }

    return $roughlyOne
} -result {1 0 1 1 1 1 1}

test "kernel-density-1.1" "Test various options - just that they have effect" -body {
    set subResults {}

    set data {1 2 3 4 5 6 7 8 9 10}

    set result [::math::statistics::kernel-density $data -number 20]






|
|
|






|
>







 







|



|







596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
...
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
test "spearman-rank-extended-1.0" "Test extended Spearman rank correlation procedure" -match tolerant -body {
    ::math::statistics::spearman-rank-extended {106  86 100 101  99 103  97 113 112 110} \
                                               {  7   0  27  50  28  29  20  12   6  17}
} -result {-0.175757575758 10 -0.456397284}

#
# Note: for the uniform and the logistic kernel the sum deviates more from 1 than for the others.
# For the logistic kernel this is because the density function is very widespread. For the
# uniform kernel the reason is not quite clear. Hence the margin per kernel.
#
test "kernel-density-1.0" "Test various kernel functions" -body {
    set data {1 2 3 4 5 6 7 8 9 10}

    set roughlyOne {}

    foreach kernel {gaussian uniform triangular epanechnikov biweight cosine logistic} \
            margin {0.01     0.02    0.01       0.01         0.01     0.01   0.05    } {
        set result [::math::statistics::kernel-density $data -kernel $kernel]

        set sum 0.0
        set xbegin [lindex $result 2 0]
        set xend   [lindex $result 2 1]
        set number [llength [lindex $result 0]]
        set dx     [expr {($xend-$xbegin) / $number}]
................................................................................
        # Integral should be roughly one
        #
        set sum 0.0
        foreach v [lindex $result 1] {
            set sum [expr {$sum + $dx * $v}]
        }

        lappend roughlyOne [expr {abs($sum-1.0) < $margin}]
    }

    return $roughlyOne
} -result {1 1 1 1 1 1 1}

test "kernel-density-1.1" "Test various options - just that they have effect" -body {
    set subResults {}

    set data {1 2 3 4 5 6 7 8 9 10}

    set result [::math::statistics::kernel-density $data -number 20]