Tcl Source Code

Changes On Branch dkf-experimental-fast-number-hash
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Changes In Branch dkf-experimental-fast-number-hash Excluding Merge-Ins

This is equivalent to a diff from 0dd1983bb0 to 0b67af9667

2019-05-12
07:39
Better comments, and a better fix for UB avoidance. Leaf check-in: 0b67af9667 user: dkf tags: dkf-experimental-fast-number-hash
2019-05-10
21:28
optimized variant (especially on x86) for better performance on small integers; more test cases (cov... check-in: 139414267f user: sebres tags: dkf-experimental-fast-number-hash
18:32
testlinkarray: fixed cast to pointer from integer (and eliminate warning) check-in: 510d7e17de user: sebres tags: core-8-branch
16:35
Merge 8.7 check-in: ed6a549a7a user: jan.nijtmans tags: initsubsystems
08:00
Merge 8.7 check-in: 95a9ccf863 user: jan.nijtmans tags: dkf-experimental-fast-number-hash
07:46
merge 8.7 check-in: f3302db091 user: jan.nijtmans tags: utf-max
2019-05-09
20:42
Merge 8.7. Define Tcl_GetStringResult() as macro. check-in: 02588757a0 user: jan.nijtmans tags: trunk
20:06
If compiling with -DTCL_NO_DEPRECATED, make Tcl_GetStringResult() a macro. This opens up one more si... check-in: 0dd1983bb0 user: jan.nijtmans tags: core-8-branch
15:52
Merge 8.6. Fix compilation warning. No longer return linked adress as interp result in Tcl_LinkArra(... check-in: 676d95cac7 user: jan.nijtmans tags: core-8-branch

Changes to generic/tclObj.c.

4187
4188
4189
4190
4191
4192
4193














4194
4195
4196
4197
4198
4199


4200
4201
4202
4203
4204
4205
4206
    void *keyPtr,		/* New key to compare. */
    Tcl_HashEntry *hPtr)	/* Existing key to compare. */
{
    Tcl_Obj *objPtr1 = keyPtr;
    Tcl_Obj *objPtr2 = (Tcl_Obj *) hPtr->key.oneWordValue;
    register const char *p1, *p2;
    register size_t l1, l2;















    /*
     * If the object pointers are the same then they match.
     * OPT: this comparison was moved to the caller

       if (objPtr1 == objPtr2) return 1;


    */

    /*
     * Don't use Tcl_GetStringFromObj as it would prevent l1 and l2 being
     * in a register.
     */








>
>
>
>
>
>
>
>
>
>
>
>
>
>





|
>
>







4187
4188
4189
4190
4191
4192
4193
4194
4195
4196
4197
4198
4199
4200
4201
4202
4203
4204
4205
4206
4207
4208
4209
4210
4211
4212
4213
4214
4215
4216
4217
4218
4219
4220
4221
4222
    void *keyPtr,		/* New key to compare. */
    Tcl_HashEntry *hPtr)	/* Existing key to compare. */
{
    Tcl_Obj *objPtr1 = keyPtr;
    Tcl_Obj *objPtr2 = (Tcl_Obj *) hPtr->key.oneWordValue;
    register const char *p1, *p2;
    register size_t l1, l2;

    /*
     * Optimisation for comparing small integers, where "small" means
     * non-bigint. Note that if there's a string rep for either value (or
     * we've got one non-integer), we can't optimize. For example, "0x7E" is
     * not the same as "126" when hashing (because the primary hash algorithm
     * is defined on strings) despite potentially having the same integer
     * representation.
     */

    if (objPtr1->typePtr == &tclIntType && objPtr1->bytes == NULL
            && objPtr2->typePtr == &tclIntType && objPtr2->bytes == NULL) {
        return objPtr1->internalRep.wideValue == objPtr2->internalRep.wideValue;
    }

    /*
     * If the object pointers are the same then they match.
     * OPT: this comparison was moved to the caller

       if (objPtr1 == objPtr2) {
           return 1;
       }
    */

    /*
     * Don't use Tcl_GetStringFromObj as it would prevent l1 and l2 being
     * in a register.
     */

4273
4274
4275
4276
4277
4278
4279

4280
4281
4282
4283
4284
4285
4286
4287
4288
4289

TCL_HASH_TYPE
TclHashObjKey(
    Tcl_HashTable *tablePtr,	/* Hash table. */
    void *keyPtr)		/* Key from which to compute hash value. */
{
    Tcl_Obj *objPtr = keyPtr;

    int length;
    const char *string = TclGetStringFromObj(objPtr, &length);
    unsigned int result = 0;

    /*
     * I tried a zillion different hash functions and asked many other people
     * for advice. Many people had their own favorite functions, all
     * different, but no-one had much idea why they were good ones. I chose
     * the one below (multiply by 9 and add new character) because of the
     * following reasons:







>

|
<







4289
4290
4291
4292
4293
4294
4295
4296
4297
4298

4299
4300
4301
4302
4303
4304
4305

TCL_HASH_TYPE
TclHashObjKey(
    Tcl_HashTable *tablePtr,	/* Hash table. */
    void *keyPtr)		/* Key from which to compute hash value. */
{
    Tcl_Obj *objPtr = keyPtr;
    TCL_HASH_TYPE result = 0;
    int length;
    const char *string;


    /*
     * I tried a zillion different hash functions and asked many other people
     * for advice. Many people had their own favorite functions, all
     * different, but no-one had much idea why they were good ones. I chose
     * the one below (multiply by 9 and add new character) because of the
     * following reasons:
4311
4312
4313
4314
4315
4316
4317




















4318


4319


























4320


4321
4322

4323

4324





























4325
4326
4327
4328
4329
4330
4331
     *
     * See also HashStringKey in tclHash.c.
     * See also HashString in tclLiteral.c.
     *
     * See [tcl-Feature Request #2958832]
     */





















    if (length > 0) {


	result = UCHAR(*string);


























	while (--length) {


	    result += (result << 3) + UCHAR(*++string);
	}

    }

    return (TCL_HASH_TYPE) result;





























}

/*
 *----------------------------------------------------------------------
 *
 * Tcl_GetCommandFromObj --
 *







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|
>
>
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|
>
>
|
|
>
|
>
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







4327
4328
4329
4330
4331
4332
4333
4334
4335
4336
4337
4338
4339
4340
4341
4342
4343
4344
4345
4346
4347
4348
4349
4350
4351
4352
4353
4354
4355
4356
4357
4358
4359
4360
4361
4362
4363
4364
4365
4366
4367
4368
4369
4370
4371
4372
4373
4374
4375
4376
4377
4378
4379
4380
4381
4382
4383
4384
4385
4386
4387
4388
4389
4390
4391
4392
4393
4394
4395
4396
4397
4398
4399
4400
4401
4402
4403
4404
4405
4406
4407
4408
4409
4410
4411
4412
4413
4414
4415
4416
4417
4418
4419
4420
4421
4422
4423
4424
4425
4426
4427
4428
     *
     * See also HashStringKey in tclHash.c.
     * See also HashString in tclLiteral.c.
     *
     * See [tcl-Feature Request #2958832]
     */

    /*
     * Special case: we can compute the hash of integers numerically. This
     * allows us to avoid allocating a string representation to them.
     */

    if (objPtr->typePtr == &tclIntType && objPtr->bytes == NULL) {
        const Tcl_WideInt objValue = objPtr->internalRep.wideValue;
        register Tcl_WideUInt value = (Tcl_WideUInt) objValue;

        /*
         * VERY TRICKY POINT: if the value is the minimum WideInt, use the
         * string path with constant values as it is unsafe to negate (this is
         * otherwise Undefined Behaviour). We use the string path here because
         * we are not assuming the type of TCL_HASH_TYPE; if we knew it was
         * 'unsigned', which is the default, we'd be able to just return
         * 843933654 (which I computed directly).
         */

        if (value == ((Tcl_WideUInt) 1) << 63) {
            string = "-9223372036854775808";
            length = 20;
            goto hashString;
        }

        if (objValue < 0) {     /* wrap to positive (remove sign) */
            value = (Tcl_WideUInt) -objValue;
        }

#ifndef TCL_WIDE_INT_IS_LONG
        /* 
         * For the performance reasons we should try convert small integers
         * as unsigned long if it is safe to cast in.
         */

        if (value <= (Tcl_WideUInt) ULONG_MAX) {
            register unsigned long lvalue = (unsigned long) value;

            /*
             * Important: use do-cycle, because value could be 0
             */

            do {
                /*
                 * Theoretically, divmod() would be perfect for this.
                 * Practically, it's usually miserably optimised so we avoid
                 * it.
                 */

                result += (result << 3) + (lvalue % 10 + '0');
                lvalue /= 10;
            } while (lvalue);

            if (objValue < 0) {         /* negative, sign as char */
                result += (result << 3) + '-';
            }
            return result;
        }
#endif

        /*
         * Important: use do-cycle, because value could be 0
         */

        do {
            /*
             * Theoretically, divmod() would be perfect for this. Practically,
             * it's usually miserably optimised so we avoid it.
             */

            result += (result << 3) + (value % 10 + '0');
            value /= 10;
        } while (value);

        if (objValue < 0) {             /* negative, sign as char */
            result += (result << 3) + '-';
        }
        return result;
    }

    string = TclGetString(objPtr);
    length = objPtr->length;

  hashString:
    string += length;
    while (length--) {
        result += (result << 3) + (unsigned char)(*--string);
    }
    return result;
}

/*
 *----------------------------------------------------------------------
 *
 * Tcl_GetCommandFromObj --
 *

Changes to generic/tclVar.c.

6598
6599
6600
6601
6602
6603
6604





6605
6606
6607
6608
6609
6610
6611
    void *keyPtr,		/* New key to compare. */
    Tcl_HashEntry *hPtr)	/* Existing key to compare. */
{
    Tcl_Obj *objPtr1 = keyPtr;
    Tcl_Obj *objPtr2 = hPtr->key.objPtr;
    register const char *p1, *p2;
    register int l1, l2;






    /*
     * If the object pointers are the same then they match.
     * OPT: this comparison was moved to the caller
     *
     * if (objPtr1 == objPtr2) return 1;
     */







>
>
>
>
>







6598
6599
6600
6601
6602
6603
6604
6605
6606
6607
6608
6609
6610
6611
6612
6613
6614
6615
6616
    void *keyPtr,		/* New key to compare. */
    Tcl_HashEntry *hPtr)	/* Existing key to compare. */
{
    Tcl_Obj *objPtr1 = keyPtr;
    Tcl_Obj *objPtr2 = hPtr->key.objPtr;
    register const char *p1, *p2;
    register int l1, l2;

    /* Optimisation for comparing small integers */
    if (objPtr1->typePtr == &tclIntType && objPtr1->bytes == NULL && objPtr2->typePtr == &tclIntType && objPtr2->bytes == NULL) {
        return objPtr1->internalRep.wideValue == objPtr2->internalRep.wideValue;
    }

    /*
     * If the object pointers are the same then they match.
     * OPT: this comparison was moved to the caller
     *
     * if (objPtr1 == objPtr2) return 1;
     */

Changes to tests/var.test.

1475
1476
1477
1478
1479
1480
1481





















1482
1483
1484
1485
1486
1487
1488
test var-24.24 {array default unset: errors} -setup {
    unset -nocomplain ary
} -body {
    array default unset ary x
} -returnCodes error -cleanup {
    unset -nocomplain ary
} -result * -match glob






















catch {namespace delete ns}
catch {unset arr}
catch {unset v}

catch {rename getbytes ""}
catch {rename p ""}







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
test var-24.24 {array default unset: errors} -setup {
    unset -nocomplain ary
} -body {
    array default unset ary x
} -returnCodes error -cleanup {
    unset -nocomplain ary
} -result * -match glob

test var-25.1 {corner cases of int-hash optimizations (TIP 534} -setup {
    unset -nocomplain v
} -body {
    foreach i {
	0
	0x7fffffff -0x7fffffff
	0xffffffff -0xffffffff
	0x7fffffffffffffff -0x7fffffffffffffff
	0xffffffffffffffff -0xffffffffffffffff
	0x7fffffffffffffffffffffffffffffff -0x7fffffffffffffffffffffffffffffff
	0xffffffffffffffffffffffffffffffff -0xffffffffffffffffffffffffffffffff
    } {
	# check integer hash is equal string hash for values from [$i-5 .. $i+5]:
	set i [expr {$i - 5}]
	time { set v([incr i]) 1; set v([string trim "$i "]); unset v($i) } 10
    }
    array size v
} -cleanup {
    unset -nocomplain v
} -result 0

catch {namespace delete ns}
catch {unset arr}
catch {unset v}

catch {rename getbytes ""}
catch {rename p ""}