Index: generic/tclObj.c ================================================================== --- generic/tclObj.c +++ generic/tclObj.c @@ -4189,16 +4189,32 @@ { Tcl_Obj *objPtr1 = keyPtr; Tcl_Obj *objPtr2 = (Tcl_Obj *) hPtr->key.oneWordValue; register const char *p1, *p2; register size_t l1, l2; + + /* + * Optimisation for comparing small integers, where "small" means + * non-bigint. Note that if there's a string rep for either value (or + * we've got one non-integer), we can't optimize. For example, "0x7E" is + * not the same as "126" when hashing (because the primary hash algorithm + * is defined on strings) despite potentially having the same integer + * representation. + */ + + if (objPtr1->typePtr == &tclIntType && objPtr1->bytes == NULL + && objPtr2->typePtr == &tclIntType && objPtr2->bytes == NULL) { + return objPtr1->internalRep.wideValue == objPtr2->internalRep.wideValue; + } /* * If the object pointers are the same then they match. * OPT: this comparison was moved to the caller - if (objPtr1 == objPtr2) return 1; + if (objPtr1 == objPtr2) { + return 1; + } */ /* * Don't use Tcl_GetStringFromObj as it would prevent l1 and l2 being * in a register. @@ -4275,13 +4291,13 @@ TclHashObjKey( Tcl_HashTable *tablePtr, /* Hash table. */ void *keyPtr) /* Key from which to compute hash value. */ { Tcl_Obj *objPtr = keyPtr; + TCL_HASH_TYPE result = 0; int length; - const char *string = TclGetStringFromObj(objPtr, &length); - unsigned int result = 0; + const char *string; /* * I tried a zillion different hash functions and asked many other people * for advice. Many people had their own favorite functions, all * different, but no-one had much idea why they were good ones. I chose @@ -4313,17 +4329,98 @@ * See also HashString in tclLiteral.c. * * See [tcl-Feature Request #2958832] */ - if (length > 0) { - result = UCHAR(*string); - while (--length) { - result += (result << 3) + UCHAR(*++string); - } + /* + * Special case: we can compute the hash of integers numerically. This + * allows us to avoid allocating a string representation to them. + */ + + if (objPtr->typePtr == &tclIntType && objPtr->bytes == NULL) { + const Tcl_WideInt objValue = objPtr->internalRep.wideValue; + register Tcl_WideUInt value = (Tcl_WideUInt) objValue; + + /* + * VERY TRICKY POINT: if the value is the minimum WideInt, use the + * string path with constant values as it is unsafe to negate (this is + * otherwise Undefined Behaviour). We use the string path here because + * we are not assuming the type of TCL_HASH_TYPE; if we knew it was + * 'unsigned', which is the default, we'd be able to just return + * 843933654 (which I computed directly). + */ + + if (value == ((Tcl_WideUInt) 1) << 63) { + string = "-9223372036854775808"; + length = 20; + goto hashString; + } + + if (objValue < 0) { /* wrap to positive (remove sign) */ + value = (Tcl_WideUInt) -objValue; + } + +#ifndef TCL_WIDE_INT_IS_LONG + /* + * For the performance reasons we should try convert small integers + * as unsigned long if it is safe to cast in. + */ + + if (value <= (Tcl_WideUInt) ULONG_MAX) { + register unsigned long lvalue = (unsigned long) value; + + /* + * Important: use do-cycle, because value could be 0 + */ + + do { + /* + * Theoretically, divmod() would be perfect for this. + * Practically, it's usually miserably optimised so we avoid + * it. + */ + + result += (result << 3) + (lvalue % 10 + '0'); + lvalue /= 10; + } while (lvalue); + + if (objValue < 0) { /* negative, sign as char */ + result += (result << 3) + '-'; + } + return result; + } +#endif + + /* + * Important: use do-cycle, because value could be 0 + */ + + do { + /* + * Theoretically, divmod() would be perfect for this. Practically, + * it's usually miserably optimised so we avoid it. + */ + + result += (result << 3) + (value % 10 + '0'); + value /= 10; + } while (value); + + if (objValue < 0) { /* negative, sign as char */ + result += (result << 3) + '-'; + } + return result; + } + + string = TclGetString(objPtr); + length = objPtr->length; + + hashString: + string += length; + while (length--) { + result += (result << 3) + (unsigned char)(*--string); } - return (TCL_HASH_TYPE) result; + return result; } /* *---------------------------------------------------------------------- * Index: generic/tclVar.c ================================================================== --- generic/tclVar.c +++ generic/tclVar.c @@ -6600,10 +6600,15 @@ { Tcl_Obj *objPtr1 = keyPtr; Tcl_Obj *objPtr2 = hPtr->key.objPtr; register const char *p1, *p2; register int l1, l2; + + /* Optimisation for comparing small integers */ + if (objPtr1->typePtr == &tclIntType && objPtr1->bytes == NULL && objPtr2->typePtr == &tclIntType && objPtr2->bytes == NULL) { + return objPtr1->internalRep.wideValue == objPtr2->internalRep.wideValue; + } /* * If the object pointers are the same then they match. * OPT: this comparison was moved to the caller * Index: tests/var.test ================================================================== --- tests/var.test +++ tests/var.test @@ -1477,10 +1477,31 @@ } -body { array default unset ary x } -returnCodes error -cleanup { unset -nocomplain ary } -result * -match glob + +test var-25.1 {corner cases of int-hash optimizations (TIP 534} -setup { + unset -nocomplain v +} -body { + foreach i { + 0 + 0x7fffffff -0x7fffffff + 0xffffffff -0xffffffff + 0x7fffffffffffffff -0x7fffffffffffffff + 0xffffffffffffffff -0xffffffffffffffff + 0x7fffffffffffffffffffffffffffffff -0x7fffffffffffffffffffffffffffffff + 0xffffffffffffffffffffffffffffffff -0xffffffffffffffffffffffffffffffff + } { + # check integer hash is equal string hash for values from [$i-5 .. $i+5]: + set i [expr {$i - 5}] + time { set v([incr i]) 1; set v([string trim "$i "]); unset v($i) } 10 + } + array size v +} -cleanup { + unset -nocomplain v +} -result 0 catch {namespace delete ns} catch {unset arr} catch {unset v}