[prev in list] [next in list] [prev in thread] [next in thread]
List: sbcl-commits
Subject: Re: [Sbcl-commits] master: Improve EQL-HASH
From: Stas Boukarev <stassats () gmail ! com>
Date: 2020-05-21 1:46:18
Message-ID: CAF63=10+6TXpp-KGerGHiAKX71Dg72bsFuAQGeU4_WJiGzdBaA () mail ! gmail ! com
[Download RAW message or body]
Can't build clisp, abcl, ccl. Probably due to the macrolet+inline.
On Thu, May 21, 2020 at 3:24 AM Douglas Katzman via Sbcl-commits
<sbcl-commits@lists.sourceforge.net> wrote:
>
> The branch "master" has been updated in SBCL:
> via e78f1947957cb5613ebc813d1ae55afcdc59620f (commit)
> from 923b6af21c4f4370b3e320e0427248827674f4f5 (commit)
>
> - Log -----------------------------------------------------------------
> commit e78f1947957cb5613ebc813d1ae55afcdc59620f
> Author: Douglas Katzman <dougk@google.com>
> Date: Wed May 20 17:18:45 2020 -0400
>
> Improve EQL-HASH
>
> * Put SYMBOL-WIDETAG contiguous with the numeric widetags
>
> * Add a type proclamation for NUMBER-SXHASH
>
> * Change ENSURE-SYMBOL-HASH to SYMBOL-HASH within GETHASH because
> a symbol that was not yet hashed can't be in the table anyway.
> This avoids a conditional branch and reduces code size a little.
> ---
> src/code/target-hash-table.lisp | 84 ++++++++++++++++++++++------------
> src/code/target-sxhash.lisp | 1 +
> src/cold/shared.lisp | 16 +++++--
> src/compiler/generic/early-objdef.lisp | 29 ++++++------
> src/compiler/generic/utils.lisp | 5 ++
> src/compiler/generic/vm-fndb.lisp | 1 +
> src/compiler/meta-vmdef.lisp | 6 ++-
> src/compiler/ppc/cell.lisp | 4 +-
> src/compiler/ppc64/cell.lisp | 4 ++
> src/compiler/sparc/cell.lisp | 4 +-
> src/compiler/x86-64/cell.lisp | 6 ++-
> src/compiler/x86/cell.lisp | 4 +-
> 12 files changed, 109 insertions(+), 55 deletions(-)
>
> diff --git a/src/code/target-hash-table.lisp b/src/code/target-hash-table.lisp
> index 5677c1b99..f90d7fce9 100644
> --- a/src/code/target-hash-table.lisp
> +++ b/src/code/target-hash-table.lisp
> @@ -132,26 +132,42 @@
> (values (pointer-hash key)
> (sb-vm:is-lisp-pointer (get-lisp-obj-address key))))
>
> -#-sb-fluid (declaim (inline eql-hash))
> -(defun eql-hash (key)
> - (declare (values fixnum (member t nil)))
> - (if (%other-pointer-subtype-p
> - key
> - ;; SYMBOL is listed here so that we can hash symbols address-insensitively.
> - ;; Given that we're already picking off a bunch of OTHER-POINTER objects
> - ;; and already calling SXHASH, the overhead is minimal. In fact, with \
> suitably
> - ;; and rearranged widetags, this would be included in the numeric range.
> - '#.(list sb-vm:bignum-widetag sb-vm:ratio-widetag \
> sb-vm:double-float-widetag
> - sb-vm:single-float-widetag
> - sb-vm:complex-widetag sb-vm:complex-single-float-widetag \
> sb-vm:complex-double-float-widetag
> - sb-vm:symbol-widetag))
> - (values (if (= (%other-pointer-widetag key) sb-vm:symbol-widetag)
> - (sxhash (truly-the symbol key))
> - (number-sxhash key))
> - nil)
> - ;; I don't want to add a case for INSTANCE-WITH-HASH-P here,
> - ;; but in the EQUAL and EQUAL hash functions, we do that.
> - (eq-hash key)))
> +(declaim (inline eql-hash eql-hash-no-memoize))
> +(macrolet
> + ((define-eql-hash (name symbol-hash-fun)
> + `(defun ,name (key)
> + (declare (values fixnum (member t nil)))
> + (if (%other-pointer-subtype-p
> + key
> + ;; SYMBOL is listed here so that we can hash symbols \
> address-insensitively. + ;; We have to pick off a bunch of \
> OTHER-POINTER objects anyway, so there + ;; no overhead to extending \
> the widetag range by 1 widetag. + '#.(list sb-vm:bignum-widetag \
> sb-vm:ratio-widetag sb-vm:double-float-widetag + \
> sb-vm:single-float-widetag + sb-vm:complex-widetag \
> sb-vm:complex-single-float-widetag + \
> sb-vm:complex-double-float-widetag + sb-vm:symbol-widetag))
> + ;; NON-NULL-SYMBOL-P skips a test for NIL which is sensible, and \
> we're + ;; excluding NIL anyway because it's not an OTHER-POINTER.
> + ;; To produce the best code for NON-NULL-SYMBOL-P (omitting a lowtag \
> test) + ;; we need to force the compiler to see that KEY is definitely \
> an + ;; OTHER-POINTER (cf OTHER-POINTER-TN-REF-P) because \
> %OTHER-POINTER-SUBTYPE-P + ;; doesn't suffice, though it would be nice \
> if it did. + (values (if (non-null-symbol-p
> + (truly-the (or (and number (not fixnum) #+64-bit (not \
> single-float)) + (and symbol (not null)))
> + key))
> + (,symbol-hash-fun (truly-the symbol key))
> + (number-sxhash (truly-the number key)))
> + nil)
> + ;; Consider picking off %INSTANCEP too before using EQ-HASH ?
> + (eq-hash key)))))
> + (define-eql-hash eql-hash sxhash) ; via transform
> + ;; For GETHASH we should never compute a symbol-hash. If it hasn't been
> + ;; computed, KEY won't be found, and it doesn't matter what the hash is.
> + ;; This could theoretically avoid clearing the fixnum tag since the symbol
> + ;; is not n.
> + (define-eql-hash eql-hash-no-memoize symbol-hash))
>
> #-sb-fluid (declaim (inline equal-hash))
> (defun equal-hash (key)
> @@ -912,16 +928,26 @@ if there is no such entry. Entries can be added using SETF."
> ;; to keep things simple so that we don't have to pass in the names
> ;; of local variables to bind. (Being unhygienic on purpose)
>
> - (defun ht-hash-setup (std-fn)
> + (defun ht-hash-setup (std-fn caller)
> (if std-fn
> `(((hash0 address-based-p)
> ;; so many warnings about generic SXHASH - who cares
> (locally (declare (muffle-conditions compiler-note))
> - ,(if (eq std-fn 'equal)
> - `(if (eq (hash-table-hash-fun table) #'equal-hash)
> - (equal-hash key) ; inlined
> - (funcall (hash-table-hash-fun table) key))
> - `(,(symbolicate std-fn "-HASH") key))))
> + ,(case std-fn
> + (eql
> + ;; GETHASH in an EQL table doesn't need to compute and writeback
> + ;; a hash into a symbol that didn't already have a hash.
> + ;; So the hash computation is a touch shorter by avoiding that.
> + `(,(if (eq caller 'gethash) 'eql-hash-no-memoize 'eql-hash) key))
> + (equal
> + ;; EQUAL tables can opt out of using the stable instance hash
> + ;; to avoid increasing the length of all structures.
> + ;; There is no exposed interface to this; it's for system use.
> + `(if (eq (hash-table-hash-fun table) #'equal-hash)
> + (equal-hash key) ; inlined
> + (funcall (hash-table-hash-fun table) key)))
> + (t
> + `(,(symbolicate std-fn "-HASH") key)))))
> (hash (prefuzz-hash hash0)))
> '((hash0 (funcall (hash-table-hash-fun hash-table) key))
> (address-based-p nil)
> @@ -1119,7 +1145,7 @@ nnnn 1_ any linear scan
> (/= cache 0)) ; don't falsely match the metadata cell
> (return-from ,name (values (aref kv-vector (1+ cache)) t))))
> (with-pinned-objects (key)
> - (binding* (,@(ht-hash-setup std-fn)
> + (binding* (,@(ht-hash-setup std-fn 'gethash)
> (eq-test ,(ht-probing-should-use-eq std-fn)))
> (declare (fixnum hash0))
> (flet ((hash-search (&aux ,@(ht-probe-setup std-fn))
> @@ -1467,7 +1493,7 @@ nnnn 1_ any linear scan
> ;; Granted that the bit might have been 1 at timestamp 't1',
> ;; but it's best to read it at t1 and not later.
> (binding* ((initial-epoch (kv-vector-rehash-epoch kv-vector))
> - ,@(ht-hash-setup std-fn)
> + ,@(ht-hash-setup std-fn 'puthash)
> ,@(ht-probe-setup std-fn)
> (eq-test ,(ht-probing-should-use-eq std-fn)))
> (declare (fixnum hash0) (index/2 index))
> @@ -1606,7 +1632,7 @@ nnnn 1_ any linear scan
> ;; See comment in DEFINE-HT-SETTER about why to read initial-epoch
> ;; as soon as possible after pinning KEY.
> (binding* ((initial-epoch (kv-vector-rehash-epoch kv-vector))
> - ,@(ht-hash-setup std-fn)
> + ,@(ht-hash-setup std-fn 'remhash)
> ,@(ht-probe-setup std-fn)
> (eq-test ,(ht-probing-should-use-eq std-fn)))
> (declare (fixnum hash0) (index/2 index) (ignore probe-limit))
> diff --git a/src/code/target-sxhash.lisp b/src/code/target-sxhash.lisp
> index 3eefbc171..af8039886 100644
> --- a/src/code/target-sxhash.lisp
> +++ b/src/code/target-sxhash.lisp
> @@ -156,6 +156,7 @@
>
> (defun number-sxhash (x)
> (declare (optimize (sb-c:verify-arg-count 0) speed))
> + (declare (explicit-check))
> (labels ((hash-ratio (x)
> (let ((result 127810327))
> (declare (type fixnum result))
> diff --git a/src/cold/shared.lisp b/src/cold/shared.lisp
> index ce0493d58..9ad48b705 100644
> --- a/src/cold/shared.lisp
> +++ b/src/cold/shared.lisp
> @@ -243,6 +243,17 @@
> (gc (find-if (lambda (x) (member x '(:cheneygc :gencgc)))
> target-feature-list))
> (arch (target-platform-keyword target-feature-list)))
> + (when (and (member :x86 target-feature-list)
> + (member :int4-breakpoints target-feature-list))
> + ;; 0xCE is a perfectly good 32-bit instruction,
> + ;; unlike on x86-64 where it is illegal. It's therefore
> + ;; confusing to allow this feature in a 32-bit build.
> + ;; But it's annoying to have a build script that otherwise works
> + ;; for a native x86/x86-64 build except for needing one change.
> + ;; Just print something and go on with life.
> + (setq target-feature-list
> + (remove :int4-breakpoints target-feature-list))
> + (warn "Removed :INT4-BREAKPOINTS from target features"))
> ;; Putting arch and gc choice first is visually convenient, versus
> ;; having to parse a random place in the line to figure out the value
> ;; of a binary choice {cheney vs gencgc} and architecture.
> @@ -309,11 +320,6 @@
> ":IMMOBILE-CODE requires :IMMOBILE-SPACE feature")
> ("(and immobile-symbols (not immobile-space))"
> ":IMMOBILE-SYMBOLS requires :IMMOBILE-SPACE feature")
> - ("(and int4-breakpoints x86)"
> - ;; 0xCE is a perfectly good 32-bit instruction,
> - ;; unlike on x86-64 where it is illegal. It's therefore
> - ;; confusing to allow this feature in a 32-bit build.
> - ":INT4-BREAKPOINTS are incompatible with x86")
> ;; There is still hope to make multithreading on DragonFly x86-64
> ("(and sb-thread x86 dragonfly)"
> ":SB-THREAD not supported on selected architecture")))
> diff --git a/src/compiler/generic/early-objdef.lisp \
> b/src/compiler/generic/early-objdef.lisp index 292f8767d..4ae311ab0 100644
> --- a/src/compiler/generic/early-objdef.lisp
> +++ b/src/compiler/generic/early-objdef.lisp
> @@ -201,27 +201,28 @@
> ;; Word bits ; 32 | 64 32 | 64
> ;------------------
> ; [ all numbers are hex ]
> - bignum-widetag ; 0A 11 0A 11
> - ratio-widetag ; 0E 15 0E 15
> - single-float-widetag ; 12 19 12 19
> - double-float-widetag ; 16 1D 16 1D
> - complex-widetag ; 1A 21 1A 21
> - complex-single-float-widetag ; 1E 25 1E 25
> - complex-double-float-widetag ; 22 29 22 29
> + bignum-widetag ; 0A 11 0A 11 \
> + ratio-widetag ; 0E 15 0E 15 |
> + single-float-widetag ; 12 19 12 19 |
> + double-float-widetag ; 16 1D 16 1D | EQL-hash \
> picks off this + complex-widetag ; 1A 21 1A 21 \
> | range of widetags + complex-single-float-widetag ; 1E 25 1E \
> 25 | + complex-double-float-widetag ; 22 29 22 29 \
> | + ; |
> + symbol-widetag ; 26 2D 26 2D /
>
> - code-header-widetag ; 26 2D 26 2D
> + code-header-widetag ; 2A 31 2A 31
>
> - simple-fun-widetag ; 2A 31 2A 31
> - closure-widetag ; 2E 35 2E 35
> - funcallable-instance-widetag ; 32 39 32 39
> + simple-fun-widetag ; 2E 35 2E 35
> + closure-widetag ; 32 39 32 39
> + funcallable-instance-widetag ; 36 3D 36 3D
>
> ;; x86[-64] does not have objects with this widetag,
> #+(or x86 x86-64) unused00-widetag
> #-(or x86 x86-64)
> - return-pc-widetag ; 36 3D 36 3D
> + return-pc-widetag ; 3A 41 3A 41
>
> - value-cell-widetag ; 3A 41 3A 41
> - symbol-widetag ; 3E 45 3E 45
> + value-cell-widetag ; 3E 45 3E 45
> character-widetag ; 42 49 42 49
> sap-widetag ; 46 4D 46 4D
> unbound-marker-widetag ; 4A 51 4A 51
> diff --git a/src/compiler/generic/utils.lisp b/src/compiler/generic/utils.lisp
> index eabf8e6b0..6b44d944c 100644
> --- a/src/compiler/generic/utils.lisp
> +++ b/src/compiler/generic/utils.lisp
> @@ -207,6 +207,11 @@
> instance
> character))))))
>
> +(defun not-nil-tn-ref-p (tn-ref)
> + (and (tn-ref-type tn-ref)
> + (not (types-equal-or-intersect (tn-ref-type tn-ref)
> + (specifier-type '(eql nil))))))
> +
> (defun length-field-shift (widetag)
> (if (= widetag instance-widetag)
> instance-length-shift
> diff --git a/src/compiler/generic/vm-fndb.lisp b/src/compiler/generic/vm-fndb.lisp
> index 4215d269d..39c929a09 100644
> --- a/src/compiler/generic/vm-fndb.lisp
> +++ b/src/compiler/generic/vm-fndb.lisp
> @@ -104,6 +104,7 @@
> (values index fixnum)
> (foldable flushable))
>
> +(defknown sb-impl::number-sxhash (number) hash-code (foldable flushable))
> (defknown %sxhash-string (string) hash-code (foldable flushable))
> (defknown %sxhash-simple-string (simple-string) hash-code (foldable flushable))
>
> diff --git a/src/compiler/meta-vmdef.lisp b/src/compiler/meta-vmdef.lisp
> index b8540f263..a8c705ed0 100644
> --- a/src/compiler/meta-vmdef.lisp
> +++ b/src/compiler/meta-vmdef.lisp
> @@ -780,8 +780,10 @@
> `(,dummy)))
> (ignorable ,n-args ,n-results))
> ,@(loads)
> - (assemble ()
> - ,@(vop-parse-body parse))
> + ;; RETURN-FROM can exit the ASSEMBLE while continuing on with saves.
> + (block ,(vop-parse-name parse)
> + (assemble ()
> + ,@(vop-parse-body parse)))
> ,@(saves))))))
>
> (defvar *parse-vop-operand-count*)
> diff --git a/src/compiler/ppc/cell.lisp b/src/compiler/ppc/cell.lisp
> index b0404c627..54ca3e82d 100644
> --- a/src/compiler/ppc/cell.lisp
> +++ b/src/compiler/ppc/cell.lisp
> @@ -222,13 +222,15 @@
> (:args (symbol :scs (descriptor-reg)))
> (:results (res :scs (any-reg)))
> (:result-types positive-fixnum)
> + (:args-var args)
> (:generator 2
> ;; The symbol-hash slot of NIL holds NIL because it is also the
> ;; car slot, so we have to strip off the two low bits to make sure
> ;; it is a fixnum. The lowtag selection magic that is required to
> ;; ensure this is explained in the comment in objdef.lisp
> (loadw res symbol symbol-hash-slot other-pointer-lowtag)
> - (inst clrrwi res res n-fixnum-tag-bits)))
> + (unless (not-nil-tn-ref-p args)
> + (inst clrrwi res res n-fixnum-tag-bits))))
>
> ;;;; Fdefinition (fdefn) objects.
>
> diff --git a/src/compiler/ppc64/cell.lisp b/src/compiler/ppc64/cell.lisp
> index 51e4d1823..afdaedaf1 100644
> --- a/src/compiler/ppc64/cell.lisp
> +++ b/src/compiler/ppc64/cell.lisp
> @@ -268,7 +268,11 @@
> (:args (symbol :scs (descriptor-reg)))
> (:results (res :scs (any-reg)))
> (:result-types positive-fixnum)
> + (:args-var args)
> (:generator 4
> + (when (not-nil-tn-ref-p args)
> + (loadw res symbol symbol-hash-slot other-pointer-lowtag)
> + (return-from symbol-hash))
> (inst cmpld symbol null-tn)
> (inst beq NULL)
> (loadw res symbol symbol-hash-slot other-pointer-lowtag)
> diff --git a/src/compiler/sparc/cell.lisp b/src/compiler/sparc/cell.lisp
> index 4dc349511..e1a6870c9 100644
> --- a/src/compiler/sparc/cell.lisp
> +++ b/src/compiler/sparc/cell.lisp
> @@ -89,13 +89,15 @@
> (:args (symbol :scs (descriptor-reg)))
> (:results (res :scs (any-reg)))
> (:result-types positive-fixnum)
> + (:args-var args)
> (:generator 2
> ;; The symbol-hash slot of NIL holds NIL because it is also the
> ;; cdr slot, so we have to strip off the two low bits to make sure
> ;; it is a fixnum. The lowtag selection magic that is required to
> ;; ensure this is explained in the comment in objdef.lisp
> (loadw res symbol symbol-hash-slot other-pointer-lowtag)
> - (inst andn res res fixnum-tag-mask)))
> + (unless (not-nil-tn-ref-p args)
> + (inst andn res res fixnum-tag-mask))))
>
> ;;; On unithreaded builds these are just copies of the non-global versions.
> (define-vop (%set-symbol-global-value set))
> diff --git a/src/compiler/x86-64/cell.lisp b/src/compiler/x86-64/cell.lisp
> index 19369c7e8..86a90c774 100644
> --- a/src/compiler/x86-64/cell.lisp
> +++ b/src/compiler/x86-64/cell.lisp
> @@ -358,13 +358,15 @@
> (:args (symbol :scs (descriptor-reg)))
> (:results (res :scs (any-reg)))
> (:result-types positive-fixnum)
> + (:args-var args)
> (:generator 2
> + (loadw res symbol symbol-hash-slot other-pointer-lowtag)
> ;; The symbol-hash slot of NIL holds NIL because it is also the
> ;; car slot, so we have to zero the fixnum tag bit(s) to make sure
> ;; it is a fixnum. The lowtag selection magic that is required to
> ;; ensure this is explained in the comment in objdef.lisp
> - (loadw res symbol symbol-hash-slot other-pointer-lowtag)
> - (inst and res (lognot fixnum-tag-mask))))
> + (unless (not-nil-tn-ref-p args)
> + (inst and res (lognot fixnum-tag-mask)))))
>
> ;;; Combine SYMBOL-HASH and the lisp fallback code into one vop.
> (define-vop ()
> diff --git a/src/compiler/x86/cell.lisp b/src/compiler/x86/cell.lisp
> index 86a56dd5c..9bd0e5c98 100644
> --- a/src/compiler/x86/cell.lisp
> +++ b/src/compiler/x86/cell.lisp
> @@ -234,13 +234,15 @@
> (:args (symbol :scs (descriptor-reg)))
> (:results (res :scs (any-reg)))
> (:result-types positive-fixnum)
> + (:args-var args)
> (:generator 2
> ;; The symbol-hash slot of NIL holds NIL because it is also the
> ;; car slot, so we have to strip off the two low bits to make sure
> ;; it is a fixnum. The lowtag selection magic that is required to
> ;; ensure this is explained in the comment in objdef.lisp
> (loadw res symbol symbol-hash-slot other-pointer-lowtag)
> - (inst and res (lognot #b11))))
> + (unless (not-nil-tn-ref-p args)
> + (inst and res (lognot #b11)))))
>
> ;;;; fdefinition (FDEFN) objects
>
>
> -----------------------------------------------------------------------
>
>
> hooks/post-receive
> --
> SBCL
>
>
> _______________________________________________
> Sbcl-commits mailing list
> Sbcl-commits@lists.sourceforge.net
> https://lists.sourceforge.net/lists/listinfo/sbcl-commits
_______________________________________________
Sbcl-commits mailing list
Sbcl-commits@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/sbcl-commits
[prev in list] [next in list] [prev in thread] [next in thread]
Configure |
About |
News |
Add a list |
Sponsored by KoreLogic