[prev in list] [next in list] [prev in thread] [next in thread]
List: sbcl-commits
Subject: Re: [Sbcl-commits] master: Add another allocator metric
From: Stas Boukarev <stassats () gmail ! com>
Date: 2021-08-21 11:05:08
Message-ID: CAF63=13CJii-Mhvr=Xqu=WX=2bMczh_2vjwL8wt0qBwSm2yf_A () mail ! gmail ! com
[Download RAW message or body]
/tests/aprof.impure.lisp
; in: SB-C:DEFINE-VOP (ALLOC-TO-R8)
; (SB-VM::INSTRUMENT-ALLOC SB-VM::BYTES SB-VM::NODE)
;
; caught STYLE-WARNING:
; The function SB-VM::INSTRUMENT-ALLOC is called with two arguments,
but wants exactly three.
;
; compilation unit finished
; caught 1 STYLE-WARNING condition
::: Running :APROF-SMOKETEST-LARGE-VECTOR-TO-UPPER-REGISTER
::: UNEXPECTED-FAILURE :APROF-SMOKETEST-LARGE-VECTOR-TO-UPPER-REGISTER
due to SB-INT:SIMPLE-PROGRAM-ERROR: "invalid number of arguments: 2"
On Sat, Aug 21, 2021 at 5:35 AM Douglas Katzman via Sbcl-commits
<sbcl-commits@lists.sourceforge.net> wrote:
>
> The branch "master" has been updated in SBCL:
> via 7cdbc7e69b80996fddba7d8bbda339764dc6da9d (commit)
> from fafa3fb60e5797ee73f220b00ca8ec65ebb6dbc6 (commit)
>
> - Log -----------------------------------------------------------------
> commit 7cdbc7e69b80996fddba7d8bbda339764dc6da9d
> Author: Douglas Katzman <dougk@google.com>
> Date: Fri Aug 20 22:32:16 2021 -0400
>
> Add another allocator metric
>
> Compute number of unboxed bytes versus boxed bytes per thread
> ---
> src/assembly/x86-64/arith.lisp | 4 +--
> src/code/target-thread.lisp | 24 +++++++-------
> src/compiler/generic/objdef.lisp | 3 +-
> src/compiler/x86-64/alloc.lisp | 67 ++++++++++++++++++++++++++++++----------
> src/compiler/x86-64/array.lisp | 2 +-
> src/compiler/x86-64/call.lisp | 2 +-
> 6 files changed, 67 insertions(+), 35 deletions(-)
>
> diff --git a/src/assembly/x86-64/arith.lisp b/src/assembly/x86-64/arith.lisp
> index 1d6dc8fc3..5cdac0b2c 100644
> --- a/src/assembly/x86-64/arith.lisp
> +++ b/src/assembly/x86-64/arith.lisp
> @@ -52,9 +52,9 @@
> (defun return-single-word-bignum (dest alloc-tn source)
> (let ((header (logior (ash 1 n-widetag-bits) bignum-widetag))
> (nbytes #+bignum-assertions 32 #-bignum-assertions 16))
> - (instrument-alloc nbytes nil)
> + (instrument-alloc bignum-widetag nbytes nil)
> (pseudo-atomic ()
> - (allocation nil nbytes 0 nil nil alloc-tn)
> + (allocation bignum-widetag nbytes 0 nil nil alloc-tn)
> (storew* header alloc-tn 0 0 t)
> (storew source alloc-tn bignum-digits-offset 0)
> (if (eq dest alloc-tn)
> diff --git a/src/code/target-thread.lisp b/src/code/target-thread.lisp
> index 766186b67..bed5af477 100644
> --- a/src/code/target-thread.lisp
> +++ b/src/code/target-thread.lisp
> @@ -2636,12 +2636,8 @@ mechanism for inter-thread communication."
> (sum (a b)
> (cond ((null a) b)
> ((null b) a)
> - (t (list (+ (first a) (first b))
> - (vector-sum (second a) (second b))
> - (+ (third a) (third b))
> - (+ (fourth a) (fourth b))
> - (+ (fifth a) (fifth b))
> - (+ (sixth a) (sixth b)))))))
> + (t (cons (vector-sum (car a) (car b))
> + (mapcar #'+ (cdr a) (cdr b)))))))
> (reduce #'sum
> ;; what about the finalizer thread?
> (mapcar 'allocator-histogram (list-all-threads))))
> @@ -2651,10 +2647,10 @@ mechanism for inter-thread communication."
> (declare (notinline position)) ; style-warning for some reason
> (dotimes (i sb-vm:n-word-bits)
> (setf (aref a i) (histogram-value c-thread i)))
> - (list (metric c-thread sb-vm::thread-total-bytes-allocated-slot)
> - ;; discard uninteresting entries
> - (subseq a sb-vm:n-lowtag-bits
> + (list (subseq a sb-vm:n-lowtag-bits ; discard uninteresting entries
> (1+ (position 0 a :from-end t :test #'/=)))
> + (metric c-thread sb-vm::thread-tot-bytes-alloc-boxed-slot)
> + (metric c-thread sb-vm::thread-tot-bytes-alloc-unboxed-slot)
> (metric c-thread sb-vm::thread-slow-path-allocs-slot)
> (metric c-thread sb-vm::thread-et-allocator-mutex-acq-slot)
> (metric c-thread sb-vm::thread-et-find-freeish-page-slot)
> @@ -2663,11 +2659,14 @@ mechanism for inter-thread communication."
> (defun reset-allocator-histogram (&optional (thread *current-thread*))
> (with-deathlok (thread c-thread)
> (unless (= c-thread 0)
> + (setf (metric c-thread sb-vm::thread-tot-bytes-alloc-boxed-slot) 0
> + (metric c-thread sb-vm::thread-tot-bytes-alloc-unboxed-slot) 0
> + (metric c-thread sb-vm::thread-slow-path-allocs-slot) 0)
> (dotimes (i sb-vm:n-word-bits)
> (setf (histogram-value c-thread i) 0)))))
>
> (defun print-allocator-histogram (&optional (thread *current-thread*))
> - (destructuring-bind (total-bytes bins n-slow-path lock find clear)
> + (destructuring-bind (bins tot-bytes-boxed tot-bytes-unboxed n-slow-path lock find clear)
> (allocator-histogram thread)
> (let ((total-objects (reduce #'+ bins))
> (size (* 4 sb-vm:n-word-bytes)) ; "<=" this size is the smallest bin
> @@ -2681,9 +2680,8 @@ mechanism for inter-thread communication."
> (format nil "< 2^~d" (1- (integer-length size))))
> count (/ cumulative total-objects))
> (setq size (* size 2)))
> - (format t "Total: ~D bytes, ~D objects, ~,2,2f% fast path~%"
> - total-bytes
> - total-objects
> + (format t "Total: ~D+~D bytes, ~D objects, ~,2,2f% fast path~%"
> + tot-bytes-boxed tot-bytes-unboxed total-objects
> (/ (- total-objects n-slow-path) total-objects))
> (format t "Times (sec): lock=~,,-9f find=~,,-9f clear=~,,-9f~%"
> lock find clear)))))
> diff --git a/src/compiler/generic/objdef.lisp b/src/compiler/generic/objdef.lisp
> index 0e2b5b9a3..e34376df4 100644
> --- a/src/compiler/generic/objdef.lisp
> +++ b/src/compiler/generic/objdef.lisp
> @@ -569,7 +569,8 @@ during backtrace.
> (mach-port-name :c-type "mach_port_name_t")
>
> ;; allocation instrumenting
> - (total-bytes-allocated)
> + (tot-bytes-alloc-boxed)
> + (tot-bytes-alloc-unboxed)
> (slow-path-allocs)
> (et-allocator-mutex-acq) ; elapsed times
> (et-find-freeish-page)
> diff --git a/src/compiler/x86-64/alloc.lisp b/src/compiler/x86-64/alloc.lisp
> index 2d7b988d1..153557e17 100644
> --- a/src/compiler/x86-64/alloc.lisp
> +++ b/src/compiler/x86-64/alloc.lisp
> @@ -53,6 +53,16 @@
> (used-p (sb-c::ir2-component-normal-tns comp))
> (used-p (sb-c::ir2-component-wired-tns comp))))))))
>
> +(defun alloc-unboxed-p (type)
> + (case type
> + ((unboxed-array
> + #.bignum-widetag
> + #.sap-widetag
> + #.double-float-widetag
> + #.complex-single-float-widetag
> + #.complex-double-float-widetag)
> + t)))
> +
> ;;; Call an allocator trampoline and get the result in the proper register.
> ;;; There are 2x2 choices of trampoline:
> ;;; - invoke alloc() or alloc_list() in C
> @@ -90,19 +100,37 @@
> (inst or :byte result-tn lowtag)))
>
> ;;; Insert allocation profiler instrumentation
> -(defun instrument-alloc (size node)
> +(eval-when (:compile-toplevel)
> + (aver (= thread-tot-bytes-alloc-unboxed-slot
> + (1+ thread-tot-bytes-alloc-boxed-slot))))
> +
> +(defun instrument-alloc (type size node)
> + (declare (ignorable type))
> #+allocator-metrics
> - (progn
> - (inst add :qword (thread-slot-ea thread-total-bytes-allocated-slot)
> - (cond ((typep size '(or (signed-byte 32))) size)
> - (t (inst mov temp-reg-tn size) temp-reg-tn)))
> + (let ((size-temp (not (typep size '(or (signed-byte 32) tn)))))
> + (cond ((tn-p type) ; from ALLOCATE-VECTOR-ON-HEAP
> + ;; Constant huge size + unknown type can't occur.
> + (aver (not size-temp))
> + (inst cmp :byte type simple-vector-widetag)
> + (inst set :ne temp-reg-tn)
> + (inst and :dword temp-reg-tn 1)
> + (inst add :qword
> + (ea (ash thread-tot-bytes-alloc-boxed-slot word-shift)
> + thread-base-tn temp-reg-tn 8)
> + size))
> + (t
> + (inst add :qword
> + (thread-slot-ea (if (alloc-unboxed-p type)
> + thread-tot-bytes-alloc-unboxed-slot
> + thread-tot-bytes-alloc-boxed-slot))
> + (cond (size-temp (inst mov temp-reg-tn size) temp-reg-tn)
> + (t size)))))
> (cond ((tn-p size)
> - ;; the low 4 bins of the histogram can't be used,
> + ;; the first 4 bins of the histogram can't be used,
> ;; but I don't care. Math is hard.
> (inst bsr temp-reg-tn size)
> - (inst inc :qword
> - (ea (ash thread-obj-size-histo-slot word-shift)
> - thread-base-tn temp-reg-tn 8)))
> + (inst inc :qword (ea (ash thread-obj-size-histo-slot word-shift)
> + thread-base-tn temp-reg-tn 8)))
> (t
> (inst inc :qword
> (thread-slot-ea (+ thread-obj-size-histo-slot
> @@ -149,13 +177,13 @@
> ;;; 2. how to allocate it: policy and how to invoke the trampoline
> ;;; 3. where to put the result
> (defun allocation (type size lowtag node dynamic-extent alloc-tn)
> + (aver (not (sb-assem::assembling-to-elsewhere-p)))
> (when dynamic-extent
> (stack-allocation alloc-tn size lowtag)
> (return-from allocation (values)))
> (aver (and (not (location= alloc-tn temp-reg-tn))
> (or (integerp size) (not (location= size temp-reg-tn)))))
>
> - (aver (not (sb-assem::assembling-to-elsewhere-p)))
> ;; Otherwise do the normal inline allocation thing
> (let* ((NOT-INLINE (gen-label))
> (DONE (gen-label))
> @@ -222,7 +250,7 @@
> (allocation nil bytes other-pointer-lowtag node t result-tn)
> (storew header result-tn 0 other-pointer-lowtag))
> (t
> - (instrument-alloc bytes node)
> + (instrument-alloc widetag bytes node)
> (pseudo-atomic ()
> (allocation nil bytes 0 node nil result-tn)
> (storew* header result-tn 0 0 t)
> @@ -262,7 +290,7 @@
> (stack-allocate-p (node-stack-allocate-p node))
> (size (* (pad-data-block cons-size) cons-cells)))
> (unless stack-allocate-p
> - (instrument-alloc size node))
> + (instrument-alloc 'list size node))
> (pseudo-atomic (:elide-if stack-allocate-p)
> (allocation 'list size (if (= cons-cells 2) 0 list-pointer-lowtag)
> node stack-allocate-p res)
> @@ -455,7 +483,12 @@
> ;; The LET generates instructions that needn't be pseudoatomic
> ;; so don't move it inside.
> (let ((size (calc-size-in-bytes words result)))
> - (instrument-alloc size node)
> + (instrument-alloc (if (sc-is type immediate)
> + (case (tn-value type)
> + (#.simple-vector-widetag 'simple-vector)
> + (t 'unboxed-array))
> + type)
> + size node)
> (pseudo-atomic ()
> (allocation nil size 0 node nil result)
> (put-header result 0 type length t)
> @@ -615,7 +648,7 @@
> (loop (gen-label))
> (no-init
> (and (sc-is element immediate) (eql (tn-value element) 0))))
> - (instrument-alloc size node)
> + (instrument-alloc 'list size node)
> (pseudo-atomic ()
> (allocation 'list size list-pointer-lowtag node nil result)
> (compute-end)
> @@ -658,7 +691,7 @@
> (bytes (pad-data-block words))
> (header (logior (ash (1- words) n-widetag-bits) closure-widetag)))
> (unless stack-allocate-p
> - (instrument-alloc bytes node))
> + (instrument-alloc closure-widetag bytes node))
> (pseudo-atomic (:elide-if stack-allocate-p)
> (allocation nil bytes fun-pointer-lowtag node stack-allocate-p result)
> (storew* #-immobile-space header ; write the widetag and size
> @@ -712,7 +745,7 @@
> (when (eq type bignum-widetag) (setq bytes (* bytes 2))) ; use 2x the space
> (progn name) ; possibly not used
> (unless stack-allocate-p
> - (instrument-alloc bytes node))
> + (instrument-alloc type bytes node))
> (pseudo-atomic (:elide-if stack-allocate-p)
> ;; If storing a header word, defer ORing in the lowtag until after
> ;; the header is written so that displacement can be 0.
> @@ -768,7 +801,7 @@
> (stack-allocation result bytes lowtag)
> (storew header result 0 lowtag))
> (t
> - (instrument-alloc bytes node)
> + (instrument-alloc type bytes node)
> (pseudo-atomic ()
> (allocation nil bytes lowtag node nil result)
> (storew header result 0 lowtag))))))
> diff --git a/src/compiler/x86-64/array.lisp b/src/compiler/x86-64/array.lisp
> index 748b1e6aa..a0ba5910b 100644
> --- a/src/compiler/x86-64/array.lisp
> +++ b/src/compiler/x86-64/array.lisp
> @@ -82,7 +82,7 @@
> (inst shl :dword header array-rank-byte-pos)
> (inst or :dword header type)
> (inst shr :dword header n-fixnum-tag-bits)
> - (instrument-alloc bytes node)
> + (instrument-alloc nil bytes node)
> (pseudo-atomic ()
> (allocation nil bytes 0 node nil result)
> (storew header result 0 0)
> diff --git a/src/compiler/x86-64/call.lisp b/src/compiler/x86-64/call.lisp
> index d7ab6ab17..9a1f4a945 100644
> --- a/src/compiler/x86-64/call.lisp
> +++ b/src/compiler/x86-64/call.lisp
> @@ -1297,7 +1297,7 @@
> (inst jmp :z done)
> (inst lea dst (ea nil rcx (ash 2 (- word-shift n-fixnum-tag-bits))))
> (unless stack-allocate-p
> - (instrument-alloc dst node))
> + (instrument-alloc 'list dst node))
> (pseudo-atomic (:elide-if stack-allocate-p)
> (allocation 'list dst list-pointer-lowtag node stack-allocate-p dst)
> ;; Set up the result.
>
> -----------------------------------------------------------------------
>
>
> hooks/post-receive
> --
> SBCL
>
>
> _______________________________________________
> Sbcl-commits mailing list
> Sbcl-commits@lists.sourceforge.net
> https://lists.sourceforge.net/lists/listinfo/sbcl-commits
_______________________________________________
Sbcl-commits mailing list
Sbcl-commits@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/sbcl-commits
[prev in list] [next in list] [prev in thread] [next in thread]
Configure |
About |
News |
Add a list |
Sponsored by KoreLogic