[prev in list] [next in list] [prev in thread] [next in thread] 

List:       sbcl-commits
Subject:    Re: [Sbcl-commits] master: Add another allocator metric
From:       Stas Boukarev <stassats () gmail ! com>
Date:       2021-08-21 11:05:08
Message-ID: CAF63=13CJii-Mhvr=Xqu=WX=2bMczh_2vjwL8wt0qBwSm2yf_A () mail ! gmail ! com
[Download RAW message or body]

/tests/aprof.impure.lisp
; in: SB-C:DEFINE-VOP (ALLOC-TO-R8)
;     (SB-VM::INSTRUMENT-ALLOC SB-VM::BYTES SB-VM::NODE)
;
; caught STYLE-WARNING:
;   The function SB-VM::INSTRUMENT-ALLOC is called with two arguments,
but wants exactly three.
;
; compilation unit finished
;   caught 1 STYLE-WARNING condition
::: Running :APROF-SMOKETEST-LARGE-VECTOR-TO-UPPER-REGISTER
::: UNEXPECTED-FAILURE :APROF-SMOKETEST-LARGE-VECTOR-TO-UPPER-REGISTER
    due to SB-INT:SIMPLE-PROGRAM-ERROR: "invalid number of arguments: 2"

On Sat, Aug 21, 2021 at 5:35 AM Douglas Katzman via Sbcl-commits
<sbcl-commits@lists.sourceforge.net> wrote:
>
> The branch "master" has been updated in SBCL:
>        via  7cdbc7e69b80996fddba7d8bbda339764dc6da9d (commit)
>       from  fafa3fb60e5797ee73f220b00ca8ec65ebb6dbc6 (commit)
>
> - Log -----------------------------------------------------------------
> commit 7cdbc7e69b80996fddba7d8bbda339764dc6da9d
> Author: Douglas Katzman <dougk@google.com>
> Date:   Fri Aug 20 22:32:16 2021 -0400
>
>     Add another allocator metric
>
>     Compute number of unboxed bytes versus boxed bytes per thread
> ---
>  src/assembly/x86-64/arith.lisp   |  4 +--
>  src/code/target-thread.lisp      | 24 +++++++-------
>  src/compiler/generic/objdef.lisp |  3 +-
>  src/compiler/x86-64/alloc.lisp   | 67 ++++++++++++++++++++++++++++++----------
>  src/compiler/x86-64/array.lisp   |  2 +-
>  src/compiler/x86-64/call.lisp    |  2 +-
>  6 files changed, 67 insertions(+), 35 deletions(-)
>
> diff --git a/src/assembly/x86-64/arith.lisp b/src/assembly/x86-64/arith.lisp
> index 1d6dc8fc3..5cdac0b2c 100644
> --- a/src/assembly/x86-64/arith.lisp
> +++ b/src/assembly/x86-64/arith.lisp
> @@ -52,9 +52,9 @@
>  (defun return-single-word-bignum (dest alloc-tn source)
>    (let ((header (logior (ash 1 n-widetag-bits) bignum-widetag))
>          (nbytes #+bignum-assertions 32 #-bignum-assertions 16))
> -    (instrument-alloc nbytes nil)
> +    (instrument-alloc bignum-widetag nbytes nil)
>      (pseudo-atomic ()
> -      (allocation nil nbytes 0 nil nil alloc-tn)
> +      (allocation bignum-widetag nbytes 0 nil nil alloc-tn)
>        (storew* header alloc-tn 0 0 t)
>        (storew source alloc-tn bignum-digits-offset 0)
>        (if (eq dest alloc-tn)
> diff --git a/src/code/target-thread.lisp b/src/code/target-thread.lisp
> index 766186b67..bed5af477 100644
> --- a/src/code/target-thread.lisp
> +++ b/src/code/target-thread.lisp
> @@ -2636,12 +2636,8 @@ mechanism for inter-thread communication."
>                 (sum (a b)
>                   (cond ((null a) b)
>                         ((null b) a)
> -                       (t (list (+ (first a) (first b))
> -                                (vector-sum (second a) (second b))
> -                                (+ (third a) (third b))
> -                                (+ (fourth a) (fourth b))
> -                                (+ (fifth a) (fifth b))
> -                                (+ (sixth a) (sixth b)))))))
> +                       (t (cons (vector-sum (car a) (car b))
> +                                (mapcar #'+ (cdr a) (cdr b)))))))
>          (reduce #'sum
>                  ;; what about the finalizer thread?
>                  (mapcar 'allocator-histogram (list-all-threads))))
> @@ -2651,10 +2647,10 @@ mechanism for inter-thread communication."
>              (declare (notinline position)) ; style-warning for some reason
>              (dotimes (i sb-vm:n-word-bits)
>                (setf (aref a i) (histogram-value c-thread i)))
> -            (list (metric c-thread sb-vm::thread-total-bytes-allocated-slot)
> -                  ;; discard uninteresting entries
> -                  (subseq a sb-vm:n-lowtag-bits
> +            (list (subseq a sb-vm:n-lowtag-bits ; discard uninteresting entries
>                            (1+ (position 0 a :from-end t :test #'/=)))
> +                  (metric c-thread sb-vm::thread-tot-bytes-alloc-boxed-slot)
> +                  (metric c-thread sb-vm::thread-tot-bytes-alloc-unboxed-slot)
>                    (metric c-thread sb-vm::thread-slow-path-allocs-slot)
>                    (metric c-thread sb-vm::thread-et-allocator-mutex-acq-slot)
>                    (metric c-thread sb-vm::thread-et-find-freeish-page-slot)
> @@ -2663,11 +2659,14 @@ mechanism for inter-thread communication."
>  (defun reset-allocator-histogram (&optional (thread *current-thread*))
>    (with-deathlok (thread c-thread)
>      (unless (= c-thread 0)
> +      (setf (metric c-thread sb-vm::thread-tot-bytes-alloc-boxed-slot) 0
> +            (metric c-thread sb-vm::thread-tot-bytes-alloc-unboxed-slot) 0
> +            (metric c-thread sb-vm::thread-slow-path-allocs-slot) 0)
>        (dotimes (i sb-vm:n-word-bits)
>          (setf (histogram-value c-thread i) 0)))))
>
>  (defun print-allocator-histogram (&optional (thread *current-thread*))
> -  (destructuring-bind (total-bytes bins n-slow-path lock find clear)
> +  (destructuring-bind (bins tot-bytes-boxed tot-bytes-unboxed n-slow-path lock find clear)
>        (allocator-histogram thread)
>      (let ((total-objects (reduce #'+ bins))
>            (size (* 4 sb-vm:n-word-bytes)) ; "<=" this size is the smallest bin
> @@ -2681,9 +2680,8 @@ mechanism for inter-thread communication."
>                      (format nil "< 2^~d" (1- (integer-length size))))
>                  count (/ cumulative total-objects))
>          (setq size (* size 2)))
> -      (format t "Total: ~D bytes, ~D objects, ~,2,2f% fast path~%"
> -              total-bytes
> -              total-objects
> +      (format t "Total: ~D+~D bytes, ~D objects, ~,2,2f% fast path~%"
> +              tot-bytes-boxed tot-bytes-unboxed total-objects
>                (/ (- total-objects n-slow-path) total-objects))
>        (format t "Times (sec): lock=~,,-9f find=~,,-9f clear=~,,-9f~%"
>                lock find clear)))))
> diff --git a/src/compiler/generic/objdef.lisp b/src/compiler/generic/objdef.lisp
> index 0e2b5b9a3..e34376df4 100644
> --- a/src/compiler/generic/objdef.lisp
> +++ b/src/compiler/generic/objdef.lisp
> @@ -569,7 +569,8 @@ during backtrace.
>    (mach-port-name :c-type "mach_port_name_t")
>
>    ;; allocation instrumenting
> -  (total-bytes-allocated)
> +  (tot-bytes-alloc-boxed)
> +  (tot-bytes-alloc-unboxed)
>    (slow-path-allocs)
>    (et-allocator-mutex-acq) ; elapsed times
>    (et-find-freeish-page)
> diff --git a/src/compiler/x86-64/alloc.lisp b/src/compiler/x86-64/alloc.lisp
> index 2d7b988d1..153557e17 100644
> --- a/src/compiler/x86-64/alloc.lisp
> +++ b/src/compiler/x86-64/alloc.lisp
> @@ -53,6 +53,16 @@
>                  (used-p (sb-c::ir2-component-normal-tns comp))
>                  (used-p (sb-c::ir2-component-wired-tns comp))))))))
>
> +(defun alloc-unboxed-p (type)
> +  (case type
> +    ((unboxed-array
> +      #.bignum-widetag
> +      #.sap-widetag
> +      #.double-float-widetag
> +      #.complex-single-float-widetag
> +      #.complex-double-float-widetag)
> +     t)))
> +
>  ;;; Call an allocator trampoline and get the result in the proper register.
>  ;;; There are 2x2 choices of trampoline:
>  ;;;  - invoke alloc() or alloc_list() in C
> @@ -90,19 +100,37 @@
>      (inst or :byte result-tn lowtag)))
>
>  ;;; Insert allocation profiler instrumentation
> -(defun instrument-alloc (size node)
> +(eval-when (:compile-toplevel)
> +  (aver (= thread-tot-bytes-alloc-unboxed-slot
> +           (1+ thread-tot-bytes-alloc-boxed-slot))))
> +
> +(defun instrument-alloc (type size node)
> +  (declare (ignorable type))
>    #+allocator-metrics
> -  (progn
> -    (inst add :qword (thread-slot-ea thread-total-bytes-allocated-slot)
> -          (cond ((typep size '(or (signed-byte 32))) size)
> -                (t (inst mov temp-reg-tn size) temp-reg-tn)))
> +  (let ((size-temp (not (typep size '(or (signed-byte 32) tn)))))
> +    (cond ((tn-p type) ; from ALLOCATE-VECTOR-ON-HEAP
> +           ;; Constant huge size + unknown type can't occur.
> +           (aver (not size-temp))
> +           (inst cmp :byte type simple-vector-widetag)
> +           (inst set :ne temp-reg-tn)
> +           (inst and :dword temp-reg-tn 1)
> +           (inst add :qword
> +                 (ea (ash thread-tot-bytes-alloc-boxed-slot word-shift)
> +                     thread-base-tn temp-reg-tn 8)
> +                 size))
> +          (t
> +           (inst add :qword
> +                 (thread-slot-ea (if (alloc-unboxed-p type)
> +                                     thread-tot-bytes-alloc-unboxed-slot
> +                                     thread-tot-bytes-alloc-boxed-slot))
> +                 (cond (size-temp (inst mov temp-reg-tn size) temp-reg-tn)
> +                       (t size)))))
>      (cond ((tn-p size)
> -           ;; the low 4 bins of the histogram can't be used,
> +           ;; the first 4 bins of the histogram can't be used,
>             ;; but I don't care. Math is hard.
>             (inst bsr temp-reg-tn size)
> -           (inst inc :qword
> -                 (ea (ash thread-obj-size-histo-slot word-shift)
> -                     thread-base-tn temp-reg-tn 8)))
> +           (inst inc :qword (ea (ash thread-obj-size-histo-slot word-shift)
> +                                thread-base-tn temp-reg-tn 8)))
>            (t
>             (inst inc :qword
>                   (thread-slot-ea (+ thread-obj-size-histo-slot
> @@ -149,13 +177,13 @@
>  ;;; 2. how to allocate it: policy and how to invoke the trampoline
>  ;;; 3. where to put the result
>  (defun allocation (type size lowtag node dynamic-extent alloc-tn)
> +  (aver (not (sb-assem::assembling-to-elsewhere-p)))
>    (when dynamic-extent
>      (stack-allocation alloc-tn size lowtag)
>      (return-from allocation (values)))
>    (aver (and (not (location= alloc-tn temp-reg-tn))
>               (or (integerp size) (not (location= size temp-reg-tn)))))
>
> -  (aver (not (sb-assem::assembling-to-elsewhere-p)))
>    ;; Otherwise do the normal inline allocation thing
>    (let* ((NOT-INLINE (gen-label))
>           (DONE (gen-label))
> @@ -222,7 +250,7 @@
>             (allocation nil bytes other-pointer-lowtag node t result-tn)
>             (storew header result-tn 0 other-pointer-lowtag))
>            (t
> -           (instrument-alloc bytes node)
> +           (instrument-alloc widetag bytes node)
>             (pseudo-atomic ()
>               (allocation nil bytes 0 node nil result-tn)
>               (storew* header result-tn 0 0 t)
> @@ -262,7 +290,7 @@
>                      (stack-allocate-p (node-stack-allocate-p node))
>                      (size (* (pad-data-block cons-size) cons-cells)))
>                 (unless stack-allocate-p
> -                 (instrument-alloc size node))
> +                 (instrument-alloc 'list size node))
>                 (pseudo-atomic (:elide-if stack-allocate-p)
>                  (allocation 'list size (if (= cons-cells 2) 0 list-pointer-lowtag)
>                              node stack-allocate-p res)
> @@ -455,7 +483,12 @@
>        ;; The LET generates instructions that needn't be pseudoatomic
>        ;; so don't move it inside.
>        (let ((size (calc-size-in-bytes words result)))
> -        (instrument-alloc size node)
> +        (instrument-alloc (if (sc-is type immediate)
> +                              (case (tn-value type)
> +                                (#.simple-vector-widetag 'simple-vector)
> +                                (t 'unboxed-array))
> +                              type)
> +                          size node)
>          (pseudo-atomic ()
>           (allocation nil size 0 node nil result)
>           (put-header result 0 type length t)
> @@ -615,7 +648,7 @@
>              (loop (gen-label))
>              (no-init
>               (and (sc-is element immediate) (eql (tn-value element) 0))))
> -        (instrument-alloc size node)
> +        (instrument-alloc 'list size node)
>          (pseudo-atomic ()
>           (allocation 'list size list-pointer-lowtag node nil result)
>           (compute-end)
> @@ -658,7 +691,7 @@
>            (bytes (pad-data-block words))
>            (header (logior (ash (1- words) n-widetag-bits) closure-widetag)))
>       (unless stack-allocate-p
> -       (instrument-alloc bytes node))
> +       (instrument-alloc closure-widetag bytes node))
>       (pseudo-atomic (:elide-if stack-allocate-p)
>         (allocation nil bytes fun-pointer-lowtag node stack-allocate-p result)
>         (storew* #-immobile-space header ; write the widetag and size
> @@ -712,7 +745,7 @@
>      (when (eq type bignum-widetag) (setq bytes (* bytes 2))) ; use 2x the space
>      (progn name) ; possibly not used
>      (unless stack-allocate-p
> -      (instrument-alloc bytes node))
> +      (instrument-alloc type bytes node))
>      (pseudo-atomic (:elide-if stack-allocate-p)
>        ;; If storing a header word, defer ORing in the lowtag until after
>        ;; the header is written so that displacement can be 0.
> @@ -768,7 +801,7 @@
>               (stack-allocation result bytes lowtag)
>               (storew header result 0 lowtag))
>           (t
> -             (instrument-alloc bytes node)
> +             (instrument-alloc type bytes node)
>               (pseudo-atomic ()
>                (allocation nil bytes lowtag node nil result)
>                (storew header result 0 lowtag))))))
> diff --git a/src/compiler/x86-64/array.lisp b/src/compiler/x86-64/array.lisp
> index 748b1e6aa..a0ba5910b 100644
> --- a/src/compiler/x86-64/array.lisp
> +++ b/src/compiler/x86-64/array.lisp
> @@ -82,7 +82,7 @@
>      (inst shl :dword header array-rank-byte-pos)
>      (inst or  :dword header type)
>      (inst shr :dword header n-fixnum-tag-bits)
> -    (instrument-alloc bytes node)
> +    (instrument-alloc nil bytes node)
>      (pseudo-atomic ()
>       (allocation nil bytes 0 node nil result)
>       (storew header result 0 0)
> diff --git a/src/compiler/x86-64/call.lisp b/src/compiler/x86-64/call.lisp
> index d7ab6ab17..9a1f4a945 100644
> --- a/src/compiler/x86-64/call.lisp
> +++ b/src/compiler/x86-64/call.lisp
> @@ -1297,7 +1297,7 @@
>        (inst jmp :z done)
>        (inst lea dst (ea nil rcx (ash 2 (- word-shift n-fixnum-tag-bits))))
>        (unless stack-allocate-p
> -        (instrument-alloc dst node))
> +        (instrument-alloc 'list dst node))
>        (pseudo-atomic (:elide-if stack-allocate-p)
>         (allocation 'list dst list-pointer-lowtag node stack-allocate-p dst)
>         ;; Set up the result.
>
> -----------------------------------------------------------------------
>
>
> hooks/post-receive
> --
> SBCL
>
>
> _______________________________________________
> Sbcl-commits mailing list
> Sbcl-commits@lists.sourceforge.net
> https://lists.sourceforge.net/lists/listinfo/sbcl-commits


_______________________________________________
Sbcl-commits mailing list
Sbcl-commits@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/sbcl-commits
[prev in list] [next in list] [prev in thread] [next in thread] 

Configure | About | News | Add a list | Sponsored by KoreLogic