[prev in list] [next in list] [prev in thread] [next in thread] 

List:       sbcl-devel
Subject:    Re: [Sbcl-devel] [Sbcl-commits] master: Reduce a test's flakiness a thousandfold
From:       Douglas Katzman via Sbcl-devel <sbcl-devel () lists ! sourceforge ! net>
Date:       2020-08-03 21:47:28
Message-ID: CAOrNasy=FQ+LkLAu6Vf6745-vG6o388JD9eUWWOsPpxAkdC3GA () mail ! gmail ! com
[Download RAW message or body]

[Attachment #2 (multipart/alternative)]


ah, because I'm an idiot and used instance-ref because of the "FIXME: can't
SETF semaphore-name"
I forgot that I didn't fix that first (as per the docstring that it's
SETFable) so of course the %instance-ref index is wrong.

On Mon, Aug 3, 2020 at 5:40 PM Stas Boukarev <stassats@gmail.com> wrote:

> Getting on x86:
>
> 5346Z While evaluating the form starting at line 396, column 0
> 2020-08-03T20:53:11.8835967Z   of
> #P"/home/runner/work/sbcl/sbcl/tests/deadline.impure.lisp":
> 2020-08-03T20:53:11.8836504Z Unhandled TYPE-ERROR: The value
> 2020-08-03T20:53:11.8836796Z                         SB-THREAD:SEMAPHORE
> 2020-08-03T20:53:11.8837120Z                       is not of type
> 2020-08-03T20:53:11.8837416Z                         SB-KERNEL:CLASSOID
> 2020-08-03T20:53:11.8837727Z   #<error printing a
> SB-KERNEL:LAYOUT-INVALID: #<TYPE-ERROR expected-type:
> 2020-08-03T20:53:11.8838059Z
>                   SB-KERNEL:CLASSOID
> 2020-08-03T20:53:11.8838190Z
>                   datum:
> 2020-08-03T20:53:11.8838510Z
>                   SB-THREAD:SEMAPHORE>>
>
>
> On Mon, Aug 3, 2020 at 11:40 PM Douglas Katzman via Sbcl-commits
> <sbcl-commits@lists.sourceforge.net> wrote:
> >
> > The branch "master" has been updated in SBCL:
> >        via  de5163a73314a7fc413a5bd56500757f910bed08 (commit)
> >       from  21a778b481f9b9041f71953fbdc84bd3c39a8fa6 (commit)
> >
> > - Log -----------------------------------------------------------------
> > commit de5163a73314a7fc413a5bd56500757f910bed08
> > Author: Douglas Katzman <dougk@google.com>
> > Date:   Mon Aug 3 16:19:06 2020 -0400
> >
> >     Reduce a test's flakiness a thousandfold
> >
> >     It still makes assumptions that the specified timeouts cause threads
> >     to proceed in lockstep, but the failure rate is much more tolerable.
> > ---
> >  src/runtime/linux-os.c     |  99 ++++++++++++++++++++++
> >  tests/deadline.impure.lisp | 207
> +++++++++++++++++++++++++++++++++++++++++++++
> >  tests/threads.impure.lisp  |  64 --------------
> >  3 files changed, 306 insertions(+), 64 deletions(-)
> >
> > diff --git a/src/runtime/linux-os.c b/src/runtime/linux-os.c
> > index 4db91810f..490b996de 100644
> > --- a/src/runtime/linux-os.c
> > +++ b/src/runtime/linux-os.c
> > @@ -67,6 +67,93 @@ int personality (unsigned long);
> >  #include <unistd.h>
> >  #include <errno.h>
> >
> > +#ifdef MUTEX_EVENTRECORDING
> > +#include "genesis/mutex.h"
> > +#define MAXEVENTS 200
> > +static struct {
> > +    struct thread* th;
> > +    struct timespec ts;
> > +    char *label;
> > +    char *mutex_name;
> > +    sword_t timeout;
> > +} events[MAXEVENTS];
> > +static int record_mutex_events;
> > +static int eventcount;
> > +
> > +void lisp_mutex_event(char *string) {
> > +    if (record_mutex_events) {
> > +        int id = __sync_fetch_and_add(&eventcount, 1);
> > +        if (id >= MAXEVENTS) lose("event buffer overflow");
> > +        clock_gettime(CLOCK_REALTIME, &events[id].ts);
> > +        events[id].th = current_thread;
> > +        events[id].label = string;
> > +        events[id].mutex_name = 0;
> > +        events[id].timeout = -1;
> > +    }
> > +}
> > +void lisp_mutex_event1(char *string, char *string2) {
> > +    if (record_mutex_events) {
> > +        int id = __sync_fetch_and_add(&eventcount, 1);
> > +        if (id >= MAXEVENTS) lose("event buffer overflow");
> > +        clock_gettime(CLOCK_REALTIME, &events[id].ts);
> > +        events[id].th = current_thread;
> > +        events[id].label = string;
> > +        events[id].mutex_name = string2;
> > +        events[id].timeout = -1;
> > +    }
> > +}
> > +void lisp_mutex_event2(char *string, char *string2, uword_t usec) {
> > +    if (record_mutex_events) {
> > +        int id = __sync_fetch_and_add(&eventcount, 1);
> > +        if (id >= MAXEVENTS) lose("event buffer overflow");
> > +        clock_gettime(CLOCK_REALTIME, &events[id].ts);
> > +        events[id].th = current_thread;
> > +        events[id].label = string;
> > +        events[id].mutex_name = string2;
> > +        events[id].timeout = usec;
> > +    }
> > +}
> > +void lisp_mutex_start_eventrecording() {
> > +    eventcount = 0;
> > +    record_mutex_events = 1;
> > +}
> > +void lisp_mutex_done_eventrecording() {
> > +    record_mutex_events = 0;
> > +    int i;
> > +    fprintf(stderr, "event log:\n");
> > +    struct timespec basetime = events[0].ts;
> > +    for(i=0; i<eventcount;++i) {
> > +        struct thread *th = events[i].th;
> > +        struct thread_instance *ti =
> (void*)native_pointer(th->lisp_thread);
> > +        struct timespec rel_time = events[i].ts;
> > +        rel_time.tv_sec -= basetime.tv_sec;
> > +        rel_time.tv_nsec -= basetime.tv_nsec;
> > +        if (rel_time.tv_nsec<0) rel_time.tv_nsec += 1000 * 1000 * 1000,
> rel_time.tv_sec--;
> > +        lispobj threadname = ti->name;
> > +        if (events[i].timeout >= 0) // must also have mutex_name in
> this case
> > +            fprintf(stderr, "[%d.%09ld] %s: %s '%s' timeout %ld\n",
> > +                    (int)rel_time.tv_sec, rel_time.tv_nsec,
> > +                    (char*)VECTOR(threadname)->data,
> > +                    events[i].label, events[i].mutex_name,
> events[i].timeout);
> > +        else if (events[i].mutex_name)
> > +            fprintf(stderr, "[%d.%09ld] %s: %s '%s'\n",
> > +                    (int)rel_time.tv_sec, rel_time.tv_nsec,
> > +                    (char*)VECTOR(threadname)->data,
> > +                    events[i].label, events[i].mutex_name);
> > +        else
> > +            fprintf(stderr, "[%d.%09ld] %s: %s\n",
> > +                    (int)rel_time.tv_sec, rel_time.tv_nsec,
> > +                    (char*)VECTOR(threadname)->data,
> > +                    events[i].label);
> > +    }
> > +    fprintf(stderr, "-----\n");
> > +}
> > +#else
> > +#define lisp_mutex_event(x)
> > +#define lisp_mutex_event1(x,y)
> > +#define lisp_mutex_event2(x,y,z)
> > +#endif
> > +
> >  /* values taken from the kernel's linux/futex.h.  This header file
> >     doesn't exist in userspace, which is our excuse for not grovelling
> >     them automatically */
> > @@ -136,14 +223,21 @@ futex_wait(int *lock_word, int oldval, long sec,
> unsigned long usec)
> >    struct timespec timeout;
> >    int t;
> >
> > +#ifdef MUTEX_EVENTRECORDING
> > +    struct mutex* m = (void*)((char*)lock_word - offsetof(struct
> mutex,state));
> > +    char *name = m->name != NIL ? (char*)VECTOR(m->name)->data :
> "(unnamed)";
> > +#endif
> >    if (sec<0) {
> > +      lisp_mutex_event1("start futex wait", name);
> >        t = sys_futex(lock_word, futex_wait_op(), oldval, 0);
> >    }
> >    else {
> >        timeout.tv_sec = sec;
> >        timeout.tv_nsec = usec * 1000;
> > +      lisp_mutex_event2("start futex timedwait", name, usec);
> >        t = sys_futex(lock_word, futex_wait_op(), oldval, &timeout);
> >    }
> > +  lisp_mutex_event1("back from sys_futex", name);
> >    if (t==0)
> >        return 0;
> >    else if (errno==ETIMEDOUT)
> > @@ -158,6 +252,11 @@ futex_wait(int *lock_word, int oldval, long sec,
> unsigned long usec)
> >  int
> >  futex_wake(int *lock_word, int n)
> >  {
> > +#ifdef MUTEX_EVENTRECORDING
> > +    struct mutex* m = (void*)((char*)lock_word - offsetof(struct
> mutex,state));
> > +    char *name = m->name != NIL ? (char*)VECTOR(m->name)->data :
> "(unnamed)";
> > +    lisp_mutex_event1("waking futex", name);
> > +#endif
> >      return sys_futex(lock_word, futex_wake_op(),n,0);
> >  }
> >  #endif
> > diff --git a/tests/deadline.impure.lisp b/tests/deadline.impure.lisp
> > index d03506d48..1294b81a2 100644
> > --- a/tests/deadline.impure.lisp
> > +++ b/tests/deadline.impure.lisp
> > @@ -190,3 +190,210 @@
> >                          (sb-sys:cancel-deadline condition))))
> >          (sb-sys:with-deadline (:seconds .1) (sleep 1)))
> >      sb-sys:deadline-timeout))
> > +
> > +(defmacro timestamp (string &optional mutex)
> > +  (declare (ignorable string mutex))
> > +  #+nil
> > +  `(let ((s ,string))
> > +     (sb-sys:with-pinned-objects (s)
> > +       ,(if mutex
> > +            `(alien-funcall (extern-alien "lisp_mutex_event1" (function
> void system-area-pointer system-area-pointer))
> > +                            (sb-sys:vector-sap s)
> > +                            (sb-sys:vector-sap (or (mutex-name ,mutex)
> "unnamed")))
> > +            `(alien-funcall (extern-alien "lisp_mutex_event" (function
> void system-area-pointer))
> > +                            (sb-sys:vector-sap s))))))
> > +
> > +;;; I observed (at least) four different ways this next test can flake:
> > +;;;
> > +;;; 1. one of the first CONDITION-WAITs at the initial deadline can be
> spuriously woken.
> > +;;;    This is easy to see:
> > +;;;      Thread A            Thread B
> > +;;;      --------            --------
> > +;;;      mutex.acquire
> > +;;;      cv.wait
> > +;;;        set wakeup token
> > +;;;        mutex.release
> > +;;;                           mutex.acquire
> > +;;;                           cv.wait
> > +;;;                             set wakeup token
> > +;;;                             mutex.release
> > +;;;                             sys_futex
> > +;;;        sys_futex
> > +;;;        (* no waiting - "wrong" token *)
> > +;;;    So this causes the (:UNKNOWN :UNKNOWN) result for one thread,
> > +;;;    because it never enters its deadline handler and because the
> customary
> > +;;;    ("obligatory" one might say) loop around condition-wait is
> absent.
> > +;;;    This is so common that it is no longer considered a failure.
> > +;;;
> > +;;; 2. one of the threads could be scheduled entirely before the other-
> > +;;;    that is, it runs from start to finish before the other thread
> does anything.
> > +;;;    This causes them both to conclude they acquired the mutex,
> > +;;;    though the deadline handlers were completely nonoverlapping.
> > +;;;
> > +;;; 3. one of the calls to SYS_futex with a timeout can wait
> significantly more than
> > +;;;    requested. This essentially reduces to case 2 but in a more
> convoluted way,
> > +;;;    which I've somewhat mitigated this by scaling up the timeouts so
> that we're
> > +;;;    not skating at the edge of the granularity the system is willing
> to give you.
> > +;;;    This mode of flakiness is likely to happen on a cloud compute
> environment.
> > +;;;
> > +;;;  4. the deadline handler can be invoked twice in one thread because
> > +;;;     - I don't know why.
> > +;;;
> > +;;; Here's an example with MUTEX_EVENTRECORDING #defined that was
> > +;;; considered a "failure":
> > +;;; [0.000000000] main thread: try-mutex: acquired 'Make-Thread Lock'
> > +;;; [0.000038478] main thread: released 'Make-Thread Lock'
> > +;;; [0.000053675] main thread: try-mutex: acquired 'Make-Thread Lock'
> > +;;; [0.000068255] main thread: released 'Make-Thread Lock'
> > +;;; [0.000069415] main thread: try-mutex: acquired 'gate sem mutex'
> > +;;; [0.000069781] main thread: released 'gate sem mutex'
> > +;;; [0.000071536] main thread: start futex wait 'thread result lock'
> > +;;; [0.000094761] B: try-mutex: acquired 'gate sem mutex'
> > +;;; [0.000095494] B: released 'gate sem mutex'
> > +;;; [0.000096646] B: deadline established
> > +;;; [0.000097026] B: try-mutex: acquired 'testmut'
> > +;;; [0.000097931] B: released 'testmut'
> > +;;; [0.000098251] B: start futex timedwait 'testcv' timeout 300000
> > +;;; [0.300161614] B: back from sys_futex 'testcv'
> > +;;; [0.300163763] B: try-mutex: acquired 'testmut'
> > +;;; [0.300172833] B: enter deadline handler
> > +;;; [0.300173184] B: start sleep
> > +;;; [0.601937423] A: try-mutex: acquired 'gate sem mutex'
> > +;;; [0.601940795] A: released 'gate sem mutex'
> > +;;; [0.601945635] A: deadline established
> > +;;; [0.601952083] A: start futex timedwait 'testmut' timeout 300000
> > +;;; [0.900264670] B: end sleep
> > +;;; [0.900266124] B: waking futex 'testcv'
> > +;;; [0.900280645] B: waking futex 'testmut'
> > +;;; [0.900285623] B: released 'testmut'
> > +;;; [0.900290563] B: try-mutex: acquired 'thread interruptions lock'
> > +;;; [0.900290777] B: released 'thread interruptions lock'
> > +;;; [0.900292127] B: try-mutex: acquired 'session lock'
> > +;;; [0.900293214] B: released 'session lock'
> > +;;; [0.900293703] B: released 'thread result lock'
> > +;;; [0.900294381] A: back from sys_futex 'testmut'
> > +;;; [0.900296267] A: %wait-for-mutex: acquired 'testmut'
> > +;;; [0.900299208] A: waking futex 'testmut'
> > +;;; [0.900300544] A: released 'testmut'
> > +;;; [0.900301101] A: start futex timedwait 'testcv' timeout 1000
> > +;;; [0.901358710] A: back from sys_futex 'testcv'
> > +;;; [0.901359680] A: try-mutex: acquired 'testmut'
> > +;;; [0.901369935] A: enter deadline handler
> > +;;; [0.901370203] A: start sleep
> > +;;; [1.501418757] A: end sleep
> > +;;; [1.501419570] A: waking futex 'testcv'
> > +;;; [1.501456896] A: released 'testmut'
> > +;;; [1.501460896] A: try-mutex: acquired 'thread interruptions lock'
> > +;;; [1.501461171] A: released 'thread interruptions lock'
> > +;;; [1.501462039] A: try-mutex: acquired 'session lock'
> > +;;; [1.501463001] A: released 'session lock'
> > +;;; [1.501463544] A: waking futex 'thread result lock'
> > +;;; [1.501465881] A: released 'thread result lock'
> > +;;; [1.501474477] main thread: back from sys_futex 'thread result lock'
> > +;;; [1.501476447] main thread: %wait-for-mutex: acquired 'thread result
> lock'
> > +;;; [1.501477283] main thread: waking futex 'thread result lock'
> > +;;; [1.501478429] main thread: released 'thread result lock'
> > +;;; [1.501479467] main thread: try-mutex: acquired 'thread result lock'
> > +;;; [1.501479600] main thread: released 'thread result lock'
> > +;;;
> > +;;; Basically I give up trying to fix this test any more.
> > +(defun signal-dealine-check-interrupt-enablement ()
> > +  (let ((mutex (sb-thread:make-mutex :name "testmut"))
> > +        (waitq (sb-thread:make-waitqueue :name "testcv"))
> > +        (A-holds? :unknown)
> > +        (B-holds? :unknown)
> > +        (A-interrupts-enabled? :unknown)
> > +        (B-interrupts-enabled? :unknown)
> > +        (gate (sb-thread:make-semaphore :name "testsem"))
> > +        (initial-deadline .3)
> > +        (sleep-amount .6)
> > +        (A)
> > +        (B))
> > +    ;; The gate semaphore is just a half-assed attempt to trigger both
> worker threads
> > +    ;; to start running their bodies at the exact same time. If they
> don't do that,
> > +    ;; then it's entirely possible that one thread gets run start to
> finish before
> > +    ;; the other thread does anything at all.  So obviously the test
> will fail.
> > +    (setf (sb-kernel:%instance-ref gate 0) "gate sem") ; FIXME: we
> claim that NAME is SETFable
> > +    (let ((m (sb-thread::semaphore-mutex gate)))
> > +      (setf (sb-thread:mutex-name m) "gate sem mutex"))
> > +    (let ((cv (sb-thread::semaphore-queue gate)))
> > +      (setf (sb-thread:waitqueue-name cv) "gate sem cv"))
> > +    #+nil (alien-funcall (extern-alien
> "lisp_mutex_start_eventrecording" (function void)))
> > +    (setq A (sb-thread:make-thread
> > +             (lambda ()
> > +               (sb-thread:wait-on-semaphore gate)
> > +               (handler-bind
> > +                   ((sb-sys:deadline-timeout
> > +                     (lambda (c)
> > +                       (timestamp "enter deadline handler")
> > +                       ;; We came here through the call to
> DECODE-TIMEOUT
> > +                       ;; in CONDITION-WAIT; hence both here are
> supposed
> > +                       ;; to evaluate to T.
> > +                       (setq A-holds? (sb-thread:holding-mutex-p mutex))
> > +                       (setq A-interrupts-enabled?
> > +                             sb-sys:*interrupts-enabled*)
> > +                       (timestamp "start sleep")
> > +                       (sleep sleep-amount)
> > +                       (timestamp "end sleep")
> > +                       (sb-thread:condition-broadcast waitq)
> > +                       (sb-sys:defer-deadline 10.0 c)
> > +                       (timestamp "leave deadline handler"))))
> > +                 (sb-sys:with-deadline (:seconds initial-deadline)
> > +                   (timestamp "deadline established")
> > +                   (sb-thread:with-mutex (mutex)
> > +                     (sb-thread:condition-wait waitq mutex)))))
> > +             :name "A"))
> > +    (setq B (sb-thread:make-thread
> > +             (lambda ()
> > +               (sb-thread:wait-on-semaphore gate)
> > +               (handler-bind
> > +                   ((sb-sys:deadline-timeout
> > +                     (lambda (c)
> > +                       (timestamp "enter deadline handler")
> > +                       ;; We came here through the call to
> DECODE-TIMEOUT
> > +                       ;; in CONDITION-WAIT (contended case of
> > +                       ;; reaquiring the mutex) - so the former will
> > +                       ;; be NIL, but interrupts should still be
> enabled.
> > +                       (setq B-holds? (sb-thread:holding-mutex-p mutex))
> > +                       (setq B-interrupts-enabled?
> > +                             sb-sys:*interrupts-enabled*)
> > +                       (timestamp "start sleep")
> > +                       (sleep sleep-amount)
> > +                       (timestamp "end sleep")
> > +                       (sb-thread:condition-broadcast waitq)
> > +                       (sb-sys:defer-deadline 10.0 c)
> > +                       (timestamp "leave deadline handler"))))
> > +                 (sb-sys:with-deadline (:seconds initial-deadline)
> > +                   (timestamp "deadline established")
> > +                   (sb-thread:with-mutex (mutex)
> > +                     (sb-thread:condition-wait waitq mutex)))))
> > +             :name "B"))
> > +    (sb-thread:signal-semaphore gate 3)
> > +    (sb-thread:join-thread A)
> > +    (sb-thread:join-thread B)
> > +    #+nil (alien-funcall (extern-alien "lisp_mutex_done_eventrecording"
> (function void)))
> > +    (let ((A-result (list A-holds? A-interrupts-enabled?))
> > +          (B-result (list B-holds? B-interrupts-enabled?)))
> > +      ;; We also check some subtle behaviour w.r.t. whether a deadline
> > +      ;; handler in CONDITION-WAIT got the mutex, or not. This is most
> > +      ;; probably very internal behaviour (so user should not depend
> > +      ;; on it) -- I added the testing here just to manifest current
> > +      ;; behaviour.
> > +      (cond ((equal A-result '(t t))
> > +             (or (equal B-result '(nil t))
> > +                 (equal B-result '(:unknown :unknown))
> > +                 (error "thread B result: ~s" B-result)))
> > +            ((equal B-result '(t t))
> > +             (or (equal A-result '(nil t))
> > +                 (equal A-result '(:unknown :unknown))
> > +                 (error "thread A result: ~s" A-result)))
> > +            (t
> > +             (error "Failure: fell through with A: ~S, B: ~S"
> > +                    A-result
> > +                    B-result))))))
> > +
> > +;;; Test from git rev 5e55f426de8fa579a0d6cfbfb3ac5433d530d3c9 formerly
> in threads.impure
> > +(test-util:with-test (:name (:deadline :interrupts-enabled) :skipped-on
> (:not :sb-thread))
> > +  ;; I cranked this up to 1000 (and ran it on lots of machines) to get
> examples of failure,
> > +  ;; but that's more than a reasonable amount of time to spend in the
> test.
> > +  (loop repeat 2 do (signal-dealine-check-interrupt-enablement)))
> > diff --git a/tests/threads.impure.lisp b/tests/threads.impure.lisp
> > index 76ba01376..80128239d 100644
> > --- a/tests/threads.impure.lisp
> > +++ b/tests/threads.impure.lisp
> > @@ -777,70 +777,6 @@
> >      (mapc #'join-thread threads)
> >      (assert (not deadline-handler-run-twice?))))
> >
> > -(with-test (:name (condition-wait
> :signal-deadline-with-interrupts-enabled))
> > -  (let ((mutex (make-mutex))
> > -        (waitq (make-waitqueue))
> > -        (A-holds? :unknown)
> > -        (B-holds? :unknown)
> > -        (A-interrupts-enabled? :unknown)
> > -        (B-interrupts-enabled? :unknown)
> > -        (A)
> > -        (B))
> > -    ;; W.L.O.G., we assume that A is executed first...
> > -    (setq A (make-thread
> > -             (lambda ()
> > -               (handler-bind
> > -                   ((sb-sys:deadline-timeout
> > -                     (lambda (c)
> > -                       ;; We came here through the call to
> DECODE-TIMEOUT
> > -                       ;; in CONDITION-WAIT; hence both here are
> supposed
> > -                       ;; to evaluate to T.
> > -                       (setq A-holds? (holding-mutex-p mutex))
> > -                       (setq A-interrupts-enabled?
> > -                             sb-sys:*interrupts-enabled*)
> > -                       (sleep 0.2)
> > -                       (condition-broadcast waitq)
> > -                       (sb-sys:defer-deadline 10.0 c))))
> > -                 (sb-sys:with-deadline (:seconds 0.1)
> > -                   (with-mutex (mutex)
> > -                     (condition-wait waitq mutex)))))
> > -             :name "A"))
> > -    (setq B (make-thread
> > -             (lambda ()
> > -               (thread-yield)
> > -               (handler-bind
> > -                   ((sb-sys:deadline-timeout
> > -                     (lambda (c)
> > -                       ;; We came here through the call to
> DECODE-TIMEOUT
> > -                       ;; in CONDITION-WAIT (contended case of
> > -                       ;; reaquiring the mutex) - so the former will
> > -                       ;; be NIL, but interrupts should still be
> enabled.
> > -                       (setq B-holds? (holding-mutex-p mutex))
> > -                       (setq B-interrupts-enabled?
> > -                             sb-sys:*interrupts-enabled*)
> > -                       (sleep 0.2)
> > -                       (condition-broadcast waitq)
> > -                       (sb-sys:defer-deadline 10.0 c))))
> > -                 (sb-sys:with-deadline (:seconds 0.1)
> > -                   (with-mutex (mutex)
> > -                     (condition-wait waitq mutex)))))
> > -             :name "B"))
> > -    (join-thread A)
> > -    (join-thread B)
> > -    (let ((A-result (list A-holds? A-interrupts-enabled?))
> > -          (B-result (list B-holds? B-interrupts-enabled?)))
> > -      ;; We also check some subtle behaviour w.r.t. whether a deadline
> > -      ;; handler in CONDITION-WAIT got the mutex, or not. This is most
> > -      ;; probably very internal behaviour (so user should not depend
> > -      ;; on it) -- I added the testing here just to manifest current
> > -      ;; behaviour.
> > -      (cond ((equal A-result '(t t)) (assert (equal B-result '(nil t))))
> > -            ((equal B-result '(t t)) (assert (equal A-result '(nil t))))
> > -            (t
> > -             (error "Failure: fell through wit A: ~S, B: ~S"
> > -                    A-result
> > -                    B-result))))))
> > -
> >  (with-test (:name (:mutex :finalization))
> >    (let ((a nil))
> >      (dotimes (i 500000)
> >
> > -----------------------------------------------------------------------
> >
> >
> > hooks/post-receive
> > --
> > SBCL
> >
> >
> > _______________________________________________
> > Sbcl-commits mailing list
> > Sbcl-commits@lists.sourceforge.net
> > https://lists.sourceforge.net/lists/listinfo/sbcl-commits
>

[Attachment #5 (text/html)]

<div dir="ltr">ah, because I&#39;m an idiot and used instance-ref because of the \
&quot;FIXME: can&#39;t SETF semaphore-name&quot;<div>I forgot that I didn&#39;t fix \
that first (as per the docstring that it&#39;s SETFable) so of course the \
%instance-ref index is wrong.</div></div><br><div class="gmail_quote"><div dir="ltr" \
class="gmail_attr">On Mon, Aug 3, 2020 at 5:40 PM Stas Boukarev &lt;<a \
href="mailto:stassats@gmail.com">stassats@gmail.com</a>&gt; \
wrote:<br></div><blockquote class="gmail_quote" style="margin:0px 0px 0px \
0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">Getting on x86:<br> \
<br> 5346Z While evaluating the form starting at line 396, column 0<br>
2020-08-03T20:53:11.8835967Z     of<br>
#P&quot;/home/runner/work/sbcl/sbcl/tests/deadline.impure.lisp&quot;:<br>
2020-08-03T20:53:11.8836504Z Unhandled TYPE-ERROR: The value<br>
2020-08-03T20:53:11.8836796Z                                      \
SB-THREAD:SEMAPHORE<br> 2020-08-03T20:53:11.8837120Z                                  \
is not of type<br> 2020-08-03T20:53:11.8837416Z                                      \
SB-KERNEL:CLASSOID<br> 2020-08-03T20:53:11.8837727Z     #&lt;error printing a<br>
SB-KERNEL:LAYOUT-INVALID: #&lt;TYPE-ERROR expected-type:<br>
2020-08-03T20:53:11.8838059Z<br>
                           SB-KERNEL:CLASSOID<br>
2020-08-03T20:53:11.8838190Z<br>
                           datum:<br>
2020-08-03T20:53:11.8838510Z<br>
                           SB-THREAD:SEMAPHORE&gt;&gt;<br>
<br>
<br>
On Mon, Aug 3, 2020 at 11:40 PM Douglas Katzman via Sbcl-commits<br>
&lt;<a href="mailto:sbcl-commits@lists.sourceforge.net" \
target="_blank">sbcl-commits@lists.sourceforge.net</a>&gt; wrote:<br> &gt;<br>
&gt; The branch &quot;master&quot; has been updated in SBCL:<br>
&gt;            via   de5163a73314a7fc413a5bd56500757f910bed08 (commit)<br>
&gt;           from   21a778b481f9b9041f71953fbdc84bd3c39a8fa6 (commit)<br>
&gt;<br>
&gt; - Log -----------------------------------------------------------------<br>
&gt; commit de5163a73314a7fc413a5bd56500757f910bed08<br>
&gt; Author: Douglas Katzman &lt;<a href="mailto:dougk@google.com" \
target="_blank">dougk@google.com</a>&gt;<br> &gt; Date:     Mon Aug 3 16:19:06 2020 \
-0400<br> &gt;<br>
&gt;        Reduce a test&#39;s flakiness a thousandfold<br>
&gt;<br>
&gt;        It still makes assumptions that the specified timeouts cause threads<br>
&gt;        to proceed in lockstep, but the failure rate is much more tolerable.<br>
&gt; ---<br>
&gt;   src/runtime/linux-os.c        |   99 ++++++++++++++++++++++<br>
&gt;   tests/deadline.impure.lisp | 207 \
+++++++++++++++++++++++++++++++++++++++++++++<br> &gt;   tests/threads.impure.lisp   \
|   64 --------------<br> &gt;   3 files changed, 306 insertions(+), 64 \
deletions(-)<br> &gt;<br>
&gt; diff --git a/src/runtime/linux-os.c b/src/runtime/linux-os.c<br>
&gt; index 4db91810f..490b996de 100644<br>
&gt; --- a/src/runtime/linux-os.c<br>
&gt; +++ b/src/runtime/linux-os.c<br>
&gt; @@ -67,6 +67,93 @@ int personality (unsigned long);<br>
&gt;   #include &lt;unistd.h&gt;<br>
&gt;   #include &lt;errno.h&gt;<br>
&gt;<br>
&gt; +#ifdef MUTEX_EVENTRECORDING<br>
&gt; +#include &quot;genesis/mutex.h&quot;<br>
&gt; +#define MAXEVENTS 200<br>
&gt; +static struct {<br>
&gt; +      struct thread* th;<br>
&gt; +      struct timespec ts;<br>
&gt; +      char *label;<br>
&gt; +      char *mutex_name;<br>
&gt; +      sword_t timeout;<br>
&gt; +} events[MAXEVENTS];<br>
&gt; +static int record_mutex_events;<br>
&gt; +static int eventcount;<br>
&gt; +<br>
&gt; +void lisp_mutex_event(char *string) {<br>
&gt; +      if (record_mutex_events) {<br>
&gt; +            int id = __sync_fetch_and_add(&amp;eventcount, 1);<br>
&gt; +            if (id &gt;= MAXEVENTS) lose(&quot;event buffer \
overflow&quot;);<br> &gt; +            clock_gettime(CLOCK_REALTIME, \
&amp;events[id].ts);<br> &gt; +            events[id].th = current_thread;<br>
&gt; +            events[id].label = string;<br>
&gt; +            events[id].mutex_name = 0;<br>
&gt; +            events[id].timeout = -1;<br>
&gt; +      }<br>
&gt; +}<br>
&gt; +void lisp_mutex_event1(char *string, char *string2) {<br>
&gt; +      if (record_mutex_events) {<br>
&gt; +            int id = __sync_fetch_and_add(&amp;eventcount, 1);<br>
&gt; +            if (id &gt;= MAXEVENTS) lose(&quot;event buffer \
overflow&quot;);<br> &gt; +            clock_gettime(CLOCK_REALTIME, \
&amp;events[id].ts);<br> &gt; +            events[id].th = current_thread;<br>
&gt; +            events[id].label = string;<br>
&gt; +            events[id].mutex_name = string2;<br>
&gt; +            events[id].timeout = -1;<br>
&gt; +      }<br>
&gt; +}<br>
&gt; +void lisp_mutex_event2(char *string, char *string2, uword_t usec) {<br>
&gt; +      if (record_mutex_events) {<br>
&gt; +            int id = __sync_fetch_and_add(&amp;eventcount, 1);<br>
&gt; +            if (id &gt;= MAXEVENTS) lose(&quot;event buffer \
overflow&quot;);<br> &gt; +            clock_gettime(CLOCK_REALTIME, \
&amp;events[id].ts);<br> &gt; +            events[id].th = current_thread;<br>
&gt; +            events[id].label = string;<br>
&gt; +            events[id].mutex_name = string2;<br>
&gt; +            events[id].timeout = usec;<br>
&gt; +      }<br>
&gt; +}<br>
&gt; +void lisp_mutex_start_eventrecording() {<br>
&gt; +      eventcount = 0;<br>
&gt; +      record_mutex_events = 1;<br>
&gt; +}<br>
&gt; +void lisp_mutex_done_eventrecording() {<br>
&gt; +      record_mutex_events = 0;<br>
&gt; +      int i;<br>
&gt; +      fprintf(stderr, &quot;event log:\n&quot;);<br>
&gt; +      struct timespec basetime = events[0].ts;<br>
&gt; +      for(i=0; i&lt;eventcount;++i) {<br>
&gt; +            struct thread *th = events[i].th;<br>
&gt; +            struct thread_instance *ti = \
(void*)native_pointer(th-&gt;lisp_thread);<br> &gt; +            struct timespec \
rel_time = events[i].ts;<br> &gt; +            rel_time.tv_sec -= \
basetime.tv_sec;<br> &gt; +            rel_time.tv_nsec -= basetime.tv_nsec;<br>
&gt; +            if (rel_time.tv_nsec&lt;0) rel_time.tv_nsec += 1000 * 1000 * 1000, \
rel_time.tv_sec--;<br> &gt; +            lispobj threadname = ti-&gt;name;<br>
&gt; +            if (events[i].timeout &gt;= 0) // must also have mutex_name in this \
case<br> &gt; +                  fprintf(stderr, &quot;[%d.%09ld] %s: %s &#39;%s&#39; \
timeout %ld\n&quot;,<br> &gt; +                              (int)rel_time.tv_sec, \
rel_time.tv_nsec,<br> &gt; +                              \
(char*)VECTOR(threadname)-&gt;data,<br> &gt; +                              \
events[i].label, events[i].mutex_name, events[i].timeout);<br> &gt; +            else \
if (events[i].mutex_name)<br> &gt; +                  fprintf(stderr, \
&quot;[%d.%09ld] %s: %s &#39;%s&#39;\n&quot;,<br> &gt; +                              \
(int)rel_time.tv_sec, rel_time.tv_nsec,<br> &gt; +                              \
(char*)VECTOR(threadname)-&gt;data,<br> &gt; +                              \
events[i].label, events[i].mutex_name);<br> &gt; +            else<br>
&gt; +                  fprintf(stderr, &quot;[%d.%09ld] %s: %s\n&quot;,<br>
&gt; +                              (int)rel_time.tv_sec, rel_time.tv_nsec,<br>
&gt; +                              (char*)VECTOR(threadname)-&gt;data,<br>
&gt; +                              events[i].label);<br>
&gt; +      }<br>
&gt; +      fprintf(stderr, &quot;-----\n&quot;);<br>
&gt; +}<br>
&gt; +#else<br>
&gt; +#define lisp_mutex_event(x)<br>
&gt; +#define lisp_mutex_event1(x,y)<br>
&gt; +#define lisp_mutex_event2(x,y,z)<br>
&gt; +#endif<br>
&gt; +<br>
&gt;   /* values taken from the kernel&#39;s linux/futex.h.   This header file<br>
&gt;        doesn&#39;t exist in userspace, which is our excuse for not \
grovelling<br> &gt;        them automatically */<br>
&gt; @@ -136,14 +223,21 @@ futex_wait(int *lock_word, int oldval, long sec, unsigned \
long usec)<br> &gt;      struct timespec timeout;<br>
&gt;      int t;<br>
&gt;<br>
&gt; +#ifdef MUTEX_EVENTRECORDING<br>
&gt; +      struct mutex* m = (void*)((char*)lock_word - offsetof(struct \
mutex,state));<br> &gt; +      char *name = m-&gt;name != NIL ? \
(char*)VECTOR(m-&gt;name)-&gt;data : &quot;(unnamed)&quot;;<br> &gt; +#endif<br>
&gt;      if (sec&lt;0) {<br>
&gt; +         lisp_mutex_event1(&quot;start futex wait&quot;, name);<br>
&gt;            t = sys_futex(lock_word, futex_wait_op(), oldval, 0);<br>
&gt;      }<br>
&gt;      else {<br>
&gt;            timeout.tv_sec = sec;<br>
&gt;            timeout.tv_nsec = usec * 1000;<br>
&gt; +         lisp_mutex_event2(&quot;start futex timedwait&quot;, name, usec);<br>
&gt;            t = sys_futex(lock_word, futex_wait_op(), oldval, &amp;timeout);<br>
&gt;      }<br>
&gt; +   lisp_mutex_event1(&quot;back from sys_futex&quot;, name);<br>
&gt;      if (t==0)<br>
&gt;            return 0;<br>
&gt;      else if (errno==ETIMEDOUT)<br>
&gt; @@ -158,6 +252,11 @@ futex_wait(int *lock_word, int oldval, long sec, unsigned \
long usec)<br> &gt;   int<br>
&gt;   futex_wake(int *lock_word, int n)<br>
&gt;   {<br>
&gt; +#ifdef MUTEX_EVENTRECORDING<br>
&gt; +      struct mutex* m = (void*)((char*)lock_word - offsetof(struct \
mutex,state));<br> &gt; +      char *name = m-&gt;name != NIL ? \
(char*)VECTOR(m-&gt;name)-&gt;data : &quot;(unnamed)&quot;;<br> &gt; +      \
lisp_mutex_event1(&quot;waking futex&quot;, name);<br> &gt; +#endif<br>
&gt;         return sys_futex(lock_word, futex_wake_op(),n,0);<br>
&gt;   }<br>
&gt;   #endif<br>
&gt; diff --git a/tests/deadline.impure.lisp b/tests/deadline.impure.lisp<br>
&gt; index d03506d48..1294b81a2 100644<br>
&gt; --- a/tests/deadline.impure.lisp<br>
&gt; +++ b/tests/deadline.impure.lisp<br>
&gt; @@ -190,3 +190,210 @@<br>
&gt;                                       (sb-sys:cancel-deadline condition))))<br>
&gt;               (sb-sys:with-deadline (:seconds .1) (sleep 1)))<br>
&gt;         sb-sys:deadline-timeout))<br>
&gt; +<br>
&gt; +(defmacro timestamp (string &amp;optional mutex)<br>
&gt; +   (declare (ignorable string mutex))<br>
&gt; +   #+nil<br>
&gt; +   `(let ((s ,string))<br>
&gt; +        (sb-sys:with-pinned-objects (s)<br>
&gt; +           ,(if mutex<br>
&gt; +                  `(alien-funcall (extern-alien &quot;lisp_mutex_event1&quot; \
(function void system-area-pointer system-area-pointer))<br> &gt; +                   \
(sb-sys:vector-sap s)<br> &gt; +                                          \
(sb-sys:vector-sap (or (mutex-name ,mutex) &quot;unnamed&quot;)))<br> &gt; +          \
`(alien-funcall (extern-alien &quot;lisp_mutex_event&quot; (function void \
system-area-pointer))<br> &gt; +                                          \
(sb-sys:vector-sap s))))))<br> &gt; +<br>
&gt; +;;; I observed (at least) four different ways this next test can flake:<br>
&gt; +;;;<br>
&gt; +;;; 1. one of the first CONDITION-WAITs at the initial deadline can be \
spuriously woken.<br> &gt; +;;;      This is easy to see:<br>
&gt; +;;;         Thread A                  Thread B<br>
&gt; +;;;         --------                  --------<br>
&gt; +;;;         mutex.acquire<br>
&gt; +;;;         cv.wait<br>
&gt; +;;;            set wakeup token<br>
&gt; +;;;            mutex.release<br>
&gt; +;;;                                         mutex.acquire<br>
&gt; +;;;                                         cv.wait<br>
&gt; +;;;                                            set wakeup token<br>
&gt; +;;;                                            mutex.release<br>
&gt; +;;;                                            sys_futex<br>
&gt; +;;;            sys_futex<br>
&gt; +;;;            (* no waiting - &quot;wrong&quot; token *)<br>
&gt; +;;;      So this causes the (:UNKNOWN :UNKNOWN) result for one thread,<br>
&gt; +;;;      because it never enters its deadline handler and because the \
customary<br> &gt; +;;;      (&quot;obligatory&quot; one might say) loop around \
condition-wait is absent.<br> &gt; +;;;      This is so common that it is no longer \
considered a failure.<br> &gt; +;;;<br>
&gt; +;;; 2. one of the threads could be scheduled entirely before the other-<br>
&gt; +;;;      that is, it runs from start to finish before the other thread does \
anything.<br> &gt; +;;;      This causes them both to conclude they acquired the \
mutex,<br> &gt; +;;;      though the deadline handlers were completely \
nonoverlapping.<br> &gt; +;;;<br>
&gt; +;;; 3. one of the calls to SYS_futex with a timeout can wait significantly more \
than<br> &gt; +;;;      requested. This essentially reduces to case 2 but in a more \
convoluted way,<br> &gt; +;;;      which I&#39;ve somewhat mitigated this by scaling \
up the timeouts so that we&#39;re<br> &gt; +;;;      not skating at the edge of the \
granularity the system is willing to give you.<br> &gt; +;;;      This mode of \
flakiness is likely to happen on a cloud compute environment.<br> &gt; +;;;<br>
&gt; +;;;   4. the deadline handler can be invoked twice in one thread because<br>
&gt; +;;;        - I don&#39;t know why.<br>
&gt; +;;;<br>
&gt; +;;; Here&#39;s an example with MUTEX_EVENTRECORDING #defined that was<br>
&gt; +;;; considered a &quot;failure&quot;:<br>
&gt; +;;; [0.000000000] main thread: try-mutex: acquired &#39;Make-Thread \
Lock&#39;<br> &gt; +;;; [0.000038478] main thread: released &#39;Make-Thread \
Lock&#39;<br> &gt; +;;; [0.000053675] main thread: try-mutex: acquired \
&#39;Make-Thread Lock&#39;<br> &gt; +;;; [0.000068255] main thread: released \
&#39;Make-Thread Lock&#39;<br> &gt; +;;; [0.000069415] main thread: try-mutex: \
acquired &#39;gate sem mutex&#39;<br> &gt; +;;; [0.000069781] main thread: released \
&#39;gate sem mutex&#39;<br> &gt; +;;; [0.000071536] main thread: start futex wait \
&#39;thread result lock&#39;<br> &gt; +;;; [0.000094761] B: try-mutex: acquired \
&#39;gate sem mutex&#39;<br> &gt; +;;; [0.000095494] B: released &#39;gate sem \
mutex&#39;<br> &gt; +;;; [0.000096646] B: deadline established<br>
&gt; +;;; [0.000097026] B: try-mutex: acquired &#39;testmut&#39;<br>
&gt; +;;; [0.000097931] B: released &#39;testmut&#39;<br>
&gt; +;;; [0.000098251] B: start futex timedwait &#39;testcv&#39; timeout 300000<br>
&gt; +;;; [0.300161614] B: back from sys_futex &#39;testcv&#39;<br>
&gt; +;;; [0.300163763] B: try-mutex: acquired &#39;testmut&#39;<br>
&gt; +;;; [0.300172833] B: enter deadline handler<br>
&gt; +;;; [0.300173184] B: start sleep<br>
&gt; +;;; [0.601937423] A: try-mutex: acquired &#39;gate sem mutex&#39;<br>
&gt; +;;; [0.601940795] A: released &#39;gate sem mutex&#39;<br>
&gt; +;;; [0.601945635] A: deadline established<br>
&gt; +;;; [0.601952083] A: start futex timedwait &#39;testmut&#39; timeout 300000<br>
&gt; +;;; [0.900264670] B: end sleep<br>
&gt; +;;; [0.900266124] B: waking futex &#39;testcv&#39;<br>
&gt; +;;; [0.900280645] B: waking futex &#39;testmut&#39;<br>
&gt; +;;; [0.900285623] B: released &#39;testmut&#39;<br>
&gt; +;;; [0.900290563] B: try-mutex: acquired &#39;thread interruptions \
lock&#39;<br> &gt; +;;; [0.900290777] B: released &#39;thread interruptions \
lock&#39;<br> &gt; +;;; [0.900292127] B: try-mutex: acquired &#39;session \
lock&#39;<br> &gt; +;;; [0.900293214] B: released &#39;session lock&#39;<br>
&gt; +;;; [0.900293703] B: released &#39;thread result lock&#39;<br>
&gt; +;;; [0.900294381] A: back from sys_futex &#39;testmut&#39;<br>
&gt; +;;; [0.900296267] A: %wait-for-mutex: acquired &#39;testmut&#39;<br>
&gt; +;;; [0.900299208] A: waking futex &#39;testmut&#39;<br>
&gt; +;;; [0.900300544] A: released &#39;testmut&#39;<br>
&gt; +;;; [0.900301101] A: start futex timedwait &#39;testcv&#39; timeout 1000<br>
&gt; +;;; [0.901358710] A: back from sys_futex &#39;testcv&#39;<br>
&gt; +;;; [0.901359680] A: try-mutex: acquired &#39;testmut&#39;<br>
&gt; +;;; [0.901369935] A: enter deadline handler<br>
&gt; +;;; [0.901370203] A: start sleep<br>
&gt; +;;; [1.501418757] A: end sleep<br>
&gt; +;;; [1.501419570] A: waking futex &#39;testcv&#39;<br>
&gt; +;;; [1.501456896] A: released &#39;testmut&#39;<br>
&gt; +;;; [1.501460896] A: try-mutex: acquired &#39;thread interruptions \
lock&#39;<br> &gt; +;;; [1.501461171] A: released &#39;thread interruptions \
lock&#39;<br> &gt; +;;; [1.501462039] A: try-mutex: acquired &#39;session \
lock&#39;<br> &gt; +;;; [1.501463001] A: released &#39;session lock&#39;<br>
&gt; +;;; [1.501463544] A: waking futex &#39;thread result lock&#39;<br>
&gt; +;;; [1.501465881] A: released &#39;thread result lock&#39;<br>
&gt; +;;; [1.501474477] main thread: back from sys_futex &#39;thread result \
lock&#39;<br> &gt; +;;; [1.501476447] main thread: %wait-for-mutex: acquired \
&#39;thread result lock&#39;<br> &gt; +;;; [1.501477283] main thread: waking futex \
&#39;thread result lock&#39;<br> &gt; +;;; [1.501478429] main thread: released \
&#39;thread result lock&#39;<br> &gt; +;;; [1.501479467] main thread: try-mutex: \
acquired &#39;thread result lock&#39;<br> &gt; +;;; [1.501479600] main thread: \
released &#39;thread result lock&#39;<br> &gt; +;;;<br>
&gt; +;;; Basically I give up trying to fix this test any more.<br>
&gt; +(defun signal-dealine-check-interrupt-enablement ()<br>
&gt; +   (let ((mutex (sb-thread:make-mutex :name &quot;testmut&quot;))<br>
&gt; +            (waitq (sb-thread:make-waitqueue :name &quot;testcv&quot;))<br>
&gt; +            (A-holds? :unknown)<br>
&gt; +            (B-holds? :unknown)<br>
&gt; +            (A-interrupts-enabled? :unknown)<br>
&gt; +            (B-interrupts-enabled? :unknown)<br>
&gt; +            (gate (sb-thread:make-semaphore :name &quot;testsem&quot;))<br>
&gt; +            (initial-deadline .3)<br>
&gt; +            (sleep-amount .6)<br>
&gt; +            (A)<br>
&gt; +            (B))<br>
&gt; +      ;; The gate semaphore is just a half-assed attempt to trigger both worker \
threads<br> &gt; +      ;; to start running their bodies at the exact same time. If \
they don&#39;t do that,<br> &gt; +      ;; then it&#39;s entirely possible that one \
thread gets run start to finish before<br> &gt; +      ;; the other thread does \
anything at all.   So obviously the test will fail.<br> &gt; +      (setf \
(sb-kernel:%instance-ref gate 0) &quot;gate sem&quot;) ; FIXME: we claim that NAME is \
SETFable<br> &gt; +      (let ((m (sb-thread::semaphore-mutex gate)))<br>
&gt; +         (setf (sb-thread:mutex-name m) &quot;gate sem mutex&quot;))<br>
&gt; +      (let ((cv (sb-thread::semaphore-queue gate)))<br>
&gt; +         (setf (sb-thread:waitqueue-name cv) &quot;gate sem cv&quot;))<br>
&gt; +      #+nil (alien-funcall (extern-alien \
&quot;lisp_mutex_start_eventrecording&quot; (function void)))<br> &gt; +      (setq A \
(sb-thread:make-thread<br> &gt; +                    (lambda ()<br>
&gt; +                       (sb-thread:wait-on-semaphore gate)<br>
&gt; +                       (handler-bind<br>
&gt; +                             ((sb-sys:deadline-timeout<br>
&gt; +                                (lambda (c)<br>
&gt; +                                   (timestamp &quot;enter deadline \
handler&quot;)<br> &gt; +                                   ;; We came here through \
the call to DECODE-TIMEOUT<br> &gt; +                                   ;; in \
CONDITION-WAIT; hence both here are supposed<br> &gt; +                               \
;; to evaluate to T.<br> &gt; +                                   (setq A-holds? \
(sb-thread:holding-mutex-p mutex))<br> &gt; +                                   (setq \
A-interrupts-enabled?<br> &gt; +                                            \
sb-sys:*interrupts-enabled*)<br> &gt; +                                   (timestamp \
&quot;start sleep&quot;)<br> &gt; +                                   (sleep \
sleep-amount)<br> &gt; +                                   (timestamp &quot;end \
sleep&quot;)<br> &gt; +                                   \
(sb-thread:condition-broadcast waitq)<br> &gt; +                                   \
(sb-sys:defer-deadline 10.0 c)<br> &gt; +                                   \
(timestamp &quot;leave deadline handler&quot;))))<br> &gt; +                          \
(sb-sys:with-deadline (:seconds initial-deadline)<br> &gt; +                          \
(timestamp &quot;deadline established&quot;)<br> &gt; +                             \
(sb-thread:with-mutex (mutex)<br> &gt; +                                \
(sb-thread:condition-wait waitq mutex)))))<br> &gt; +                    :name \
&quot;A&quot;))<br> &gt; +      (setq B (sb-thread:make-thread<br>
&gt; +                    (lambda ()<br>
&gt; +                       (sb-thread:wait-on-semaphore gate)<br>
&gt; +                       (handler-bind<br>
&gt; +                             ((sb-sys:deadline-timeout<br>
&gt; +                                (lambda (c)<br>
&gt; +                                   (timestamp &quot;enter deadline \
handler&quot;)<br> &gt; +                                   ;; We came here through \
the call to DECODE-TIMEOUT<br> &gt; +                                   ;; in \
CONDITION-WAIT (contended case of<br> &gt; +                                   ;; \
reaquiring the mutex) - so the former will<br> &gt; +                                 \
;; be NIL, but interrupts should still be enabled.<br> &gt; +                         \
(setq B-holds? (sb-thread:holding-mutex-p mutex))<br> &gt; +                          \
(setq B-interrupts-enabled?<br> &gt; +                                            \
sb-sys:*interrupts-enabled*)<br> &gt; +                                   (timestamp \
&quot;start sleep&quot;)<br> &gt; +                                   (sleep \
sleep-amount)<br> &gt; +                                   (timestamp &quot;end \
sleep&quot;)<br> &gt; +                                   \
(sb-thread:condition-broadcast waitq)<br> &gt; +                                   \
(sb-sys:defer-deadline 10.0 c)<br> &gt; +                                   \
(timestamp &quot;leave deadline handler&quot;))))<br> &gt; +                          \
(sb-sys:with-deadline (:seconds initial-deadline)<br> &gt; +                          \
(timestamp &quot;deadline established&quot;)<br> &gt; +                             \
(sb-thread:with-mutex (mutex)<br> &gt; +                                \
(sb-thread:condition-wait waitq mutex)))))<br> &gt; +                    :name \
&quot;B&quot;))<br> &gt; +      (sb-thread:signal-semaphore gate 3)<br>
&gt; +      (sb-thread:join-thread A)<br>
&gt; +      (sb-thread:join-thread B)<br>
&gt; +      #+nil (alien-funcall (extern-alien \
&quot;lisp_mutex_done_eventrecording&quot; (function void)))<br> &gt; +      (let \
((A-result (list A-holds? A-interrupts-enabled?))<br> &gt; +               (B-result \
(list B-holds? B-interrupts-enabled?)))<br> &gt; +         ;; We also check some \
subtle behaviour w.r.t. whether a deadline<br> &gt; +         ;; handler in \
CONDITION-WAIT got the mutex, or not. This is most<br> &gt; +         ;; probably \
very internal behaviour (so user should not depend<br> &gt; +         ;; on it) -- I \
added the testing here just to manifest current<br> &gt; +         ;; behaviour.<br>
&gt; +         (cond ((equal A-result &#39;(t t))<br>
&gt; +                    (or (equal B-result &#39;(nil t))<br>
&gt; +                          (equal B-result &#39;(:unknown :unknown))<br>
&gt; +                          (error &quot;thread B result: ~s&quot; \
B-result)))<br> &gt; +                  ((equal B-result &#39;(t t))<br>
&gt; +                    (or (equal A-result &#39;(nil t))<br>
&gt; +                          (equal A-result &#39;(:unknown :unknown))<br>
&gt; +                          (error &quot;thread A result: ~s&quot; \
A-result)))<br> &gt; +                  (t<br>
&gt; +                    (error &quot;Failure: fell through with A: ~S, B: \
~S&quot;<br> &gt; +                              A-result<br>
&gt; +                              B-result))))))<br>
&gt; +<br>
&gt; +;;; Test from git rev 5e55f426de8fa579a0d6cfbfb3ac5433d530d3c9 formerly in \
threads.impure<br> &gt; +(test-util:with-test (:name (:deadline :interrupts-enabled) \
:skipped-on (:not :sb-thread))<br> &gt; +   ;; I cranked this up to 1000 (and ran it \
on lots of machines) to get examples of failure,<br> &gt; +   ;; but that&#39;s more \
than a reasonable amount of time to spend in the test.<br> &gt; +   (loop repeat 2 do \
(signal-dealine-check-interrupt-enablement)))<br> &gt; diff --git \
a/tests/threads.impure.lisp b/tests/threads.impure.lisp<br> &gt; index \
76ba01376..80128239d 100644<br> &gt; --- a/tests/threads.impure.lisp<br>
&gt; +++ b/tests/threads.impure.lisp<br>
&gt; @@ -777,70 +777,6 @@<br>
&gt;         (mapc #&#39;join-thread threads)<br>
&gt;         (assert (not deadline-handler-run-twice?))))<br>
&gt;<br>
&gt; -(with-test (:name (condition-wait \
:signal-deadline-with-interrupts-enabled))<br> &gt; -   (let ((mutex \
(make-mutex))<br> &gt; -            (waitq (make-waitqueue))<br>
&gt; -            (A-holds? :unknown)<br>
&gt; -            (B-holds? :unknown)<br>
&gt; -            (A-interrupts-enabled? :unknown)<br>
&gt; -            (B-interrupts-enabled? :unknown)<br>
&gt; -            (A)<br>
&gt; -            (B))<br>
&gt; -      ;; W.L.O.G., we assume that A is executed first...<br>
&gt; -      (setq A (make-thread<br>
&gt; -                    (lambda ()<br>
&gt; -                       (handler-bind<br>
&gt; -                             ((sb-sys:deadline-timeout<br>
&gt; -                                (lambda (c)<br>
&gt; -                                   ;; We came here through the call to \
DECODE-TIMEOUT<br> &gt; -                                   ;; in CONDITION-WAIT; \
hence both here are supposed<br> &gt; -                                   ;; to \
evaluate to T.<br> &gt; -                                   (setq A-holds? \
(holding-mutex-p mutex))<br> &gt; -                                   (setq \
A-interrupts-enabled?<br> &gt; -                                            \
sb-sys:*interrupts-enabled*)<br> &gt; -                                   (sleep \
0.2)<br> &gt; -                                   (condition-broadcast waitq)<br>
&gt; -                                   (sb-sys:defer-deadline 10.0 c))))<br>
&gt; -                          (sb-sys:with-deadline (:seconds 0.1)<br>
&gt; -                             (with-mutex (mutex)<br>
&gt; -                                (condition-wait waitq mutex)))))<br>
&gt; -                    :name &quot;A&quot;))<br>
&gt; -      (setq B (make-thread<br>
&gt; -                    (lambda ()<br>
&gt; -                       (thread-yield)<br>
&gt; -                       (handler-bind<br>
&gt; -                             ((sb-sys:deadline-timeout<br>
&gt; -                                (lambda (c)<br>
&gt; -                                   ;; We came here through the call to \
DECODE-TIMEOUT<br> &gt; -                                   ;; in CONDITION-WAIT \
(contended case of<br> &gt; -                                   ;; reaquiring the \
mutex) - so the former will<br> &gt; -                                   ;; be NIL, \
but interrupts should still be enabled.<br> &gt; -                                   \
(setq B-holds? (holding-mutex-p mutex))<br> &gt; -                                   \
(setq B-interrupts-enabled?<br> &gt; -                                            \
sb-sys:*interrupts-enabled*)<br> &gt; -                                   (sleep \
0.2)<br> &gt; -                                   (condition-broadcast waitq)<br>
&gt; -                                   (sb-sys:defer-deadline 10.0 c))))<br>
&gt; -                          (sb-sys:with-deadline (:seconds 0.1)<br>
&gt; -                             (with-mutex (mutex)<br>
&gt; -                                (condition-wait waitq mutex)))))<br>
&gt; -                    :name &quot;B&quot;))<br>
&gt; -      (join-thread A)<br>
&gt; -      (join-thread B)<br>
&gt; -      (let ((A-result (list A-holds? A-interrupts-enabled?))<br>
&gt; -               (B-result (list B-holds? B-interrupts-enabled?)))<br>
&gt; -         ;; We also check some subtle behaviour w.r.t. whether a deadline<br>
&gt; -         ;; handler in CONDITION-WAIT got the mutex, or not. This is most<br>
&gt; -         ;; probably very internal behaviour (so user should not depend<br>
&gt; -         ;; on it) -- I added the testing here just to manifest current<br>
&gt; -         ;; behaviour.<br>
&gt; -         (cond ((equal A-result &#39;(t t)) (assert (equal B-result &#39;(nil \
t))))<br> &gt; -                  ((equal B-result &#39;(t t)) (assert (equal \
A-result &#39;(nil t))))<br> &gt; -                  (t<br>
&gt; -                    (error &quot;Failure: fell through wit A: ~S, B: \
~S&quot;<br> &gt; -                              A-result<br>
&gt; -                              B-result))))))<br>
&gt; -<br>
&gt;   (with-test (:name (:mutex :finalization))<br>
&gt;      (let ((a nil))<br>
&gt;         (dotimes (i 500000)<br>
&gt;<br>
&gt; -----------------------------------------------------------------------<br>
&gt;<br>
&gt;<br>
&gt; hooks/post-receive<br>
&gt; --<br>
&gt; SBCL<br>
&gt;<br>
&gt;<br>
&gt; _______________________________________________<br>
&gt; Sbcl-commits mailing list<br>
&gt; <a href="mailto:Sbcl-commits@lists.sourceforge.net" \
target="_blank">Sbcl-commits@lists.sourceforge.net</a><br> &gt; <a \
href="https://lists.sourceforge.net/lists/listinfo/sbcl-commits" rel="noreferrer" \
target="_blank">https://lists.sourceforge.net/lists/listinfo/sbcl-commits</a><br> \
</blockquote></div>





_______________________________________________
Sbcl-devel mailing list
Sbcl-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/sbcl-devel


[prev in list] [next in list] [prev in thread] [next in thread] 

Configure | About | News | Add a list | Sponsored by KoreLogic