[prev in list] [next in list] [prev in thread] [next in thread] 

List:       ceph-users
Subject:    [ceph-users] OSD activate hangs
From:       pavanakrishnabhat () gmail ! com (pavana bhat)
Date:       2015-08-30 22:09:41
Message-ID: CAMnVXr_Bo7MdH3y4YfLSvP-h5K-bWgG-9yB9W2ajXDTOh0W+DQ () mail ! gmail ! com
[Download RAW message or body]

Hi,

I am trying to ceph-deploy with Hammer on rhel7. While trying to activate
the OSD using ceph-deploy on admin-node, the below step hangs. I tried to
run it manually on the osd-node and tried tracing using "python -m trace
--trace" . It looks like it is stuck in some threading.py code. Can someone
please help?

[*ceph-vm-osd1*][*WARNIN*] INFO:ceph-disk:Running command: /usr/bin/ceph
--cluster ceph --name client.bootstrap-osd --keyring
/var/lib/ceph/bootstrap-osd/ceph.keyring osd create --concise
c2d19639-f3ec-447d-9a7c-a180a226dded

[*ceph-vm-osd1*][*WARNIN*] No data was received after 300 seconds,
disconnecting...



*Manual run with --verbose option:*


[cloud-user at ceph-vm-osd1 ~]$ sudo /usr/bin/ceph --verbose --cluster ceph
--name client.bootstrap-osd --keyring
/var/lib/ceph/bootstrap-osd/ceph.keyring osd create
c2d19639-f3ec-447d-9a7c-a180a226dded

parsed_args: Namespace(admin_socket=None, admin_socket_nope=None,
cephconf=None, client_id=None, client_name='client.bootstrap-osd',
cluster='ceph', cluster_timeout=None, completion=False, help=False,
input_file=None, output_file=None, output_format=None, status=False,
verbose=True, version=False, watch=False, watch_debug=False,
watch_error=False, watch_info=False, watch_sec=False, watch_warn=False),
childargs: ['--keyring', '/var/lib/ceph/bootstrap-osd/ceph.keyring', 'osd',
'create', 'c2d19639-f3ec-447d-9a7c-a180a226dded']

^CError connecting to cluster: InterruptedOrTimeoutError


*Manual run with python -m trace --trace :*

<<truncated>>


 --- modulename: threading, funcname: _note

threading.py(64):             if self.__verbose:

threading.py(946):         self.__block.acquire()

threading.py(947):         try:

threading.py(948):             if timeout is None:

threading.py(954):                 deadline = _time() + timeout

threading.py(955):                 while not self.__stopped:

threading.py(956):                     delay = deadline - _time()

threading.py(957):                     if delay <= 0:

threading.py(961):                     self.__block.wait(delay, balancing)

 --- modulename: threading, funcname: wait

threading.py(331):         if not self._is_owned():

 --- modulename: threading, funcname: _is_owned

threading.py(302):         if self.__lock.acquire(0):

threading.py(306):             return True

threading.py(333):         waiter = _allocate_lock()

threading.py(334):         waiter.acquire()

threading.py(335):         self.__waiters.append(waiter)

threading.py(336):         saved_state = self._release_save()

 --- modulename: threading, funcname: _release_save

threading.py(294):         self.__lock.release()           # No state to
save

threading.py(337):         try:    # restore state no matter what (e.g.,
KeyboardInterrupt)

threading.py(338):             if timeout is None:

threading.py(348):                 endtime = _time() + timeout

threading.py(349):                 delay = 0.0005 # 500 us -> initial delay
of 1 ms

threading.py(350):                 while True:

threading.py(351):                     gotit = waiter.acquire(0)

threading.py(352):                     if gotit:

threading.py(354):                     remaining = endtime - _time()

threading.py(355):                     if remaining <= 0:

threading.py(357):                     if balancing:

threading.py(358):                         delay = min(delay * 2,
remaining, 0.05)

threading.py(361):                     _sleep(delay)

 --- modulename: threading, funcname: _note

threading.py(64):             if self.__verbose:

 --- modulename: threading, funcname: __stop

threading.py(870):         if not hasattr(self, '_Thread__block'):

threading.py(872):         self.__block.acquire()

threading.py(873):         self.__stopped = True

threading.py(874):         self.__block.notify_all()

 --- modulename: threading, funcname: notifyAll

threading.py(409):         self.notify(len(self.__waiters))

 --- modulename: threading, funcname: notify

threading.py(385):         if not self._is_owned():

 --- modulename: threading, funcname: _is_owned

threading.py(302):         if self.__lock.acquire(0):

threading.py(306):             return True

threading.py(387):         __waiters = self.__waiters

threading.py(388):         waiters = __waiters[:n]

threading.py(389):         if not waiters:

threading.py(393):         self._note("%s.notify(): notifying %d waiter%s",
self, n,

threading.py(394):                    n!=1 and "s" or "")

 --- modulename: threading, funcname: _note

threading.py(64):             if self.__verbose:

threading.py(395):         for waiter in waiters:

threading.py(396):             waiter.release()

threading.py(397):             try:

threading.py(398):                 __waiters.remove(waiter)

threading.py(395):         for waiter in waiters:

threading.py(875):         self.__block.release()

threading.py(350):                 while True:

threading.py(351):                     gotit = waiter.acquire(0)

threading.py(352):                     if gotit:

threading.py(353):                         break

threading.py(362):                 if not gotit:

threading.py(371):                         self._note("%s.wait(%s): got
it", self, timeout)

 --- modulename: threading, funcname: _note

threading.py(64):             if self.__verbose:

threading.py(373):             self._acquire_restore(saved_state)

 --- modulename: threading, funcname: _acquire_restore

threading.py(297):         self.__lock.acquire()           # Ignore saved
state

threading.py(955):                 while not self.__stopped:

threading.py(964):                         self._note("%s.join(): thread
stopped", self)

 --- modulename: threading, funcname: _note

threading.py(64):             if self.__verbose:

threading.py(966):             self.__block.release()

rados.py(178):             if timeout and t.is_alive():

rados.py(176):         while t.is_alive():

 --- modulename: threading, funcname: isAlive

threading.py(1004):         assert self.__initialized, "Thread.__init__()
not called"

threading.py(1005):         return self.__started.is_set() and not
self.__stopped

 --- modulename: threading, funcname: isSet

threading.py(572):         return self.__flag

rados.py(183):         t.join()        # in case t exits before reaching
the join() above

 --- modulename: threading, funcname: join

threading.py(936):         if not self.__initialized:

threading.py(938):         if not self.__started.is_set():

 --- modulename: threading, funcname: isSet

threading.py(572):         return self.__flag

threading.py(940):         if self is current_thread():

 --- modulename: threading, funcname: currentThread

threading.py(1160):     try:

threading.py(1161):         return _active[_get_ident()]

threading.py(944):             if not self.__stopped:

threading.py(946):         self.__block.acquire()

threading.py(947):         try:

threading.py(948):             if timeout is None:

threading.py(949):                 while not self.__stopped:

threading.py(952):                     self._note("%s.join(): thread
stopped", self)

 --- modulename: threading, funcname: _note

threading.py(64):             if self.__verbose:

threading.py(966):             self.__block.release()

rados.py(193):     if interrupt:

rados.py(195):     return t.retval

rados.py(265):             self.state = "shutdown"

ceph(916):     sys.exit(retval)

 --- modulename: trace, funcname: _unsettrace


Thanks,

Pavana
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.ceph.com/pipermail/ceph-users-ceph.com/attachments/20150830/b8372a7b/attachment.htm>

[prev in list] [next in list] [prev in thread] [next in thread] 

Configure | About | News | Add a list | Sponsored by KoreLogic