[prev in list] [next in list] [prev in thread] [next in thread] 

List:       linux-nfs
Subject:    Re: [RFC PATCH 1/1] SUNRPC: increase max timeout for rebind to handle NFS server restart
From:       dai.ngo () oracle ! com
Date:       2023-02-23 5:40:33
Message-ID: 3165b445-f4da-3dfd-7036-712b605865be () oracle ! com
[Download RAW message or body]

Hi Anna,

Just a reminder that this patch is still waiting for a review.

Thanks,
-Dai

On 2/17/23 10:22 AM, dai.ngo@oracle.com wrote:
> Hi Trond,
>
> Could you please let me know your opinion on this patch?
>
> Thanks,
> -Dai
>
> On 2/10/23 12:10 AM, Dai Ngo wrote:
>> Occasionally NLM lock and unlock request fail with EIO and ENOLCK
>> respectively. This usually happens when the NFS server is restarted
>> while NLM lock test is running.
>>
>> Currently there is a 9 seconds limit for retrying the bind operation.
>> If the server is under load the port mapper might take more than 9
>> seconds to become ready after the NFS server restarted.
>>
>> This patch increases the timeout for rebind from 9 to 30 seconds
>> allowing a bit more time for the port mapper to become ready.
>>
>> Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
>> ---
>>   include/linux/sunrpc/clnt.h  | 3 +++
>>   include/linux/sunrpc/sched.h | 4 ++--
>>   net/sunrpc/clnt.c            | 2 +-
>>   net/sunrpc/sched.c           | 3 ++-
>>   4 files changed, 8 insertions(+), 4 deletions(-)
>>
>> diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
>> index 770ef2cb5775..7f2dee56c121 100644
>> --- a/include/linux/sunrpc/clnt.h
>> +++ b/include/linux/sunrpc/clnt.h
>> @@ -162,6 +162,9 @@ struct rpc_add_xprt_test {
>>   #define RPC_CLNT_CREATE_REUSEPORT    (1UL << 11)
>>   #define RPC_CLNT_CREATE_CONNECTED    (1UL << 12)
>>   +#define    RPC_CLNT_REBIND_DELAY        3
>> +#define    RPC_CLNT_REBIND_MAX_TIMEOUT    30
>> +
>>   struct rpc_clnt *rpc_create(struct rpc_create_args *args);
>>   struct rpc_clnt    *rpc_bind_new_program(struct rpc_clnt *,
>>                   const struct rpc_program *, u32);
>> diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
>> index b8ca3ecaf8d7..e9dc142f10bb 100644
>> --- a/include/linux/sunrpc/sched.h
>> +++ b/include/linux/sunrpc/sched.h
>> @@ -90,8 +90,8 @@ struct rpc_task {
>>   #endif
>>       unsigned char        tk_priority : 2,/* Task priority */
>>                   tk_garb_retry : 2,
>> -                tk_cred_retry : 2,
>> -                tk_rebind_retry : 2;
>> +                tk_cred_retry : 2;
>> +    unsigned char        tk_rebind_retry;
>>   };
>>     typedef void            (*rpc_action)(struct rpc_task *);
>> diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
>> index 0b0b9f1eed46..6c89a1fa40bf 100644
>> --- a/net/sunrpc/clnt.c
>> +++ b/net/sunrpc/clnt.c
>> @@ -2053,7 +2053,7 @@ call_bind_status(struct rpc_task *task)
>>           if (task->tk_rebind_retry == 0)
>>               break;
>>           task->tk_rebind_retry--;
>> -        rpc_delay(task, 3*HZ);
>> +        rpc_delay(task, RPC_CLNT_REBIND_DELAY * HZ);
>>           goto retry_timeout;
>>       case -ENOBUFS:
>>           rpc_delay(task, HZ >> 2);
>> diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
>> index be587a308e05..5c18a35752aa 100644
>> --- a/net/sunrpc/sched.c
>> +++ b/net/sunrpc/sched.c
>> @@ -817,7 +817,8 @@ rpc_init_task_statistics(struct rpc_task *task)
>>       /* Initialize retry counters */
>>       task->tk_garb_retry = 2;
>>       task->tk_cred_retry = 2;
>> -    task->tk_rebind_retry = 2;
>> +    task->tk_rebind_retry = RPC_CLNT_REBIND_MAX_TIMEOUT /
>> +                    RPC_CLNT_REBIND_DELAY;
>>         /* starting timestamp */
>>       task->tk_start = ktime_get();
[prev in list] [next in list] [prev in thread] [next in thread] 

Configure | About | News | Add a list | Sponsored by KoreLogic