[pnfs] [PATCH 06/10] pnfs: Isolate odirect rpc read/write/commit execution.

Benny Halevy bhalevy at panasas.com
Thu Jul 10 02:14:22 EDT 2008


On Jul. 10, 2008, 8:16 +0300, Dean Hildebrand <seattleplus at gmail.com> wrote:
> 
> Benny Halevy wrote:
>> On Jul. 09, 2008, 0:54 +0300, Dean Hildebrand <seattleplus at gmail.com> wrote:
>>   
>>> This patch helps prepare for pNFS, which may not use rpc.
>>>
>>> Signed-off-by: Dean Hildebrand <dhildeb at us.ibm.com>
>>> ---
>>>  fs/nfs/direct.c |  164 ++++++++++++++++++++++++++++++++-----------------------
>>>  1 files changed, 96 insertions(+), 68 deletions(-)
>>>
>>> diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
>>> index ada71d5..854ed47 100644
>>> --- a/fs/nfs/direct.c
>>> +++ b/fs/nfs/direct.c
>>> @@ -266,6 +266,40 @@ static const struct rpc_call_ops nfs_read_direct_ops = {
>>>  	.rpc_release = nfs_direct_read_release,
>>>  };
>>>  
>>> +static long nfs_direct_read_execute(struct nfs_read_data *data,
>>> +				    struct rpc_task_setup *task_setup_data,
>>> +				    struct rpc_message *msg)
>>> +{
>>> +	struct inode *inode = data->inode;
>>> +	struct rpc_task *task;
>>> +	long err;
>>> +
>>> +	msg->rpc_argp = &data->args;
>>> +	msg->rpc_resp = &data->res;
>>> +
>>> +	task_setup_data->task = &data->task;
>>> +	task_setup_data->callback_data = data;
>>> +	NFS_PROTO(inode)->read_setup(data, msg);
>>> +
>>> +	task = rpc_run_task(task_setup_data);
>>> +	err = IS_ERR(task);
>>> +	if (err)
>>> +		goto out;
>>>     
>> doing the following will percolate the error code up to the caller:
>>
>> 	if (IS_ERR(task))
>> 		return PTR_ERR(task);
>>   
> This would change current nfs behavior as it seems the function always 
> wants to return EFAULT in this case.  Not sure if this was intentional 
> by Trond or not, but I'd rather move the change into a different patch 
> if you send this to Trond.

Actually, I was suggesting to return the error (rather than effectively
a boolean status) only from nfs_direct_read_execute and not from
nfs_direct_read_schedule_segment so not to change externally visible
behavior...

>>   
>>> +
>>> +	rpc_put_task(task);
>>> +
>>> +	dprintk("NFS: %5u initiated direct read call "
>>> +		"(req %s/%lld, %u bytes @ offset %llu)\n",
>>> +		data->task.tk_pid,
>>> +		inode->i_sb->s_id,
>>> +		(long long)NFS_FILEID(inode),
>>> +		data->args.count,
>>> +		(unsigned long long)data->args.offset);
>>> +
>>> +out:
>>> +	return err;
>>>     
>> 	return 0;
>> 	(cont. to the above)
>>   
>>> +}
>>> +
>>>  /*
>>>   * For each rsize'd chunk of the user's buffer, dispatch an NFS READ
>>>   * operation.  If nfs_readdata_alloc() or get_user_pages() fails,
>>> @@ -282,7 +316,6 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq,
>>>  	unsigned long user_addr = (unsigned long)iov->iov_base;
>>>  	size_t count = iov->iov_len;
>>>  	size_t rsize = NFS_SERVER(inode)->rsize;
>>> -	struct rpc_task *task;
>>>  	struct rpc_message msg = {
>>>  		.rpc_cred = ctx->cred,
>>>  	};
>>> @@ -343,25 +376,9 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq,
>>>  		data->res.fattr = &data->fattr;
>>>  		data->res.eof = 0;
>>>  		data->res.count = bytes;
>>> -		msg.rpc_argp = &data->args;
>>> -		msg.rpc_resp = &data->res;
>>> -
>>> -		task_setup_data.task = &data->task;
>>> -		task_setup_data.callback_data = data;
>>> -		NFS_PROTO(inode)->read_setup(data, &msg);
>>> -
>>> -		task = rpc_run_task(&task_setup_data);
>>> -		if (IS_ERR(task))
>>> -			break;
>>> -		rpc_put_task(task);
>>>  
>>> -		dprintk("NFS: %5u initiated direct read call "
>>> -			"(req %s/%Ld, %zu bytes @ offset %Lu)\n",
>>> -				data->task.tk_pid,
>>> -				inode->i_sb->s_id,
>>> -				(long long)NFS_FILEID(inode),
>>> -				bytes,
>>> -				(unsigned long long)data->args.offset);
>>> +		if (nfs_direct_read_execute(data, &task_setup_data, &msg))
>>> +		    break;
>>>     
>> 		^^^^
>> 		this should be a tab (I'll fix that up when submitting)
>>   
> oops, thanks.
>>>  		started += bytes;
>>>  		user_addr += bytes;
>>> @@ -447,12 +464,15 @@ static void nfs_direct_free_writedata(struct nfs_direct_req *dreq)
>>>  }
>>>  
>>>  #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
>>> +static long nfs_direct_write_execute(struct nfs_write_data *data,
>>> +				     struct rpc_task_setup *task_setup_data,
>>> +				     struct rpc_message *msg);
>>> +
>>>  static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
>>>  {
>>>  	struct inode *inode = dreq->inode;
>>>  	struct list_head *p;
>>>  	struct nfs_write_data *data;
>>> -	struct rpc_task *task;
>>>  	struct rpc_message msg = {
>>>  		.rpc_cred = dreq->ctx->cred,
>>>  	};
>>> @@ -485,25 +505,7 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
>>>  		 * Reuse data->task; data->args should not have changed
>>>  		 * since the original request was sent.
>>>  		 */
>>> -		task_setup_data.task = &data->task;
>>> -		task_setup_data.callback_data = data;
>>> -		msg.rpc_argp = &data->args;
>>> -		msg.rpc_resp = &data->res;
>>> -		NFS_PROTO(inode)->write_setup(data, &msg);
>>> -
>>> -		/*
>>> -		 * We're called via an RPC callback, so BKL is already held.
>>> -		 */
>>> -		task = rpc_run_task(&task_setup_data);
>>> -		if (!IS_ERR(task))
>>> -			rpc_put_task(task);
>>> -
>>> -		dprintk("NFS: %5u rescheduled direct write call (req %s/%Ld, %u bytes @ offset %Lu)\n",
>>> -				data->task.tk_pid,
>>> -				inode->i_sb->s_id,
>>> -				(long long)NFS_FILEID(inode),
>>> -				data->args.count,
>>> -				(unsigned long long)data->args.offset);
>>> +		nfs_direct_write_execute(data, &task_setup_data, &msg);
>>>  	}
>>>  
>>>  	if (put_dreq(dreq))
>>> @@ -546,10 +548,28 @@ static const struct rpc_call_ops nfs_commit_direct_ops = {
>>>  	.rpc_release = nfs_direct_commit_release,
>>>  };
>>>  
>>> +static void nfs_direct_commit_execute(struct nfs_direct_req *dreq,
>>> +				      struct nfs_write_data *data,
>>> +				      struct rpc_task_setup *task_setup_data,
>>> +				      struct rpc_message *msg)
>>>     
>> to keep things similar how about returning status as in the read and write
>> cases (even though it's not used in this case)
>>   
> I agree that it is probably better, but I think it should also be in a 
> separate 2nd patch as it changes current behavior.

ditto

Benny

> Dean
>>   
>>> +{
>>> +	struct rpc_task *task;
>>> +
>>> +	NFS_PROTO(data->inode)->commit_setup(data, msg);
>>> +
>>> +	/* Note: task.tk_ops->rpc_release will free dreq->commit_data */
>>> +	dreq->commit_data = NULL;
>>> +
>>> +	dprintk("NFS: %5u initiated commit call\n", data->task.tk_pid);
>>> +
>>> +	task = rpc_run_task(task_setup_data);
>>> +	if (!IS_ERR(task))
>>> +		rpc_put_task(task);
>>> +}
>>> +
>>>  static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq)
>>>  {
>>>  	struct nfs_write_data *data = dreq->commit_data;
>>> -	struct rpc_task *task;
>>>  	struct rpc_message msg = {
>>>  		.rpc_argp = &data->args,
>>>  		.rpc_resp = &data->res,
>>> @@ -576,16 +596,7 @@ static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq)
>>>  	data->res.fattr = &data->fattr;
>>>  	data->res.verf = &data->verf;
>>>  
>>> -	NFS_PROTO(data->inode)->commit_setup(data, &msg);
>>> -
>>> -	/* Note: task.tk_ops->rpc_release will free dreq->commit_data */
>>> -	dreq->commit_data = NULL;
>>> -
>>> -	dprintk("NFS: %5u initiated commit call\n", data->task.tk_pid);
>>> -
>>> -	task = rpc_run_task(&task_setup_data);
>>> -	if (!IS_ERR(task))
>>> -		rpc_put_task(task);
>>> +	nfs_direct_commit_execute(dreq, data, &task_setup_data, &msg);
>>>  }
>>>  
>>>  static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode)
>>> @@ -687,6 +698,40 @@ static const struct rpc_call_ops nfs_write_direct_ops = {
>>>  	.rpc_release = nfs_direct_write_release,
>>>  };
>>>  
>>> +static long nfs_direct_write_execute(struct nfs_write_data *data,
>>> +				     struct rpc_task_setup *task_setup_data,
>>> +				     struct rpc_message *msg)
>>> +{
>>> +	struct inode *inode = data->inode;
>>> +	struct rpc_task *task;
>>> +	long err;
>>> +
>>> +	msg->rpc_argp = &data->args;
>>> +	msg->rpc_resp = &data->res;
>>> +
>>> +	task_setup_data->task = &data->task;
>>> +	task_setup_data->callback_data = data;
>>> +	NFS_PROTO(inode)->write_setup(data, msg);
>>> +
>>> +	task = rpc_run_task(task_setup_data);
>>> +	err = IS_ERR(task);
>>> +	if (err)
>>> +		goto out;
>>>     
>> ditto as in the read case.
>>
>>   
>>> +
>>> +	rpc_put_task(task);
>>> +
>>> +	dprintk("NFS: %5u initiated direct write call "
>>> +		"(req %s/%lld, %u bytes @ offset %llu)\n",
>>> +		data->task.tk_pid,
>>> +		inode->i_sb->s_id,
>>> +		(long long)NFS_FILEID(inode),
>>> +		data->args.count,
>>> +		(unsigned long long)data->args.offset);
>>> +
>>> +out:
>>> +	return err;
>>> +}
>>> +
>>>  /*
>>>   * For each wsize'd chunk of the user's buffer, dispatch an NFS WRITE
>>>   * operation.  If nfs_writedata_alloc() or get_user_pages() fails,
>>> @@ -702,7 +747,6 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_direct_req *dreq,
>>>  	struct inode *inode = ctx->path.dentry->d_inode;
>>>  	unsigned long user_addr = (unsigned long)iov->iov_base;
>>>  	size_t count = iov->iov_len;
>>> -	struct rpc_task *task;
>>>  	struct rpc_message msg = {
>>>  		.rpc_cred = ctx->cred,
>>>  	};
>>> @@ -767,24 +811,8 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_direct_req *dreq,
>>>  		data->res.count = bytes;
>>>  		data->res.verf = &data->verf;
>>>  
>>> -		task_setup_data.task = &data->task;
>>> -		task_setup_data.callback_data = data;
>>> -		msg.rpc_argp = &data->args;
>>> -		msg.rpc_resp = &data->res;
>>> -		NFS_PROTO(inode)->write_setup(data, &msg);
>>> -
>>> -		task = rpc_run_task(&task_setup_data);
>>> -		if (IS_ERR(task))
>>> +		if (nfs_direct_write_execute(data, &task_setup_data, &msg))
>>>  			break;
>>> -		rpc_put_task(task);
>>> -
>>> -		dprintk("NFS: %5u initiated direct write call "
>>> -			"(req %s/%Ld, %zu bytes @ offset %Lu)\n",
>>> -				data->task.tk_pid,
>>> -				inode->i_sb->s_id,
>>> -				(long long)NFS_FILEID(inode),
>>> -				bytes,
>>> -				(unsigned long long)data->args.offset);
>>>  
>>>  		started += bytes;
>>>  		user_addr += bytes;
>>>     
>>   



More information about the pNFS mailing list