[pnfs] [PATCH 6/6] pnfs: Enable O_DIRECT write path.

Benny Halevy bhalevy at panasas.com
Sun Jun 1 04:56:34 EDT 2008


On May. 30, 2008, 3:56 +0300, Dean Hildebrand <seattleplus at gmail.com> wrote:
> I refactored most of the bits you mentioned in your emails and 
> streamlined some other parts.  I'll resend after we move to 2.6.26.

Thank you!

Benny

> 
> thanks,
> Dean
> 
> Benny Halevy wrote:
>> On May. 29, 2008, 4:27 +0300, Dean Hildebrand <seattleplus at gmail.com> wrote:
>>   
>>> Signed-off-by: Dean Hildebrand <dhildeb at us.ibm.com>
>>> ---
>>>  fs/nfs/direct.c |   81 ++++++++++++++++++++++++++++++++++++++++++++++++++++---
>>>  1 files changed, 77 insertions(+), 4 deletions(-)
>>>
>>> diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
>>> index c6b36de..8d4c2cb 100644
>>> --- a/fs/nfs/direct.c
>>> +++ b/fs/nfs/direct.c
>>> @@ -518,6 +518,9 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
>>>  		.callback_ops = &nfs_write_direct_ops,
>>>  		.flags = RPC_TASK_ASYNC,
>>>  	};
>>> +#if defined(CONFIG_PNFS)
>>> +	int result;
>>> +#endif
>>>  
>>>  	dreq->count = 0;
>>>  	get_dreq(dreq);
>>> @@ -536,8 +539,16 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
>>>  		nfs_fattr_init(&data->fattr);
>>>  		data->res.count = data->args.count;
>>>  		memset(&data->verf, 0, sizeof(data->verf));
>>> -
>>> -		nfs_direct_write_execute(data, &task_setup_data, &msg);
>>> +#if defined(CONFIG_PNFS)
>>> +		result = pnfs_try_to_write_data(data, &nfs_write_direct_ops,
>>> +						NFS_FILE_SYNC);
>>> +		if (result < 0)
>>> +			break;
>>> +		else if (result == 0)
>>> +			dreq->pnfsflags |= data->pnfsflags;
>>> +		else if (result == 1)
>>> +#endif
>>> +			nfs_direct_write_execute(data, &task_setup_data, &msg);
>>>     
>> same comment as for the read case...
>>
>>   
>>>  	}
>>>  
>>>  	if (put_dreq(dreq))
>>> @@ -608,6 +619,9 @@ static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq)
>>>  		.callback_data = data,
>>>  		.flags = RPC_TASK_ASYNC,
>>>  	};
>>> +#if defined(CONFIG_PNFS)
>>> +	int result;
>>> +#endif
>>>  
>>>  	data->inode = dreq->inode;
>>>  	data->cred = msg.rpc_cred;
>>> @@ -619,6 +633,18 @@ static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq)
>>>  	data->res.fattr = &data->fattr;
>>>  	data->res.verf = &data->verf;
>>>  
>>> +#if defined(CONFIG_PNFS)
>>> +	dreq->commit_data = NULL;
>>> +	data->how = RPC_TASK_ASYNC;
>>> +	result = pnfs_try_to_commit(data, &nfs_commit_direct_ops);
>>> +
>>> +	/* FIXME: how report pNFS error? */
>>> +	if (result == 0)
>>> +		return;
>>> +	else if (result < 0)
>>> +		printk(KERN_ERR "%s: pnfs commit failed (%d), do nfs commit\n",
>>> +		       __func__, result);
>>> +#endif
>>>  	nfs_direct_commit_execute(dreq, data, &task_setup_data, &msg);
>>>     
>> (almost) ditto
>>
>>   
>>>  }
>>>  
>>> @@ -669,6 +695,9 @@ static void nfs_direct_write_result(struct rpc_task *task, void *calldata)
>>>  	struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req;
>>>  	int status = task->tk_status;
>>>  
>>> +	dprintk("%s: verf: %d stable %d\n", __func__,
>>> +		data->res.verf->committed, data->args.stable);
>>> +
>>>  	if (nfs_writeback_done(task, data) != 0)
>>>  		return;
>>>  
>>> @@ -778,6 +807,18 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_direct_req *dreq,
>>>  	unsigned int pgbase;
>>>  	int result;
>>>  	ssize_t started = 0;
>>> +#ifdef CONFIG_PNFS
>>> +	u32 pnfs_bound;
>>> +	size_t pnfs_stripe_rem = 0;
>>>     
>> I tend to think that these two guys better live outside the #ifdef
>>
>>   
>>> +
>>> +	pnfs_direct_init_io(inode, ctx, count, pos, 1,
>>> +			    &wsize, &pnfs_bound, &pnfs_stripe_rem);
>>> +
>>> +	dprintk("--> %s bound %u rem %Zu\n",
>>> +		__func__, pnfs_bound, pnfs_stripe_rem);
>>> +#endif
>>> +	dprintk("%s: pos %llu count %Zu wsize %Zu\n",
>>> +		__func__, pos, count, wsize);
>>>  
>>>  	do {
>>>  		struct nfs_write_data *data;
>>> @@ -785,7 +826,12 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_direct_req *dreq,
>>>  
>>>  		pgbase = user_addr & ~PAGE_MASK;
>>>  		bytes = min(wsize,count);
>>> -
>>> +#ifdef CONFIG_PNFS
>>> +		if (pnfs_stripe_rem) {
>>> +			bytes = min(bytes, pnfs_stripe_rem);
>>> +			pnfs_stripe_rem = pnfs_bound; /* reset for next req */
>>> +		}
>>> +#endif
>>>     
>> when pnfs_stripe_rem is valid regardless of CONFIG_PNFS
>> you won't need the #ifdef here.
>>
>>   
>>>  		result = -ENOMEM;
>>>  		data = nfs_writedata_alloc(nfs_page_array_len(pgbase, bytes));
>>>  		if (unlikely(!data))
>>> @@ -828,7 +874,16 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_direct_req *dreq,
>>>  		data->res.count = bytes;
>>>  		data->res.verf = &data->verf;
>>>  
>>> -		nfs_direct_write_execute(data, &task_setup_data, &msg);
>>> +#if defined(CONFIG_PNFS)
>>> +		result = pnfs_try_to_write_data(data, &nfs_write_direct_ops,
>>> +						sync);
>>> +		if (result < 0)
>>> +			break;
>>> +		else if (result == 0)
>>> +			dreq->pnfsflags |= data->pnfsflags;
>>> +		else if (result == 1)
>>> +#endif
>>> +			nfs_direct_write_execute(data, &task_setup_data, &msg);
>>>     
>> ditto previous *try_to comments...
>>
>>   
>>>  
>>>  		started += bytes;
>>>  		user_addr += bytes;
>>> @@ -905,8 +960,26 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, const struct iovec *iov,
>>>  		dreq->iocb = iocb;
>>>  
>>>  	result = nfs_direct_write_schedule_iovec(dreq, iov, nr_segs, pos, sync);
>>> +#if defined(CONFIG_PNFS)
>>> +	if (result)
>>> +		goto out;
>>> +	if (dreq->pnfsflags & PNFS_NO_RPC)
>>> +		/* FIXME: Right now non-rpc layout types must perform
>>> +		 * syncronous direct i/o.
>>> +		 * New pNFS callback to wait on outstanding requests?
>>> +		 */
>>> +		if (dreq->error)
>>> +			result = dreq->error;
>>> +		else
>>> +			result = dreq->count;
>>> +	else
>>> +		result = nfs_direct_wait(dreq);
>>> +out:
>>> +#else
>>>  	if (!result)
>>>  		result = nfs_direct_wait(dreq);
>>> +#endif
>>>     
>> same comment as for the read path.
>>
>>   
>>> +
>>>  	nfs_direct_req_release(dreq);
>>>  
>>>  	return result;
>>>     
>>   



More information about the pNFS mailing list