[pnfs] [PATCH 3/6] pnfs: use kref for pnfs_layout_type reference counting

William A. (Andy) Adamson androsadamson at gmail.com
Wed Jul 23 16:44:55 EDT 2008


On Wed, Jul 23, 2008 at 4:36 PM, Fredric Isaman <iisaman at citi.umich.edu> wrote:
>
>
> On Wed, 23 Jul 2008, andros at netapp.com wrote:
>
>> From: Andy Adamson <andros at localhost.localdomain>
>>
>> Prepare to separate reference counting of pnfs_layout_type structs from
>> the nfs_inode lo_lock spin lock.
>>
>> Replace the integer refcount with a struct kref. We still destroy the
>> pnfs_layout_type when the layout segment list is empty, but this is moved to
>> the layoutreturn release function. This will go away as well as the extra
>> kref_get in get_lock_alloc_layout once the spin lock and pnfs_laytout_type
>> reference counting are separated.
>>
>> Signed-off-by: Andy Adamson<andros at netapp.com>
>> ---
>> fs/nfs/nfs4proc.c         |   13 +++++++---
>> fs/nfs/pnfs.c             |   58 ++++++++++++++++++++++++---------------------
>> include/linux/nfs4_pnfs.h |    2 +-
>> include/linux/pnfs_xdr.h  |    1 +
>> 4 files changed, 42 insertions(+), 32 deletions(-)
>>
>> diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
>> index 6febc16..29454b4 100644
>> --- a/fs/nfs/nfs4proc.c
>> +++ b/fs/nfs/nfs4proc.c
>> @@ -5348,14 +5348,19 @@ static void nfs4_pnfs_layoutreturn_release(void *calldata)
>> {
>>       struct nfs4_pnfs_layoutreturn *lrp = calldata;
>>       struct nfs_inode *nfsi = NFS_I(lrp->args.inode);
>> -     struct pnfs_layout_type *lo;
>>
>>       dprintk("--> %s nfsi->current_layout %p\n", __func__,
>>                                       nfsi->current_layout);
>> +     pnfs_layout_release(lrp->lo);
>>
>> -     lo = nfsi->current_layout;
>> -     BUG_ON(!lo);
>> -     pnfs_layout_release(lo);
>> +     /* FIX-ME this put balances extra kref_get in get_lock_alloc_layout,
>> +     * Proper refcounting should remove both list_empty check and this put.
>> +     */
>> +     if (list_empty(&lrp->lo->segs)) {
>> +             dprintk("%s call: pnfs_layout_release LISTEMPTY refcount %d\n",
>> +                     __func__, atomic_read(&lrp->lo->refcount.refcount));
>> +             pnfs_layout_release(lrp->lo);
>> +     }
>>       kfree(calldata);
>>       dprintk("<-- %s\n", __func__);
>> }
>> diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
>> index 81291f7..26e4c7f 100644
>> --- a/fs/nfs/pnfs.c
>> +++ b/fs/nfs/pnfs.c
>> @@ -326,13 +326,39 @@ get_lock_current_layout(struct nfs_inode *nfsi)
>>       spin_lock(&nfsi->lo_lock);
>>       lo = nfsi->current_layout;
>>       if (lo)
>> -             lo->refcount++;
>> +             kref_get(&lo->refcount);
>>       else
>>               spin_unlock(&nfsi->lo_lock);
>>
>>       return lo;
>> }
>>
>> +static void
>> +destroy_layout(struct kref *kref)
>> +{
>> +     struct pnfs_layout_type *lo =
>> +                     container_of(kref, struct pnfs_layout_type, refcount);
>> +     struct layoutdriver_io_operations *io_ops = PNFS_LD_IO_OPS(lo);
>> +     struct nfs_inode *nfsi = NFS_I(lo->inode);
>> +     struct nfs_client *clp = NFS_SERVER(&nfsi->vfs_inode)->nfs_client;
>> +
>> +     dprintk("%s: freeing layout %p\n", __func__, lo);
>> +     io_ops->free_layout(lo);
>> +
>> +     nfsi->current_layout = NULL;
>> +
>> +     /* Unlist the inode.
>> +     * Note that nfsi->lo_lock must be released before getting
>> +     * cl_sem as the latter can sleep
>> +     */
>> +     spin_unlock(&nfsi->lo_lock);
>> +     down_write(&clp->cl_sem);
>> +     spin_lock(&nfsi->lo_lock);
>> +     if (!nfsi->current_layout)
>> +             list_del_init(&nfsi->lo_inodes);
>> +     up_write(&clp->cl_sem);
>> +}
>> +
>> /*
>>  * put and unlock nfs->current_layout
>>  */
>> @@ -340,32 +366,8 @@ static inline void
>> put_unlock_current_layout(struct nfs_inode *nfsi,
>>                           struct pnfs_layout_type *lo)
>> {
>> -     struct nfs_client *clp;
>> -
>>       BUG_ON_UNLOCKED_LO(lo);
>> -     BUG_ON(lo->refcount <= 0);
>> -
>> -     if (--lo->refcount == 0 && list_empty(&lo->segs)) {
>> -             struct layoutdriver_io_operations *io_ops =
>> -                     PNFS_LD_IO_OPS(lo);
>> -
>> -             dprintk("%s: freeing layout %p\n", __func__, lo);
>> -             io_ops->free_layout(lo);
>> -
>> -             nfsi->current_layout = NULL;
>> -
>> -             /* Unlist the inode.
>> -              * Note that nfsi->lo_lock must be released before getting
>> -              * cl_sem as the latter can sleep
>> -              */
>> -             clp = NFS_SERVER(&nfsi->vfs_inode)->nfs_client;
>> -             spin_unlock(&nfsi->lo_lock);
>> -             down_write(&clp->cl_sem);
>> -             spin_lock(&nfsi->lo_lock);
>> -             if (!nfsi->current_layout)
>> -                     list_del_init(&nfsi->lo_inodes);
>> -             up_write(&clp->cl_sem);
>> -     }
>> +     kref_put(&lo->refcount, destroy_layout);
>>       spin_unlock(&nfsi->lo_lock);
>> }
>>
>> @@ -598,6 +600,7 @@ return_layout(struct inode *ino, struct nfs4_pnfs_layout_segment *range,
>>       lrp = kzalloc(sizeof(*lrp), GFP_KERNEL);
>>       if (lrp == NULL)
>>               goto out;
>> +     lrp->lo = lo;
>
> This deserves a comment as to why you are not calling kref_get.

ok. update_layout called by _pnfs_return_layout gets a reference
>
> Basically, return_layout is called with the assumption it will ensure a
> kref_put is eventually done.  Which points out a bug in the current
> code...in the (lrp == NULL) case above, the assumption is violated.

true. thanks!

-->Andy
>
>
> Fred
>
>>       lrp->args.reclaim = 0;
>>       lrp->args.layout_type = server->pnfs_curr_ld->id;
>>       lrp->args.return_type = type;
>> @@ -732,7 +735,7 @@ alloc_init_layout(struct inode *ino, struct layoutdriver_io_operations *io_ops)
>>
>>       seqlock_init(&lo->seqlock);
>>       memset(&lo->stateid, 0, NFS4_STATEID_SIZE);
>> -     lo->refcount = 1;
>> +     kref_init(&lo->refcount);
>>       INIT_LIST_HEAD(&lo->segs);
>>       lo->roc_iomode = 0;
>>       lo->inode = ino;
>> @@ -793,6 +796,7 @@ get_lock_alloc_layout(struct inode *ino,
>>                       if (list_empty(&nfsi->lo_inodes))
>>                               list_add_tail(&nfsi->lo_inodes, &clp->cl_lo_inodes);
>>                       up_write(&clp->cl_sem);
>> +                     kref_get(&lo->refcount);
>>               } else
>>                       lo = ERR_PTR(-ENOMEM);
>>
>> diff --git a/include/linux/nfs4_pnfs.h b/include/linux/nfs4_pnfs.h
>> index e02ad56..8f07e8e 100644
>> --- a/include/linux/nfs4_pnfs.h
>> +++ b/include/linux/nfs4_pnfs.h
>> @@ -45,7 +45,7 @@ struct pnfs_mount_type {
>>  * A reference is stored in the nfs_inode structure.
>>  */
>> struct pnfs_layout_type {
>> -     int refcount;
>> +     struct kref refcount;
>>       struct list_head segs;          /* layout segments list */
>>       int roc_iomode;                 /* iomode to return on close, 0=none */
>>       struct inode *inode;
>> diff --git a/include/linux/pnfs_xdr.h b/include/linux/pnfs_xdr.h
>> index 0da6bea..48b184a 100644
>> --- a/include/linux/pnfs_xdr.h
>> +++ b/include/linux/pnfs_xdr.h
>> @@ -114,6 +114,7 @@ struct nfs4_pnfs_layoutreturn_res {
>> };
>>
>> struct nfs4_pnfs_layoutreturn {
>> +     struct pnfs_layout_type *lo;
>>       struct nfs4_pnfs_layoutreturn_arg args;
>>       struct nfs4_pnfs_layoutreturn_res res;
>>       struct rpc_cred *cred;
>> --
>> 1.5.4.1
>>
>> _______________________________________________
>> pNFS mailing list
>> pNFS at linux-nfs.org
>> http://linux-nfs.org/cgi-bin/mailman/listinfo/pnfs
>>
> _______________________________________________
> pNFS mailing list
> pNFS at linux-nfs.org
> http://linux-nfs.org/cgi-bin/mailman/listinfo/pnfs
>
>


More information about the pNFS mailing list