[pnfs] [PATCH 15/28] pnfsblock: use bio to implement bl_read_pagelist
Fred Isaman
iisaman at citi.umich.edu
Tue Mar 11 15:31:56 EDT 2008
From: Fred <iisaman at citi.umich.edu>
This is a working stub. It only handles calls originating from
nfs_pagein_one which fit into a single extent. Others are sent back
to NFS for handling.
Signed-off-by: Fred <iisaman at citi.umich.edu>
---
fs/nfs/blocklayout/blocklayout.c | 141 ++++++++++++++++++++++++++++++++++-
fs/nfs/blocklayout/blocklayout.h | 4 +-
fs/nfs/blocklayout/blocklayoutdev.c | 1 +
3 files changed, 142 insertions(+), 4 deletions(-)
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index f2b87ba..3e56441 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -32,6 +32,8 @@
#include <linux/module.h>
#include <linux/init.h>
+#include <linux/buffer_head.h> /* DEBUG Needed for print_page calls */
+#include <linux/bio.h> /* struct bio */
#include "blocklayout.h"
#define NFSDBG_FACILITY NFSDBG_BLOCKLAYOUT
@@ -43,6 +45,20 @@ MODULE_DESCRIPTION("The NFSv4.1 pNFS Block layout driver");
/* Callback operations to the pNFS client */
struct pnfs_client_operations *pnfs_callback_ops;
+static void print_page(struct page *page)
+{
+ dprintk("PRINTPAGE page %p\n", page);
+ dprintk(" PagePrivate %d\n", PagePrivate(page));
+ dprintk(" PageUptodate %d\n", PageUptodate(page));
+ dprintk(" PageError %d\n", PageError(page));
+ dprintk(" PageDirty %d\n", PageDirty(page));
+ dprintk(" PageReferenced %d\n", PageReferenced(page));
+ dprintk(" PageLocked %d\n", PageLocked(page));
+ dprintk(" PageWriteback %d\n", PageWriteback(page));
+ dprintk(" PageMappedToDisk %d\n", PageMappedToDisk(page));
+ dprintk("\n");
+}
+
static void print_bl_extent(struct pnfs_block_extent *be)
{
dprintk("PRINT EXTENT extent %p\n", be);
@@ -124,6 +140,36 @@ find_get_extent(struct pnfs_layout_segment *lseg, sector_t isect,
return out;
}
+/* Given the be associated with isect, determine if page data needs to be
+ * initialized.
+ */
+static int is_hole(struct pnfs_block_extent *be, sector_t isect)
+{
+ if (be->be_state == PNFS_BLOCK_INVALID_DATA ||
+ be->be_state == PNFS_BLOCK_NONE_DATA)
+ return 1;
+ else if (be->be_state != PNFS_BLOCK_NEEDS_INIT)
+ return 0;
+ else {
+ uint32_t mask;
+ mask = 1 << ((isect - be->be_f_offset) >>
+ (PAGE_CACHE_SHIFT - 9));
+ return be->be_bitmap & mask;
+ }
+}
+
+static int
+dont_like_caller(struct nfs_page *req)
+{
+ if (atomic_read(&req->wb_complete)) {
+ /* Called by _multi */
+ return 1;
+ } else {
+ /* Called by _one */
+ return 0;
+ }
+}
+
static int
bl_commit(struct pnfs_layout_type *layoutid,
int sync,
@@ -137,16 +183,105 @@ bl_commit(struct pnfs_layout_type *layoutid,
return 1;
}
+static void bl_readlist_done(struct nfs_read_data *rdata, int status)
+{
+ /* STUB - need to think through what to put into rdata */
+ rdata->task.tk_status = status;
+ rdata->res.eof = 0;
+ rdata->res.count = (status ? 0 : rdata->args.count);
+ pnfs_callback_ops->nfs_readlist_complete(rdata);
+}
+
+static void bl_end_read_bio(struct bio *bio, int err)
+{
+ struct nfs_read_data *data = (struct nfs_read_data *)bio->bi_private;
+
+ dprintk("%s called with err=%i\n", __func__, err);
+ bl_readlist_done(data, err);
+ bio_put(bio);
+}
+
static int
bl_read_pagelist(struct pnfs_layout_type *layoutid,
struct page **pages,
unsigned int pgbase,
unsigned nr_pages,
- loff_t offset,
+ loff_t f_offset,
size_t count,
- struct nfs_read_data *nfs_data)
+ struct nfs_read_data *rdata)
{
- dprintk("%s enter\n", __func__);
+ int i, hole;
+ struct bio *bio;
+ struct pnfs_block_extent *be = NULL, *cow_read = NULL;
+ sector_t isect;
+
+ dprintk("%s enter nr_pages %u offset %Ld count %d\n", __func__,
+ nr_pages, f_offset, count);
+
+ if (f_offset & 0x1ff) {
+ /* This shouldn't be needed, just being paranoid */
+ int diff;
+ dprintk("%s f_offset %Ld not aligned\n",
+ __func__, f_offset);
+ diff = f_offset & 0x1ff;
+ f_offset &= ~0x1ff;
+ count += diff;
+ }
+ if (dont_like_caller(rdata->req)) {
+ dprintk("%s dont_like_caller failed\n", __func__);
+ goto use_mds;
+ }
+ isect = (sector_t) (f_offset >> 9);
+ be = find_get_extent(rdata->lseg, isect, &cow_read);
+ if (!be || count > (be->be_length << 9)) {
+ /* STUB - if count is large, should break into
+ * multiple bios. Also, need to check cow_read size.
+ */
+ goto use_mds;
+ }
+ hole = is_hole(be, isect);
+ if (hole && !cow_read) {
+ /* Fill hole w/ zeroes w/o accessing device */
+ dprintk("%s Zeroing pages for hole\n", __func__);
+ for (i = 0; i < nr_pages; i++) {
+ zero_user_page(pages[i], 0,
+ min_t(int, PAGE_CACHE_SIZE, count),
+ KM_USER0);
+ print_page(pages[i]);
+ count -= PAGE_CACHE_SIZE;
+ }
+ bl_readlist_done(rdata, 0);
+ } else {
+ struct pnfs_block_extent *be_read;
+ int added;
+ be_read = hole && cow_read ? cow_read : be;
+ bio = bio_alloc(GFP_NOIO, nr_pages);
+ bio->bi_sector = isect - be_read->be_f_offset +
+ be_read->be_v_offset;
+ bio->bi_bdev = be_read->be_mdev;
+ bio->bi_end_io = bl_end_read_bio;
+ bio->bi_private = rdata;
+ for (i = 0; i < nr_pages; i++) {
+ added = bio_add_page(bio, pages[i], PAGE_SIZE, 0);
+ if (added < PAGE_SIZE) {
+ dprintk("%s bio_add_page(%lu)=%i\n",
+ __func__, PAGE_SIZE, added);
+ bio_put(bio);
+ goto use_mds;
+ }
+ }
+ dprintk("%s submitting read bio %u@%Lu\n", __func__,
+ bio->bi_size, bio->bi_sector);
+ submit_bio(READ, bio);
+ }
+ put_extent(be);
+ put_extent(cow_read);
+ return 0;
+
+ use_mds:
+ dprintk("Giving up and using normal NFS\n");
+ put_extent(be);
+ put_extent(cow_read);
return 1;
}
diff --git a/fs/nfs/blocklayout/blocklayout.h b/fs/nfs/blocklayout/blocklayout.h
index fe138c4..3e6df8b 100644
--- a/fs/nfs/blocklayout/blocklayout.h
+++ b/fs/nfs/blocklayout/blocklayout.h
@@ -103,7 +103,8 @@ enum exstate4 {
PNFS_BLOCK_READWRITE_DATA = 0,
PNFS_BLOCK_READ_DATA = 1,
PNFS_BLOCK_INVALID_DATA = 2, /* mapped, but data is invalid */
- PNFS_BLOCK_NONE_DATA = 3 /* unmapped, it's a hole */
+ PNFS_BLOCK_NONE_DATA = 3, /* unmapped, it's a hole */
+ PNFS_BLOCK_NEEDS_INIT = 4 /* INVAL upgrading to RW */
};
/* sector_t fields are all in 512-byte sectors */
@@ -115,6 +116,7 @@ struct pnfs_block_extent {
sector_t be_length; /* the size of the extent */
sector_t be_v_offset; /* the starting offset in the volume */
enum exstate4 be_state; /* the state of this extent */
+ uint32_t be_bitmap; /* state tracking for NEEDS_INIT */
struct kref be_refcnt;
};
diff --git a/fs/nfs/blocklayout/blocklayoutdev.c b/fs/nfs/blocklayout/blocklayoutdev.c
index b6979e7..4e67185 100644
--- a/fs/nfs/blocklayout/blocklayoutdev.c
+++ b/fs/nfs/blocklayout/blocklayoutdev.c
@@ -568,6 +568,7 @@ nfs4_blk_process_layoutget(struct pnfs_block_layout *bl,
}
INIT_LIST_HEAD(&be->be_node);
kref_init(&be->be_refcnt);
+ be->be_bitmap = 0;
READ_DEVID(&be->be_devid);
be->be_mdev = translate_devid(bl, &be->be_devid);
if (!be->be_mdev)
--
1.5.3.3
More information about the pNFS
mailing list