Viewing file: xfs_rw.c (19.66 KB) -rw-r--r-- Select action/file-type: (+) | (+) | (+) | Code (+) | Session (+) | (+) | SDB (+) | (+) | (+) | (+) | (+) | (+) |
/* * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of version 2 of the GNU General Public License as * published by the Free Software Foundation. * * This program is distributed in the hope that it would be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * Further, this software is distributed without any warranty that it is * free of the rightful claim of any third person regarding infringement * or the like. Any license provided herein, whether implied or * otherwise, applies only to this software file. Patent licenses, if * any, provided herein do not apply to combinations of this program with * other software, or any other product whatsoever. * * You should have received a copy of the GNU General Public License along * with this program; if not, write the Free Software Foundation, Inc., 59 * Temple Place - Suite 330, Boston MA 02111-1307, USA. * * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, * Mountain View, CA 94043, or: * * http://www.sgi.com * * For further information regarding this notice, see: * * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ */
#include <xfs.h>
/* * This is a subroutine for xfs_write() and other writers (xfs_ioctl) * which clears the setuid and setgid bits when a file is written. */ int xfs_write_clear_setuid( xfs_inode_t *ip) { xfs_mount_t *mp; xfs_trans_t *tp; int error;
mp = ip->i_mount; tp = xfs_trans_alloc(mp, XFS_TRANS_WRITEID); if ((error = xfs_trans_reserve(tp, 0, XFS_WRITEID_LOG_RES(mp), 0, 0, 0))) { xfs_trans_cancel(tp, 0); return error; } xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); xfs_trans_ihold(tp, ip); ip->i_d.di_mode &= ~ISUID;
/* * Note that we don't have to worry about mandatory * file locking being disabled here because we only * clear the ISGID bit if the Group execute bit is * on, but if it was on then mandatory locking wouldn't * have been enabled. */ if (ip->i_d.di_mode & (IEXEC >> 3)) { ip->i_d.di_mode &= ~ISGID; } xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); xfs_trans_set_sync(tp); error = xfs_trans_commit(tp, 0, NULL); xfs_iunlock(ip, XFS_ILOCK_EXCL); return 0; }
/* * Force a shutdown of the filesystem instantly while keeping * the filesystem consistent. We don't do an unmount here; just shutdown * the shop, make sure that absolutely nothing persistent happens to * this filesystem after this point. */
void _xfs_force_shutdown( xfs_mount_t *mp, int flags, char *fname, int lnnum) { int logerror;
#if defined(XFSDEBUG) && 0 printk("xfs_force_shutdown entered [0x%p, %d]\n", mp, flags); KDB_ENTER(); #endif
#define XFS_MAX_DRELSE_RETRIES 10 logerror = flags & XFS_LOG_IO_ERROR;
if (!(flags & XFS_FORCE_UMOUNT)) { cmn_err(CE_NOTE, "xfs_force_shutdown(%s,0x%x) called from line %d of file %s. Return address = 0x%x", mp->m_fsname,flags,lnnum,fname,__return_address); } /* * No need to duplicate efforts. */ if (XFS_FORCED_SHUTDOWN(mp) && !logerror) return;
if (XFS_MTOVFS(mp)->vfs_dev == rootdev) cmn_err(CE_PANIC, "Fatal error on root filesystem");
/* * This flags XFS_MOUNT_FS_SHUTDOWN, makes sure that we don't * queue up anybody new on the log reservations, and wakes up * everybody who's sleeping on log reservations and tells * them the bad news. */ if (xfs_log_force_umount(mp, logerror)) return;
if (flags & XFS_CORRUPT_INCORE) { cmn_err(CE_ALERT, "Corruption of in-memory data detected. Shutting down filesystem: %s", mp->m_fsname); } else if (!(flags & XFS_FORCE_UMOUNT)) { if (logerror) { cmn_err(CE_ALERT, "Log I/O Error Detected. Shutting down filesystem: %s", mp->m_fsname); } else { #if CELL_CAPABLE if (flags & XFS_SHUTDOWN_REMOTE_REQ) cmn_err(CE_ALERT, "CXFS Shutdown request from remote cell. Shutting down filesystem: %s", mp->m_fsname); else #endif cmn_err(CE_ALERT, "I/O Error Detected. Shutting down filesystem: %s", mp->m_fsname); } } if (!(flags & XFS_FORCE_UMOUNT)) { cmn_err(CE_ALERT, "Please umount the filesystem, and rectify the problem(s)"); }
/* * Any delwri buffers left over for this device will be caught * in the write path and destroyed. (pagebuf_write_full_page) */
#if CELL_CAPABLE if (cell_enabled && !(flags & XFS_SHUTDOWN_REMOTE_REQ) && (mp->m_cxfstype & XFS_CXFS_SERVER) ) { extern void cxfs_force_shutdown(xfs_mount_t *, int); /*@@@*/
/* * We're being called for a problem discovered locally. * Tell CXFS to pass along the shutdown request. * The check for cxfs server is done because xfs_goingdown * will set forced_shutdown and we don't want it propagated. */ cxfs_force_shutdown(mp, flags); } #endif /* CELL_CAPABLE */ }
/* * Called when we want to stop a buffer from getting written or read. * We attach the EIO error, muck with its flags, and call biodone * so that the proper iodone callbacks get called. */ int xfs_bioerror( xfs_buf_t *bp) {
#ifdef XFSERRORDEBUG ASSERT(XFS_BUF_ISREAD(bp) || bp->b_iodone); #endif
/* * No need to wait until the buffer is unpinned. * We aren't flushing it. */ xfs_buftrace("XFS IOERROR", bp); XFS_BUF_ERROR(bp, EIO); /* * We're calling biodone, so delete B_DONE flag. Either way * we have to call the iodone callback, and calling biodone * probably is the best way since it takes care of * GRIO as well. */ XFS_BUF_UNREAD(bp); XFS_BUF_UNDELAYWRITE(bp); XFS_BUF_UNDONE(bp); XFS_BUF_STALE(bp);
XFS_BUF_CLR_BDSTRAT_FUNC(bp); xfs_biodone(bp); return (EIO); }
/* * Same as xfs_bioerror, except that we are releasing the buffer * here ourselves, and avoiding the biodone call. * This is meant for userdata errors; metadata bufs come with * iodone functions attached, so that we can track down errors. */ int xfs_bioerror_relse( xfs_buf_t *bp) { int64_t fl;
ASSERT(XFS_BUF_IODONE_FUNC(bp) != xfs_buf_iodone_callbacks); ASSERT(XFS_BUF_IODONE_FUNC(bp) != xlog_iodone);
xfs_buftrace("XFS IOERRELSE", bp); fl = XFS_BUF_BFLAGS(bp); /* * No need to wait until the buffer is unpinned. * We aren't flushing it. * * chunkhold expects B_DONE to be set, whether * we actually finish the I/O or not. We don't want to * change that interface. */ XFS_BUF_UNREAD(bp); XFS_BUF_UNDELAYWRITE(bp); XFS_BUF_DONE(bp); XFS_BUF_STALE(bp); XFS_BUF_CLR_IODONE_FUNC(bp); XFS_BUF_CLR_BDSTRAT_FUNC(bp); if (!(fl & XFS_B_ASYNC)) { /* * Mark b_error and B_ERROR _both_. * Lot's of chunkcache code assumes that. * There's no reason to mark error for * ASYNC buffers. */ XFS_BUF_ERROR(bp, EIO); XFS_BUF_V_IODONESEMA(bp); } else { xfs_buf_relse(bp); } return (EIO); } /* * Prints out an ALERT message about I/O error. */ void xfs_ioerror_alert( char *func, struct xfs_mount *mp, xfs_buf_t *bp, xfs_daddr_t blkno) { cmn_err(CE_ALERT, "I/O error in filesystem (\"%s\") meta-data dev 0x%x block 0x%llx\n" " (\"%s\") error %d buf count %u", (mp == NULL || mp->m_fsname == NULL) ? "(fs name not set)" : mp->m_fsname, XFS_BUF_TARGET(bp), (__uint64_t)blkno, func, XFS_BUF_GETERROR(bp), XFS_BUF_COUNT(bp)); }
/* * This isn't an absolute requirement, but it is * just a good idea to call xfs_read_buf instead of * directly doing a read_buf call. For one, we shouldn't * be doing this disk read if we are in SHUTDOWN state anyway, * so this stops that from happening. Secondly, this does all * the error checking stuff and the brelse if appropriate for * the caller, so the code can be a little leaner. */
int xfs_read_buf( struct xfs_mount *mp, buftarg_t *target, xfs_daddr_t blkno, int len, uint flags, xfs_buf_t **bpp) { xfs_buf_t *bp; int error;
if (flags) bp = xfs_buf_read_flags(target, blkno, len, flags); else bp = xfs_buf_read(target, blkno, len, flags); if (!bp) return XFS_ERROR(EIO); error = XFS_BUF_GETERROR(bp); if (bp && !error && !XFS_FORCED_SHUTDOWN(mp)) { *bpp = bp; } else { *bpp = NULL; if (error) { xfs_ioerror_alert("xfs_read_buf", mp, bp, XFS_BUF_ADDR(bp)); } else { error = XFS_ERROR(EIO); } if (bp) { XFS_BUF_UNDONE(bp); XFS_BUF_UNDELAYWRITE(bp); XFS_BUF_STALE(bp); /* * brelse clears B_ERROR and b_error */ xfs_buf_relse(bp); } } return (error); } /* * Wrapper around bwrite() so that we can trap * write errors, and act accordingly. */ int xfs_bwrite( struct xfs_mount *mp, struct xfs_buf *bp) { int error;
/* * XXXsup how does this work for quotas. */ XFS_BUF_SET_BDSTRAT_FUNC(bp, xfs_bdstrat_cb); XFS_BUF_SET_FSPRIVATE3(bp, mp); XFS_BUF_WRITE(bp);
if ((error = XFS_bwrite(bp))) { ASSERT(mp); /* * Cannot put a buftrace here since if the buffer is not * B_HOLD then we will brelse() the buffer before returning * from bwrite and we could be tracing a buffer that has * been reused. */ xfs_force_shutdown(mp, XFS_METADATA_IO_ERROR); } return (error); }
/* * xfs_inval_cached_pages() * This routine is responsible for keeping direct I/O and buffered I/O * somewhat coherent. From here we make sure that we're at least * temporarily holding the inode I/O lock exclusively and then call * the page cache to flush and invalidate any cached pages. If there * are no cached pages this routine will be very quick. */ void xfs_inval_cached_pages( vnode_t *vp, xfs_iocore_t *io, xfs_off_t offset, xfs_off_t len, void *dio) { xfs_dio_t *diop = (xfs_dio_t *)dio; int relock; __uint64_t flush_end; xfs_mount_t *mp;
if (!VN_CACHED(vp)) { return; }
mp = io->io_mount;
/* * We need to get the I/O lock exclusively in order * to safely invalidate pages and mappings. */ relock = ismrlocked(io->io_iolock, MR_ACCESS); if (relock) { XFS_IUNLOCK(mp, io, XFS_IOLOCK_SHARED); XFS_ILOCK(mp, io, XFS_IOLOCK_EXCL); }
/* Writing beyond EOF creates a hole that must be zeroed */ if (diop && (offset > XFS_SIZE(mp, io))) { xfs_fsize_t isize;
XFS_ILOCK(mp, io, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD); isize = XFS_SIZE(mp, io); if (offset > isize) { xfs_zero_eof(vp, io, offset, isize, offset, NULL); } XFS_IUNLOCK(mp, io, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD); }
/* * Round up to the next page boundary and then back * off by one byte. We back off by one because this * is a first byte/last byte interface rather than * a start/len interface. We round up to a page * boundary because the page/chunk cache code is * slightly broken and won't invalidate all the right * buffers otherwise. * * We also have to watch out for overflow, so if we * go over the maximum off_t value we just pull back * to that max. */ flush_end = (__uint64_t)ctooff(offtoc(offset + len)) - 1; if (flush_end > (__uint64_t)LONGLONG_MAX) { flush_end = LONGLONG_MAX; } VOP_FLUSHINVAL_PAGES(vp, ctooff(offtoct(offset)), -1, FI_REMAPF_LOCKED); if (relock) { XFS_IUNLOCK(mp, io, XFS_IOLOCK_EXCL); XFS_ILOCK(mp, io, XFS_IOLOCK_SHARED); } }
spinlock_t xfs_refcache_lock = SPIN_LOCK_UNLOCKED; xfs_inode_t **xfs_refcache; int xfs_refcache_size; int xfs_refcache_index; int xfs_refcache_busy; int xfs_refcache_count;
/* * Timer callback to mark SB dirty, make sure we keep purging refcache */ void xfs_refcache_sbdirty(struct super_block *sb) { sb->s_dirt = 1; }
/* * Insert the given inode into the reference cache. */ void xfs_refcache_insert( xfs_inode_t *ip) { vnode_t *vp; xfs_inode_t *release_ip; xfs_inode_t **refcache;
ASSERT(ismrlocked(&(ip->i_iolock), MR_UPDATE));
/* * If an unmount is busy blowing entries out of the cache, * then don't bother. */ if (xfs_refcache_busy) { return; }
/* * If we tuned the refcache down to zero, don't do anything. */ if (!xfs_refcache_size) { return; }
/* * The inode is already in the refcache, so don't bother * with it. */ if (ip->i_refcache != NULL) { return; }
vp = XFS_ITOV(ip); /* ASSERT(vp->v_count > 0); */ VN_HOLD(vp);
/* * We allocate the reference cache on use so that we don't * waste the memory on systems not being used as NFS servers. */ if (xfs_refcache == NULL) { refcache = (xfs_inode_t **)kmem_zalloc(XFS_REFCACHE_SIZE_MAX * sizeof(xfs_inode_t *), KM_SLEEP); } else { refcache = NULL; }
spin_lock(&xfs_refcache_lock);
/* * If we allocated memory for the refcache above and it still * needs it, then use the memory we allocated. Otherwise we'll * free the memory below. */ if (refcache != NULL) { if (xfs_refcache == NULL) { xfs_refcache = refcache; refcache = NULL; } }
/* * If an unmount is busy clearing out the cache, don't add new * entries to it. */ if ((xfs_refcache_busy) || (vp->v_vfsp->vfs_flag & VFS_OFFLINE)) { spin_unlock(&xfs_refcache_lock); VN_RELE(vp); /* * If we allocated memory for the refcache above but someone * else beat us to using it, then free the memory now. */ if (refcache != NULL) { kmem_free(refcache, XFS_REFCACHE_SIZE_MAX * sizeof(xfs_inode_t *)); } return; } release_ip = xfs_refcache[xfs_refcache_index]; if (release_ip != NULL) { release_ip->i_refcache = NULL; xfs_refcache_count--; ASSERT(xfs_refcache_count >= 0); } xfs_refcache[xfs_refcache_index] = ip; ASSERT(ip->i_refcache == NULL); ip->i_refcache = &(xfs_refcache[xfs_refcache_index]); xfs_refcache_count++; ASSERT(xfs_refcache_count <= xfs_refcache_size); xfs_refcache_index++; if (xfs_refcache_index == xfs_refcache_size) { xfs_refcache_index = 0; } spin_unlock(&xfs_refcache_lock);
/* * Save the pointer to the inode to be released so that we can * VN_RELE it once we've dropped our inode locks in xfs_rwunlock(). * The pointer may be NULL, but that's OK. */ ip->i_release = release_ip;
/* * If we allocated memory for the refcache above but someone * else beat us to using it, then free the memory now. */ if (refcache != NULL) { kmem_free(refcache, XFS_REFCACHE_SIZE_MAX * sizeof(xfs_inode_t *)); } return; }
/* * If the given inode is in the reference cache, purge its entry and * release the reference on the vnode. */ void xfs_refcache_purge_ip( xfs_inode_t *ip) { vnode_t *vp; int error;
/* * If we're not pointing to our entry in the cache, then * we must not be in the cache. */ if (ip->i_refcache == NULL) { return; }
spin_lock(&xfs_refcache_lock); if (ip->i_refcache == NULL) { spin_unlock(&xfs_refcache_lock); return; }
/* * Clear both our pointer to the cache entry and its pointer * back to us. */ ASSERT(*(ip->i_refcache) == ip); *(ip->i_refcache) = NULL; ip->i_refcache = NULL; xfs_refcache_count--; ASSERT(xfs_refcache_count >= 0); spin_unlock(&xfs_refcache_lock);
vp = XFS_ITOV(ip); /* ASSERT(vp->v_count > 1); */ VOP_RELEASE(vp, error); VN_RELE(vp);
return; }
/* * This is called from the XFS unmount code to purge all entries for the * given mount from the cache. It uses the refcache busy counter to * make sure that new entries are not added to the cache as we purge them. */ void xfs_refcache_purge_mp( xfs_mount_t *mp) { vnode_t *vp; int error, i; xfs_inode_t *ip;
if (xfs_refcache == NULL) { return; }
spin_lock(&xfs_refcache_lock); /* * Bumping the busy counter keeps new entries from being added * to the cache. We use a counter since multiple unmounts could * be in here simultaneously. */ xfs_refcache_busy++;
for (i = 0; i < xfs_refcache_size; i++) { ip = xfs_refcache[i]; if ((ip != NULL) && (ip->i_mount == mp)) { xfs_refcache[i] = NULL; ip->i_refcache = NULL; xfs_refcache_count--; ASSERT(xfs_refcache_count >= 0); spin_unlock(&xfs_refcache_lock); vp = XFS_ITOV(ip); VOP_RELEASE(vp, error); VN_RELE(vp); spin_lock(&xfs_refcache_lock); } }
xfs_refcache_busy--; ASSERT(xfs_refcache_busy >= 0); spin_unlock(&xfs_refcache_lock); }
/* * This is called from the XFS sync code to ensure that the refcache * is emptied out over time. We purge a small number of entries with * each call. */ void xfs_refcache_purge_some(xfs_mount_t *mp) { int error, i; xfs_inode_t *ip; int iplist_index; xfs_inode_t **iplist; int purge_count;
if ((xfs_refcache == NULL) || (xfs_refcache_count == 0)) { return; }
iplist_index = 0; purge_count = xfs_params.xfs_un.refcache_purge; iplist = (xfs_inode_t **)kmem_zalloc(purge_count * sizeof(xfs_inode_t *), KM_SLEEP);
spin_lock(&xfs_refcache_lock);
/* * Store any inodes we find in the next several entries * into the iplist array to be released after dropping * the spinlock. We always start looking from the currently * oldest place in the cache. We move the refcache index * forward as we go so that we are sure to eventually clear * out the entire cache when the system goes idle. */ for (i = 0; i < purge_count; i++) { ip = xfs_refcache[xfs_refcache_index]; if (ip != NULL) { xfs_refcache[xfs_refcache_index] = NULL; ip->i_refcache = NULL; xfs_refcache_count--; ASSERT(xfs_refcache_count >= 0); iplist[iplist_index] = ip; iplist_index++; } xfs_refcache_index++; if (xfs_refcache_index == xfs_refcache_size) { xfs_refcache_index = 0; } }
spin_unlock(&xfs_refcache_lock);
/* * If there are still entries in the refcache, * set timer to mark the SB dirty to make sure that * we hit sync even if filesystem is idle, so that we'll * purge some more later. */ if (xfs_refcache_count) { del_timer_sync(&mp->m_sbdirty_timer); mp->m_sbdirty_timer.data = (unsigned long)LINVFS_GET_IP(XFS_ITOV(mp->m_rootip))->i_sb; mp->m_sbdirty_timer.expires = jiffies + 2*HZ; add_timer(&mp->m_sbdirty_timer); }
/* * Now drop the inodes we collected. */ for (i = 0; i < iplist_index; i++) { VOP_RELEASE(XFS_ITOV(iplist[i]), error); VN_RELE(XFS_ITOV(iplist[i])); }
kmem_free(iplist, purge_count * sizeof(xfs_inode_t *)); }
/* * This is called when the refcache is dynamically resized * via a sysctl. * * If the new size is smaller than the old size, purge all * entries in slots greater than the new size, and move * the index if necessary. * * If the refcache hasn't even been allocated yet, or the * new size is larger than the old size, just set the value * of xfs_refcache_size. */
void xfs_refcache_resize(int xfs_refcache_new_size) { int i; xfs_inode_t *ip; int iplist_index = 0; xfs_inode_t **iplist; int error;
/* * If the new size is smaller than the current size, * purge entries to create smaller cache, and * reposition index if necessary. * Don't bother if no refcache yet. */ if (xfs_refcache && (xfs_refcache_new_size < xfs_refcache_size)) {
iplist = (xfs_inode_t **)kmem_zalloc(XFS_REFCACHE_SIZE_MAX * sizeof(xfs_inode_t *), KM_SLEEP);
spin_lock(&xfs_refcache_lock);
for (i = xfs_refcache_new_size; i < xfs_refcache_size; i++) { ip = xfs_refcache[i]; if (ip != NULL) { xfs_refcache[i] = NULL; ip->i_refcache = NULL; xfs_refcache_count--; ASSERT(xfs_refcache_count >= 0); iplist[iplist_index] = ip; iplist_index++; } }
xfs_refcache_size = xfs_refcache_new_size;
/* * Move index to beginning of cache if it's now past the end */ if (xfs_refcache_index >= xfs_refcache_new_size) xfs_refcache_index = 0;
spin_unlock(&xfs_refcache_lock);
/* * Now drop the inodes we collected. */ for (i = 0; i < iplist_index; i++) { VOP_RELEASE(XFS_ITOV(iplist[i]), error); VN_RELE(XFS_ITOV(iplist[i])); }
kmem_free(iplist, XFS_REFCACHE_SIZE_MAX * sizeof(xfs_inode_t *)); } else { spin_lock(&xfs_refcache_lock); xfs_refcache_size = xfs_refcache_new_size; spin_unlock(&xfs_refcache_lock); } }
|