123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656 |
- // File system implementation. Five layers:
- // + Blocks: allocator for raw disk blocks.
- // + Log: crash recovery for multi-step updates.
- // + Files: inode allocator, reading, writing, metadata.
- // + Directories: inode with special contents (list of other inodes!)
- // + Names: paths like /usr/rtm/xv6/fs.c for convenient naming.
- //
- // This file contains the low-level file system manipulation
- // routines. The (higher-level) system call implementations
- // are in sysfile.c.
- #include "types.h"
- #include "defs.h"
- #include "param.h"
- #include "stat.h"
- #include "mmu.h"
- #include "proc.h"
- #include "spinlock.h"
- #include "buf.h"
- #include "fs.h"
- #include "file.h"
- #define min(a, b) ((a) < (b) ? (a) : (b))
- static void itrunc(struct inode*);
- // Read the super block.
- void
- readsb(int dev, struct superblock *sb)
- {
- struct buf *bp;
-
- bp = bread(dev, 1);
- memmove(sb, bp->data, sizeof(*sb));
- brelse(bp);
- }
- // Zero a block.
- static void
- bzero(int dev, int bno)
- {
- struct buf *bp;
-
- bp = bread(dev, bno);
- memset(bp->data, 0, BSIZE);
- log_write(bp);
- brelse(bp);
- }
- // Blocks.
- // Allocate a zeroed disk block.
- static uint
- balloc(uint dev)
- {
- int b, bi, m;
- struct buf *bp;
- struct superblock sb;
- bp = 0;
- readsb(dev, &sb);
- for(b = 0; b < sb.size; b += BPB){
- bp = bread(dev, BBLOCK(b, sb.ninodes));
- for(bi = 0; bi < BPB && b + bi < sb.size; bi++){
- m = 1 << (bi % 8);
- if((bp->data[bi/8] & m) == 0){ // Is block free?
- bp->data[bi/8] |= m; // Mark block in use.
- log_write(bp);
- brelse(bp);
- bzero(dev, b + bi);
- return b + bi;
- }
- }
- brelse(bp);
- }
- panic("balloc: out of blocks");
- return -1;
- }
- // Free a disk block.
- static void
- bfree(int dev, uint b)
- {
- struct buf *bp;
- struct superblock sb;
- int bi, m;
- readsb(dev, &sb);
- bp = bread(dev, BBLOCK(b, sb.ninodes));
- bi = b % BPB;
- m = 1 << (bi % 8);
- if((bp->data[bi/8] & m) == 0)
- panic("freeing free block");
- bp->data[bi/8] &= ~m;
- log_write(bp);
- brelse(bp);
- }
- // Inodes.
- //
- // An inode describes a single unnamed file.
- // The inode disk structure holds metadata: the file's type,
- // its size, the number of links referring to it, and the
- // list of blocks holding the file's content.
- //
- // The inodes are laid out sequentially on disk immediately after
- // the superblock. Each inode has a number, indicating its
- // position on the disk.
- //
- // The kernel keeps a cache of in-use inodes in memory
- // to provide a place for synchronizing access
- // to inodes used by multiple processes. The cached
- // inodes include book-keeping information that is
- // not stored on disk: ip->ref and ip->flags.
- //
- // An inode and its in-memory represtative go through a
- // sequence of states before they can be used by the
- // rest of the file system code.
- //
- // * Allocation: an inode is allocated if its type (on disk)
- // is non-zero. ialloc() allocates, iput() frees if
- // the link count has fallen to zero.
- //
- // * Referencing in cache: an entry in the inode cache
- // is free if ip->ref is zero. Otherwise ip->ref tracks
- // the number of in-memory pointers to the entry (open
- // files and current directories). iget() to find or
- // create a cache entry and increment its ref, iput()
- // to decrement ref.
- //
- // * Valid: the information (type, size, &c) in an inode
- // cache entry is only correct when the I_VALID bit
- // is set in ip->flags. ilock() reads the inode from
- // the disk and sets I_VALID, while iput() clears
- // I_VALID if ip->ref has fallen to zero.
- //
- // * Locked: file system code may only examine and modify
- // the information in an inode and its content if it
- // has first locked the inode. The I_BUSY flag indicates
- // that the inode is locked. ilock() sets I_BUSY,
- // while iunlock clears it.
- //
- // Thus a typical sequence is:
- // ip = iget(dev, inum)
- // ilock(ip)
- // ... examine and modify ip->xxx ...
- // iunlock(ip)
- // iput(ip)
- //
- // ilock() is separate from iget() so that system calls can
- // get a long-term reference to an inode (as for an open file)
- // and only lock it for short periods (e.g., in read()).
- // The separation also helps avoid deadlock and races during
- // pathname lookup. iget() increments ip->ref so that the inode
- // stays cached and pointers to it remain valid.
- //
- // Many internal file system functions expect the caller to
- // have locked the inodes involved; this lets callers create
- // multi-step atomic operations.
- struct {
- struct spinlock lock;
- struct inode inode[NINODE];
- } icache;
- void
- iinit(void)
- {
- memset(&icache, 0, sizeof(icache));
- initlock(&icache.lock, "icache");
- }
- static struct inode* iget(uint dev, uint inum);
- //PAGEBREAK!
- // Allocate a new inode with the given type on device dev.
- // A free inode has a type of zero.
- struct inode*
- ialloc(uint dev, short type)
- {
- int inum;
- struct buf *bp;
- struct dinode *dip;
- struct superblock sb;
- readsb(dev, &sb);
- for(inum = 1; inum < sb.ninodes; inum++){
- bp = bread(dev, IBLOCK(inum));
- dip = (struct dinode*)bp->data + inum%IPB;
- if(dip->type == 0){ // a free inode
- memset(dip, 0, sizeof(*dip));
- dip->type = type;
- log_write(bp); // mark it allocated on the disk
- brelse(bp);
- return iget(dev, inum);
- }
- brelse(bp);
- }
- panic("ialloc: no inodes");
- return 0;
- }
- // Copy a modified in-memory inode to disk.
- void
- iupdate(struct inode *ip)
- {
- struct buf *bp;
- struct dinode *dip;
- bp = bread(ip->dev, IBLOCK(ip->inum));
- dip = (struct dinode*)bp->data + ip->inum%IPB;
- dip->type = ip->type;
- dip->major = ip->major;
- dip->minor = ip->minor;
- dip->nlink = ip->nlink;
- dip->size = ip->size;
- memmove(dip->addrs, ip->addrs, sizeof(ip->addrs));
- log_write(bp);
- brelse(bp);
- }
- // Find the inode with number inum on device dev
- // and return the in-memory copy. Does not lock
- // the inode and does not read it from disk.
- static struct inode*
- iget(uint dev, uint inum)
- {
- struct inode *ip, *empty;
- acquire(&icache.lock);
- // Is the inode already cached?
- empty = 0;
- for(ip = &icache.inode[0]; ip < &icache.inode[NINODE]; ip++){
- if(ip->ref > 0 && ip->dev == dev && ip->inum == inum){
- ip->ref++;
- release(&icache.lock);
- return ip;
- }
- if(empty == 0 && ip->ref == 0) // Remember empty slot.
- empty = ip;
- }
- // Recycle an inode cache entry.
- if(empty == 0)
- panic("iget: no inodes");
- ip = empty;
- ip->dev = dev;
- ip->inum = inum;
- ip->ref = 1;
- ip->flags = 0;
- release(&icache.lock);
- return ip;
- }
- // Increment reference count for ip.
- // Returns ip to enable ip = idup(ip1) idiom.
- struct inode*
- idup(struct inode *ip)
- {
- acquire(&icache.lock);
- ip->ref++;
- release(&icache.lock);
- return ip;
- }
- // Lock the given inode.
- // Reads the inode from disk if necessary.
- void
- ilock(struct inode *ip)
- {
- struct buf *bp;
- struct dinode *dip;
- if(ip == 0 || ip->ref < 1)
- panic("ilock");
- acquire(&icache.lock);
- while(ip->flags & I_BUSY)
- sleep(ip, &icache.lock);
- ip->flags |= I_BUSY;
- release(&icache.lock);
- if(!(ip->flags & I_VALID)){
- bp = bread(ip->dev, IBLOCK(ip->inum));
- dip = (struct dinode*)bp->data + ip->inum%IPB;
- ip->type = dip->type;
- ip->major = dip->major;
- ip->minor = dip->minor;
- ip->nlink = dip->nlink;
- ip->size = dip->size;
- memmove(ip->addrs, dip->addrs, sizeof(ip->addrs));
- brelse(bp);
- ip->flags |= I_VALID;
- if(ip->type == 0)
- panic("ilock: no type");
- }
- }
- // Unlock the given inode.
- void
- iunlock(struct inode *ip)
- {
- if(ip == 0 || !(ip->flags & I_BUSY) || ip->ref < 1)
- panic("iunlock");
- acquire(&icache.lock);
- ip->flags &= ~I_BUSY;
- wakeup(ip);
- release(&icache.lock);
- }
- // Drop a reference to an in-memory inode.
- // If that was the last reference, the inode cache entry can
- // be recycled.
- // If that was the last reference and the inode has no links
- // to it, free the inode (and its content) on disk.
- void
- iput(struct inode *ip)
- {
- acquire(&icache.lock);
- if(ip->ref == 1 && (ip->flags & I_VALID) && ip->nlink == 0){
- // inode has no links: truncate and free inode.
- if(ip->flags & I_BUSY)
- panic("iput busy");
- ip->flags |= I_BUSY;
- release(&icache.lock);
- itrunc(ip);
- ip->type = 0;
- iupdate(ip);
- acquire(&icache.lock);
- ip->flags = 0;
- wakeup(ip);
- }
- ip->ref--;
- release(&icache.lock);
- }
- // Common idiom: unlock, then put.
- void
- iunlockput(struct inode *ip)
- {
- iunlock(ip);
- iput(ip);
- }
- //PAGEBREAK!
- // Inode content
- //
- // The content (data) associated with each inode is stored
- // in blocks on the disk. The first NDIRECT block numbers
- // are listed in ip->addrs[]. The next NINDIRECT blocks are
- // listed in block ip->addrs[NDIRECT].
- // Return the disk block address of the nth block in inode ip.
- // If there is no such block, bmap allocates one.
- static uint
- bmap(struct inode *ip, uint bn)
- {
- uint addr, *a;
- struct buf *bp;
- if(bn < NDIRECT){
- if((addr = ip->addrs[bn]) == 0)
- ip->addrs[bn] = addr = balloc(ip->dev);
- return addr;
- }
- bn -= NDIRECT;
- if(bn < NINDIRECT){
- // Load indirect block, allocating if necessary.
- if((addr = ip->addrs[NDIRECT]) == 0)
- ip->addrs[NDIRECT] = addr = balloc(ip->dev);
- bp = bread(ip->dev, addr);
- a = (uint*)bp->data;
- if((addr = a[bn]) == 0){
- a[bn] = addr = balloc(ip->dev);
- log_write(bp);
- }
- brelse(bp);
- return addr;
- }
- panic("bmap: out of range");
- return -1;
- }
- // Truncate inode (discard contents).
- // Only called when the inode has no links
- // to it (no directory entries referring to it)
- // and has no in-memory reference to it (is
- // not an open file or current directory).
- static void
- itrunc(struct inode *ip)
- {
- int i, j;
- struct buf *bp;
- uint *a;
- for(i = 0; i < NDIRECT; i++){
- if(ip->addrs[i]){
- bfree(ip->dev, ip->addrs[i]);
- ip->addrs[i] = 0;
- }
- }
-
- if(ip->addrs[NDIRECT]){
- bp = bread(ip->dev, ip->addrs[NDIRECT]);
- a = (uint*)bp->data;
- for(j = 0; j < NINDIRECT; j++){
- if(a[j])
- bfree(ip->dev, a[j]);
- }
- brelse(bp);
- bfree(ip->dev, ip->addrs[NDIRECT]);
- ip->addrs[NDIRECT] = 0;
- }
- ip->size = 0;
- iupdate(ip);
- }
- // Copy stat information from inode.
- void
- stati(struct inode *ip, struct stat *st)
- {
- st->dev = ip->dev;
- st->ino = ip->inum;
- st->type = ip->type;
- st->nlink = ip->nlink;
- st->size = ip->size;
- }
- //PAGEBREAK!
- // Read data from inode.
- int
- readi(struct inode *ip, char *dst, uint off, uint n)
- {
- uint tot, m;
- struct buf *bp;
- if(ip->type == T_DEV){
- if(ip->major < 0 || ip->major >= NDEV || !devsw[ip->major].read)
- return -1;
- //cprintf("inside readi\n");
- return devsw[ip->major].read(ip, dst, n);
- }
- if(off > ip->size || off + n < off)
- return -1;
- if(off + n > ip->size)
- n = ip->size - off;
- for(tot=0; tot<n; tot+=m, off+=m, dst+=m){
- bp = bread(ip->dev, bmap(ip, off/BSIZE));
- m = min(n - tot, BSIZE - off%BSIZE);
- memmove(dst, bp->data + off%BSIZE, m);
- brelse(bp);
- }
- return n;
- }
- // PAGEBREAK!
- // Write data to inode.
- int
- writei(struct inode *ip, char *src, uint off, uint n)
- {
- uint tot, m;
- struct buf *bp;
- //cprintf("inside writei: type=%x major=%x, func addr: %x\n", ip->type, ip->major, devsw[ip->major].write);
- if(ip->type == T_DEV){
- if(ip->major < 0 || ip->major >= NDEV || !devsw[ip->major].write)
- return -1;
- //cprintf("before calling consolewrite: major=%x, func addr: %x\n", ip->major, devsw[ip->major].write);
- return devsw[ip->major].write(ip, src, n);
- }
- if(off > ip->size || off + n < off)
- return -1;
- if(off + n > MAXFILE*BSIZE)
- return -1;
- for(tot=0; tot<n; tot+=m, off+=m, src+=m){
- bp = bread(ip->dev, bmap(ip, off/BSIZE));
- m = min(n - tot, BSIZE - off%BSIZE);
- memmove(bp->data + off%BSIZE, src, m);
- log_write(bp);
- brelse(bp);
- }
- if(n > 0 && off > ip->size){
- ip->size = off;
- iupdate(ip);
- }
- return n;
- }
- //PAGEBREAK!
- // Directories
- int
- namecmp(const char *s, const char *t)
- {
- return strncmp(s, t, DIRSIZ);
- }
- // Look for a directory entry in a directory.
- // If found, set *poff to byte offset of entry.
- struct inode*
- dirlookup(struct inode *dp, char *name, uint *poff)
- {
- uint off, inum;
- struct dirent de;
- if(dp->type != T_DIR)
- panic("dirlookup not DIR");
- for(off = 0; off < dp->size; off += sizeof(de)){
- if(readi(dp, (char*)&de, off, sizeof(de)) != sizeof(de))
- panic("dirlink read");
- if(de.inum == 0)
- continue;
- if(namecmp(name, de.name) == 0){
- // entry matches path element
- if(poff)
- *poff = off;
- inum = de.inum;
- return iget(dp->dev, inum);
- }
- }
- return 0;
- }
- // Write a new directory entry (name, inum) into the directory dp.
- int
- dirlink(struct inode *dp, char *name, uint inum)
- {
- int off;
- struct dirent de;
- struct inode *ip;
- // Check that name is not present.
- if((ip = dirlookup(dp, name, 0)) != 0){
- iput(ip);
- return -1;
- }
- // Look for an empty dirent.
- for(off = 0; off < dp->size; off += sizeof(de)){
- if(readi(dp, (char*)&de, off, sizeof(de)) != sizeof(de))
- panic("dirlink read");
- if(de.inum == 0)
- break;
- }
- strncpy(de.name, name, DIRSIZ);
- de.inum = inum;
- if(writei(dp, (char*)&de, off, sizeof(de)) != sizeof(de))
- panic("dirlink");
-
- return 0;
- }
- //PAGEBREAK!
- // Paths
- // Copy the next path element from path into name.
- // Return a pointer to the element following the copied one.
- // The returned path has no leading slashes,
- // so the caller can check *path=='\0' to see if the name is the last one.
- // If no name to remove, return 0.
- //
- // Examples:
- // skipelem("a/bb/c", name) = "bb/c", setting name = "a"
- // skipelem("///a//bb", name) = "bb", setting name = "a"
- // skipelem("a", name) = "", setting name = "a"
- // skipelem("", name) = skipelem("////", name) = 0
- //
- static char*
- skipelem(char *path, char *name)
- {
- char *s;
- int len;
- while(*path == '/')
- path++;
- if(*path == 0)
- return 0;
- s = path;
- while(*path != '/' && *path != 0)
- path++;
- len = path - s;
- if(len >= DIRSIZ)
- memmove(name, s, DIRSIZ);
- else {
- memmove(name, s, len);
- name[len] = 0;
- }
- while(*path == '/')
- path++;
- return path;
- }
- // Look up and return the inode for a path name.
- // If parent != 0, return the inode for the parent and copy the final
- // path element into name, which must have room for DIRSIZ bytes.
- static struct inode*
- namex(char *path, int nameiparent, char *name)
- {
- struct inode *ip, *next;
- if(*path == '/')
- ip = iget(ROOTDEV, ROOTINO);
- else
- ip = idup(curr_proc->cwd);
- while((path = skipelem(path, name)) != 0){
- ilock(ip);
- if(ip->type != T_DIR){
- iunlockput(ip);
- return 0;
- }
- if(nameiparent && *path == '\0'){
- // Stop one level early.
- iunlock(ip);
- return ip;
- }
- if((next = dirlookup(ip, name, 0)) == 0){
- iunlockput(ip);
- return 0;
- }
- iunlockput(ip);
- ip = next;
- }
- if(nameiparent){
- iput(ip);
- return 0;
- }
- return ip;
- }
- struct inode*
- namei(char *path)
- {
- char name[DIRSIZ];
- return namex(path, 0, name);
- }
- struct inode*
- nameiparent(char *path, char *name)
- {
- return namex(path, 1, name);
- }
|