fs.c 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656
  1. // File system implementation. Five layers:
  2. // + Blocks: allocator for raw disk blocks.
  3. // + Log: crash recovery for multi-step updates.
  4. // + Files: inode allocator, reading, writing, metadata.
  5. // + Directories: inode with special contents (list of other inodes!)
  6. // + Names: paths like /usr/rtm/xv6/fs.c for convenient naming.
  7. //
  8. // This file contains the low-level file system manipulation
  9. // routines. The (higher-level) system call implementations
  10. // are in sysfile.c.
  11. #include "types.h"
  12. #include "defs.h"
  13. #include "param.h"
  14. #include "stat.h"
  15. #include "mmu.h"
  16. #include "proc.h"
  17. #include "spinlock.h"
  18. #include "buf.h"
  19. #include "fs.h"
  20. #include "file.h"
  21. #define min(a, b) ((a) < (b) ? (a) : (b))
  22. static void itrunc(struct inode*);
  23. // Read the super block.
  24. void
  25. readsb(int dev, struct superblock *sb)
  26. {
  27. struct buf *bp;
  28. bp = bread(dev, 1);
  29. memmove(sb, bp->data, sizeof(*sb));
  30. brelse(bp);
  31. }
  32. // Zero a block.
  33. static void
  34. bzero(int dev, int bno)
  35. {
  36. struct buf *bp;
  37. bp = bread(dev, bno);
  38. memset(bp->data, 0, BSIZE);
  39. log_write(bp);
  40. brelse(bp);
  41. }
  42. // Blocks.
  43. // Allocate a zeroed disk block.
  44. static uint
  45. balloc(uint dev)
  46. {
  47. int b, bi, m;
  48. struct buf *bp;
  49. struct superblock sb;
  50. bp = 0;
  51. readsb(dev, &sb);
  52. for(b = 0; b < sb.size; b += BPB){
  53. bp = bread(dev, BBLOCK(b, sb.ninodes));
  54. for(bi = 0; bi < BPB && b + bi < sb.size; bi++){
  55. m = 1 << (bi % 8);
  56. if((bp->data[bi/8] & m) == 0){ // Is block free?
  57. bp->data[bi/8] |= m; // Mark block in use.
  58. log_write(bp);
  59. brelse(bp);
  60. bzero(dev, b + bi);
  61. return b + bi;
  62. }
  63. }
  64. brelse(bp);
  65. }
  66. panic("balloc: out of blocks");
  67. return -1;
  68. }
  69. // Free a disk block.
  70. static void
  71. bfree(int dev, uint b)
  72. {
  73. struct buf *bp;
  74. struct superblock sb;
  75. int bi, m;
  76. readsb(dev, &sb);
  77. bp = bread(dev, BBLOCK(b, sb.ninodes));
  78. bi = b % BPB;
  79. m = 1 << (bi % 8);
  80. if((bp->data[bi/8] & m) == 0)
  81. panic("freeing free block");
  82. bp->data[bi/8] &= ~m;
  83. log_write(bp);
  84. brelse(bp);
  85. }
  86. // Inodes.
  87. //
  88. // An inode describes a single unnamed file.
  89. // The inode disk structure holds metadata: the file's type,
  90. // its size, the number of links referring to it, and the
  91. // list of blocks holding the file's content.
  92. //
  93. // The inodes are laid out sequentially on disk immediately after
  94. // the superblock. Each inode has a number, indicating its
  95. // position on the disk.
  96. //
  97. // The kernel keeps a cache of in-use inodes in memory
  98. // to provide a place for synchronizing access
  99. // to inodes used by multiple processes. The cached
  100. // inodes include book-keeping information that is
  101. // not stored on disk: ip->ref and ip->flags.
  102. //
  103. // An inode and its in-memory represtative go through a
  104. // sequence of states before they can be used by the
  105. // rest of the file system code.
  106. //
  107. // * Allocation: an inode is allocated if its type (on disk)
  108. // is non-zero. ialloc() allocates, iput() frees if
  109. // the link count has fallen to zero.
  110. //
  111. // * Referencing in cache: an entry in the inode cache
  112. // is free if ip->ref is zero. Otherwise ip->ref tracks
  113. // the number of in-memory pointers to the entry (open
  114. // files and current directories). iget() to find or
  115. // create a cache entry and increment its ref, iput()
  116. // to decrement ref.
  117. //
  118. // * Valid: the information (type, size, &c) in an inode
  119. // cache entry is only correct when the I_VALID bit
  120. // is set in ip->flags. ilock() reads the inode from
  121. // the disk and sets I_VALID, while iput() clears
  122. // I_VALID if ip->ref has fallen to zero.
  123. //
  124. // * Locked: file system code may only examine and modify
  125. // the information in an inode and its content if it
  126. // has first locked the inode. The I_BUSY flag indicates
  127. // that the inode is locked. ilock() sets I_BUSY,
  128. // while iunlock clears it.
  129. //
  130. // Thus a typical sequence is:
  131. // ip = iget(dev, inum)
  132. // ilock(ip)
  133. // ... examine and modify ip->xxx ...
  134. // iunlock(ip)
  135. // iput(ip)
  136. //
  137. // ilock() is separate from iget() so that system calls can
  138. // get a long-term reference to an inode (as for an open file)
  139. // and only lock it for short periods (e.g., in read()).
  140. // The separation also helps avoid deadlock and races during
  141. // pathname lookup. iget() increments ip->ref so that the inode
  142. // stays cached and pointers to it remain valid.
  143. //
  144. // Many internal file system functions expect the caller to
  145. // have locked the inodes involved; this lets callers create
  146. // multi-step atomic operations.
  147. struct {
  148. struct spinlock lock;
  149. struct inode inode[NINODE];
  150. } icache;
  151. void
  152. iinit(void)
  153. {
  154. memset(&icache, 0, sizeof(icache));
  155. initlock(&icache.lock, "icache");
  156. }
  157. static struct inode* iget(uint dev, uint inum);
  158. //PAGEBREAK!
  159. // Allocate a new inode with the given type on device dev.
  160. // A free inode has a type of zero.
  161. struct inode*
  162. ialloc(uint dev, short type)
  163. {
  164. int inum;
  165. struct buf *bp;
  166. struct dinode *dip;
  167. struct superblock sb;
  168. readsb(dev, &sb);
  169. for(inum = 1; inum < sb.ninodes; inum++){
  170. bp = bread(dev, IBLOCK(inum));
  171. dip = (struct dinode*)bp->data + inum%IPB;
  172. if(dip->type == 0){ // a free inode
  173. memset(dip, 0, sizeof(*dip));
  174. dip->type = type;
  175. log_write(bp); // mark it allocated on the disk
  176. brelse(bp);
  177. return iget(dev, inum);
  178. }
  179. brelse(bp);
  180. }
  181. panic("ialloc: no inodes");
  182. return 0;
  183. }
  184. // Copy a modified in-memory inode to disk.
  185. void
  186. iupdate(struct inode *ip)
  187. {
  188. struct buf *bp;
  189. struct dinode *dip;
  190. bp = bread(ip->dev, IBLOCK(ip->inum));
  191. dip = (struct dinode*)bp->data + ip->inum%IPB;
  192. dip->type = ip->type;
  193. dip->major = ip->major;
  194. dip->minor = ip->minor;
  195. dip->nlink = ip->nlink;
  196. dip->size = ip->size;
  197. memmove(dip->addrs, ip->addrs, sizeof(ip->addrs));
  198. log_write(bp);
  199. brelse(bp);
  200. }
  201. // Find the inode with number inum on device dev
  202. // and return the in-memory copy. Does not lock
  203. // the inode and does not read it from disk.
  204. static struct inode*
  205. iget(uint dev, uint inum)
  206. {
  207. struct inode *ip, *empty;
  208. acquire(&icache.lock);
  209. // Is the inode already cached?
  210. empty = 0;
  211. for(ip = &icache.inode[0]; ip < &icache.inode[NINODE]; ip++){
  212. if(ip->ref > 0 && ip->dev == dev && ip->inum == inum){
  213. ip->ref++;
  214. release(&icache.lock);
  215. return ip;
  216. }
  217. if(empty == 0 && ip->ref == 0) // Remember empty slot.
  218. empty = ip;
  219. }
  220. // Recycle an inode cache entry.
  221. if(empty == 0)
  222. panic("iget: no inodes");
  223. ip = empty;
  224. ip->dev = dev;
  225. ip->inum = inum;
  226. ip->ref = 1;
  227. ip->flags = 0;
  228. release(&icache.lock);
  229. return ip;
  230. }
  231. // Increment reference count for ip.
  232. // Returns ip to enable ip = idup(ip1) idiom.
  233. struct inode*
  234. idup(struct inode *ip)
  235. {
  236. acquire(&icache.lock);
  237. ip->ref++;
  238. release(&icache.lock);
  239. return ip;
  240. }
  241. // Lock the given inode.
  242. // Reads the inode from disk if necessary.
  243. void
  244. ilock(struct inode *ip)
  245. {
  246. struct buf *bp;
  247. struct dinode *dip;
  248. if(ip == 0 || ip->ref < 1)
  249. panic("ilock");
  250. acquire(&icache.lock);
  251. while(ip->flags & I_BUSY)
  252. sleep(ip, &icache.lock);
  253. ip->flags |= I_BUSY;
  254. release(&icache.lock);
  255. if(!(ip->flags & I_VALID)){
  256. bp = bread(ip->dev, IBLOCK(ip->inum));
  257. dip = (struct dinode*)bp->data + ip->inum%IPB;
  258. ip->type = dip->type;
  259. ip->major = dip->major;
  260. ip->minor = dip->minor;
  261. ip->nlink = dip->nlink;
  262. ip->size = dip->size;
  263. memmove(ip->addrs, dip->addrs, sizeof(ip->addrs));
  264. brelse(bp);
  265. ip->flags |= I_VALID;
  266. if(ip->type == 0)
  267. panic("ilock: no type");
  268. }
  269. }
  270. // Unlock the given inode.
  271. void
  272. iunlock(struct inode *ip)
  273. {
  274. if(ip == 0 || !(ip->flags & I_BUSY) || ip->ref < 1)
  275. panic("iunlock");
  276. acquire(&icache.lock);
  277. ip->flags &= ~I_BUSY;
  278. wakeup(ip);
  279. release(&icache.lock);
  280. }
  281. // Drop a reference to an in-memory inode.
  282. // If that was the last reference, the inode cache entry can
  283. // be recycled.
  284. // If that was the last reference and the inode has no links
  285. // to it, free the inode (and its content) on disk.
  286. void
  287. iput(struct inode *ip)
  288. {
  289. acquire(&icache.lock);
  290. if(ip->ref == 1 && (ip->flags & I_VALID) && ip->nlink == 0){
  291. // inode has no links: truncate and free inode.
  292. if(ip->flags & I_BUSY)
  293. panic("iput busy");
  294. ip->flags |= I_BUSY;
  295. release(&icache.lock);
  296. itrunc(ip);
  297. ip->type = 0;
  298. iupdate(ip);
  299. acquire(&icache.lock);
  300. ip->flags = 0;
  301. wakeup(ip);
  302. }
  303. ip->ref--;
  304. release(&icache.lock);
  305. }
  306. // Common idiom: unlock, then put.
  307. void
  308. iunlockput(struct inode *ip)
  309. {
  310. iunlock(ip);
  311. iput(ip);
  312. }
  313. //PAGEBREAK!
  314. // Inode content
  315. //
  316. // The content (data) associated with each inode is stored
  317. // in blocks on the disk. The first NDIRECT block numbers
  318. // are listed in ip->addrs[]. The next NINDIRECT blocks are
  319. // listed in block ip->addrs[NDIRECT].
  320. // Return the disk block address of the nth block in inode ip.
  321. // If there is no such block, bmap allocates one.
  322. static uint
  323. bmap(struct inode *ip, uint bn)
  324. {
  325. uint addr, *a;
  326. struct buf *bp;
  327. if(bn < NDIRECT){
  328. if((addr = ip->addrs[bn]) == 0)
  329. ip->addrs[bn] = addr = balloc(ip->dev);
  330. return addr;
  331. }
  332. bn -= NDIRECT;
  333. if(bn < NINDIRECT){
  334. // Load indirect block, allocating if necessary.
  335. if((addr = ip->addrs[NDIRECT]) == 0)
  336. ip->addrs[NDIRECT] = addr = balloc(ip->dev);
  337. bp = bread(ip->dev, addr);
  338. a = (uint*)bp->data;
  339. if((addr = a[bn]) == 0){
  340. a[bn] = addr = balloc(ip->dev);
  341. log_write(bp);
  342. }
  343. brelse(bp);
  344. return addr;
  345. }
  346. panic("bmap: out of range");
  347. return -1;
  348. }
  349. // Truncate inode (discard contents).
  350. // Only called when the inode has no links
  351. // to it (no directory entries referring to it)
  352. // and has no in-memory reference to it (is
  353. // not an open file or current directory).
  354. static void
  355. itrunc(struct inode *ip)
  356. {
  357. int i, j;
  358. struct buf *bp;
  359. uint *a;
  360. for(i = 0; i < NDIRECT; i++){
  361. if(ip->addrs[i]){
  362. bfree(ip->dev, ip->addrs[i]);
  363. ip->addrs[i] = 0;
  364. }
  365. }
  366. if(ip->addrs[NDIRECT]){
  367. bp = bread(ip->dev, ip->addrs[NDIRECT]);
  368. a = (uint*)bp->data;
  369. for(j = 0; j < NINDIRECT; j++){
  370. if(a[j])
  371. bfree(ip->dev, a[j]);
  372. }
  373. brelse(bp);
  374. bfree(ip->dev, ip->addrs[NDIRECT]);
  375. ip->addrs[NDIRECT] = 0;
  376. }
  377. ip->size = 0;
  378. iupdate(ip);
  379. }
  380. // Copy stat information from inode.
  381. void
  382. stati(struct inode *ip, struct stat *st)
  383. {
  384. st->dev = ip->dev;
  385. st->ino = ip->inum;
  386. st->type = ip->type;
  387. st->nlink = ip->nlink;
  388. st->size = ip->size;
  389. }
  390. //PAGEBREAK!
  391. // Read data from inode.
  392. int
  393. readi(struct inode *ip, char *dst, uint off, uint n)
  394. {
  395. uint tot, m;
  396. struct buf *bp;
  397. if(ip->type == T_DEV){
  398. if(ip->major < 0 || ip->major >= NDEV || !devsw[ip->major].read)
  399. return -1;
  400. //cprintf("inside readi\n");
  401. return devsw[ip->major].read(ip, dst, n);
  402. }
  403. if(off > ip->size || off + n < off)
  404. return -1;
  405. if(off + n > ip->size)
  406. n = ip->size - off;
  407. for(tot=0; tot<n; tot+=m, off+=m, dst+=m){
  408. bp = bread(ip->dev, bmap(ip, off/BSIZE));
  409. m = min(n - tot, BSIZE - off%BSIZE);
  410. memmove(dst, bp->data + off%BSIZE, m);
  411. brelse(bp);
  412. }
  413. return n;
  414. }
  415. // PAGEBREAK!
  416. // Write data to inode.
  417. int
  418. writei(struct inode *ip, char *src, uint off, uint n)
  419. {
  420. uint tot, m;
  421. struct buf *bp;
  422. //cprintf("inside writei: type=%x major=%x, func addr: %x\n", ip->type, ip->major, devsw[ip->major].write);
  423. if(ip->type == T_DEV){
  424. if(ip->major < 0 || ip->major >= NDEV || !devsw[ip->major].write)
  425. return -1;
  426. //cprintf("before calling consolewrite: major=%x, func addr: %x\n", ip->major, devsw[ip->major].write);
  427. return devsw[ip->major].write(ip, src, n);
  428. }
  429. if(off > ip->size || off + n < off)
  430. return -1;
  431. if(off + n > MAXFILE*BSIZE)
  432. return -1;
  433. for(tot=0; tot<n; tot+=m, off+=m, src+=m){
  434. bp = bread(ip->dev, bmap(ip, off/BSIZE));
  435. m = min(n - tot, BSIZE - off%BSIZE);
  436. memmove(bp->data + off%BSIZE, src, m);
  437. log_write(bp);
  438. brelse(bp);
  439. }
  440. if(n > 0 && off > ip->size){
  441. ip->size = off;
  442. iupdate(ip);
  443. }
  444. return n;
  445. }
  446. //PAGEBREAK!
  447. // Directories
  448. int
  449. namecmp(const char *s, const char *t)
  450. {
  451. return strncmp(s, t, DIRSIZ);
  452. }
  453. // Look for a directory entry in a directory.
  454. // If found, set *poff to byte offset of entry.
  455. struct inode*
  456. dirlookup(struct inode *dp, char *name, uint *poff)
  457. {
  458. uint off, inum;
  459. struct dirent de;
  460. if(dp->type != T_DIR)
  461. panic("dirlookup not DIR");
  462. for(off = 0; off < dp->size; off += sizeof(de)){
  463. if(readi(dp, (char*)&de, off, sizeof(de)) != sizeof(de))
  464. panic("dirlink read");
  465. if(de.inum == 0)
  466. continue;
  467. if(namecmp(name, de.name) == 0){
  468. // entry matches path element
  469. if(poff)
  470. *poff = off;
  471. inum = de.inum;
  472. return iget(dp->dev, inum);
  473. }
  474. }
  475. return 0;
  476. }
  477. // Write a new directory entry (name, inum) into the directory dp.
  478. int
  479. dirlink(struct inode *dp, char *name, uint inum)
  480. {
  481. int off;
  482. struct dirent de;
  483. struct inode *ip;
  484. // Check that name is not present.
  485. if((ip = dirlookup(dp, name, 0)) != 0){
  486. iput(ip);
  487. return -1;
  488. }
  489. // Look for an empty dirent.
  490. for(off = 0; off < dp->size; off += sizeof(de)){
  491. if(readi(dp, (char*)&de, off, sizeof(de)) != sizeof(de))
  492. panic("dirlink read");
  493. if(de.inum == 0)
  494. break;
  495. }
  496. strncpy(de.name, name, DIRSIZ);
  497. de.inum = inum;
  498. if(writei(dp, (char*)&de, off, sizeof(de)) != sizeof(de))
  499. panic("dirlink");
  500. return 0;
  501. }
  502. //PAGEBREAK!
  503. // Paths
  504. // Copy the next path element from path into name.
  505. // Return a pointer to the element following the copied one.
  506. // The returned path has no leading slashes,
  507. // so the caller can check *path=='\0' to see if the name is the last one.
  508. // If no name to remove, return 0.
  509. //
  510. // Examples:
  511. // skipelem("a/bb/c", name) = "bb/c", setting name = "a"
  512. // skipelem("///a//bb", name) = "bb", setting name = "a"
  513. // skipelem("a", name) = "", setting name = "a"
  514. // skipelem("", name) = skipelem("////", name) = 0
  515. //
  516. static char*
  517. skipelem(char *path, char *name)
  518. {
  519. char *s;
  520. int len;
  521. while(*path == '/')
  522. path++;
  523. if(*path == 0)
  524. return 0;
  525. s = path;
  526. while(*path != '/' && *path != 0)
  527. path++;
  528. len = path - s;
  529. if(len >= DIRSIZ)
  530. memmove(name, s, DIRSIZ);
  531. else {
  532. memmove(name, s, len);
  533. name[len] = 0;
  534. }
  535. while(*path == '/')
  536. path++;
  537. return path;
  538. }
  539. // Look up and return the inode for a path name.
  540. // If parent != 0, return the inode for the parent and copy the final
  541. // path element into name, which must have room for DIRSIZ bytes.
  542. static struct inode*
  543. namex(char *path, int nameiparent, char *name)
  544. {
  545. struct inode *ip, *next;
  546. if(*path == '/')
  547. ip = iget(ROOTDEV, ROOTINO);
  548. else
  549. ip = idup(curr_proc->cwd);
  550. while((path = skipelem(path, name)) != 0){
  551. ilock(ip);
  552. if(ip->type != T_DIR){
  553. iunlockput(ip);
  554. return 0;
  555. }
  556. if(nameiparent && *path == '\0'){
  557. // Stop one level early.
  558. iunlock(ip);
  559. return ip;
  560. }
  561. if((next = dirlookup(ip, name, 0)) == 0){
  562. iunlockput(ip);
  563. return 0;
  564. }
  565. iunlockput(ip);
  566. ip = next;
  567. }
  568. if(nameiparent){
  569. iput(ip);
  570. return 0;
  571. }
  572. return ip;
  573. }
  574. struct inode*
  575. namei(char *path)
  576. {
  577. char name[DIRSIZ];
  578. return namex(path, 0, name);
  579. }
  580. struct inode*
  581. nameiparent(char *path, char *name)
  582. {
  583. return namex(path, 1, name);
  584. }