RAID5 IO處理之寫請求代碼詳解( 二 )

2.3 計算新校驗函數調用關系:
raid5_end_read_request() \_ handle_stripe()\_ analyse_stripe()\_ handle_stripe_dirtying()\_ schedule_reconstruction()\_ raid_run_ops()\_ ops_run_prexor5()\_ ops_run_biodrain()\_ ops_run_reconstruct5()由上輪次下發讀請求的回調出發本輪次處理 。
本輪次中 , 再次進入 handle_stripe_dirtying() 后因為讀請求的完成,上輪次中需要讀的條帶/設備都設置了 R5_UPTODATE 標記,所以一方面 rmw 變量等于0,另一方面在不需要下發請求 s->locked 等于0,因此滿足條件進入到schedule_reconstruction() 中 。
static voidschedule_reconstruction(struct stripe_head *sh, struct stripe_head_state *s,int rcw, int expand){ if (!rcw) {/* RAID6不支持讀改寫 */BUG_ON(level == 6);BUG_ON(!(test_bit(R5_UPTODATE, &sh->dev[pd_idx].flags) ||test_bit(R5_Wantcompute, &sh->dev[pd_idx].flags)));/* 遍歷所有條帶/設備 */for (i = disks; i--; ) {struct r5dev *dev = &sh->dev[i];/* 跳過校驗 */if (i == pd_idx)continue;/* 有寫請求的條帶/設備設置相關標記 */if (dev->towrite &&(test_bit(R5_UPTODATE, &dev->flags) ||test_bit(R5_Wantcompute, &dev->flags))) {/* 將數據從bio中拷貝到dev->page中 */set_bit(R5_Wantdrain, &dev->flags);/* 給條帶/設備上鎖表明正在進行IO */set_bit(R5_LOCKED, &dev->flags);/* 清除標記表明當前條帶/設備的page中的數據不可直接試用 */clear_bit(R5_UPTODATE, &dev->flags);/* locked計數 */s->locked++;}}/* 設置條帶重構狀態 */sh->reconstruct_state = reconstruct_state_prexor_drain_run;/* 設置條帶需要進行異或運算 */set_bit(STRIPE_OP_PREXOR, &s->ops_request);/* 設置條帶需要“抽干”數據 */set_bit(STRIPE_OP_BIODRAIN, &s->ops_request);/* 設置條帶需要計算校驗 */set_bit(STRIPE_OP_RECONSTRUCT, &s->ops_request); } /* 給校驗值所在條帶/設備上鎖表明正在進行IO */ set_bit(R5_LOCKED, &sh->dev[pd_idx].flags); /* 清除標記表明當前條帶/設備的page中的數據不可直接試用 */ clear_bit(R5_UPTODATE, &sh->dev[pd_idx].flags); /* locked計數 */ s->locked++;}static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request){ /* 先使用舊數據和舊校驗進行異或運算獲得中間狀態的校驗 */ if (test_bit(STRIPE_OP_PREXOR, &ops_request))tx = ops_run_prexor(sh, percpu, tx); /* 將新數據拷貝從bio中拷貝到dev中 */ if (test_bit(STRIPE_OP_BIODRAIN, &ops_request))tx = ops_run_biodrain(sh, tx); /* 計算最終的校驗值 */ if (test_bit(STRIPE_OP_RECONSTRUCT, &ops_request)) {if (level < 6)ops_run_reconstruct5(sh, percpu, tx);elseops_run_reconstruct6(sh, percpu, tx); }}static struct dma_async_tx_descriptor *ops_run_prexor(struct stripe_head *sh, struct raid5_percpu *percpu,struct dma_async_tx_descriptor *tx){ /* 將校驗值的page設置為第一個源數據和目標數據 */ struct page *xor_dest = xor_srcs[count++] = sh->dev[pd_idx].page; /* 遍歷所有條帶/設備 */ for (i = disks; i--; ) {struct r5dev *dev = &sh->dev[i];/* 需要“抽干”數據的dev即包含新數據的dev , 將其page依次設置為源數據 */if (test_bit(R5_Wantdrain, &dev->flags))xor_srcs[count++] = dev->page; } /* 進行異或運算 */ init_async_submit(&submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx,ops_complete_prexor, sh, to_addr_conv(sh, percpu)); tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, &submit); return tx;}static struct dma_async_tx_descriptor *ops_run_biodrain(struct stripe_head *sh, struct dma_async_tx_descriptor *tx){ /* 遍歷所有條帶/設備 */ for (i = disks; i--; ) {/* 處理所有需要“抽干”數據的dev */if (test_and_clear_bit(R5_Wantdrain, &dev->flags)) {struct bio *wbi;spin_lock_irq(&sh->stripe_lock);/* 將bio從towrite轉移到written表明開始調度 */chosen = dev->towrite;dev->towrite = NULL;BUG_ON(dev->written);wbi = dev->written = chosen;spin_unlock_irq(&sh->stripe_lock);/* 將bio中本條帶范圍內的所有數據拷貝到dev的page中 */while (wbi && wbi->bi_sector < dev->sector + STRIPE_SECTORS) {tx = async_copy_data(1, wbi, dev->page, dev->sector, tx);wbi = r5_next_bio(wbi, dev->sector);}} } return tx;}static voidops_run_reconstruct5(struct stripe_head *sh, struct raid5_percpu *percpu,struct dma_async_tx_descriptor *tx){ /* check if prexor is active which means only process blocks* that are part of a read-modify-write (written)*/ if (sh->reconstruct_state == reconstruct_state_prexor_drain_run) {prexor = 1;/* 將校驗值的page設置為第一個源數據和目標數據 */xor_dest = xor_srcs[count++] = sh->dev[pd_idx].page;/* 所有包含需要寫請求的條帶/設備依次設置為源數據 */for (i = disks; i--; ) {struct r5dev *dev = &sh->dev[i];if (dev->written)xor_srcs[count++] = dev->page;} } /* 1/ if we prexor'd then the dest is reused as a source* 2/ if we did not prexor then we are redoing the parity* set ASYNC_TX_XOR_DROP_DST and ASYNC_TX_XOR_ZERO_DST* for the synchronous xor case*/ flags = ASYNC_TX_ACK |(prexor ? ASYNC_TX_XOR_DROP_DST : ASYNC_TX_XOR_ZERO_DST); atomic_inc(&sh->count); /* 進行異步異或運算,完成后進入回調函數ops_complete_reconstruct */ init_async_submit(&submit, flags, tx, ops_complete_reconstruct, sh, to_addr_conv(sh, percpu)); tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, &submit);}

推薦閱讀