在 ext3/4 中的特定偏移处分配文件

Question 1

我已经设法找到一种方法来做到这一点。它使用一个 python 脚本，该脚本首先用于debugfs查找文件所需的所需块数（包括间接块）。然后，它手动将间接块写入磁盘，并debugfs再次调用以将块标记为已使用并更新文件的索引节点。

唯一的问题是，debugfs当您使用时，显然不会更新块组的空闲块计数setb。尽管我可以手动设置该参数，但似乎没有任何方法可以打印当前值，因此我无法计算正确的值。据我所知，它不会产生任何真正的负面后果，并且fsck.ext3可以在需要时用于纠正值，因此出于基准目的，它可以做到。

如果我错过了任何其他文件系统一致性问题，请告诉我，但由于fsck.ext3除了不正确的空闲块计数之外没有报告任何内容，我应该是安全的。

import sys
import tempfile
import struct
import subprocess

SECTOR_SIZE = 512
BLOCK_SIZE = 4096
DIRECT_BLOCKS = 12
BLOCKS_PER_INDIRECT_BLOCK = BLOCK_SIZE / 4

def write_indirect_block(device, indirect_block, blocks):
    print "writing indirect block ", indirect_block
    dev = open(device, "wb")
    dev.seek(indirect_block * BLOCK_SIZE)
    # Write blocks
    for block in blocks:
        bin_block = struct.pack("<I", int(block))
        dev.write(bin_block)
    zero = struct.pack("<I", 0)
    # Zero out the rest of the block
    for x in range(len(blocks), BLOCKS_PER_INDIRECT_BLOCK):
        dev.write(zero)
    dev.close()

def main(argv):
    if len(argv) < 5:
        print "Usage: ext3allocfile.py [device] [file] [sizeInMB] [offsetInMB]"
        return

    device = argv[1] # device containing the ext3 file system, e.g. "/dev/sdb1"
    file = argv[2] # file name relative to the root of the device, e.g. "/myfile"
    size = int(argv[3]) * 1024 * 1024 # Size in MB
    offset = int(argv[4]) * 1024 * 1024 # Offset from the start of the device in MB

    if size > 0xFFFFFFFF:
        # Supporting this requires two things: triple indirect block support, and proper handling of size_high when changing the inode
        print "Unable to allocate files over 4GB."
        return

    # Because size is specified in MB, it should always be exactly divisable by BLOCK_SIZE.
    size_blocks = size / BLOCK_SIZE
    # We need 1 indirect block for each 1024 blocks over 12 blocks.
    ind_blocks = (size_blocks - DIRECT_BLOCKS) / BLOCKS_PER_INDIRECT_BLOCK
    if (size_blocks - DIRECT_BLOCKS) % BLOCKS_PER_INDIRECT_BLOCK != 0:
        ind_blocks += 1
    # We need a double indirect block if we have more than one indirect block
    has_dind_block = ind_blocks > 1
    total_blocks = size_blocks + ind_blocks
    if has_dind_block:
        total_blocks += 1

    # Find free blocks we can use at the offset
    offset_block = offset / BLOCK_SIZE
    print "Finding ", total_blocks, " free blocks from block ", offset_block
    process = subprocess.Popen(["debugfs", device, "-R", "ffb %d %d" % (total_blocks, offset_block)], stdout=subprocess.PIPE)
    output = process.stdout
    # The first three entries after splitting are "Free", "blocks", "found:", so we skip those.
    blocks = output.readline().split(" ")[3:]
    output.close()
    # The last entry may contain a line-break. Removing it this way to be safe.
    blocks = filter(lambda x: len(x.strip(" \n")) > 0, blocks)
    if len(blocks) != total_blocks:
        print "Not enough free blocks found for the file."
        return

    # The direct blocks in the inode are blocks 0-11
    # Write the first indirect block, listing the blocks for file blocks 12-1035 (inclusive)
    if ind_blocks > 0:
        write_indirect_block(device, int(blocks[DIRECT_BLOCKS]), blocks[DIRECT_BLOCKS + 1 : DIRECT_BLOCKS + 1 + BLOCKS_PER_INDIRECT_BLOCK])

    if has_dind_block:
        dind_block_index = DIRECT_BLOCKS + 1 + BLOCKS_PER_INDIRECT_BLOCK
        dind_block = blocks[dind_block_index]
        ind_block_indices = [dind_block_index+1+(i*(BLOCKS_PER_INDIRECT_BLOCK+1)) for i in range(ind_blocks-1)]
        # Write the double indirect block, listing the blocks for the remaining indirect block
        write_indirect_block(device, int(dind_block), [blocks[i] for i in ind_block_indices])
        # Write the remaining indirect blocks, listing the relevant file blocks
        for i in ind_block_indices:
            write_indirect_block(device, int(blocks[i]), blocks[i+1:i+1+BLOCKS_PER_INDIRECT_BLOCK])

    # Time to generate a script for debugfs
    script = tempfile.NamedTemporaryFile(mode = "w", delete = False)
    # Mark all the blocks as in-use
    for block in blocks:
        script.write("setb %s\n" % (block,))

    # Change direct blocks in the inode
    for i in range(DIRECT_BLOCKS):
        script.write("sif %s block[%d] %s\n" % (file, i, blocks[i]))

    # Change indirect block in the inode
    if size_blocks > DIRECT_BLOCKS:
        script.write("sif %s block[IND] %s\n" % (file, blocks[DIRECT_BLOCKS]))

    # Change double indirect block in the inode
    if has_dind_block:
        script.write("sif %s block[DIND] %s\n" % (file, dind_block))

    # Set total number of blocks in the inode (this value seems to actually be sectors
    script.write("sif %s blocks %d\n" % (file, total_blocks * (BLOCK_SIZE / SECTOR_SIZE)))
    # Set file size in the inode
    # TODO: Need support of size_high for large files
    script.write("sif %s size %d\n" % (file, size))
    script.close()

    # execute the script
    print "Modifying file"
    subprocess.call(["debugfs", "-w", device, "-f", script.name])
    script.unlink(script.name)

if __name__ == "__main__":
    main(sys.argv)

该脚本可以按如下方式在偏移量 200GB 处创建一个 1GB 文件（您需要是 root）：

touch /mount/point/myfile
sync
python ext3allocfile.py /dev/sdb1 /myfile 1024 204800
umount /dev/sdb1
mount /dev/sdb1

为了让系统识别更改，必须使用 umount/mount 组合。您可以在调用脚本之前卸载，但这会使调用速度debugfs变慢。

如果有人想使用这个：我不保证它会正常工作，如果您丢失任何数据，我不承担任何责任。一般来说，不要在包含任何重要内容的文件系统上使用它。

Answer

我已经设法找到一种方法来做到这一点。它使用一个 python 脚本，该脚本首先用于debugfs查找文件所需的所需块数（包括间接块）。然后，它手动将间接块写入磁盘，并debugfs再次调用以将块标记为已使用并更新文件的索引节点。

唯一的问题是，debugfs当您使用时，显然不会更新块组的空闲块计数setb。尽管我可以手动设置该参数，但似乎没有任何方法可以打印当前值，因此我无法计算正确的值。据我所知，它不会产生任何真正的负面后果，并且fsck.ext3可以在需要时用于纠正值，因此出于基准目的，它可以做到。

如果我错过了任何其他文件系统一致性问题，请告诉我，但由于fsck.ext3除了不正确的空闲块计数之外没有报告任何内容，我应该是安全的。

import sys
import tempfile
import struct
import subprocess

SECTOR_SIZE = 512
BLOCK_SIZE = 4096
DIRECT_BLOCKS = 12
BLOCKS_PER_INDIRECT_BLOCK = BLOCK_SIZE / 4

def write_indirect_block(device, indirect_block, blocks):
    print "writing indirect block ", indirect_block
    dev = open(device, "wb")
    dev.seek(indirect_block * BLOCK_SIZE)
    # Write blocks
    for block in blocks:
        bin_block = struct.pack("<I", int(block))
        dev.write(bin_block)
    zero = struct.pack("<I", 0)
    # Zero out the rest of the block
    for x in range(len(blocks), BLOCKS_PER_INDIRECT_BLOCK):
        dev.write(zero)
    dev.close()

def main(argv):
    if len(argv) < 5:
        print "Usage: ext3allocfile.py [device] [file] [sizeInMB] [offsetInMB]"
        return

    device = argv[1] # device containing the ext3 file system, e.g. "/dev/sdb1"
    file = argv[2] # file name relative to the root of the device, e.g. "/myfile"
    size = int(argv[3]) * 1024 * 1024 # Size in MB
    offset = int(argv[4]) * 1024 * 1024 # Offset from the start of the device in MB

    if size > 0xFFFFFFFF:
        # Supporting this requires two things: triple indirect block support, and proper handling of size_high when changing the inode
        print "Unable to allocate files over 4GB."
        return

    # Because size is specified in MB, it should always be exactly divisable by BLOCK_SIZE.
    size_blocks = size / BLOCK_SIZE
    # We need 1 indirect block for each 1024 blocks over 12 blocks.
    ind_blocks = (size_blocks - DIRECT_BLOCKS) / BLOCKS_PER_INDIRECT_BLOCK
    if (size_blocks - DIRECT_BLOCKS) % BLOCKS_PER_INDIRECT_BLOCK != 0:
        ind_blocks += 1
    # We need a double indirect block if we have more than one indirect block
    has_dind_block = ind_blocks > 1
    total_blocks = size_blocks + ind_blocks
    if has_dind_block:
        total_blocks += 1

    # Find free blocks we can use at the offset
    offset_block = offset / BLOCK_SIZE
    print "Finding ", total_blocks, " free blocks from block ", offset_block
    process = subprocess.Popen(["debugfs", device, "-R", "ffb %d %d" % (total_blocks, offset_block)], stdout=subprocess.PIPE)
    output = process.stdout
    # The first three entries after splitting are "Free", "blocks", "found:", so we skip those.
    blocks = output.readline().split(" ")[3:]
    output.close()
    # The last entry may contain a line-break. Removing it this way to be safe.
    blocks = filter(lambda x: len(x.strip(" \n")) > 0, blocks)
    if len(blocks) != total_blocks:
        print "Not enough free blocks found for the file."
        return

    # The direct blocks in the inode are blocks 0-11
    # Write the first indirect block, listing the blocks for file blocks 12-1035 (inclusive)
    if ind_blocks > 0:
        write_indirect_block(device, int(blocks[DIRECT_BLOCKS]), blocks[DIRECT_BLOCKS + 1 : DIRECT_BLOCKS + 1 + BLOCKS_PER_INDIRECT_BLOCK])

    if has_dind_block:
        dind_block_index = DIRECT_BLOCKS + 1 + BLOCKS_PER_INDIRECT_BLOCK
        dind_block = blocks[dind_block_index]
        ind_block_indices = [dind_block_index+1+(i*(BLOCKS_PER_INDIRECT_BLOCK+1)) for i in range(ind_blocks-1)]
        # Write the double indirect block, listing the blocks for the remaining indirect block
        write_indirect_block(device, int(dind_block), [blocks[i] for i in ind_block_indices])
        # Write the remaining indirect blocks, listing the relevant file blocks
        for i in ind_block_indices:
            write_indirect_block(device, int(blocks[i]), blocks[i+1:i+1+BLOCKS_PER_INDIRECT_BLOCK])

    # Time to generate a script for debugfs
    script = tempfile.NamedTemporaryFile(mode = "w", delete = False)
    # Mark all the blocks as in-use
    for block in blocks:
        script.write("setb %s\n" % (block,))

    # Change direct blocks in the inode
    for i in range(DIRECT_BLOCKS):
        script.write("sif %s block[%d] %s\n" % (file, i, blocks[i]))

    # Change indirect block in the inode
    if size_blocks > DIRECT_BLOCKS:
        script.write("sif %s block[IND] %s\n" % (file, blocks[DIRECT_BLOCKS]))

    # Change double indirect block in the inode
    if has_dind_block:
        script.write("sif %s block[DIND] %s\n" % (file, dind_block))

    # Set total number of blocks in the inode (this value seems to actually be sectors
    script.write("sif %s blocks %d\n" % (file, total_blocks * (BLOCK_SIZE / SECTOR_SIZE)))
    # Set file size in the inode
    # TODO: Need support of size_high for large files
    script.write("sif %s size %d\n" % (file, size))
    script.close()

    # execute the script
    print "Modifying file"
    subprocess.call(["debugfs", "-w", device, "-f", script.name])
    script.unlink(script.name)

if __name__ == "__main__":
    main(sys.argv)

该脚本可以按如下方式在偏移量 200GB 处创建一个 1GB 文件（您需要是 root）：

touch /mount/point/myfile
sync
python ext3allocfile.py /dev/sdb1 /myfile 1024 204800
umount /dev/sdb1
mount /dev/sdb1

为了让系统识别更改，必须使用 umount/mount 组合。您可以在调用脚本之前卸载，但这会使调用速度debugfs变慢。

如果有人想使用这个：我不保证它会正常工作，如果您丢失任何数据，我不承担任何责任。一般来说，不要在包含任何重要内容的文件系统上使用它。

Question 2

这不是你想要的答案，我意识到了。但该方法与文件系统无关。

仅一次

找到块大小；从头到尾循环创建该块大小的文件；一旦完成；删除除您想要的文件之外的所有文件。

完成后，将设备的原始副本复制到压缩文件中

dd if=/dev/sdp1 |bzip2 -9 > /tmp/my-fs-image.bz2

瞧！文件系统映像并不是很大，只有一个块位于您想要的位置。分配。

要恢复，请创建一个具有完全相同数量/大小的物理磁盘块的分区

bzip2 -d < /tmp/my-fs-image.bz2|dd of=/dev/sdq1

第一次复飞，会很麻烦。

Answer