如何从 grep 输出构建 xml 文档

我想将find | grep操作结果转换为结构化 XML 文档,其中文件条目包含文件名、出现次数、行号和行内容。 linux 是否提供任何工具来格式化输出,或者我必须自己编写代码?


所以我用 Python 尝试了一下,我想我已经想出了一个简单的脚本来完成你想要的事情。这里是:

#!/usr/bin/env python2
# -*- coding: ascii -*-

Takes a list of file-system paths and
generates an XML representation of the
corresponding file-system hierarchy.

import sys
from lxml.etree import Element, SubElement, fromstring, tostring, XMLParser
from xml.sax.saxutils import escape, unescape
from os.path import join, isdir
from posix import lstat
import fileinput

def insert_path(xmlroot, path):
    """Updates an XML element `xmlroot` and adds the
    child elements that represent the path `path`."""

    # Initialize a node cursor to start at the root node
    xmlcursor = xmlroot

    # Keep track of the relative path
    fullpath = ''

    # Iterate through the components of the path
    for path_component in path.split('/'):

        # Update the path
        fullpath = join(fullpath, path_component)

        # UTF and XML encode the strings
        fullpath_encoded = escape(fullpath.encode('string-escape'))
        path_component_encoded = escape(path_component.encode('string-escape'))

        # Check to see if the component if already represented by a node
        xmlnodes = xmlcursor.xpath("./*[@name='%s']" % path_component_encoded)

        # If the node exists, update the cursor
        if xmlnodes:
            xmlcursor = xmlnodes[0]

        # If the node doesn't exists, create it

            # Create the node
            if isdir(fullpath):
                xmlcursor = SubElement(xmlcursor, "directory")
                xmlcursor = SubElement(xmlcursor, "file")

            # (Optional) Add some file-attributes
            # xmlcursor.set('name', path_component)
            xmlcursor.set('name', path_component_encoded)
            xmlcursor.set('path', fullpath_encoded)
            xmlcursor.set('inode', str(lstat(fullpath).st_ino))

    # Return the modified root element (for convenience - not necessary)

def paths_to_xml(pathlist):
    """ Takes a list of file-system paths and generates an XML
    representation of the corresponding file-system hierarchy.

    xmlroot = Element('root')

    for path in pathlist:
        insert_path(xmlroot, path.strip().strip('/'))


# Read a list of file paths standard input or from a list of files
if __name__ == "__main__":

    # Get the XML document
    xmlroot = paths_to_xml(fileinput.input())

    # Display the generated XML document
    print(tostring(xmlroot, pretty_print=True))


mkdir -p /tmp/xmltest
cd /tmp/xmltest
touch file1
touch file2
mkdir dir1
touch dir1/file3
touch dir1/file4
mkdir dir2
mkdir dir2/dir3
touch dir2/dir3/file5


├── dir1
│   ├── file3
│   └── file4
├── dir2
│   └── dir3
│       └── file5
├── file1
└── file2


find . | pathlist2xml.py

这是生成的 XML 输出:

  <directory name="." path="." inode="3587802">
    <directory name="dir1" path="./dir1" inode="3587817">
      <file name="file3" path="./dir1/file3" inode="3587818"/>
      <file name="file4" path="./dir1/file4" inode="3587819"/>
    <directory name="dir2" path="./dir2" inode="3587820">
      <directory name="dir3" path="./dir2/dir3" inode="3587821">
        <file name="file5" path="./dir2/dir3/file5" inode="3587822"/>
    <file name="file1" path="./file1" inode="3587815"/>
    <file name="file2" path="./file2" inode="3587816"/>


find . | grep dir2 | pathlist2xml.py


  <directory name="." path="." inode="3587802">
    <directory name="dir2" path="./dir2" inode="3587820">
      <directory name="dir3" path="./dir2/dir3" inode="3587821">
        <file name="file5" path="./dir2/dir3/file5" inode="3587822"/>
