补充评论

Question

好的，这是一个（相对）简单的 Python 脚本，它尝试进行此清理。我称之为clean-typescript.py。它当然可以改进，并且可能包含错误，但这是我可以在短时间内想出的。

#!/usr/bin/env python3
# Takes raw terminal output of a program containing control sequences that
# overwrite parts of the output, and attempts to transform it to just the end
# result, writing its output on stdout.
#
# Assumptions/limitations:
#  * Assumes Unix line endings.
#  * Assumes input text is left-to-right, in system character encoding.
#  * Does not attempt to deal with most of the complexities of Unicode.
#  * Does not attempt to interpret every ANSI escape sequence; just the common
#    ones that affect cursor position.
#  * Ignores ANSI SGR (bold/color/etc.) sequences.
#  * Assumes 8-column tab stops.
#  * Assumes the terminal displays an unlimited number of lines.
#  * Ignores absolute positioning sequences (except CHA): this is not for
#    ncurses output and such.
#  * Will not allow the cursor to go up beyond the first line in the file.
#
# Usage: clean-typescript.py FILE COLS
# FILE is the name of the file to read; if omitted or "-", read stdin.
# COLS is how many columns wide the terminal is; if omitted, set to 80.

import sys
from array import array
from enum import Enum

if len(sys.argv) >= 2 and sys.argv[1] != "-":
    f = open(sys.argv[1], "r")
else:
    f = sys.stdin

if len(sys.argv) >= 3:
    cols = int(sys.argv[2])
else:
    cols = 80

lines = [array("u", (" ",)*cols)]
curline = curcol = 0
eol = False

class Dir (Enum):
    UP = 0
    DOWN = 1
    RIGHT = 2
    LEFT = 3

def move_cursor (dir: Dir, count: int):
    global curline, curcol, eol
    if dir == Dir.UP:
        pos = curline - count
        curline = pos if pos >= 0 else 0
    elif dir == Dir.DOWN:
        pos = curline + count
        curline = pos if pos < len(lines) else len(lines) - 1
    elif dir == Dir.RIGHT:
        pos = curcol + count
        curcol = pos if pos < cols else cols - 1
    elif dir == Dir.LEFT:
        eol = False
        pos = curcol - count
        curcol = pos if pos >= 0 else 0

def skip_osc ():
    c = f.read(1)
    while True:
        if c == "\x07":
            return f.read(1)
        if c == "\x1b":
            if f.read(1) == "\\":
                return f.read(1)
        c = f.read(1)

def interpret_seq ():
    c = f.read(1)
    if c == "]": # OSC
        return skip_osc()
    if c != "[": # CSI
        # Other Fe seqs. not supported
        return f.read(1)

    parms = []
    c = f.read(1)
    while True:
        p = ""
        while c >= "0" and c <= "9":
            p += c
            c = f.read(1)
        if p:
            parms.append(int(p))

        if c != ";": break
        c = f.read(1)

    if c == "A":   # CUU
        move_cursor(Dir.UP, parms[0] if len(parms) > 0 else 1)
    elif c == "B": # CUD
        move_cursor(Dir.DOWN, parms[0] if len(parms) > 0 else 1)
    elif c == "C": # CUF
        move_cursor(Dir.RIGHT, parms[0] if len(parms) > 0 else 1)
    elif c == "D": # CUB
        move_cursor(Dir.LEFT, parms[0] if len(parms) > 0 else 1)
    elif c == "E": # CNL
        move_cursor(Dir.LEFT, cols)
        move_cursor(Dir.DOWN, parms[0] if len(parms) > 0 else 1)
    elif c == "F": # CPL
        move_cursor(Dir.LEFT, cols)
        move_cursor(Dir.UP, parms[0] if len(parms) > 0 else 1)
    elif c == "G": # CHA
        move_cursor(Dir.LEFT, cols)
        move_cursor(Dir.RIGHT, parms[0] - 1 if len(parms) > 0 else 0)
    # CUP and ED not implemented
    elif c == "K": # EL
        if (len(parms) == 0 or parms[0] == 0) and not eol:
            for i in range(curcol, cols):
                lines[curline][i] = " "
        elif parms[0] == 1:
            for i in range(0, curcol):
                lines[curline][i] = " "
            if eol:
                append_line()
                move_cursor(Dir.LEFT, cols)
                move_cursor(Dir.DOWN, 1)
        elif parms[0] == 2:
            for i in range(0, cols):
                lines[curline][i] = " "
            if eol:
                append_line()
                move_cursor(Dir.LEFT, cols)
                move_cursor(Dir.DOWN, 1)
    # ED, SU, SD, and HVP also not implemented

    c = f.read(1)
    return c

def append_line ():
    lines.append(array("u", (" ",)*cols))

c = f.read(1)
while c:
    if c == "\x08":   # BS
        if eol:
            eol = False
        else:
            move_cursor(Dir.LEFT, 1)
    elif c == "\x09": # HT
        curcol = (curcol + 8)//8*8
        if curcol >= cols: curcol = cols - 1
    elif c == "\x0a": # LF (implies CR in Unix)
        eol = False
        if curline == len(lines) - 1: append_line()
        move_cursor(Dir.LEFT, cols)
        move_cursor(Dir.DOWN, 1)
    elif c == "\x0b" or c == "\x0c": # VT/FF: just go down one line
        if curline == len(lines) - 1: append_line()
        move_cursor(Dir.DOWN, 1)
    elif c == "\x0d": # CR (stays on same line)
        eol = False
        move_cursor(Dir.LEFT, cols)
    elif c == "\x1b": # Escape seq.
        c = interpret_seq()
        continue
    elif (c >= "\x20" and c <= "\x7e") or c >= "\xa1":
        if not eol: lines[curline][curcol] = c
        if curcol == cols - 1:
            if eol:
                if curline == len(lines) - 1: append_line()
                move_cursor(Dir.LEFT, cols)
                move_cursor(Dir.DOWN, 1)
                lines[curline][curcol] = c
                move_cursor(Dir.RIGHT, 1)
            else:
                eol = True
        else:
            move_cursor(Dir.RIGHT, 1)

    c = f.read(1)

# Final result
print(*("".join(line).rstrip() for line in lines), sep="\n", end="")

Answer 1

好的，这是一个（相对）简单的 Python 脚本，它尝试进行此清理。我称之为clean-typescript.py。它当然可以改进，并且可能包含错误，但这是我可以在短时间内想出的。

#!/usr/bin/env python3
# Takes raw terminal output of a program containing control sequences that
# overwrite parts of the output, and attempts to transform it to just the end
# result, writing its output on stdout.
#
# Assumptions/limitations:
#  * Assumes Unix line endings.
#  * Assumes input text is left-to-right, in system character encoding.
#  * Does not attempt to deal with most of the complexities of Unicode.
#  * Does not attempt to interpret every ANSI escape sequence; just the common
#    ones that affect cursor position.
#  * Ignores ANSI SGR (bold/color/etc.) sequences.
#  * Assumes 8-column tab stops.
#  * Assumes the terminal displays an unlimited number of lines.
#  * Ignores absolute positioning sequences (except CHA): this is not for
#    ncurses output and such.
#  * Will not allow the cursor to go up beyond the first line in the file.
#
# Usage: clean-typescript.py FILE COLS
# FILE is the name of the file to read; if omitted or "-", read stdin.
# COLS is how many columns wide the terminal is; if omitted, set to 80.

import sys
from array import array
from enum import Enum

if len(sys.argv) >= 2 and sys.argv[1] != "-":
    f = open(sys.argv[1], "r")
else:
    f = sys.stdin

if len(sys.argv) >= 3:
    cols = int(sys.argv[2])
else:
    cols = 80

lines = [array("u", (" ",)*cols)]
curline = curcol = 0
eol = False

class Dir (Enum):
    UP = 0
    DOWN = 1
    RIGHT = 2
    LEFT = 3

def move_cursor (dir: Dir, count: int):
    global curline, curcol, eol
    if dir == Dir.UP:
        pos = curline - count
        curline = pos if pos >= 0 else 0
    elif dir == Dir.DOWN:
        pos = curline + count
        curline = pos if pos < len(lines) else len(lines) - 1
    elif dir == Dir.RIGHT:
        pos = curcol + count
        curcol = pos if pos < cols else cols - 1
    elif dir == Dir.LEFT:
        eol = False
        pos = curcol - count
        curcol = pos if pos >= 0 else 0

def skip_osc ():
    c = f.read(1)
    while True:
        if c == "\x07":
            return f.read(1)
        if c == "\x1b":
            if f.read(1) == "\\":
                return f.read(1)
        c = f.read(1)

def interpret_seq ():
    c = f.read(1)
    if c == "]": # OSC
        return skip_osc()
    if c != "[": # CSI
        # Other Fe seqs. not supported
        return f.read(1)

    parms = []
    c = f.read(1)
    while True:
        p = ""
        while c >= "0" and c <= "9":
            p += c
            c = f.read(1)
        if p:
            parms.append(int(p))

        if c != ";": break
        c = f.read(1)

    if c == "A":   # CUU
        move_cursor(Dir.UP, parms[0] if len(parms) > 0 else 1)
    elif c == "B": # CUD
        move_cursor(Dir.DOWN, parms[0] if len(parms) > 0 else 1)
    elif c == "C": # CUF
        move_cursor(Dir.RIGHT, parms[0] if len(parms) > 0 else 1)
    elif c == "D": # CUB
        move_cursor(Dir.LEFT, parms[0] if len(parms) > 0 else 1)
    elif c == "E": # CNL
        move_cursor(Dir.LEFT, cols)
        move_cursor(Dir.DOWN, parms[0] if len(parms) > 0 else 1)
    elif c == "F": # CPL
        move_cursor(Dir.LEFT, cols)
        move_cursor(Dir.UP, parms[0] if len(parms) > 0 else 1)
    elif c == "G": # CHA
        move_cursor(Dir.LEFT, cols)
        move_cursor(Dir.RIGHT, parms[0] - 1 if len(parms) > 0 else 0)
    # CUP and ED not implemented
    elif c == "K": # EL
        if (len(parms) == 0 or parms[0] == 0) and not eol:
            for i in range(curcol, cols):
                lines[curline][i] = " "
        elif parms[0] == 1:
            for i in range(0, curcol):
                lines[curline][i] = " "
            if eol:
                append_line()
                move_cursor(Dir.LEFT, cols)
                move_cursor(Dir.DOWN, 1)
        elif parms[0] == 2:
            for i in range(0, cols):
                lines[curline][i] = " "
            if eol:
                append_line()
                move_cursor(Dir.LEFT, cols)
                move_cursor(Dir.DOWN, 1)
    # ED, SU, SD, and HVP also not implemented

    c = f.read(1)
    return c

def append_line ():
    lines.append(array("u", (" ",)*cols))

c = f.read(1)
while c:
    if c == "\x08":   # BS
        if eol:
            eol = False
        else:
            move_cursor(Dir.LEFT, 1)
    elif c == "\x09": # HT
        curcol = (curcol + 8)//8*8
        if curcol >= cols: curcol = cols - 1
    elif c == "\x0a": # LF (implies CR in Unix)
        eol = False
        if curline == len(lines) - 1: append_line()
        move_cursor(Dir.LEFT, cols)
        move_cursor(Dir.DOWN, 1)
    elif c == "\x0b" or c == "\x0c": # VT/FF: just go down one line
        if curline == len(lines) - 1: append_line()
        move_cursor(Dir.DOWN, 1)
    elif c == "\x0d": # CR (stays on same line)
        eol = False
        move_cursor(Dir.LEFT, cols)
    elif c == "\x1b": # Escape seq.
        c = interpret_seq()
        continue
    elif (c >= "\x20" and c <= "\x7e") or c >= "\xa1":
        if not eol: lines[curline][curcol] = c
        if curcol == cols - 1:
            if eol:
                if curline == len(lines) - 1: append_line()
                move_cursor(Dir.LEFT, cols)
                move_cursor(Dir.DOWN, 1)
                lines[curline][curcol] = c
                move_cursor(Dir.RIGHT, 1)
            else:
                eol = True
        else:
            move_cursor(Dir.RIGHT, 1)

    c = f.read(1)

# Final result
print(*("".join(line).rstrip() for line in lines), sep="\n", end="")

补充评论

补充评论

答案1

相关内容