make4ht 数组和 mathml 的输出不正确

make4ht 数组和 mathml 的输出不正确

我正在编译以下乳胶文件:

\documentclass{article}

\begin{document}
$\begin{array}{c}
  1\\
  2
\end{array}$
\end{document}

通过 TeXlive 2023 编译以上内容,截至最新 (2023 年 6 月 14 日)

make4ht test.tex "mathml"

输出test.html仅包含表的第一行。

答案1

这是 中的一个错误make4ht。它会自动清除 TeX4ht 生成的 MathML 代码中的一些常见问题。其中之一是 中虚假的空最后一行array。在这种情况下,代码错误地认为第二行为空,并将其删除。

您可以将此修复文件保存为domfilters/make4ht-mathmlfixes.lua文档目录中的文件,或者安装开发版本制作4小时

local log = logging.new("mathmlfixes")
-- <mglyph> should be inside <mi>, so we don't process it 
-- even though it is  a token element
local token = {"mi", "mn", "mo", "mtext", "mspace", "ms"}
local token_elements = {}
for _, tok in ipairs(token) do token_elements[tok] = true end

-- helper functions to support MathML elements with prefixes (<mml:mi> etc).
--
local function get_element_name(el)
  -- return element name and xmlns prefix
  local name = el:get_element_name()
  if name:match(":") then
    local prefix, real_name =  name:match("([^%:]+):?(.+)")
    return real_name, prefix
  else
    return name
  end
end

local function get_attribute(el, attr_name)
  -- attributes can have the prefix, but sometimes they don't have it
  -- so we need to catch both cases
  local _, prefix = get_element_name(el)
  prefix = prefix or ""
  return el:get_attribute(attr_name) or el:get_attribute(prefix .. ":" .. attr_name)
end

local function get_new_element_name(name, prefix)
  return prefix and prefix .. ":" .. name or name
end

local function update_element_name(el, name, prefix)
  local newname = get_new_element_name(name, prefix)
  el._name = newname
end

local function create_element(el, name, prefix)
  return el:create_element(newname)
end

local function is_token_element(el)
  local name, prefix = get_element_name(el)
  return token_elements[name], prefix
end

local function fix_token_elements(el)
  -- find token elements that are children of other token elements
  if is_token_element(el) then
    local parent = el:get_parent()
    local is_parent_token, prefix = is_token_element(parent)
    if is_parent_token then
      -- change top element in nested token elements to mstyle
      update_element_name(parent, "mstyle", prefix)
    end
  end
end

local function fix_nested_mstyle(el)
  -- the <mstyle> element can be child of token elements
  -- we must exterminate it
  local el_name = get_element_name(el)
  if el_name == "mstyle" then
    local parent = el:get_parent()
    if is_token_element(parent) then
      -- if parent doesn't have the mathvariant attribute copy it from <mstyle>
      if not parent:get_attribute("mathvariant") then
        local mathvariant = el:get_attribute("mathvariant") 
        parent._attr = parent._attr or {}
        parent:set_attribute("mathvariant", mathvariant)
      end
      -- copy the contents of <mstyle> to the parent element
      parent._children = el._children
    end
  end
end

local function fix_mathvariant(el)
  -- set mathvariant of <mi> that is child of <mstyle> to have the same value
  local function find_mstyle(x)
    -- find if element has <mstyle> parent, and its value of mathvariant
    if not x:is_element() then
      return nil
    elseif get_element_name(x) == "mstyle" then 
      return x:get_attribute("mathvariant")
    else
      return find_mstyle(x:get_parent())
    end
  end
  if get_element_name(el) == "mi" then
    -- process only <mi> that have mathvariant set
    local oldmathvariant = el:get_attribute("mathvariant")
    if oldmathvariant then
      local mathvariant = find_mstyle(el:get_parent())
      if mathvariant then
        el:set_attribute("mathvariant", mathvariant)
      end
    end
  end
end

local function contains_only_text(el)
  -- detect if element contains only text
  local elements = 0
  local text     = 0
  local children = el:get_children() or {}
  for _ , child in ipairs(children) do
    if child:is_text() then text = text + 1
    elseif child:is_element() then elements = elements + 1
    end
  end
  return text > 0 and elements == 0
end

-- check if <mstyle> element contains direct text. in that case, add
-- <mtext>
local function fix_missing_mtext(el)
  if el:get_element_name() == "mstyle" and contains_only_text(el) then
    -- add child <mtext>
    log:debug("mstyle contains only text: " .. el:get_text())
    -- copy the current mode, change it's element name to mtext and add it as a child of <mstyle>
    local copy = el:copy_node()
    copy._name = "mtext"
    copy._parent = el
    el._children = {copy}
  end
end

local function is_radical(el)
  local radicals = {msup=true, msub=true, msubsup=true}
  return radicals[el:get_element_name()]
end

local function get_mrow_child(el)
  local get_first = function(x) 
    local children = x:get_children() 
    return children[1]
  end
  local first = get_first(el)
  -- either return first child, and if the child is <mrow>, return it's first child
  if first and first:is_element() then
    if first:get_element_name() == "mrow" then
      return get_first(first), first
    else
      return first
    end
  end
end

local function fix_radicals(el)
  if is_radical(el) then
    local first_child, mrow = get_mrow_child(el)
    -- if the first child is only one character long, it is possible that there is a problem
    if first_child and string.len(first_child:get_text()) == 1 then
      local name = first_child:get_element_name() 
      local siblings = el:get_siblings()
      local pos = el:find_element_pos()
      -- it doesn't make sense to do any further processing if the element is at the beginning
      if pos == 1 then return end
      if name == "mo" then
        for i = pos, 1,-1 do
        end

      end
    end

  end
end

-- put <mrow> as child of <math> if it already isn't here
local allowed_top_mrow = {
  math=true
}
local function top_mrow(math)
  local children = math:get_children()
  local put_mrow = false
  -- don't process elements with one or zero children
  -- don't process elements that already are mrow
  local parent = math:get_parent()
  local parent_name
  if parent then parent_name = get_element_name(parent) end
  local current_name, prefix = get_element_name(math)
  if #children < 2 or not allowed_top_mrow[current_name] or current_name == "mrow" or parent_name == "mrow" then return nil end
  local mrow_count = 0
  for _,v in ipairs(children) do
    if v:is_element() and is_token_element(v) then
      put_mrow = true
      -- break
    elseif v:is_element() and get_element_name(v) == "mrow" then
      mrow_count = mrow_count + 1
    end
  end
  if not put_mrow and get_element_name(math) == "math" and mrow_count == 0 then
    -- put at least one <mrow> to each <math>
    put_mrow = true
  end
  if put_mrow then
    local newname = get_new_element_name("mrow", prefix)
    local mrow = math:create_element(newname)
    for _, el in ipairs(children) do
      mrow:add_child_node(el)
    end
    math._children = {mrow}
  end

end

local function get_fence(el, attr, form)
  -- convert fence attribute to <mo> element
  -- attr: open | close
  -- form: prefix | postfix
  local char = el:get_attribute(attr)
  local mo 
  if char then
    local name, prefix = get_element_name(el)
    local newname = get_new_element_name("mo", prefix)
    mo = el:create_element(newname, {fence="true", form = form})
    mo:add_child_node(mo:create_text_node(char))
  end
  return mo
end


local function fix_mfenced(el)
  -- TeX4ht uses in some cases <mfenced> element which is deprecated in MathML.
  -- Firefox doesn't support it already.
  local name, prefix = get_element_name(el)
  if name == "mfenced" then
    -- we must replace it by <mrow><mo>start</mo><mfenced children...><mo>end</mo></mrow>
    local open = get_fence(el, "open", "prefix")
    local close = get_fence(el, "close", "postfix")
    -- there can be also separator attribute, but it is not used in TeX4ht
    -- change <mfenced> to <mrow> and remove all attributes
    local newname = get_new_element_name("mrow", prefix)
    el._name = newname
    el._attr = {}
    -- open must be first child, close needs to be last
    if open then el:add_child_node(open, 1) end
    if close then el:add_child_node(close) end
  end
end

local function is_fence(el)
  return get_element_name(el) == "mo" and el:get_attribute("fence") == "true"
end

local function fix_mo_to_mfenced(el)
  -- LibreOffice NEEDS <mfenced> element. so we need to convert <mrow><mo fence="true">
  -- to <mfenced>. ouch.
  if is_fence(el) then
    local parent = el:get_parent()
    local open = el:get_text():gsub("%s*", "") -- convert mo content to text, so it can be used in 
    -- close needs to be the last element in the sibling list of the current element
    local siblings = el:get_siblings()
    el:remove_node() -- we don't need this element anymore
    local close
    for i = #siblings, 1, -1 do
      last = siblings[i]
      if last:is_element() then
        if is_fence(last) then -- set close attribute only if the last element is fence
          close = last:get_text():gsub("%s*", "")
          last:remove_node() -- remove <mo>
        end
        break -- break looping over elements once we find last element
      end 
    end
    -- convert parent <mrow> to <mfenced>
    local _, prefix = get_element_name(parent)
    local newname = get_new_element_name("mfenced", prefix)
    parent._name = newname
    parent._attr = {open = open, close = close}
  end
end

local function fix_numbers(el)
  -- convert <mn>1</mn><mo>.</mo><mn>3</mn> to <mn>1.3</mn>
  if get_element_name(el) == "mn" then
    local n = el:get_sibling_node(1)
    -- test if next  element is <mo class="MathClass-punc">.</mo>
    if n and n:is_element() 
         and get_element_name(n) == "mo" 
         and get_attribute(n, "class") == "MathClass-punc" 
         and n:get_text() == "." 
    then
      -- get next element and test if it is <mn>
      local x = el:get_sibling_node(2)
      if x and x:is_element() 
           and get_element_name(x) == "mn" 
      then
        -- join numbers and set it as text content of the current element
        local newnumber = el:get_text() .. "." .. x:get_text()
        log:debug("Joining numbers: " .. newnumber)
        el._children = {}
        local newchild = el:create_text_node(newnumber)
        el:add_child_node(newchild)
        -- remove elements that hold dot and decimal part
        n:remove_node()
        x:remove_node()
      end
    end
  end
end


local function just_operators(list)
  -- count <mo> and return true if list contains just them
  local mo = 0
  for _, x in ipairs(list) do
    if get_element_name(x) == "mo" then mo = mo + 1 end
  end
  return mo
end


local function fix_operators(x)
  -- change <mo> elements that are only children of any element to <mi>
  -- this fixes issues in LibreOffice with a^{*}
  -- I hope it doesn't introduce different issues
  -- process only <mo>
  local el_name, prefix = get_element_name(x)
  if el_name ~= "mo" then return nil end
    local siblings = x:get_siblings()
    -- test if current element list contains only <mo>
    if just_operators(siblings) == #siblings then
        if #siblings == 1 then
      if not x:get_attribute("stretchy") then
        -- one <mo> translates to <mtext>
        local newname = get_new_element_name("mtext", prefix)
        x._name = newname
        log:debug("changing one <mo> to <mtext>: " .. x:get_text())
        -- I think we should use <mi>, but LO incorrectly renders it in <msubsup>,
        -- even if we use the mathvariant="normal" attribute. <mtext> works, so
        -- we use that instead.
        -- x:set_attribute("mathvariant", "normal")
      end
        else
            -- multiple <mo> translate to <mtext>
            local text = {}
            for _, el in ipairs(siblings) do
                text[#text+1] = el:get_text()
            end
            -- replace first <mo> text with concetanated text content
            -- of all <mo> elements
            x._children = {}
      local newtext = table.concat(text)
            local text_el = x:create_text_node(newtext)
      log:debug("changing <mo> to <mtext>: " .. newtext)
      x:add_child_node(text_el)
      -- change <mo> to <mtext>
      local newname = get_new_element_name("mtext", prefix)
      x._name = newname
      -- remove subsequent <mo>
      for i = 2, #siblings do
        siblings[i]:remove_node()
      end
    end
  end
end

local function is_last_element(el)
  local siblings = el:get_siblings()
  -- return true only if the current element is the last in the parent's children
  for i = #siblings, 1, -1 do
    local curr = siblings[i]
    if curr == el then
      return true
    elseif curr:is_element() then
      return false
    end
  end
  return false
end

local function is_empty_row(el)
  -- empty row should contain only one <mtd>
  local count = 0
  if el:get_text():match("^%s*$") then
    for _, child in ipairs(el:get_children()) do
      if child:is_element() then count = count + 1 end
    end
  else
    -- row is not empty if it contains any text
    return false
  end
  -- if there is one or zero childrens, then it is empty row
  return count < 2
end


local function delete_last_empty_mtr(el)
  -- arrays sometimes contain last empty row, which causes rendering issues,
  -- so we should remove them
  local el_name, prefix = get_element_name(el)
  if el_name == "mtr" 
    and get_attribute(el, "class") == "array-row" 
    and is_last_element(el)
    and is_empty_row(el)
  then
    el:remove_node()
  end

end

return function(dom)
  dom:traverse_elements(function(el)
    if settings.output_format ~= "odt" then
      -- LibreOffice needs <mfenced>, but Firefox doesn't
      fix_mfenced(el)
    else
      fix_mo_to_mfenced(el)
    end
    fix_radicals(el)
    fix_token_elements(el)
    fix_nested_mstyle(el)
    fix_missing_mtext(el)
    fix_numbers(el)
    fix_operators(el)
    fix_mathvariant(el)
    top_mrow(el)
    delete_last_empty_mtr(el)
  end)
  return dom
end

这是使用固定过滤器的结果:

在此处输入图片描述

相关内容