在超链接旁边自动生成页码以供打印

Question 1

我找到了解决方案！它主要依赖于以下答案：

https://stackoverflow.com/questions/18324367/how-to-use-findelementdocumentapp-elementtype-table-of-contents-to-get-and-pa
https://stackoverflow.com/questions/18727341/get-all-links-in-a-document 将其全部发布在这里，以便其他人能够使用（为什么这不是所有写作软件都具有的功能？！）

这使用 Google 脚本，因此您需要将其上传到您的云端硬盘并授予适当的权限，然后它才能工作。这也可能是有史以来最棘手的解决方法，因为您必须在文档中插入不止一个目录，而是两个目录。Google Docs 有一个奇怪的“功能”（阅读：巨大的限制），您必须在带页码的目录和带链接的目录之间进行选择。此脚本基本上将两者合并成一个字典，以便文档中的所有链接都可以用目标的页码进行注释。外部链接用其 URL 进行注释。

在我的文档中，我有指向内部标题和外部网站的链接：

我还使用“插入”>“目录”菜单选项添加了两个目录。第一个使用页码选项，第二个使用链接选项：

当我运行脚本时，文档中的链接会用其页码或 URL 注释，以便文档以打印形式使用：

在 Google Scripts 中我有以下代码：

/**
 * Parse a Google Document, and annotate all hyperlinks with either the 
 * page number for the target (for internal headings and bookmarks) or 
 * the URL (for external websites and other documents).
 * 
 * This function relies on there being an open Document, which contains TWO
 * tables of contents (available through the Insert > Table of Contents menu).
 * The first TOC should be created using the option to include page numbers.
 * The second TOC should be creeated using the option to use links.
 */
function annotateHyperlinks() {

  const doc = DocumentApp.getActiveDocument();

  // Create a dictionary using the two tables of contents: 
  // - the first must be created with page numbers
  // - the second must be created with links.
  // The returned `toc` a dictionary with keys which are the 
  // hyperlink URLs, and values which are also a dictionary, including
  // a `page` key to give the pagenumber.   
  var toc = assembleContentsDict(doc);

  var paragraphs = doc.getBody().getParagraphs();
  for(var p = 0; p < paragraphs.length; p++) {
    labelLinksWithPageNumbers(paragraphs[p], toc);
  }
}

/**
 * Utility function called from `assembleContentsDict` to populate the table of contents dictionary with 
 * information from both TOCs.
 * 
 * @param {Element} toc: The TABLE_OF_CONTENTS element
 * @param {Object} contents: the dictionary to populate
 * 
 * @return {Object} contents: the populated dictionary
 */
function updateContentsList(toc, contents) {

  var numChildren = toc.getNumChildren();
  for (var j=0; j < numChildren; j++) {

    var tocItem = toc.getChild(j).asParagraph();
    var tocItemText = tocItem.getChild(0).asText().getText(); // Gets the whole line, including page number (if present)

    // Only overwrite the url when we have one
    var myUrl = tocItem.getLinkUrl();
    if(myUrl) {
      var item = contents[j];
      item.url = myUrl;
      contents[j] = item;
    }
    else {
      var mySplit = tocItemText.trim().split(/\s+/);
      var item = {};
      
      // The sectionNumber and sectionHeading are not currently used in this script, but if you wanted to 
      // replace the linked text with something consistent (ie: instead of "Go here" it could be "Section 1.3 Go here (page 2)")
      // then this is where that info comes from.  You'd also need to check that you do have numbered sections in your document,
      // otherwise this will need to be tweaked.
      item.page = mySplit.at(-1);  // last item is the page number
      item.sectionNumber = mySplit.at(0); // first item is the section number
      item.sectionHeading = mySplit.slice(1,-1).join(" "); // everything in between is the section heading

      item.url = '';
      contents[j] = item;
    }
  }
    // Return array of objects containing TOC info
  return contents;
}

/**
 * Assemble a dictionary-style table of contents, where the keys are the URL of each of the
 * headings.  The values are themselves also dictionaries, with keys of:
 *        - page (the page on which the heading appears)
 *        - sectionNumber (the number of the section)
 *        - sectionHeading (the title of the section)
 * The values are determined as:
 *        - sectionNumber (first item before whitespace in the TOC with pagenumbers)
 *        - page (last item before in the TOC with pagenumbers)
 *        - sectionHeading (everything in between)
 * 
 * @param {Element} doc: the current active document
 * 
 * @return {Object} contentsAsDict: the dictionary described above
 */
function assembleContentsDict(doc) {

  // Define the search parameters.
  var body = doc.getBody();
  var searchType = DocumentApp.ElementType.TABLE_OF_CONTENTS;
  var searchResult = null;
  var contents = {}; // This will be a dict of dicts, key is the bookmark url

  // Loop through both TOCs.  Note that this only works when there is one of each type ... 
  // ie: one with page numbers and one with links.
  while (searchResult = body.findElement(searchType, searchResult)) {
    var toc = searchResult.getElement().asTableOfContents();
    contents = updateContentsList(toc, contents);
  }

  // Now turn the contents array into a dict with keys that are the heading URLs
  contentsAsDict = {};
  for (var key in contents) {
    contentsAsDict[contents[key].url] = contents[key];
  }

  console.log(contentsAsDict);
  return contentsAsDict;
}


/**
 * Label each internal link in the document (ie: those found within the table of contents)
 * with the page on which the link target is found.
 *
 * @param {Element} paragraph: The paragraph to operate on. 
 * @param {Object} toc: The table of contents dictionary created by 
 *                      assembleContentsDict() function
 * 
 */
function labelLinksWithPageNumbers(paragraph, toc) {
  
  for(var c = 0; c < paragraph.getNumChildren(); c++){

    var element = paragraph.getChild(c);
    if (element.getType() === DocumentApp.ElementType.TEXT) {

      var textObj = element.editAsText();
      var text = element.getText();
      var inUrl = false;

      // Counting down from the end so that insertions don't affect the place in the string
      for (var ch=text.length-1; ch >= 0; ch--) {
        var url = textObj.getLinkUrl(ch);
        if (url != null) {

          // If we don't yet have a URL, it must start here.
          if (!inUrl) {
            inUrl = true;
            var curUrl = {};
            
            curUrl.element = element;
            curUrl.url = String( url );
            curUrl.startOffset = 0; // setting a default in case the link starts from the beginning of the paragraph.

            // Getting the page number from our TOC dict, if it is an internal link
            if(toc[curUrl.url]) {
              var linkText = " (page "+toc[curUrl.url].page + ") ";
              textObj.insertText(ch+1, linkText);
            }
            else {
              // Then it is an external link.  Include the URL address instead.
              var linkText = " ("+ curUrl.url +") ";
              textObj.insertText(ch+1, linkText);
            }
            
            // Updating the current position to skip over what we've just inserted
            curUrl.endOffsetInclusive = ch + linkText.length;
          }
          else {
            curUrl.startOffset = ch;
          }          
        }
        else {
          if (inUrl) {

            // Not any more, we're not.
            inUrl = false;
            textObj.setLinkUrl(curUrl.startOffset, curUrl.endOffsetInclusive, curUrl.url);

            curUrl = {};
          }
        }
      }
      // Takes care of links that extend to the first character in the paragraph
      if(inUrl) {
        textObj.setLinkUrl(curUrl.startOffset, curUrl.endOffsetInclusive, curUrl.url);
      }
    }
  }
}

注意：

如果你想修改链接文本以包含实际的章节标题和/或编号（参见之前链接的文本），这是可能的（参见代码中的注释）。
我还没有对图像/表格/方程式等的随机书签进行测试，只对目录中出现的标题进行了测试。

Answer

我找到了解决方案！它主要依赖于以下答案：

https://stackoverflow.com/questions/18324367/how-to-use-findelementdocumentapp-elementtype-table-of-contents-to-get-and-pa
https://stackoverflow.com/questions/18727341/get-all-links-in-a-document 将其全部发布在这里，以便其他人能够使用（为什么这不是所有写作软件都具有的功能？！）

这使用 Google 脚本，因此您需要将其上传到您的云端硬盘并授予适当的权限，然后它才能工作。这也可能是有史以来最棘手的解决方法，因为您必须在文档中插入不止一个目录，而是两个目录。Google Docs 有一个奇怪的“功能”（阅读：巨大的限制），您必须在带页码的目录和带链接的目录之间进行选择。此脚本基本上将两者合并成一个字典，以便文档中的所有链接都可以用目标的页码进行注释。外部链接用其 URL 进行注释。

在我的文档中，我有指向内部标题和外部网站的链接：

我还使用“插入”>“目录”菜单选项添加了两个目录。第一个使用页码选项，第二个使用链接选项：

当我运行脚本时，文档中的链接会用其页码或 URL 注释，以便文档以打印形式使用：

在 Google Scripts 中我有以下代码：

/**
 * Parse a Google Document, and annotate all hyperlinks with either the 
 * page number for the target (for internal headings and bookmarks) or 
 * the URL (for external websites and other documents).
 * 
 * This function relies on there being an open Document, which contains TWO
 * tables of contents (available through the Insert > Table of Contents menu).
 * The first TOC should be created using the option to include page numbers.
 * The second TOC should be creeated using the option to use links.
 */
function annotateHyperlinks() {

  const doc = DocumentApp.getActiveDocument();

  // Create a dictionary using the two tables of contents: 
  // - the first must be created with page numbers
  // - the second must be created with links.
  // The returned `toc` a dictionary with keys which are the 
  // hyperlink URLs, and values which are also a dictionary, including
  // a `page` key to give the pagenumber.   
  var toc = assembleContentsDict(doc);

  var paragraphs = doc.getBody().getParagraphs();
  for(var p = 0; p < paragraphs.length; p++) {
    labelLinksWithPageNumbers(paragraphs[p], toc);
  }
}

/**
 * Utility function called from `assembleContentsDict` to populate the table of contents dictionary with 
 * information from both TOCs.
 * 
 * @param {Element} toc: The TABLE_OF_CONTENTS element
 * @param {Object} contents: the dictionary to populate
 * 
 * @return {Object} contents: the populated dictionary
 */
function updateContentsList(toc, contents) {

  var numChildren = toc.getNumChildren();
  for (var j=0; j < numChildren; j++) {

    var tocItem = toc.getChild(j).asParagraph();
    var tocItemText = tocItem.getChild(0).asText().getText(); // Gets the whole line, including page number (if present)

    // Only overwrite the url when we have one
    var myUrl = tocItem.getLinkUrl();
    if(myUrl) {
      var item = contents[j];
      item.url = myUrl;
      contents[j] = item;
    }
    else {
      var mySplit = tocItemText.trim().split(/\s+/);
      var item = {};
      
      // The sectionNumber and sectionHeading are not currently used in this script, but if you wanted to 
      // replace the linked text with something consistent (ie: instead of "Go here" it could be "Section 1.3 Go here (page 2)")
      // then this is where that info comes from.  You'd also need to check that you do have numbered sections in your document,
      // otherwise this will need to be tweaked.
      item.page = mySplit.at(-1);  // last item is the page number
      item.sectionNumber = mySplit.at(0); // first item is the section number
      item.sectionHeading = mySplit.slice(1,-1).join(" "); // everything in between is the section heading

      item.url = '';
      contents[j] = item;
    }
  }
    // Return array of objects containing TOC info
  return contents;
}

/**
 * Assemble a dictionary-style table of contents, where the keys are the URL of each of the
 * headings.  The values are themselves also dictionaries, with keys of:
 *        - page (the page on which the heading appears)
 *        - sectionNumber (the number of the section)
 *        - sectionHeading (the title of the section)
 * The values are determined as:
 *        - sectionNumber (first item before whitespace in the TOC with pagenumbers)
 *        - page (last item before in the TOC with pagenumbers)
 *        - sectionHeading (everything in between)
 * 
 * @param {Element} doc: the current active document
 * 
 * @return {Object} contentsAsDict: the dictionary described above
 */
function assembleContentsDict(doc) {

  // Define the search parameters.
  var body = doc.getBody();
  var searchType = DocumentApp.ElementType.TABLE_OF_CONTENTS;
  var searchResult = null;
  var contents = {}; // This will be a dict of dicts, key is the bookmark url

  // Loop through both TOCs.  Note that this only works when there is one of each type ... 
  // ie: one with page numbers and one with links.
  while (searchResult = body.findElement(searchType, searchResult)) {
    var toc = searchResult.getElement().asTableOfContents();
    contents = updateContentsList(toc, contents);
  }

  // Now turn the contents array into a dict with keys that are the heading URLs
  contentsAsDict = {};
  for (var key in contents) {
    contentsAsDict[contents[key].url] = contents[key];
  }

  console.log(contentsAsDict);
  return contentsAsDict;
}


/**
 * Label each internal link in the document (ie: those found within the table of contents)
 * with the page on which the link target is found.
 *
 * @param {Element} paragraph: The paragraph to operate on. 
 * @param {Object} toc: The table of contents dictionary created by 
 *                      assembleContentsDict() function
 * 
 */
function labelLinksWithPageNumbers(paragraph, toc) {
  
  for(var c = 0; c < paragraph.getNumChildren(); c++){

    var element = paragraph.getChild(c);
    if (element.getType() === DocumentApp.ElementType.TEXT) {

      var textObj = element.editAsText();
      var text = element.getText();
      var inUrl = false;

      // Counting down from the end so that insertions don't affect the place in the string
      for (var ch=text.length-1; ch >= 0; ch--) {
        var url = textObj.getLinkUrl(ch);
        if (url != null) {

          // If we don't yet have a URL, it must start here.
          if (!inUrl) {
            inUrl = true;
            var curUrl = {};
            
            curUrl.element = element;
            curUrl.url = String( url );
            curUrl.startOffset = 0; // setting a default in case the link starts from the beginning of the paragraph.

            // Getting the page number from our TOC dict, if it is an internal link
            if(toc[curUrl.url]) {
              var linkText = " (page "+toc[curUrl.url].page + ") ";
              textObj.insertText(ch+1, linkText);
            }
            else {
              // Then it is an external link.  Include the URL address instead.
              var linkText = " ("+ curUrl.url +") ";
              textObj.insertText(ch+1, linkText);
            }
            
            // Updating the current position to skip over what we've just inserted
            curUrl.endOffsetInclusive = ch + linkText.length;
          }
          else {
            curUrl.startOffset = ch;
          }          
        }
        else {
          if (inUrl) {

            // Not any more, we're not.
            inUrl = false;
            textObj.setLinkUrl(curUrl.startOffset, curUrl.endOffsetInclusive, curUrl.url);

            curUrl = {};
          }
        }
      }
      // Takes care of links that extend to the first character in the paragraph
      if(inUrl) {
        textObj.setLinkUrl(curUrl.startOffset, curUrl.endOffsetInclusive, curUrl.url);
      }
    }
  }
}

注意：

如果你想修改链接文本以包含实际的章节标题和/或编号（参见之前链接的文本），这是可能的（参见代码中的注释）。
我还没有对图像/表格/方程式等的随机书签进行测试，只对目录中出现的标题进行了测试。

Question 2

在 Microsoft Word 中，您可以使用 PageRef 字段。

以下是链接微软关于该领域的文档。

句法

当您查看文档中的 PageRef 字段时，语法如下所示：

{ PAGEREF 书签 [* 格式切换 ] }

如果您链接到内置标题样式，您还可以使用插入交叉引用，它允许您选择显示页码并默认插入为未标记的超链接。

我不知道 LibreOffice 是否有同等功能。

Answer

在 Microsoft Word 中，您可以使用 PageRef 字段。

以下是链接微软关于该领域的文档。

句法

当您查看文档中的 PageRef 字段时，语法如下所示：

{ PAGEREF 书签 [* 格式切换 ] }

如果您链接到内置标题样式，您还可以使用插入交叉引用，它允许您选择显示页码并默认插入为未标记的超链接。

我不知道 LibreOffice 是否有同等功能。

在超链接旁边自动生成页码以供打印

答案1

答案2

在 Microsoft Word 中，您可以使用 PageRef 字段。

相关内容