使用 Azure 认知搜索 AI 索引 SharePoint 子网站时排除特定网站库

使用 Azure 认知搜索 AI 索引 SharePoint 子网站时排除特定网站库

我在使用 Azure Search AI(以前称为 Azure 认知搜索)时遇到了问题。我的目标是使用 Azure 认知服务为我的 SharePoint 编制索引,以便将其与 Azure OpenAI 集成。在我的场景中,我有一个站点,我们将其称为“根”,其中包含多个子站点,例如 subsite-1、subsite-2、subsite-3 等。我想在 Azure Search 中为子站点内的所有库编制索引。每个子站点的库都有额外的列,我也想将它们包含在索引中。但是,“根”站点中的库没有这些额外的列,我希望跳过对它们的编制索引。

数据源

{
"name": "prod-sharepoint-datasource",
"type": "sharepoint",
"credentials": {
    "connectionString": "SharePointOnlineEndpoint=https://xxx.sharepoint.com/sites/Root/;ApplicationId=xxx;ApplicationSecret=xxx"
},
"container": {
    "name": "useQuery",
    "query": "includeLibrariesInSite=https://xxx.sharepoint.com/sites/Root;additionalColumns=MyCustomColumn,MyCustomColumn2,MyCustomColumn3"
} 
}

我尝试使用 excludeLibrary 属性从“根”站点中排除库,但没有按预期工作。以下是我尝试的示例:

{
"name": "prod-sharepoint-datasource",
"type": "sharepoint",
"credentials": {
    "connectionString": "SharePointOnlineEndpoint=https://xxx.sharepoint.com/sites/Root/;ApplicationId=xxx;ApplicationSecret=xxx"
},
"container": {
    "name": "useQuery",
    "query": "includeLibrariesInSite=https://xxx.sharepoint.com/sites/Root;additionalColumns=MyCustomColumn,MyCustomColumn2,MyCustomColumn3;excludeLibrary=https://xxx.sharepoint.com/sites/Root/default.aspx;excludeLibrary=https://xxx.sharepoint.com/sites/Root/Library1.aspx;excludeLibrary=https://xxx.sharepoint.com/sites/Root/Library2.aspx;excludeLibrary=https://xxx.sharepoint.com/sites/Root/Library3.aspx"
} }

我还提供了索引和索引器配置的 JSON。如果有人知道如何正确地从“根”站点中排除库或排除整个“根”站点以便只索引其子站点的库,我将不胜感激。

索引

{
"name" : "prod-sharepoint-indexes",
"fields": [
{ "name": "column1", "type": "Edm.String", "searchable": true, "filterable": false, "sortable": false, "facetable": false },
{ "name": "column2", "type": "Edm.String", "searchable": true, "filterable": false, "sortable": false, "facetable": false },
{ "name": "column3", "type": "Edm.String", "searchable": true, "filterable": false, "sortable": false, "facetable": false },
{ "name": "content", "type": "Edm.String", "searchable": true, "filterable": false, "sortable": false, "facetable": false },
{ "name": "id", "type": "Edm.String", "key": true, "searchable": false },
{ "name": "metadata_spo_item_name", "type": "Edm.String", "key": false, "searchable": true, "filterable": false, "sortable": false, "facetable": false },
{ "name": "metadata_spo_item_path", "type": "Edm.String", "key": false, "searchable": false, "filterable": false, "sortable": false, "facetable": false },
    { "name": "metadata_spo_item_weburi", "type": "Edm.String", "key": false, "searchable": false, "filterable": false, "sortable": false, "facetable": false },
{ "name": "metadata_spo_item_content_type", "type": "Edm.String", "key": false, "searchable": false, "filterable": true, "sortable": false, "facetable": true },
{ "name": "metadata_spo_item_last_modified", "type": "Edm.DateTimeOffset", "key": false, "searchable": false, "filterable": false, "sortable": true, "facetable": false },
{ "name": "metadata_spo_item_size", "type": "Edm.Int64", "key": false, "searchable": false, "filterable": false, "sortable": false, "facetable": false },
{ "name": "content", "type": "Edm.String", "searchable": true, "filterable": false, "sortable": false, "facetable": false },
] 
}

索引器

{
"name" : "prod-sharepoint-indexer",
"dataSourceName" : "prod-sharepoint-datasource",
"targetIndexName" : "prod-sharepoint-indexes",
"parameters": {
"batchSize": null,
"maxFailedItems": null,
"maxFailedItemsPerBatch": null,
"base64EncodeKeys": null,
"configuration": {
    "indexedFileNameExtensions" : ".pdf, .docx, .msg, .xlsx, .eml, .ppt, .doc",
    "excludedFileNameExtensions" : ".png, .jpg",
    "dataToExtract": "contentAndMetadata"
  }
},
"schedule" : { },
"fieldMappings" : [
    { 
      "sourceFieldName" : "metadata_spo_site_library_item_id", 
      "targetFieldName" : "id", 
      "mappingFunction" : { 
        "name" : "base64Encode" 
      } 
     }
] 
}

相关内容