从 JSON 字符串中提取数据

从 JSON 字符串中提取数据

我必须从 JSON 字符串中提取车牌数字及其关联的置信度,如下所示:

{
  "response": {
    "container": {
      "id": "0df307bc-06b2-45cf-b7ff-ce07fd04e04d",
      "timestamp": "2018-Jul-10 17:34:27.448632"
    },
    "id": "00000002-0000-0000-0000-000000000015"
  },
  "frames": {
    "frame": {
      "id": "5583",
      "timestamp": "2016-Nov-30 13:05:27",
      "lps": {
        "lp": {
          "licenseplate": "15451BBL",
          "text": "15451BBL",
          "wtext": "15451BBL",
          "confidence": "20",
          "bkcolor": "16777215",
          "color": "16777215",
          "type": "0",
          "ntip": "11",
          "cct_country_short": "",
          "cct_state_short": "",
          "tips": {
            "tip": {
              "poly": {
                "p": { "x": "1094", "y": "643" },
                "p": { "x": "1099", "y": "643" },
                "p": { "x": "1099", "y": "667" },
                "p": { "x": "1094", "y": "667" }
              },
              "bkcolor": "16777215",
              "color": "0",
              "code": "49",
              "code_ascii": "1",
              "confidence": "97"
            },
            "tip": {
              "poly": {
                "p": { "x": "1103", "y": "642" },
                "p": { "x": "1113", "y": "642" },
                "p": { "x": "1112", "y": "667" },
                "p": { "x": "1102", "y": "667" }
              },
              "bkcolor": "16777215",
              "color": "0",
              "code": "53",
              "code_ascii": "5",
              "confidence": "89"
            },
            "tip": {
              "poly": {
                "p": { "x": "1112", "y": "640" },
                "p": { "x": "1122", "y": "640" },
                "p": { "x": "1122", "y": "666" },
                "p": { "x": "1112", "y": "666" }
              },
              "bkcolor": "16777215",
              "color": "0",
              "code": "52",
              "code_ascii": "4",
              "confidence": "97"
            },
            "tip": {
              "poly": {
                "p": { "x": "1123", "y": "640" },
                "p": { "x": "1132", "y": "640" },
                "p": { "x": "1131", "y": "665" },
                "p": { "x": "1123", "y": "665" }
              },
              "bkcolor": "16777215",
              "color": "0",
              "code": "53",
              "code_ascii": "5",
              "confidence": "97"
            },
            "tip": {
              "poly": {
                "p": { "x": "1134", "y": "640" },
                "p": { "x": "1139", "y": "640" },
                "p": { "x": "1139", "y": "664" },
                "p": { "x": "1133", "y": "664" }
              },
              "bkcolor": "16777215",
              "color": "0",
              "code": "49",
              "code_ascii": "1",
              "confidence": "77"
            },
            "tip": {
              "poly": {
                "p": { "x": "1154", "y": "639" },
                "p": { "x": "1163", "y": "639" },
                "p": { "x": "1163", "y": "663" },
                "p": { "x": "1153", "y": "663" }
              },
              "bkcolor": "16777215",
              "color": "0",
              "code": "66",
              "code_ascii": "B",
              "confidence": "97"
            },
            "tip": {
              "poly": {
                "p": { "x": "1164", "y": "638" },
                "p": { "x": "1173", "y": "638" },
                "p": { "x": "1173", "y": "663" },
                "p": { "x": "1163", "y": "663" }
              },
              "bkcolor": "16777215",
              "color": "0",
              "code": "66",
              "code_ascii": "B",
              "confidence": "94"
            },
            "tip": {
              "poly": {
                "p": { "x": "1191", "y": "637" },
                "p": { "x": "1206", "y": "636" },
                "p": { "x": "1205", "y": "660" },
                "p": { "x": "1190", "y": "661" }
              },
              "bkcolor": "16777215",
              "color": "0",
              "code": "76",
              "code_ascii": "L",
              "confidence": "34"
            },
            "tip": {
              "poly": {
                "p": { "x": "1103", "y": "655" },
                "p": { "x": "1111", "y": "655" },
                "p": { "x": "1111", "y": "667" },
                "p": { "x": "1103", "y": "667" }
              },
              "bkcolor": "16777215",
              "color": "0",
              "code": "74",
              "code_ascii": "J",
              "confidence": "57"
            },
            "tip": {
              "poly": {
                "p": { "x": "1103", "y": "655" },
                "p": { "x": "1111", "y": "655" },
                "p": { "x": "1111", "y": "667" },
                "p": { "x": "1103", "y": "667" }
              },
              "bkcolor": "16777215",
              "color": "0",
              "code": "74",
              "code_ascii": "J",
              "confidence": "57"
            },
            "tip": {
              "poly": {
                "p": { "x": "1176", "y": "638" },
                "p": { "x": "1185", "y": "637" },
                "p": { "x": "1184", "y": "661" },
                "p": { "x": "1175", "y": "662" }
              },
              "bkcolor": "16777215",
              "color": "0",
              "code": "52",
              "code_ascii": "4",
              "confidence": "7"
            }
          },
          "ncharacter": "8",
          "characters": {
            "characater": {
              "poly": {
                "p": { "x": "1094", "y": "643" },
                "p": { "x": "1099", "y": "643" },
                "p": { "x": "1099", "y": "667" },
                "p": { "x": "1094", "y": "667" }
              },
              "bkcolor": "16777215",
              "color": "0",
              "code": "49",
              "code_ascii": "1",
              "confidence": "97"
            },
            "characater": {
              "poly": {
                "p": { "x": "1103", "y": "642" },
                "p": { "x": "1113", "y": "642" },
                "p": { "x": "1112", "y": "667" },
                "p": { "x": "1102", "y": "667" }
              },
              "bkcolor": "16777215",
              "color": "0",
              "code": "53",
              "code_ascii": "5",
              "confidence": "89"
            },
            "characater": {
              "poly": {
                "p": { "x": "1112", "y": "640" },
                "p": { "x": "1122", "y": "640" },
                "p": { "x": "1122", "y": "666" },
                "p": { "x": "1112", "y": "666" }
              },
              "bkcolor": "16777215",
              "color": "0",
              "code": "52",
              "code_ascii": "4",
              "confidence": "97"
            },
            "characater": {
              "poly": {
                "p": { "x": "1123", "y": "640" },
                "p": { "x": "1132", "y": "640" },
                "p": { "x": "1131", "y": "665" },
                "p": { "x": "1123", "y": "665" }
              },
              "bkcolor": "16777215",
              "color": "0",
              "code": "53",
              "code_ascii": "5",
              "confidence": "97"
            },
            "characater": {
              "poly": {
                "p": { "x": "1134", "y": "640" },
                "p": { "x": "1139", "y": "640" },
                "p": { "x": "1139", "y": "664" },
                "p": { "x": "1133", "y": "664" }
              },
              "bkcolor": "16777215",
              "color": "0",
              "code": "49",
              "code_ascii": "1",
              "confidence": "77"
            },
            "characater": {
              "poly": {
                "p": { "x": "1154", "y": "639" },
                "p": { "x": "1163", "y": "639" },
                "p": { "x": "1163", "y": "663" },
                "p": { "x": "1153", "y": "663" }
              },
              "bkcolor": "16777215",
              "color": "0",
              "code": "66",
              "code_ascii": "B",
              "confidence": "97"
            },
            "characater": {
              "poly": {
                "p": { "x": "1164", "y": "638" },
                "p": { "x": "1173", "y": "638" },
                "p": { "x": "1173", "y": "663" },
                "p": { "x": "1163", "y": "663" }
              },
              "bkcolor": "16777215",
              "color": "0",
              "code": "66",
              "code_ascii": "B",
              "confidence": "94"
            },
            "characater": {
              "poly": {
                "p": { "x": "1191", "y": "637" },
                "p": { "x": "1206", "y": "636" },
                "p": { "x": "1205", "y": "660" },
                "p": { "x": "1190", "y": "661" }
              },
              "bkcolor": "16777215",
              "color": "0",
              "code": "76",
              "code_ascii": "L",
              "confidence": "34"
            }
          },
          "det_time_us": "1104009",
          "poly": {
            "p": { "x": "1088", "y": "642" },
            "p": { "x": "1210", "y": "634" },
            "p": { "x": "1210", "y": "661" },
            "p": { "x": "1087", "y": "669" }
          }
        }
      },
      "det_time_us": "1710270"
    }
  }
}

我有类似的东西:

$ jq -r '.frames.frame.lps.lp|.characters.characater.code_ascii,.characters.characater.confidence' test.json

但它只返回一个字母并且它是conf。分数....

问题

  • 我怎样才能取回所有字母和相关分数?

输出

我期望输出如下:

1 97, 5 89, 4 97,5 97, 1 77,B 97, B 94, L 34, J, 57,J 57, 4 7, 1 97, 5 89, 4 97, 5 97, 1 77, B 97, B 94, L 34

笔记:格式可以不同,这只是表明我想要提取的数据。

复制粘贴

输入文件“test.json”

{"response":{"container":{"id":"41d6efcb-24d6-490d-8880-762255519b5f","timestamp":"2018-Jul-11 19:51:06.461665"},"id":"00000002-0000-0000-0000-000000000015"},"frames":{"frame":{"id":"5583","timestamp":"2016-Nov-30 13:05:27","lps":{"lp":{"licenseplate":"15451BBL","text":"15451BBL","wtext":"15451BBL","confidence":"20","bkcolor":"16777215","color":"16777215","type":"0","ntip":"11","cct_country_short":"","cct_state_short":"","tips":{"tip":{"poly":{"p":{"x":"1094","y":"643"},"p":{"x":"1099","y":"643"},"p":{"x":"1099","y":"667"},"p":{"x":"1094","y":"667"}},"bkcolor":"16777215","color":"0","code":"49","code_ascii":"1","confidence":"97"},"tip":{"poly":{"p":{"x":"1103","y":"642"},"p":{"x":"1113","y":"642"},"p":{"x":"1112","y":"667"},"p":{"x":"1102","y":"667"}},"bkcolor":"16777215","color":"0","code":"53","code_ascii":"5","confidence":"89"},"tip":{"poly":{"p":{"x":"1112","y":"640"},"p":{"x":"1122","y":"640"},"p":{"x":"1122","y":"666"},"p":{"x":"1112","y":"666"}},"bkcolor":"16777215","color":"0","code":"52","code_ascii":"4","confidence":"97"},"tip":{"poly":{"p":{"x":"1123","y":"640"},"p":{"x":"1132","y":"640"},"p":{"x":"1131","y":"665"},"p":{"x":"1123","y":"665"}},"bkcolor":"16777215","color":"0","code":"53","code_ascii":"5","confidence":"97"},"tip":{"poly":{"p":{"x":"1134","y":"640"},"p":{"x":"1139","y":"640"},"p":{"x":"1139","y":"664"},"p":{"x":"1133","y":"664"}},"bkcolor":"16777215","color":"0","code":"49","code_ascii":"1","confidence":"77"},"tip":{"poly":{"p":{"x":"1154","y":"639"},"p":{"x":"1163","y":"639"},"p":{"x":"1163","y":"663"},"p":{"x":"1153","y":"663"}},"bkcolor":"16777215","color":"0","code":"66","code_ascii":"B","confidence":"97"},"tip":{"poly":{"p":{"x":"1164","y":"638"},"p":{"x":"1173","y":"638"},"p":{"x":"1173","y":"663"},"p":{"x":"1163","y":"663"}},"bkcolor":"16777215","color":"0","code":"66","code_ascii":"B","confidence":"94"},"tip":{"poly":{"p":{"x":"1191","y":"637"},"p":{"x":"1206","y":"636"},"p":{"x":"1205","y":"660"},"p":{"x":"1190","y":"661"}},"bkcolor":"16777215","color":"0","code":"76","code_ascii":"L","confidence":"34"},"tip":{"poly":{"p":{"x":"1103","y":"655"},"p":{"x":"1111","y":"655"},"p":{"x":"1111","y":"667"},"p":{"x":"1103","y":"667"}},"bkcolor":"16777215","color":"0","code":"74","code_ascii":"J","confidence":"57"},"tip":{"poly":{"p":{"x":"1103","y":"655"},"p":{"x":"1111","y":"655"},"p":{"x":"1111","y":"667"},"p":{"x":"1103","y":"667"}},"bkcolor":"16777215","color":"0","code":"74","code_ascii":"J","confidence":"57"},"tip":{"poly":{"p":{"x":"1176","y":"638"},"p":{"x":"1185","y":"637"},"p":{"x":"1184","y":"661"},"p":{"x":"1175","y":"662"}},"bkcolor":"16777215","color":"0","code":"52","code_ascii":"4","confidence":"7"}},"ncharacter":"8","characters":{"characater":{"poly":{"p":{"x":"1094","y":"643"},"p":{"x":"1099","y":"643"},"p":{"x":"1099","y":"667"},"p":{"x":"1094","y":"667"}},"bkcolor":"16777215","color":"0","code":"49","code_ascii":"1","confidence":"97"},"characater":{"poly":{"p":{"x":"1103","y":"642"},"p":{"x":"1113","y":"642"},"p":{"x":"1112","y":"667"},"p":{"x":"1102","y":"667"}},"bkcolor":"16777215","color":"0","code":"53","code_ascii":"5","confidence":"89"},"characater":{"poly":{"p":{"x":"1112","y":"640"},"p":{"x":"1122","y":"640"},"p":{"x":"1122","y":"666"},"p":{"x":"1112","y":"666"}},"bkcolor":"16777215","color":"0","code":"52","code_ascii":"4","confidence":"97"},"characater":{"poly":{"p":{"x":"1123","y":"640"},"p":{"x":"1132","y":"640"},"p":{"x":"1131","y":"665"},"p":{"x":"1123","y":"665"}},"bkcolor":"16777215","color":"0","code":"53","code_ascii":"5","confidence":"97"},"characater":{"poly":{"p":{"x":"1134","y":"640"},"p":{"x":"1139","y":"640"},"p":{"x":"1139","y":"664"},"p":{"x":"1133","y":"664"}},"bkcolor":"16777215","color":"0","code":"49","code_ascii":"1","confidence":"77"},"characater":{"poly":{"p":{"x":"1154","y":"639"},"p":{"x":"1163","y":"639"},"p":{"x":"1163","y":"663"},"p":{"x":"1153","y":"663"}},"bkcolor":"16777215","color":"0","code":"66","code_ascii":"B","confidence":"97"},"characater":{"poly":{"p":{"x":"1164","y":"638"},"p":{"x":"1173","y":"638"},"p":{"x":"1173","y":"663"},"p":{"x":"1163","y":"663"}},"bkcolor":"16777215","color":"0","code":"66","code_ascii":"B","confidence":"94"},"characater":{"poly":{"p":{"x":"1191","y":"637"},"p":{"x":"1206","y":"636"},"p":{"x":"1205","y":"660"},"p":{"x":"1190","y":"661"}},"bkcolor":"16777215","color":"0","code":"76","code_ascii":"L","confidence":"34"}},"det_time_us":"1072592","poly":{"p":{"x":"1088","y":"642"},"p":{"x":"1210","y":"634"},"p":{"x":"1210","y":"661"},"p":{"x":"1087","y":"669"}}}},"det_time_us":"1720812"}}}

关联

输入文件:https://drive.google.com/file/d/18wCzjMBpw7SIeVFByAGPQiqCBjg_0te3/view?usp=sharing

答案1

这是一个新的、改进的答案。旧的答案可以在最后的分隔符之后找到。

数据的问题在于它包含一个.frame.frames.lps.lp.characters带有非唯一键 ( characater) 的对象 。每次为已存在的键指定值时,旧值都会丢失。

jq我们可以通过使用实用程序的“流”输入( )来解决这个问题--stream

$ jq -r -n --stream 'fromstream(1|truncate_stream(5|truncate_stream(inputs)|select(.[0][0] == "characater"))) | [.code_ascii, .confidence] | @tsv' test.json
1       97
5       89
4       97
5       97
1       77
B       97
B       94
L       34

首先在深度 5(路径 的长度.frame.frames.lps.lp.characters)处截断输入流,然后我们提取以有问题的键开头的任何子路径characater。这就是内在truncate_stream()和外在select()正在做的事情。然后,每个这样的子路径的顶层被截断,剩下的被重新构成对象流。这就是外部与包装它的truncate_stream()调用一起所做的事情fromstream()

在处理过程中,修改后的文档如下所示(给出问题中的示例):

{"poly":{"p":{"x":"1094","y":"667"}},"bkcolor":"16777215","color":"0","code":"49","code_ascii":"1","confidence":"97"}
{"poly":{"p":{"x":"1102","y":"667"}},"bkcolor":"16777215","color":"0","code":"53","code_ascii":"5","confidence":"89"}
{"poly":{"p":{"x":"1112","y":"666"}},"bkcolor":"16777215","color":"0","code":"52","code_ascii":"4","confidence":"97"}
{"poly":{"p":{"x":"1123","y":"665"}},"bkcolor":"16777215","color":"0","code":"53","code_ascii":"5","confidence":"97"}
{"poly":{"p":{"x":"1133","y":"664"}},"bkcolor":"16777215","color":"0","code":"49","code_ascii":"1","confidence":"77"}
{"poly":{"p":{"x":"1153","y":"663"}},"bkcolor":"16777215","color":"0","code":"66","code_ascii":"B","confidence":"97"}
{"poly":{"p":{"x":"1163","y":"663"}},"bkcolor":"16777215","color":"0","code":"66","code_ascii":"B","confidence":"94"}
{"poly":{"p":{"x":"1190","y":"661"}},"bkcolor":"16777215","color":"0","code":"76","code_ascii":"L","confidence":"34"}

对于来自的每个对象fromstream,数据被提取到一个格式化为输出的数组中@tsv,从而为我们提供上面的制表符分隔的输出。


2018年的旧答案:

由于您的characters数组不是正确的 JSON 数组(它是每个键具有相同名称的哈希),因此只有它的最后一个值可用。

我们可以通过预处理数据来为现在调用的每个对象创建唯一的键来解决这个问题characater

awk '/"characater"/ { sub("\"characater\"", "\"char" ++n "\"", $0) } 1' file.json

这会将每个替换characater为等char1char2

我们现在可以使用例如访问其中的所有值

jq -r '.frames.frame.lps.lp|.characters[]|[.code_ascii,.confidence]|@tsv'

完整的管道:

awk '/"characater"/ { sub("\"characater\"", "\"char" ++n "\"", $0) } 1' file.json |
jq -r '.frames.frame.lps.lp|.characters[]|[.code_ascii,.confidence]|@tsv'

结果(给出问题中的数据):

1       97
5       89
4       97
5       97
1       77
B       97
B       94
L       34

如果您控制 JSON 文档的生成,则应考虑将对象转换characters为适当的数组。

答案2

鉴于您的输入不是有效的 JSON,您可能必须采用sed, awk,grep类型的解决方案。为此,以下可以处理已“缩小”为单个字符串的输入:

$ grep -oP '"code_ascii":"\w+","confidence":"\w+"' <FILE> | grep -oP '(?<=:)"\w+"' | paste - -

例子

$ grep -oP '"code_ascii":"\w+","confidence":"\w+"' b.json | grep -oP '(?<=:)"\w+"' | paste - -
"1", "97"
"5", "89"
"4", "97"
"5", "97"
"1", "77"
"B", "97"
"B", "94"
"L", "34"
"J", "57"
"J", "57"
"4", "7"
"1", "97"
"5", "89"
"4", "97"
"5", "97"
"1", "77"
"B", "97"
"B", "94"
"L", "34"

备择方案

笔记:它们适用于多行输入数据。

sed+粘贴
$ sed -n '/code_ascii/,/confidence/p' a.json | sed 's/.*: [^"]*//' | paste - -
sed、awk、粘贴
$ sed -n '/code_ascii/,/confidence/p' a.json | awk -F': ' '{print $2}' | paste -d" " - -
sed、grep、粘贴
$ sed -n '/code_ascii/,/confidence/p' a.json | grep -oP '(?<=: ).*$' | paste - -
grep+粘贴
$ grep -oP '"code_ascii":"\w+","confidence":"\w+"' b.json | grep -oP '(?<=:)"\w+"' | paste - -

答案3

grep + sed + tr :

grep -e code_ascii -e confidence <file> |  sed 's/.*: "\(.*\)"/\1/;' | tr "\n" ' '

返回 :

20, 1, 97 5, 89 4, 97 5, 97 1, 77 B, 97 B, 94 L, 34 J, 57 J, 57 4, 7 1, 97 5, 89 4, 97 5, 97 1, 77 B, 97 B, 94 L, 34 

此命令仅返回 code_ascii +confidence 对:

 grep -e code_ascii -e confidence <file> | sed -n 's/.*code_ascii": "\(.*\)"/\1/;T;h;n;s/.*"confidence": "\(.*\)"/\1/;T;H;g;p' | tr "\n" " " 

返回 :

 1, 97 5, 89 4, 97 5, 97 1, 77 B, 97 B, 94 L, 34 J, 57 J, 57 4, 7 1, 97 5, 89 4, 97 5, 97 1, 77 B, 97 B, 94 L, 34 

现在,请参阅上面 Kusalananda 的帖子。只需 sed + tr :

sed -i 's/},/},\n/g' <tempfile>
sed -n 's/characater//;T;:z;n;s/.*code_ascii": "\(.*\)"/\1/;Tz;:zz;h;n;s/.*"confidence": "\(.*\)"/\1/;Tzz;H;s/$//;Tz;g;p' <tempfile> | tr "\n" " "

返回 :

1, 97 5, 89 4, 97 5, 97 1, 77 B, 97 B, 94 L, 34 

相关内容