如何从 JSON 中的重复元素中获取值? [英] How to get the value from duplicate element in JSON?
问题描述
我正在尝试使用 Microsoft 计算机视觉和 Blue Prism(RPA 工具)从图像中提取文本.我有一个像下面这样的 JSON 响应:
I am trying to extract text from image using Microsoft Computer vision and using Blue Prism (RPA tool). I have a JSON response like below:
{
"status": "Succeeded",
"recognitionResults": [{
"page": 1,
"clockwiseOrientation": 0.76,
"width": 1700,
"height": 2200,
"unit": "pixel",
"lines": [{
"boundingBox": [691, 55, 854, 57, 853, 78, 690, 76],
"text": "Display Record",
"words": [{
"boundingBox": [695, 56, 775, 57, 776, 77, 695, 76],
"text": "Display"
}, {
"boundingBox": [783, 57, 853, 58, 854, 79, 783, 78],
"text": "Record"
}]
}, {
"boundingBox": [57, 102, 397, 107, 396, 130, 56, 126],
"text": "Customer Number . . 0012343345",
"words": [{
"boundingBox": [60, 103, 153, 106, 153, 127, 60, 124],
"text": "Customer"
}, {
"boundingBox": [160, 106, 230, 107, 230, 129, 160, 128],
"text": "Number"
}, {
"boundingBox": [242, 108, 256, 108, 255, 129, 241, 129],
"text": "."
}, {
"boundingBox": [264, 108, 278, 108, 278, 130, 264, 129],
"text": "."
}, {
"boundingBox": [284, 108, 397, 107, 396, 130, 284, 130],
"text": "0012343345"
}]
}, {
"boundingBox": [685, 110, 1126, 115, 1125, 140, 684, 136],
"text": "Short Name . . . . . . SHOES AND THINGS",
"words": [{
"boundingBox": [685, 111, 742, 114, 742, 135, 685, 133],
"text": "Short"
}, {
"boundingBox": [749, 114, 798, 116, 798, 137, 749, 135],
"text": "Name"
}, {
"boundingBox": [808, 117, 823, 117, 823, 137, 808, 137],
"text": "."
}, {
"boundingBox": [830, 117, 844, 117, 844, 138, 830, 138],
"text": "."
}, {
"boundingBox": [851, 118, 866, 118, 866, 138, 851, 138],
"text": "."
}, {
"boundingBox": [874, 118, 889, 118, 889, 139, 874, 139],
"text": "."
}, {
"boundingBox": [897, 118, 912, 119, 912, 139, 897, 139],
"text": "."
}, {
"boundingBox": [919, 119, 933, 119, 933, 139, 919, 139],
"text": "."
}, {
"boundingBox": [940, 119, 999, 118, 999, 139, 940, 139],
"text": "SHOES"
}, {
"boundingBox": [1005, 118, 1042, 118, 1042, 139, 1005, 139],
"text": "AND"
}, {
"boundingBox": [1051, 118, 1121, 115, 1121, 138, 1051, 139],
"text": "THINGS"
}]
}, {
"boundingBox": [57, 132, 366, 135, 365, 159, 56, 155],
"text": "Name . . . SHOES AND THINGS",
"words": [{
"boundingBox": [59, 132, 109, 134, 108, 156, 58, 155],
"text": "Name"
}, {
"boundingBox": [118, 134, 133, 134, 132, 157, 117, 156],
"text": "."
}, {
"boundingBox": [141, 134, 156, 135, 155, 157, 140, 157],
"text": "."
}, {
"boundingBox": [163, 135, 178, 135, 178, 157, 163, 157],
"text": "."
}, {
"boundingBox": [183, 135, 243, 136, 243, 158, 182, 157],
"text": "SHOES"
}, {
"boundingBox": [249, 136, 287, 137, 286, 158, 249, 158],
"text": "AND"
}, {
"boundingBox": [296, 137, 366, 138, 365, 157, 296, 158],
"text": "THINGS"
}]
}, {
"boundingBox": [682, 139, 930, 143, 929, 167, 682, 163],
"text": "Legal Residence Code .",
"words": [{
"boundingBox": [683, 141, 743, 141, 742, 163, 682, 163],
"text": "Legal"
}, {
"boundingBox": [749, 141, 855, 143, 854, 164, 748, 163],
"text": "Residence"
}, {
"boundingBox": [862, 144, 911, 146, 910, 165, 861, 164],
"text": "Code"
}, {
"boundingBox": [920, 147, 930, 148, 929, 166, 918, 166],
"text": "."
}]
}, {
"boundingBox": [682, 279, 1066, 284, 1065, 305, 682, 301],
"text": "Date of Registration/ Incorporation",
"words": [{
"boundingBox": [683, 280, 729, 281, 728, 301, 682, 301],
"text": "Date"
}, {
"boundingBox": [737, 281, 763, 282, 762, 301, 736, 301],
"text": "of"
}, {
"boundingBox": [771, 282, 913, 284, 912, 303, 770, 301],
"text": "Registration/"
}, {
"boundingBox": [917, 284, 1063, 285, 1061, 305, 916, 303],
"text": "Incorporation"
}]
}, {
"boundingBox": [1083, 282, 1192, 284, 1191, 307, 1082, 305],
"text": "2/14/2016",
"words": [{
"boundingBox": [1094, 284, 1192, 285, 1191, 307, 1094, 305],
"text": "2/14/2016"
}]
}, {
"boundingBox": [57, 326, 454, 332, 453, 356, 56, 350],
"text": "Email Addr SHOESANDTHINGS@YAHOO . COM",
"words": [{
"boundingBox": [58, 327, 119, 328, 118, 351, 57, 350],
"text": "Email"
}, {
"boundingBox": [125, 328, 174, 329, 173, 352, 124, 351],
"text": "Addr"
}, {
"boundingBox": [181, 329, 402, 333, 402, 354, 181, 352],
"text": "SHOESANDTHINGS@YAHOO"
}, {
"boundingBox": [407, 333, 412, 334, 411, 355, 407, 355],
"text": "."
}, {
"boundingBox": [416, 334, 451, 334, 451, 355, 416, 355],
"text": "COM"
}]
}, {
"boundingBox": [680, 335, 951, 338, 950, 359, 679, 356],
"text": "Customer Risk Class . N",
"words": [{
"boundingBox": [681, 336, 772, 336, 772, 358, 681, 355],
"text": "Customer"
}, {
"boundingBox": [781, 336, 827, 337, 827, 358, 781, 358],
"text": "Risk"
}, {
"boundingBox": [837, 337, 896, 339, 896, 358, 837, 358],
"text": "Class"
}, {
"boundingBox": [917, 339, 930, 340, 930, 357, 918, 358],
"text": "."
}, {
"boundingBox": [936, 340, 949, 340, 950, 357, 937, 357],
"text": "N"
}]
}, {
"boundingBox": [54, 356, 236, 358, 236, 383, 53, 380],
"text": "National ID Nbr",
"words": [{
"boundingBox": [57, 358, 153, 358, 153, 381, 56, 380],
"text": "National"
}, {
"boundingBox": [161, 358, 185, 359, 184, 382, 160, 381],
"text": "ID"
}, {
"boundingBox": [191, 359, 230, 360, 230, 383, 190, 382],
"text": "Nbr",
"confidence": "Low"
}]
}, {
"boundingBox": [676, 362, 757, 366, 756, 387, 675, 382],
"text": "Remarks",
"words": [{
"boundingBox": [680, 364, 756, 369, 756, 386, 680, 384],
"text": "Remarks"
}]
}, {
"boundingBox": [58, 386, 185, 388, 184, 408, 57, 406],
"text": "Tax ID Cert",
"words": [{
"boundingBox": [59, 386, 93, 387, 93, 407, 59, 406],
"text": "Tax"
}, {
"boundingBox": [104, 387, 126, 388, 126, 407, 104, 407],
"text": "ID"
}, {
"boundingBox": [136, 388, 183, 389, 183, 407, 136, 407],
"text": "Cert"
}]
}, {
"boundingBox": [676, 390, 925, 396, 924, 417, 675, 412],
"text": "Originating Branch . .",
"words": [{
"boundingBox": [680, 392, 805, 395, 804, 415, 680, 410],
"text": "Originating"
}, {
"boundingBox": [814, 395, 881, 396, 881, 416, 813, 415],
"text": "Branch"
}, {
"boundingBox": [895, 396, 907, 396, 906, 416, 895, 416],
"text": "."
}, {
"boundingBox": [917, 396, 924, 396, 924, 416, 917, 416],
"text": "."
}]
}, {
"boundingBox": [952, 396, 994, 396, 994, 414, 953, 413],
"text": "126",
"words": [{
"boundingBox": [960, 395, 993, 395, 993, 413, 960, 413],
"text": "126"
}]
}, {
"boundingBox": [1030, 394, 1376, 397, 1375, 420, 1029, 417],
"text": "Date Last Tagged . . 2/04/2019",
"words": [{
"boundingBox": [1035, 396, 1082, 397, 1083, 417, 1035, 415],
"text": "Date"
}, {
"boundingBox": [1091, 397, 1138, 397, 1139, 418, 1091, 417],
"text": "Last"
}, {
"boundingBox": [1148, 397, 1217, 398, 1217, 419, 1149, 418],
"text": "Tagged"
}, {
"boundingBox": [1227, 398, 1239, 398, 1240, 419, 1227, 419],
"text": "."
}, {
"boundingBox": [1249, 398, 1262, 398, 1262, 420, 1249, 419],
"text": "."
}, {
"boundingBox": [1281, 398, 1376, 397, 1375, 420, 1281, 420],
"text": "2/04/2019"
}]
}, {
"boundingBox": [51, 412, 214, 414, 213, 435, 50, 433],
"text": "Tax ID Number",
"words": [{
"boundingBox": [57, 414, 92, 414, 92, 434, 58, 434],
"text": "Tax"
}, {
"boundingBox": [103, 414, 125, 414, 125, 435, 104, 434],
"text": "ID"
}, {
"boundingBox": [134, 414, 205, 415, 203, 436, 134, 435],
"text": "Number"
}]
}, {
"boundingBox": [282, 414, 470, 416, 469, 440, 281, 438],
"text": ". 11110201201900",
"words": [{
"boundingBox": [295, 417, 307, 416, 306, 436, 293, 436],
"text": "."
}, {
"boundingBox": [315, 416, 470, 417, 468, 440, 314, 436],
"text": "11110201201900"
}]
}, {
"boundingBox": [546, 417, 719, 420, 718, 441, 545, 438],
"text": "Present Address",
"words": [{
"boundingBox": [548, 417, 628, 420, 627, 440, 547, 438],
"text": "Present"
}, {
"boundingBox": [636, 420, 718, 421, 718, 440, 635, 440],
"text": "Address"
}]
}, {
"boundingBox": [1022, 420, 1493, 425, 1492, 451, 1021, 446],
"text": "Permanent Bus. Addr (Same as Present Y Y/N)",
"words": [{
"boundingBox": [1025, 424, 1127, 424, 1126, 444, 1024, 443],
"text": "Permanent"
}, {
"boundingBox": [1136, 424, 1185, 424, 1184, 444, 1135, 444],
"text": "Bus."
}, {
"boundingBox": [1192, 424, 1236, 425, 1235, 445, 1190, 444],
"text": "Addr"
}, {
"boundingBox": [1240, 425, 1294, 425, 1292, 446, 1239, 445],
"text": "(Same"
}, {
"boundingBox": [1303, 425, 1328, 425, 1327, 447, 1302, 446],
"text": "as"
}, {
"boundingBox": [1337, 425, 1417, 425, 1415, 449, 1335, 447],
"text": "Present"
}, {
"boundingBox": [1426, 425, 1439, 425, 1437, 450, 1425, 449],
"text": "Y"
}, {
"boundingBox": [1448, 425, 1492, 425, 1490, 451, 1446, 450],
"text": "Y/N)"
}]
}, {
"boundingBox": [52, 440, 232, 442, 231, 464, 51, 461],
"text": "SSS/GSIS Number.",
"words": [{
"boundingBox": [58, 441, 150, 442, 149, 462, 58, 462],
"text": "SSS/GSIS"
}, {
"boundingBox": [157, 442, 232, 444, 230, 464, 156, 462],
"text": "Number."
}]
}, {
"boundingBox": [299, 442, 437, 443, 436, 465, 298, 464],
"text": "44402012019",
"words": [{
"boundingBox": [316, 445, 436, 444, 434, 466, 314, 462],
"text": "44402012019"
}]
}, {
"boundingBox": [545, 444, 712, 446, 711, 468, 545, 466],
"text": "UNI1206 2ND FLR",
"words": [{
"boundingBox": [547, 445, 629, 446, 629, 467, 547, 466],
"text": "UNI1206"
}, {
"boundingBox": [637, 446, 673, 447, 673, 468, 637, 467],
"text": "2ND"
}, {
"boundingBox": [681, 447, 712, 448, 712, 469, 681, 468],
"text": "FLR"
}]
}, {
"boundingBox": [1022, 448, 1191, 449, 1190, 472, 1021, 472],
"text": "UNI1206 2ND FLR",
"words": [{
"boundingBox": [1025, 450, 1107, 449, 1107, 471, 1025, 473],
"text": "UNI1206",
"confidence": "Low"
}, {
"boundingBox": [1114, 449, 1151, 450, 1152, 471, 1115, 471],
"text": "2ND"
}, {
"boundingBox": [1158, 450, 1190, 451, 1191, 472, 1159, 471],
"text": "FLR"
}]
}, {
"boundingBox": [55, 466, 200, 468, 199, 488, 54, 487],
"text": "Branch Number",
"words": [{
"boundingBox": [56, 467, 125, 468, 125, 489, 57, 487],
"text": "Branch"
}, {
"boundingBox": [135, 468, 201, 468, 200, 488, 135, 489],
"text": "Number"
}]
}, {
"boundingBox": [332, 472, 365, 471, 364, 490, 332, 491],
"text": "12",
"words": [{
"boundingBox": [337, 471, 360, 471, 361, 490, 338, 490],
"text": "12",
"confidence": "Low"
}]
}, {
"boundingBox": [551, 471, 914, 475, 913, 499, 550, 495],
"text": "(Unit/House No. , Street , Subd/Bgy . )",
"words": [{
"boundingBox": [552, 472, 674, 476, 672, 496, 551, 495],
"text": "(Unit/House"
}, {
"boundingBox": [680, 476, 713, 476, 711, 496, 678, 496],
"text": "No."
}, {
"boundingBox": [717, 476, 722, 476, 720, 496, 716, 496],
"text": ","
}, {
"boundingBox": [726, 476, 791, 477, 789, 497, 725, 496],
"text": "Street"
}, {
"boundingBox": [795, 477, 800, 477, 798, 497, 793, 497],
"text": ","
}, {
"boundingBox": [804, 476, 891, 475, 889, 499, 802, 497],
"text": "Subd/Bgy"
}, {
"boundingBox": [896, 475, 900, 475, 898, 499, 893, 499],
"text": "."
}, {
"boundingBox": [905, 475, 914, 475, 911, 500, 902, 500],
"text": ")"
}]
}, {
"boundingBox": [1030, 477, 1388, 481, 1387, 505, 1029, 500],
"text": "(Unit/House No. , Street, Subd/Bgy. )",
"words": [{
"boundingBox": [1030, 477, 1152, 479, 1152, 502, 1030, 500],
"text": "(Unit/House"
}, {
"boundingBox": [1157, 479, 1189, 480, 1188, 503, 1156, 502],
"text": "No."
}, {
"boundingBox": [1193, 480, 1198, 480, 1198, 503, 1193, 503],
"text": ","
}, {
"boundingBox": [1202, 480, 1276, 481, 1275, 504, 1202, 503],
"text": "Street,"
}, {
"boundingBox": [1280, 481, 1376, 483, 1376, 505, 1280, 504],
"text": "Subd/Bgy."
}, {
"boundingBox": [1381, 483, 1387, 483, 1387, 505, 1381, 505],
"text": ")"
}]
}, {
"boundingBox": [62, 495, 197, 497, 196, 519, 61, 516],
"text": "SAMSON ROAD",
"words": [{
"boundingBox": [68, 498, 135, 497, 134, 517, 68, 516],
"text": "SAMSON"
}, {
"boundingBox": [147, 497, 192, 498, 191, 519, 146, 518],
"text": "ROAD"
}]
}, {
"boundingBox": [545, 501, 862, 503, 861, 525, 544, 523],
"text": "SHOPPESVILLE PLUS GREENHILLS",
"words": [{
"boundingBox": [548, 502, 684, 503, 684, 524, 548, 523],
"text": "SHOPPESVILLE"
}, {
"boundingBox": [693, 503, 739, 503, 739, 524, 692, 524],
"text": "PLUS"
}, {
"boundingBox": [748, 503, 862, 504, 861, 525, 747, 524],
"text": "GREENHILLS"
}]
}, {
"boundingBox": [1030, 506, 1344, 509, 1343, 531, 1029, 528],
"text": "SHOPPESVILLE PLUS GREENHILLS",
"words": [{
"boundingBox": [1030, 506, 1162, 509, 1162, 530, 1030, 528],
"text": "SHOPPESVILLE",
"confidence": "Low"
}, {
"boundingBox": [1170, 509, 1218, 510, 1218, 530, 1170, 530],
"text": "PLUS"
}, {
"boundingBox": [1224, 510, 1340, 511, 1341, 529, 1224, 530],
"text": "GREENHILLS"
}]
}, {
"boundingBox": [54, 524, 233, 525, 232, 547, 53, 546],
"text": "Primary Officer",
"words": [{
"boundingBox": [58, 526, 137, 525, 137, 548, 57, 547],
"text": "Primary"
}, {
"boundingBox": [147, 525, 227, 526, 227, 548, 146, 548],
"text": "Officer"
}]
}, {
"boundingBox": [279, 527, 347, 528, 346, 549, 279, 549],
"text": ". 126",
"words": [{
"boundingBox": [295, 530, 306, 528, 304, 544, 293, 546],
"text": "."
}, {
"boundingBox": [314, 529, 347, 533, 346, 549, 313, 544],
"text": "126"
}]
}, {
"boundingBox": [552, 527, 702, 532, 701, 552, 551, 548],
"text": "(District, Town",
"words": [{
"boundingBox": [552, 528, 655, 531, 655, 551, 552, 549],
"text": "(District,",
"confidence": "Low"
}, {
"boundingBox": [659, 531, 702, 532, 701, 552, 659, 551],
"text": "Town"
}]
}, {
"boundingBox": [1026, 532, 1185, 537, 1184, 560, 1025, 554],
"text": "(District, Town)",
"words": [{
"boundingBox": [1028, 533, 1132, 537, 1133, 558, 1028, 555],
"text": "(District,"
}, {
"boundingBox": [1137, 537, 1183, 539, 1184, 558, 1138, 558],
"text": "Town)"
}]
}, {
"boundingBox": [52, 551, 188, 555, 188, 577, 51, 573],
"text": "Access Code",
"words": [{
"boundingBox": [57, 553, 127, 554, 127, 575, 56, 573],
"text": "Access"
}, {
"boundingBox": [135, 554, 182, 555, 182, 577, 135, 575],
"text": "Code"
}]
}, {
"boundingBox": [312, 553, 368, 557, 366, 573, 310, 568],
"text": "126",
"words": [{
"boundingBox": [337, 557, 367, 558, 366, 573, 336, 570],
"text": "126"
}]
}, {
"boundingBox": [548, 554, 702, 556, 701, 578, 547, 576],
"text": "SAN JUAN CITY",
"words": [{
"boundingBox": [548, 555, 582, 555, 583, 577, 549, 577],
"text": "SAN"
}, {
"boundingBox": [592, 555, 638, 556, 639, 577, 593, 577],
"text": "JUAN"
}, {
"boundingBox": [647, 556, 696, 557, 696, 578, 648, 577],
"text": "CITY"
}]
}, {
"boundingBox": [1027, 562, 1171, 565, 1170, 586, 1026, 582],
"text": "SAN JUAN CITY",
"words": [{
"boundingBox": [1027, 562, 1058, 563, 1059, 583, 1027, 582],
"text": "SAN"
}, {
"boundingBox": [1070, 564, 1115, 565, 1115, 584, 1071, 583],
"text": "JUAN"
}, {
"boundingBox": [1125, 565, 1170, 567, 1171, 585, 1126, 584],
"text": "CITY"
}]
}, {
"boundingBox": [56, 581, 184, 585, 183, 605, 56, 602],
"text": "Citizenship",
"words": [{
"boundingBox": [59, 583, 181, 587, 181, 605, 57, 602],
"text": "Citizenship"
}]
}, {
"boundingBox": [547, 581, 617, 584, 616, 607, 546, 604],
"text": "(city)",
"words": [{
"boundingBox": [549, 583, 617, 585, 616, 607, 550, 604],
"text": "(city)",
"confidence": "Low"
}]
}, {
"boundingBox": [1026, 588, 1093, 591, 1092, 614, 1025, 611],
"text": "(city)",
"words": [{
"boundingBox": [1027, 590, 1090, 591, 1092, 611, 1029, 611],
"text": "(city)"
}]
}, {
"boundingBox": [51, 606, 233, 609, 232, 631, 50, 628],
"text": "Customer Class .",
"words": [{
"boundingBox": [56, 608, 149, 609, 149, 629, 57, 629],
"text": "Customer"
}, {
"boundingBox": [157, 609, 217, 611, 216, 631, 157, 629],
"text": "Class"
}, {
"boundingBox": [228, 611, 232, 611, 231, 631, 227, 631],
"text": "."
}]
}, {
"boundingBox": [549, 611, 674, 612, 673, 635, 548, 634],
"text": "PHILIPPINES",
"words": [{
"boundingBox": [549, 611, 674, 613, 675, 635, 549, 633],
"text": "PHILIPPINES"
}]
}, {
"boundingBox": [1023, 616, 1154, 618, 1153, 640, 1022, 637],
"text": "PHILIPPINES",
"words": [{
"boundingBox": [1024, 616, 1151, 619, 1152, 639, 1024, 638],
"text": "PHILIPPINES"
}]
}, {
"boundingBox": [56, 635, 234, 639, 233, 661, 55, 656],
"text": "Market Segment .",
"words": [{
"boundingBox": [56, 639, 125, 637, 124, 656, 56, 657],
"text": "Market"
}, {
"boundingBox": [135, 637, 214, 642, 212, 660, 134, 656],
"text": "Segment"
}, {
"boundingBox": [227, 643, 234, 644, 232, 661, 225, 661],
"text": "."
}]
}, {
"boundingBox": [547, 639, 745, 644, 744, 666, 546, 661],
"text": "(Province , Country)",
"words": [{
"boundingBox": [549, 640, 645, 643, 646, 664, 550, 661],
"text": "(Province"
}, {
"boundingBox": [649, 643, 655, 643, 655, 664, 650, 664],
"text": ","
}, {
"boundingBox": [659, 643, 745, 644, 746, 666, 659, 664],
"text": "Country)"
}]
}, {
"boundingBox": [1028, 643, 1227, 646, 1226, 669, 1027, 665],
"text": "(Province, Country)",
"words": [{
"boundingBox": [1028, 644, 1130, 646, 1131, 667, 1029, 667],
"text": "(Province,"
}, {
"boundingBox": [1135, 646, 1226, 647, 1227, 669, 1135, 667],
"text": "Country)"
}]
}, {
"boundingBox": [49, 662, 266, 668, 265, 690, 49, 684],
"text": "Solicitable Code .",
"words": [{
"boundingBox": [56, 665, 182, 667, 181, 686, 56, 684],
"text": "Solicitable"
}, {
"boundingBox": [191, 667, 238, 670, 236, 688, 190, 686],
"text": "Code"
}, {
"boundingBox": [249, 671, 262, 673, 260, 690, 248, 689],
"text": "."
}]
}, {
"boundingBox": [548, 668, 726, 671, 725, 692, 547, 689],
"text": "Zip Code 1900 -",
"words": [{
"boundingBox": [548, 670, 579, 672, 579, 689, 548, 687],
"text": "Zip"
}, {
"boundingBox": [591, 672, 635, 673, 636, 691, 591, 689],
"text": "Code"
}, {
"boundingBox": [658, 673, 702, 672, 703, 692, 658, 691],
"text": "1900"
}, {
"boundingBox": [714, 672, 725, 671, 726, 692, 714, 692],
"text": "-"
}]
}, {
"boundingBox": [1020, 672, 1199, 674, 1198, 696, 1019, 694],
"text": "Zip Code 1900 -",
"words": [{
"boundingBox": [1024, 673, 1058, 674, 1059, 695, 1025, 694],
"text": "Zip"
}, {
"boundingBox": [1068, 674, 1115, 675, 1116, 696, 1068, 695],
"text": "Code"
}, {
"boundingBox": [1136, 675, 1183, 675, 1183, 696, 1136, 696],
"text": "1900"
}, {
"boundingBox": [1193, 675, 1198, 675, 1198, 696, 1193, 696],
"text": "-"
}]
}, {
"boundingBox": [46, 720, 320, 725, 319, 747, 45, 742],
"text": "F2=View Last Maintenance",
"words": [{
"boundingBox": [46, 721, 124, 723, 123, 744, 46, 740],
"text": "F2=View"
}, {
"boundingBox": [134, 723, 180, 724, 180, 745, 134, 744],
"text": "Last"
}, {
"boundingBox": [189, 724, 314, 726, 313, 745, 188, 745],
"text": "Maintenance"
}]
}, {
"boundingBox": [354, 722, 492, 725, 491, 746, 353, 743],
"text": "F8=Loan Data",
"words": [{
"boundingBox": [357, 724, 435, 725, 435, 745, 357, 743],
"text": "F8=Loan"
}, {
"boundingBox": [445, 725, 492, 726, 491, 746, 445, 745],
"text": "Data"
}]
}, {
"boundingBox": [699, 726, 976, 729, 975, 751, 698, 747],
"text": "F10=Owners/Beneficiaries",
"words": [{
"boundingBox": [702, 728, 971, 731, 970, 751, 702, 747],
"text": "F10=Owners/Beneficiaries"
}]
}, {
"boundingBox": [1033, 729, 1145, 731, 1145, 752, 1032, 750],
"text": "F12=Cancel",
"words": [{
"boundingBox": [1035, 731, 1145, 732, 1145, 753, 1035, 750],
"text": "F12=Cancel"
}]
}, {
"boundingBox": [44, 750, 294, 749, 294, 771, 45, 772],
"text": "F7=User Defined Fields",
"words": [{
"boundingBox": [46, 750, 124, 752, 124, 772, 45, 769],
"text": "F7=User"
}, {
"boundingBox": [133, 752, 214, 752, 214, 772, 133, 772],
"text": "Defined"
}, {
"boundingBox": [223, 752, 293, 750, 293, 769, 224, 771],
"text": "Fields"
}]
}, {
"boundingBox": [355, 749, 661, 754, 660, 775, 354, 771],
"text": "F9=FATCA/Substantial Owners",
"words": [{
"boundingBox": [357, 750, 584, 754, 584, 774, 357, 771],
"text": "F9=FATCA/Substantial"
}, {
"boundingBox": [590, 754, 661, 754, 661, 776, 591, 774],
"text": "Owners"
}]
}, {
"boundingBox": [695, 752, 994, 757, 993, 780, 694, 775],
"text": "F11=Alternate Address Data",
"words": [{
"boundingBox": [701, 754, 849, 756, 848, 778, 700, 776],
"text": "F11=Alternate",
"confidence": "Low"
}, {
"boundingBox": [856, 756, 939, 758, 938, 779, 855, 778],
"text": "Address"
}, {
"boundingBox": [945, 758, 993, 760, 993, 779, 944, 779],
"text": "Data"
}]
}, {
"boundingBox": [1035, 756, 1238, 759, 1237, 781, 1034, 777],
"text": "F16=High Risk Type",
"words": [{
"boundingBox": [1035, 759, 1124, 758, 1124, 779, 1035, 779],
"text": "F16=High"
}, {
"boundingBox": [1134, 759, 1181, 760, 1181, 780, 1134, 779],
"text": "Risk"
}, {
"boundingBox": [1191, 760, 1238, 762, 1237, 781, 1191, 780],
"text": "Type"
}]
}]
}]
}
我想获取属性文本的值(邮政编码 1900 -
),但是还有另一个具有相同名称的属性.
例如,我想提取如下:
I wanted to get the value of property text (Zip Code 1900 -
) however there is another property with same same name.
E.g., I want to extract like below:
Display Record
Customer Number . . 0012343345
Short Name . . . . . . SHOES AND THINGS
下面是我的代码:
Dim ocrProperties As azureOcrJsonProperty = JsonConvert.DeserializeObject(Of azureOcrJsonProperty)(JsonReponse)
For Each item As PropertyInfo In ocrProperties.GetType.GetProperties()
If item.PropertyType.IsNested Then
Dim TypeHolder as Type = item.GetValue(ocrProperties)
For Each subitem As PropertyInfo In TypeHolder.GetType.GetProperties()
Formatted_JSon = subitem.Name & ": " & subitem.GetValue(TypeHolder).ToString
Next
Else
Formatted_JSon = item.Name & ": " & item.GetValue(ocrProperties).ToString
End If
Next
Public Class azureOcrJsonProperty
Public Property status As String
Public Property recognitionResults As recognitionResults
End Class
Public Class recognitionResults
Public Property page As Integer
Public Property clockwiseOrientation As decimal
Public Property width As Integer
Public Property height As Integer
Public Property unit As String
Public Property lines As lines
End Class
Public Class lines
Public Property boundingBox As String()
Public Property text As decimal
Public Property words As words
End Class
Public Class words
Public Property boundingBox As String()
Public Property text As decimal
End Class
我收到以下错误:
无法将当前 JSON 数组(例如 [1,2,3])反序列化为类型'Get_Customer_Info+recognitionResults' 因为该类型需要一个JSON 对象(例如 {name":value"})以正确反序列化.修理此错误要么将 JSON 更改为 JSON 对象(例如{name":value"}) 或将反序列化类型更改为数组或实现集合接口的类型(例如 ICollection、IList)像可以从 JSON 数组反序列化的 List.
Cannot deserialize the current JSON array (e.g. [1,2,3]) into type 'Get_Customer_Info+recognitionResults' because the type requires a JSON object (e.g. {"name":"value"}) to deserialize correctly. To fix this error either change the JSON to a JSON object (e.g. {"name":"value"}) or change the deserialized type to an array or a type that implements a collection interface (e.g. ICollection, IList) like List that can be deserialized from a JSON array.
推荐答案
JSON 结构本身非常简单,请参阅本页底部生成的类模型.
主类 (OcrResults
) 公开了 Serialize()
和 Deserialize(json AsString)
执行标准操作的公共方法.
The JSON structure itself is quite simple, see the resulting class model at the bottom of this page.
The main Class (OcrResults
) exposes the Serialize()
and Deserialize(json As String)
public methods that perform the standard operations.
要反序列化 JSON,请调用 Deserialize()
方法:
To deserialize the JSON, call the Deserialize()
method as:
Dim ocrResults = New OcrResults()
Dim ocrObjects = ocrResults.Deserialize(json)
或
Dim ocrObjects = New OcrResults(json).Deserialize()
生成的类对象可以修改并序列化回(如果需要)原始 JSON 结构,调用 Serialize()
方法:
The resulting class object can be modified and serialized back (if required) to the original JSON structure, calling the Serialize()
method:
Dim ocrResults = New OcrResults()
Dim ocrObjects = ocrResults.Deserialize(json)
'(...)
Dim newJson As String = ocrResults.Serialize(ocrObjects)
由于此 JSON 包含 OCR 过程的结果 - 其中识别文本的边界多边形被引用为整数数组 - 我添加了一个(稍微详细)JsonConverter
, (BoundingBoxConverter
),将整数数组转换为 Point
结构数组(当类模型被序列化).
Since this JSON contains the result of an OCR procedure - where the bounding polygons of the recognized text are referenced as an array on integers - I've added a (slightly verbose) JsonConverter
, (BoundingBoxConverter
), to convert the arrays on integers to arrays of Point
structures (and back to arrays of integers when the class model is serialized).
如果/当围绕识别的文本部分绘制多边形时(通常,绘图类,作为 GraphicsPath
类,接受一组 Point
/PointF
作为参数).
This eliminates one step if/when the polygons are drawn around the recognized Text parts (usually, the drawing classes, as the GraphicsPath
class, accept an array of Point
/PointF
as argument).
反序列化对象的 Lines
属性是一个 List(Of Line)
类.
Each Line().Text
property contains a full sequence of recognized text, while the Line().Words()
Lists contain the single parts of the same sequence.
The deserialized object's Lines
property is a List(Of Line)
classes.
Each Line().Text
property contains a full sequence of recognized text, while the Line().Words()
Lists contain the single parts of the same sequence.
The full line can be extracted using LINQ queries in different ways.
For example, as described in the question, the lines:
The full line can be extracted using LINQ queries in different ways.
For example, as described in the question, the lines:
Display Record Customer Number . . 0012343345 Short Name . . . . . . SHOES AND THINGS
Can be extracted as:
Can be extracted as:
Dim ocrObjects = New OcrResults(json).Deserialize()
Dim searchResult = ocrObjects.RecognitionResults.
SelectMany(Function(r) r.Lines.
SkipWhile(Function(ln) Not ln.Text.Contains("Display Record")).
TakeWhile(Function(ln) Not ln.Text.StartsWith("Name"))).ToList()
The searchResult
list contains all the Line
objects in sequence, starting from "Display Record"
to "Short Name . . . . . . SHOES AND THINGS"
.
The searchResult
list contains all the Line
objects in sequence, starting from "Display Record"
to "Short Name . . . . . . SHOES AND THINGS"
.
To print the result:
To print the result:
Console.WriteLine(String.Join(" ", searchResult.Select(Function(ln) ln.Text)))
equivalent to:
equivalent to:
For Each line As OcrResults.Line In searchResult
Console.Write(line.Text)
Next
Extract the first Line
object where the Text property contains "Zip Code 1900"
:
Extract the first Line
object where the Text property contains "Zip Code 1900"
:
Dim zipCode = ocrObjects.RecognitionResults.
SelectMany(Function(r) r.Lines).
First(Function(ln) ln.Text.Contains("Zip Code 1900"))
Dim zipCodeText = zipCode.Text
Dim zipPoints = zipCode.BoundingBox
or, assign the first recognized Page to a new object, so you can use its Lines
objects directly:
or, assign the first recognized Page to a new object, so you can use its Lines
objects directly:
Dim ocrFirstPage = ocrObjects.RecognitionResults(0)
Dim zipCode = ocrFirstPage.Lines.
First(Function(ln) ln.Text.Contains("Zip Code 1900"))
Dim zipCodeText = zipCode.Text
Dim zipPoints = zipCode.BoundingBox
<小时>
Drawing the Text sections:
Each line of recognized Text specifies the polygon that contains the string.
To draw the Polygons corresponding to the boundingBox
array of Point structures of each Line of text, one of the
Each line of recognized Text specifies the polygon that contains the string.
To draw the Polygons corresponding to the boundingBox
array of Point structures of each Line of text, one of the
Each OcrObjects.RecognitionResults
class has a ClockwiseOrientation
property that defines the rotation angle of the Text as identified by the OCR.
This angle can be used to rotate the Graphics (or to strighten the Polygon, eventually).
Each OcrObjects.RecognitionResults
class has a ClockwiseOrientation
property that defines the rotation angle of the Text as identified by the OCR.
This angle can be used to rotate the Graphics (or to strighten the Polygon, eventually).
For example, using a Windows Forms PictureBox
control as canvas and a GraphicsPath as the container of the Polygons:
For example, using a Windows Forms PictureBox
control as canvas and a GraphicsPath as the container of the Polygons:
Dim ocrObjects As OcrResults.Results = Nothing
' (...)
' Somewhere...
ocrObjects = New OcrResults(json).Deserialize()
PictureBox1.Invalidate()
' (...)
Private Sub PictureBox1_Paint(sender As Object, e As PaintEventArgs) Handles PictureBox1.Paint
If ocrObjects Is Nothing Then Return
Dim ocr = ocrObjects.RecognitionResults(0)
'Dim angle As Single = Convert.ToSingle(ocr.ClockwiseOrientation)
Using path As GraphicsPath = New GraphicsPath
For i As Integer = 0 To 4
path.AddPolygon(ocr.Lines(i).BoundingBox)
Next
e.Graphics.SmoothingMode = SmoothingMode.AntiAlias
'e.Graphics.RotateTransform(-angle)
e.Graphics.DrawPath(Pens.Red, path)
End Using
End Sub
This will draw the first 5 polygons, giving this result:
This will draw the first 5 polygons, giving this result:
OcrResults
class model:
OcrResults
class model:
Imports System
Imports System.Collections.Generic
Imports System.Drawing
Imports System.Linq
Imports Newtonsoft.Json
Public Class OcrResults
Private jSettings As JsonSerializerSettings
Private jsonInput As String = String.Empty
Public Sub New()
Me.New(String.Empty)
End Sub
Public Sub New(json As String)
jsonInput = json
jSettings = New JsonSerializerSettings() With {
.Converters = New JsonConverter() {New BoundingBoxConverter()}
}
End Sub
Public Class Results
Public Property Status As String
Public Property RecognitionResults As List(Of RecognitionResult)
End Class
Public Class RecognitionResult
Public Property Page As Long
Public Property ClockwiseOrientation As Double
Public Property Width As Long
Public Property Height As Long
Public Property Unit As String
Public Property Lines As List(Of Line)
End Class
Public Class Line
Public Property BoundingBox As Point()
Public Property Text As String
Public Property Words As List(Of Word)
End Class
Public Class Word
Public Property BoundingBox As Point()
Public Property Text As String
<JsonProperty(NullValueHandling:=NullValueHandling.Ignore)>
Public Property Confidence As String
End Class
Public Function Serialize(model As Results) As String
Return JsonConvert.SerializeObject(model, jSettings)
End Function
Public Function Deserialize() As Results
Return Deserialize(jsonInput)
End Function
Public Function Deserialize(json As String) As Results
Return JsonConvert.DeserializeObject(Of Results)(json, jSettings)
End Function
Public Class BoundingBoxConverter
Inherits JsonConverter
Public Overrides Function CanConvert(t As Type) As Boolean
Return t = GetType(Point())
End Function
Public Overrides Function ReadJson(reader As JsonReader, t As Type, existingValue As Object, serializer As JsonSerializer) As Object
If reader.TokenType = JsonToken.Null Then Return Nothing
If reader.TokenType <> JsonToken.StartArray Then Return existingValue
Dim values As New List(Of Integer)()
While reader.TokenType <> JsonToken.EndArray
Dim pointPart = reader.ReadAsInt32()
If pointPart IsNot Nothing Then values.Add(pointPart.Value)
End While
Return New Point() {
New Point(values(0), values(1)),
New Point(values(2), values(3)),
New Point(values(4), values(5)),
New Point(values(6), values(7))
}
End Function
Public Overrides Sub WriteJson(writer As JsonWriter, untypedValue As Object, serializer As JsonSerializer)
If untypedValue Is Nothing Then
serializer.Serialize(writer, Nothing)
Return
End If
If TypeOf untypedValue Is Point() Then
Dim points = DirectCast(untypedValue, Point())
If points.Length = 4 Then
Dim values() = points.SelectMany(Function(p) New Integer() {p.X, p.Y}).ToArray()
serializer.Serialize(writer, values)
Return
End If
Throw New Exception("Invalid array of [Point]")
End If
End Sub
End Class
End Class
这篇关于如何从 JSON 中的重复元素中获取值?的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!