如何从 JSON 中的重复元素中获取值? [英] How to get the value from duplicate element in JSON?

查看:71
本文介绍了如何从 JSON 中的重复元素中获取值?的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

我正在尝试使用 Microsoft 计算机视觉和 Blue Prism(RPA 工具)从图像中提取文本.我有一个像下面这样的 JSON 响应:

I am trying to extract text from image using Microsoft Computer vision and using Blue Prism (RPA tool). I have a JSON response like below:

{
  "status": "Succeeded",
  "recognitionResults": [{
    "page": 1,
    "clockwiseOrientation": 0.76,
    "width": 1700,
    "height": 2200,
    "unit": "pixel",
    "lines": [{
      "boundingBox": [691, 55, 854, 57, 853, 78, 690, 76],
      "text": "Display Record",
      "words": [{
        "boundingBox": [695, 56, 775, 57, 776, 77, 695, 76],
        "text": "Display"
      }, {
        "boundingBox": [783, 57, 853, 58, 854, 79, 783, 78],
        "text": "Record"
      }]
    }, {
      "boundingBox": [57, 102, 397, 107, 396, 130, 56, 126],
      "text": "Customer Number . . 0012343345",
      "words": [{
        "boundingBox": [60, 103, 153, 106, 153, 127, 60, 124],
        "text": "Customer"
      }, {
        "boundingBox": [160, 106, 230, 107, 230, 129, 160, 128],
        "text": "Number"
      }, {
        "boundingBox": [242, 108, 256, 108, 255, 129, 241, 129],
        "text": "."
      }, {
        "boundingBox": [264, 108, 278, 108, 278, 130, 264, 129],
        "text": "."
      }, {
        "boundingBox": [284, 108, 397, 107, 396, 130, 284, 130],
        "text": "0012343345"
      }]
    }, {
      "boundingBox": [685, 110, 1126, 115, 1125, 140, 684, 136],
      "text": "Short Name . . . . . . SHOES AND THINGS",
      "words": [{
        "boundingBox": [685, 111, 742, 114, 742, 135, 685, 133],
        "text": "Short"
      }, {
        "boundingBox": [749, 114, 798, 116, 798, 137, 749, 135],
        "text": "Name"
      }, {
        "boundingBox": [808, 117, 823, 117, 823, 137, 808, 137],
        "text": "."
      }, {
        "boundingBox": [830, 117, 844, 117, 844, 138, 830, 138],
        "text": "."
      }, {
        "boundingBox": [851, 118, 866, 118, 866, 138, 851, 138],
        "text": "."
      }, {
        "boundingBox": [874, 118, 889, 118, 889, 139, 874, 139],
        "text": "."
      }, {
        "boundingBox": [897, 118, 912, 119, 912, 139, 897, 139],
        "text": "."
      }, {
        "boundingBox": [919, 119, 933, 119, 933, 139, 919, 139],
        "text": "."
      }, {
        "boundingBox": [940, 119, 999, 118, 999, 139, 940, 139],
        "text": "SHOES"
      }, {
        "boundingBox": [1005, 118, 1042, 118, 1042, 139, 1005, 139],
        "text": "AND"
      }, {
        "boundingBox": [1051, 118, 1121, 115, 1121, 138, 1051, 139],
        "text": "THINGS"
      }]
    }, {
      "boundingBox": [57, 132, 366, 135, 365, 159, 56, 155],
      "text": "Name . . . SHOES AND THINGS",
      "words": [{
        "boundingBox": [59, 132, 109, 134, 108, 156, 58, 155],
        "text": "Name"
      }, {
        "boundingBox": [118, 134, 133, 134, 132, 157, 117, 156],
        "text": "."
      }, {
        "boundingBox": [141, 134, 156, 135, 155, 157, 140, 157],
        "text": "."
      }, {
        "boundingBox": [163, 135, 178, 135, 178, 157, 163, 157],
        "text": "."
      }, {
        "boundingBox": [183, 135, 243, 136, 243, 158, 182, 157],
        "text": "SHOES"
      }, {
        "boundingBox": [249, 136, 287, 137, 286, 158, 249, 158],
        "text": "AND"
      }, {
        "boundingBox": [296, 137, 366, 138, 365, 157, 296, 158],
        "text": "THINGS"
      }]
    }, {
      "boundingBox": [682, 139, 930, 143, 929, 167, 682, 163],
      "text": "Legal Residence Code .",
      "words": [{
        "boundingBox": [683, 141, 743, 141, 742, 163, 682, 163],
        "text": "Legal"
      }, {
        "boundingBox": [749, 141, 855, 143, 854, 164, 748, 163],
        "text": "Residence"
      }, {
        "boundingBox": [862, 144, 911, 146, 910, 165, 861, 164],
        "text": "Code"
      }, {
        "boundingBox": [920, 147, 930, 148, 929, 166, 918, 166],
        "text": "."
      }]
    }, {
      "boundingBox": [682, 279, 1066, 284, 1065, 305, 682, 301],
      "text": "Date of Registration/ Incorporation",
      "words": [{
        "boundingBox": [683, 280, 729, 281, 728, 301, 682, 301],
        "text": "Date"
      }, {
        "boundingBox": [737, 281, 763, 282, 762, 301, 736, 301],
        "text": "of"
      }, {
        "boundingBox": [771, 282, 913, 284, 912, 303, 770, 301],
        "text": "Registration/"
      }, {
        "boundingBox": [917, 284, 1063, 285, 1061, 305, 916, 303],
        "text": "Incorporation"
      }]
    }, {
      "boundingBox": [1083, 282, 1192, 284, 1191, 307, 1082, 305],
      "text": "2/14/2016",
      "words": [{
        "boundingBox": [1094, 284, 1192, 285, 1191, 307, 1094, 305],
        "text": "2/14/2016"
      }]
    }, {
      "boundingBox": [57, 326, 454, 332, 453, 356, 56, 350],
      "text": "Email Addr SHOESANDTHINGS@YAHOO . COM",
      "words": [{
        "boundingBox": [58, 327, 119, 328, 118, 351, 57, 350],
        "text": "Email"
      }, {
        "boundingBox": [125, 328, 174, 329, 173, 352, 124, 351],
        "text": "Addr"
      }, {
        "boundingBox": [181, 329, 402, 333, 402, 354, 181, 352],
        "text": "SHOESANDTHINGS@YAHOO"
      }, {
        "boundingBox": [407, 333, 412, 334, 411, 355, 407, 355],
        "text": "."
      }, {
        "boundingBox": [416, 334, 451, 334, 451, 355, 416, 355],
        "text": "COM"
      }]
    }, {
      "boundingBox": [680, 335, 951, 338, 950, 359, 679, 356],
      "text": "Customer Risk Class . N",
      "words": [{
        "boundingBox": [681, 336, 772, 336, 772, 358, 681, 355],
        "text": "Customer"
      }, {
        "boundingBox": [781, 336, 827, 337, 827, 358, 781, 358],
        "text": "Risk"
      }, {
        "boundingBox": [837, 337, 896, 339, 896, 358, 837, 358],
        "text": "Class"
      }, {
        "boundingBox": [917, 339, 930, 340, 930, 357, 918, 358],
        "text": "."
      }, {
        "boundingBox": [936, 340, 949, 340, 950, 357, 937, 357],
        "text": "N"
      }]
    }, {
      "boundingBox": [54, 356, 236, 358, 236, 383, 53, 380],
      "text": "National ID Nbr",
      "words": [{
        "boundingBox": [57, 358, 153, 358, 153, 381, 56, 380],
        "text": "National"
      }, {
        "boundingBox": [161, 358, 185, 359, 184, 382, 160, 381],
        "text": "ID"
      }, {
        "boundingBox": [191, 359, 230, 360, 230, 383, 190, 382],
        "text": "Nbr",
        "confidence": "Low"
      }]
    }, {
      "boundingBox": [676, 362, 757, 366, 756, 387, 675, 382],
      "text": "Remarks",
      "words": [{
        "boundingBox": [680, 364, 756, 369, 756, 386, 680, 384],
        "text": "Remarks"
      }]
    }, {
      "boundingBox": [58, 386, 185, 388, 184, 408, 57, 406],
      "text": "Tax ID Cert",
      "words": [{
        "boundingBox": [59, 386, 93, 387, 93, 407, 59, 406],
        "text": "Tax"
      }, {
        "boundingBox": [104, 387, 126, 388, 126, 407, 104, 407],
        "text": "ID"
      }, {
        "boundingBox": [136, 388, 183, 389, 183, 407, 136, 407],
        "text": "Cert"
      }]
    }, {
      "boundingBox": [676, 390, 925, 396, 924, 417, 675, 412],
      "text": "Originating Branch . .",
      "words": [{
        "boundingBox": [680, 392, 805, 395, 804, 415, 680, 410],
        "text": "Originating"
      }, {
        "boundingBox": [814, 395, 881, 396, 881, 416, 813, 415],
        "text": "Branch"
      }, {
        "boundingBox": [895, 396, 907, 396, 906, 416, 895, 416],
        "text": "."
      }, {
        "boundingBox": [917, 396, 924, 396, 924, 416, 917, 416],
        "text": "."
      }]
    }, {
      "boundingBox": [952, 396, 994, 396, 994, 414, 953, 413],
      "text": "126",
      "words": [{
        "boundingBox": [960, 395, 993, 395, 993, 413, 960, 413],
        "text": "126"
      }]
    }, {
      "boundingBox": [1030, 394, 1376, 397, 1375, 420, 1029, 417],
      "text": "Date Last Tagged . . 2/04/2019",
      "words": [{
        "boundingBox": [1035, 396, 1082, 397, 1083, 417, 1035, 415],
        "text": "Date"
      }, {
        "boundingBox": [1091, 397, 1138, 397, 1139, 418, 1091, 417],
        "text": "Last"
      }, {
        "boundingBox": [1148, 397, 1217, 398, 1217, 419, 1149, 418],
        "text": "Tagged"
      }, {
        "boundingBox": [1227, 398, 1239, 398, 1240, 419, 1227, 419],
        "text": "."
      }, {
        "boundingBox": [1249, 398, 1262, 398, 1262, 420, 1249, 419],
        "text": "."
      }, {
        "boundingBox": [1281, 398, 1376, 397, 1375, 420, 1281, 420],
        "text": "2/04/2019"
      }]
    }, {
      "boundingBox": [51, 412, 214, 414, 213, 435, 50, 433],
      "text": "Tax ID Number",
      "words": [{
        "boundingBox": [57, 414, 92, 414, 92, 434, 58, 434],
        "text": "Tax"
      }, {
        "boundingBox": [103, 414, 125, 414, 125, 435, 104, 434],
        "text": "ID"
      }, {
        "boundingBox": [134, 414, 205, 415, 203, 436, 134, 435],
        "text": "Number"
      }]
    }, {
      "boundingBox": [282, 414, 470, 416, 469, 440, 281, 438],
      "text": ". 11110201201900",
      "words": [{
        "boundingBox": [295, 417, 307, 416, 306, 436, 293, 436],
        "text": "."
      }, {
        "boundingBox": [315, 416, 470, 417, 468, 440, 314, 436],
        "text": "11110201201900"
      }]
    }, {
      "boundingBox": [546, 417, 719, 420, 718, 441, 545, 438],
      "text": "Present Address",
      "words": [{
        "boundingBox": [548, 417, 628, 420, 627, 440, 547, 438],
        "text": "Present"
      }, {
        "boundingBox": [636, 420, 718, 421, 718, 440, 635, 440],
        "text": "Address"
      }]
    }, {
      "boundingBox": [1022, 420, 1493, 425, 1492, 451, 1021, 446],
      "text": "Permanent Bus. Addr (Same as Present Y Y/N)",
      "words": [{
        "boundingBox": [1025, 424, 1127, 424, 1126, 444, 1024, 443],
        "text": "Permanent"
      }, {
        "boundingBox": [1136, 424, 1185, 424, 1184, 444, 1135, 444],
        "text": "Bus."
      }, {
        "boundingBox": [1192, 424, 1236, 425, 1235, 445, 1190, 444],
        "text": "Addr"
      }, {
        "boundingBox": [1240, 425, 1294, 425, 1292, 446, 1239, 445],
        "text": "(Same"
      }, {
        "boundingBox": [1303, 425, 1328, 425, 1327, 447, 1302, 446],
        "text": "as"
      }, {
        "boundingBox": [1337, 425, 1417, 425, 1415, 449, 1335, 447],
        "text": "Present"
      }, {
        "boundingBox": [1426, 425, 1439, 425, 1437, 450, 1425, 449],
        "text": "Y"
      }, {
        "boundingBox": [1448, 425, 1492, 425, 1490, 451, 1446, 450],
        "text": "Y/N)"
      }]
    }, {
      "boundingBox": [52, 440, 232, 442, 231, 464, 51, 461],
      "text": "SSS/GSIS Number.",
      "words": [{
        "boundingBox": [58, 441, 150, 442, 149, 462, 58, 462],
        "text": "SSS/GSIS"
      }, {
        "boundingBox": [157, 442, 232, 444, 230, 464, 156, 462],
        "text": "Number."
      }]
    }, {
      "boundingBox": [299, 442, 437, 443, 436, 465, 298, 464],
      "text": "44402012019",
      "words": [{
        "boundingBox": [316, 445, 436, 444, 434, 466, 314, 462],
        "text": "44402012019"
      }]
    }, {
      "boundingBox": [545, 444, 712, 446, 711, 468, 545, 466],
      "text": "UNI1206 2ND FLR",
      "words": [{
        "boundingBox": [547, 445, 629, 446, 629, 467, 547, 466],
        "text": "UNI1206"
      }, {
        "boundingBox": [637, 446, 673, 447, 673, 468, 637, 467],
        "text": "2ND"
      }, {
        "boundingBox": [681, 447, 712, 448, 712, 469, 681, 468],
        "text": "FLR"
      }]
    }, {
      "boundingBox": [1022, 448, 1191, 449, 1190, 472, 1021, 472],
      "text": "UNI1206 2ND FLR",
      "words": [{
        "boundingBox": [1025, 450, 1107, 449, 1107, 471, 1025, 473],
        "text": "UNI1206",
        "confidence": "Low"
      }, {
        "boundingBox": [1114, 449, 1151, 450, 1152, 471, 1115, 471],
        "text": "2ND"
      }, {
        "boundingBox": [1158, 450, 1190, 451, 1191, 472, 1159, 471],
        "text": "FLR"
      }]
    }, {
      "boundingBox": [55, 466, 200, 468, 199, 488, 54, 487],
      "text": "Branch Number",
      "words": [{
        "boundingBox": [56, 467, 125, 468, 125, 489, 57, 487],
        "text": "Branch"
      }, {
        "boundingBox": [135, 468, 201, 468, 200, 488, 135, 489],
        "text": "Number"
      }]
    }, {
      "boundingBox": [332, 472, 365, 471, 364, 490, 332, 491],
      "text": "12",
      "words": [{
        "boundingBox": [337, 471, 360, 471, 361, 490, 338, 490],
        "text": "12",
        "confidence": "Low"
      }]
    }, {
      "boundingBox": [551, 471, 914, 475, 913, 499, 550, 495],
      "text": "(Unit/House No. , Street , Subd/Bgy . )",
      "words": [{
        "boundingBox": [552, 472, 674, 476, 672, 496, 551, 495],
        "text": "(Unit/House"
      }, {
        "boundingBox": [680, 476, 713, 476, 711, 496, 678, 496],
        "text": "No."
      }, {
        "boundingBox": [717, 476, 722, 476, 720, 496, 716, 496],
        "text": ","
      }, {
        "boundingBox": [726, 476, 791, 477, 789, 497, 725, 496],
        "text": "Street"
      }, {
        "boundingBox": [795, 477, 800, 477, 798, 497, 793, 497],
        "text": ","
      }, {
        "boundingBox": [804, 476, 891, 475, 889, 499, 802, 497],
        "text": "Subd/Bgy"
      }, {
        "boundingBox": [896, 475, 900, 475, 898, 499, 893, 499],
        "text": "."
      }, {
        "boundingBox": [905, 475, 914, 475, 911, 500, 902, 500],
        "text": ")"
      }]
    }, {
      "boundingBox": [1030, 477, 1388, 481, 1387, 505, 1029, 500],
      "text": "(Unit/House No. , Street, Subd/Bgy. )",
      "words": [{
        "boundingBox": [1030, 477, 1152, 479, 1152, 502, 1030, 500],
        "text": "(Unit/House"
      }, {
        "boundingBox": [1157, 479, 1189, 480, 1188, 503, 1156, 502],
        "text": "No."
      }, {
        "boundingBox": [1193, 480, 1198, 480, 1198, 503, 1193, 503],
        "text": ","
      }, {
        "boundingBox": [1202, 480, 1276, 481, 1275, 504, 1202, 503],
        "text": "Street,"
      }, {
        "boundingBox": [1280, 481, 1376, 483, 1376, 505, 1280, 504],
        "text": "Subd/Bgy."
      }, {
        "boundingBox": [1381, 483, 1387, 483, 1387, 505, 1381, 505],
        "text": ")"
      }]
    }, {
      "boundingBox": [62, 495, 197, 497, 196, 519, 61, 516],
      "text": "SAMSON ROAD",
      "words": [{
        "boundingBox": [68, 498, 135, 497, 134, 517, 68, 516],
        "text": "SAMSON"
      }, {
        "boundingBox": [147, 497, 192, 498, 191, 519, 146, 518],
        "text": "ROAD"
      }]
    }, {
      "boundingBox": [545, 501, 862, 503, 861, 525, 544, 523],
      "text": "SHOPPESVILLE PLUS GREENHILLS",
      "words": [{
        "boundingBox": [548, 502, 684, 503, 684, 524, 548, 523],
        "text": "SHOPPESVILLE"
      }, {
        "boundingBox": [693, 503, 739, 503, 739, 524, 692, 524],
        "text": "PLUS"
      }, {
        "boundingBox": [748, 503, 862, 504, 861, 525, 747, 524],
        "text": "GREENHILLS"
      }]
    }, {
      "boundingBox": [1030, 506, 1344, 509, 1343, 531, 1029, 528],
      "text": "SHOPPESVILLE PLUS GREENHILLS",
      "words": [{
        "boundingBox": [1030, 506, 1162, 509, 1162, 530, 1030, 528],
        "text": "SHOPPESVILLE",
        "confidence": "Low"
      }, {
        "boundingBox": [1170, 509, 1218, 510, 1218, 530, 1170, 530],
        "text": "PLUS"
      }, {
        "boundingBox": [1224, 510, 1340, 511, 1341, 529, 1224, 530],
        "text": "GREENHILLS"
      }]
    }, {
      "boundingBox": [54, 524, 233, 525, 232, 547, 53, 546],
      "text": "Primary Officer",
      "words": [{
        "boundingBox": [58, 526, 137, 525, 137, 548, 57, 547],
        "text": "Primary"
      }, {
        "boundingBox": [147, 525, 227, 526, 227, 548, 146, 548],
        "text": "Officer"
      }]
    }, {
      "boundingBox": [279, 527, 347, 528, 346, 549, 279, 549],
      "text": ". 126",
      "words": [{
        "boundingBox": [295, 530, 306, 528, 304, 544, 293, 546],
        "text": "."
      }, {
        "boundingBox": [314, 529, 347, 533, 346, 549, 313, 544],
        "text": "126"
      }]
    }, {
      "boundingBox": [552, 527, 702, 532, 701, 552, 551, 548],
      "text": "(District, Town",
      "words": [{
        "boundingBox": [552, 528, 655, 531, 655, 551, 552, 549],
        "text": "(District,",
        "confidence": "Low"
      }, {
        "boundingBox": [659, 531, 702, 532, 701, 552, 659, 551],
        "text": "Town"
      }]
    }, {
      "boundingBox": [1026, 532, 1185, 537, 1184, 560, 1025, 554],
      "text": "(District, Town)",
      "words": [{
        "boundingBox": [1028, 533, 1132, 537, 1133, 558, 1028, 555],
        "text": "(District,"
      }, {
        "boundingBox": [1137, 537, 1183, 539, 1184, 558, 1138, 558],
        "text": "Town)"
      }]
    }, {
      "boundingBox": [52, 551, 188, 555, 188, 577, 51, 573],
      "text": "Access Code",
      "words": [{
        "boundingBox": [57, 553, 127, 554, 127, 575, 56, 573],
        "text": "Access"
      }, {
        "boundingBox": [135, 554, 182, 555, 182, 577, 135, 575],
        "text": "Code"
      }]
    }, {
      "boundingBox": [312, 553, 368, 557, 366, 573, 310, 568],
      "text": "126",
      "words": [{
        "boundingBox": [337, 557, 367, 558, 366, 573, 336, 570],
        "text": "126"
      }]
    }, {
      "boundingBox": [548, 554, 702, 556, 701, 578, 547, 576],
      "text": "SAN JUAN CITY",
      "words": [{
        "boundingBox": [548, 555, 582, 555, 583, 577, 549, 577],
        "text": "SAN"
      }, {
        "boundingBox": [592, 555, 638, 556, 639, 577, 593, 577],
        "text": "JUAN"
      }, {
        "boundingBox": [647, 556, 696, 557, 696, 578, 648, 577],
        "text": "CITY"
      }]
    }, {
      "boundingBox": [1027, 562, 1171, 565, 1170, 586, 1026, 582],
      "text": "SAN JUAN CITY",
      "words": [{
        "boundingBox": [1027, 562, 1058, 563, 1059, 583, 1027, 582],
        "text": "SAN"
      }, {
        "boundingBox": [1070, 564, 1115, 565, 1115, 584, 1071, 583],
        "text": "JUAN"
      }, {
        "boundingBox": [1125, 565, 1170, 567, 1171, 585, 1126, 584],
        "text": "CITY"
      }]
    }, {
      "boundingBox": [56, 581, 184, 585, 183, 605, 56, 602],
      "text": "Citizenship",
      "words": [{
        "boundingBox": [59, 583, 181, 587, 181, 605, 57, 602],
        "text": "Citizenship"
      }]
    }, {
      "boundingBox": [547, 581, 617, 584, 616, 607, 546, 604],
      "text": "(city)",
      "words": [{
        "boundingBox": [549, 583, 617, 585, 616, 607, 550, 604],
        "text": "(city)",
        "confidence": "Low"
      }]
    }, {
      "boundingBox": [1026, 588, 1093, 591, 1092, 614, 1025, 611],
      "text": "(city)",
      "words": [{
        "boundingBox": [1027, 590, 1090, 591, 1092, 611, 1029, 611],
        "text": "(city)"
      }]
    }, {
      "boundingBox": [51, 606, 233, 609, 232, 631, 50, 628],
      "text": "Customer Class .",
      "words": [{
        "boundingBox": [56, 608, 149, 609, 149, 629, 57, 629],
        "text": "Customer"
      }, {
        "boundingBox": [157, 609, 217, 611, 216, 631, 157, 629],
        "text": "Class"
      }, {
        "boundingBox": [228, 611, 232, 611, 231, 631, 227, 631],
        "text": "."
      }]
    }, {
      "boundingBox": [549, 611, 674, 612, 673, 635, 548, 634],
      "text": "PHILIPPINES",
      "words": [{
        "boundingBox": [549, 611, 674, 613, 675, 635, 549, 633],
        "text": "PHILIPPINES"
      }]
    }, {
      "boundingBox": [1023, 616, 1154, 618, 1153, 640, 1022, 637],
      "text": "PHILIPPINES",
      "words": [{
        "boundingBox": [1024, 616, 1151, 619, 1152, 639, 1024, 638],
        "text": "PHILIPPINES"
      }]
    }, {
      "boundingBox": [56, 635, 234, 639, 233, 661, 55, 656],
      "text": "Market Segment .",
      "words": [{
        "boundingBox": [56, 639, 125, 637, 124, 656, 56, 657],
        "text": "Market"
      }, {
        "boundingBox": [135, 637, 214, 642, 212, 660, 134, 656],
        "text": "Segment"
      }, {
        "boundingBox": [227, 643, 234, 644, 232, 661, 225, 661],
        "text": "."
      }]
    }, {
      "boundingBox": [547, 639, 745, 644, 744, 666, 546, 661],
      "text": "(Province , Country)",
      "words": [{
        "boundingBox": [549, 640, 645, 643, 646, 664, 550, 661],
        "text": "(Province"
      }, {
        "boundingBox": [649, 643, 655, 643, 655, 664, 650, 664],
        "text": ","
      }, {
        "boundingBox": [659, 643, 745, 644, 746, 666, 659, 664],
        "text": "Country)"
      }]
    }, {
      "boundingBox": [1028, 643, 1227, 646, 1226, 669, 1027, 665],
      "text": "(Province, Country)",
      "words": [{
        "boundingBox": [1028, 644, 1130, 646, 1131, 667, 1029, 667],
        "text": "(Province,"
      }, {
        "boundingBox": [1135, 646, 1226, 647, 1227, 669, 1135, 667],
        "text": "Country)"
      }]
    }, {
      "boundingBox": [49, 662, 266, 668, 265, 690, 49, 684],
      "text": "Solicitable Code .",
      "words": [{
        "boundingBox": [56, 665, 182, 667, 181, 686, 56, 684],
        "text": "Solicitable"
      }, {
        "boundingBox": [191, 667, 238, 670, 236, 688, 190, 686],
        "text": "Code"
      }, {
        "boundingBox": [249, 671, 262, 673, 260, 690, 248, 689],
        "text": "."
      }]
    }, {
      "boundingBox": [548, 668, 726, 671, 725, 692, 547, 689],
      "text": "Zip Code 1900 -",
      "words": [{
        "boundingBox": [548, 670, 579, 672, 579, 689, 548, 687],
        "text": "Zip"
      }, {
        "boundingBox": [591, 672, 635, 673, 636, 691, 591, 689],
        "text": "Code"
      }, {
        "boundingBox": [658, 673, 702, 672, 703, 692, 658, 691],
        "text": "1900"
      }, {
        "boundingBox": [714, 672, 725, 671, 726, 692, 714, 692],
        "text": "-"
      }]
    }, {
      "boundingBox": [1020, 672, 1199, 674, 1198, 696, 1019, 694],
      "text": "Zip Code 1900 -",
      "words": [{
        "boundingBox": [1024, 673, 1058, 674, 1059, 695, 1025, 694],
        "text": "Zip"
      }, {
        "boundingBox": [1068, 674, 1115, 675, 1116, 696, 1068, 695],
        "text": "Code"
      }, {
        "boundingBox": [1136, 675, 1183, 675, 1183, 696, 1136, 696],
        "text": "1900"
      }, {
        "boundingBox": [1193, 675, 1198, 675, 1198, 696, 1193, 696],
        "text": "-"
      }]
    }, {
      "boundingBox": [46, 720, 320, 725, 319, 747, 45, 742],
      "text": "F2=View Last Maintenance",
      "words": [{
        "boundingBox": [46, 721, 124, 723, 123, 744, 46, 740],
        "text": "F2=View"
      }, {
        "boundingBox": [134, 723, 180, 724, 180, 745, 134, 744],
        "text": "Last"
      }, {
        "boundingBox": [189, 724, 314, 726, 313, 745, 188, 745],
        "text": "Maintenance"
      }]
    }, {
      "boundingBox": [354, 722, 492, 725, 491, 746, 353, 743],
      "text": "F8=Loan Data",
      "words": [{
        "boundingBox": [357, 724, 435, 725, 435, 745, 357, 743],
        "text": "F8=Loan"
      }, {
        "boundingBox": [445, 725, 492, 726, 491, 746, 445, 745],
        "text": "Data"
      }]
    }, {
      "boundingBox": [699, 726, 976, 729, 975, 751, 698, 747],
      "text": "F10=Owners/Beneficiaries",
      "words": [{
        "boundingBox": [702, 728, 971, 731, 970, 751, 702, 747],
        "text": "F10=Owners/Beneficiaries"
      }]
    }, {
      "boundingBox": [1033, 729, 1145, 731, 1145, 752, 1032, 750],
      "text": "F12=Cancel",
      "words": [{
        "boundingBox": [1035, 731, 1145, 732, 1145, 753, 1035, 750],
        "text": "F12=Cancel"
      }]
    }, {
      "boundingBox": [44, 750, 294, 749, 294, 771, 45, 772],
      "text": "F7=User Defined Fields",
      "words": [{
        "boundingBox": [46, 750, 124, 752, 124, 772, 45, 769],
        "text": "F7=User"
      }, {
        "boundingBox": [133, 752, 214, 752, 214, 772, 133, 772],
        "text": "Defined"
      }, {
        "boundingBox": [223, 752, 293, 750, 293, 769, 224, 771],
        "text": "Fields"
      }]
    }, {
      "boundingBox": [355, 749, 661, 754, 660, 775, 354, 771],
      "text": "F9=FATCA/Substantial Owners",
      "words": [{
        "boundingBox": [357, 750, 584, 754, 584, 774, 357, 771],
        "text": "F9=FATCA/Substantial"
      }, {
        "boundingBox": [590, 754, 661, 754, 661, 776, 591, 774],
        "text": "Owners"
      }]
    }, {
      "boundingBox": [695, 752, 994, 757, 993, 780, 694, 775],
      "text": "F11=Alternate Address Data",
      "words": [{
        "boundingBox": [701, 754, 849, 756, 848, 778, 700, 776],
        "text": "F11=Alternate",
        "confidence": "Low"
      }, {
        "boundingBox": [856, 756, 939, 758, 938, 779, 855, 778],
        "text": "Address"
      }, {
        "boundingBox": [945, 758, 993, 760, 993, 779, 944, 779],
        "text": "Data"
      }]
    }, {
      "boundingBox": [1035, 756, 1238, 759, 1237, 781, 1034, 777],
      "text": "F16=High Risk Type",
      "words": [{
        "boundingBox": [1035, 759, 1124, 758, 1124, 779, 1035, 779],
        "text": "F16=High"
      }, {
        "boundingBox": [1134, 759, 1181, 760, 1181, 780, 1134, 779],
        "text": "Risk"
      }, {
        "boundingBox": [1191, 760, 1238, 762, 1237, 781, 1191, 780],
        "text": "Type"
      }]
    }]
  }]
}

我想获取属性文本的值(邮政编码 1900 - ),但是还有另一个具有相同名称的属性.
例如,我想提取如下:

I wanted to get the value of property text (Zip Code 1900 -) however there is another property with same same name.
E.g., I want to extract like below:

Display Record
Customer Number . . 0012343345
Short Name . . . . . . SHOES AND THINGS

下面是我的代码:

Dim ocrProperties As azureOcrJsonProperty = JsonConvert.DeserializeObject(Of azureOcrJsonProperty)(JsonReponse)
For Each item As PropertyInfo In ocrProperties.GetType.GetProperties()
    If item.PropertyType.IsNested Then

        Dim TypeHolder as Type = item.GetValue(ocrProperties)
        For Each subitem As PropertyInfo In TypeHolder.GetType.GetProperties()
            Formatted_JSon = subitem.Name & ": " & subitem.GetValue(TypeHolder).ToString 
        Next
    Else
        Formatted_JSon = item.Name & ": " & item.GetValue(ocrProperties).ToString
    End If
Next

Public Class azureOcrJsonProperty
    Public Property status As String
    Public Property recognitionResults As recognitionResults
End Class

Public Class recognitionResults
    Public Property page As Integer
    Public Property clockwiseOrientation As decimal
    Public Property width As Integer
    Public Property height As Integer
    Public Property unit As String
    Public Property lines As lines
End Class

Public Class lines
    Public Property boundingBox As String()
    Public Property text As decimal
    Public Property words As words
End Class

Public Class words
    Public Property boundingBox As String()
    Public Property text As decimal
End Class

我收到以下错误:

无法将当前 JSON 数组(例如 [1,2,3])反序列化为类型'Get_Customer_Info+recognitionResults' 因为该类型需要一个JSON 对象(例如 {name":value"})以正确反序列化.修理此错误要么将 JSON 更改为 JSON 对象(例如{name":value"}) 或将反序列化类型更改为数组或实现集合接口的类型(例如 ICollection、IList)像可以从 JSON 数组反序列化的 List.

Cannot deserialize the current JSON array (e.g. [1,2,3]) into type 'Get_Customer_Info+recognitionResults' because the type requires a JSON object (e.g. {"name":"value"}) to deserialize correctly. To fix this error either change the JSON to a JSON object (e.g. {"name":"value"}) or change the deserialized type to an array or a type that implements a collection interface (e.g. ICollection, IList) like List that can be deserialized from a JSON array.

推荐答案

JSON 结构本身非常简单,请参阅本页底部生成的类模型.
主类 (OcrResults) 公开了 Serialize()Deserialize(json AsString) 执行标准操作的公共方法.

The JSON structure itself is quite simple, see the resulting class model at the bottom of this page.
The main Class (OcrResults) exposes the Serialize() and Deserialize(json As String) public methods that perform the standard operations.

要反序列化 JSON,请调用 Deserialize() 方法:

To deserialize the JSON, call the Deserialize() method as:

Dim ocrResults = New OcrResults()
Dim ocrObjects = ocrResults.Deserialize(json)

 Dim ocrObjects = New OcrResults(json).Deserialize()

生成的类对象可以修改并序列化回(如果需要)原始 JSON 结构,调用 Serialize() 方法:

The resulting class object can be modified and serialized back (if required) to the original JSON structure, calling the Serialize() method:

Dim ocrResults = New OcrResults()
Dim ocrObjects = ocrResults.Deserialize(json)
'(...)

Dim newJson As String = ocrResults.Serialize(ocrObjects)

由于此 JSON 包含 OCR 过程的结果 - 其中识别文本的边界多边形被引用为整数数组 - 我添加了一个(稍微详细)JsonConverter, (BoundingBoxConverter),将整数数组转换为 Point 结构数组(当类模型被序列化).

Since this JSON contains the result of an OCR procedure - where the bounding polygons of the recognized text are referenced as an array on integers - I've added a (slightly verbose) JsonConverter, (BoundingBoxConverter), to convert the arrays on integers to arrays of Point structures (and back to arrays of integers when the class model is serialized).

如果/当围绕识别的文本部分绘制多边形时(通常,绘图类,作为 GraphicsPath 类,接受一组 Point/PointF 作为参数).

This eliminates one step if/when the polygons are drawn around the recognized Text parts (usually, the drawing classes, as the GraphicsPath class, accept an array of Point/PointF as argument).

反序列化对象的 Lines 属性是一个 List(Of Line) 类.
Each Line().Text property contains a full sequence of recognized text, while the Line().Words() Lists contain the single parts of the same sequence.

The deserialized object's Lines property is a List(Of Line) classes.
Each Line().Text property contains a full sequence of recognized text, while the Line().Words() Lists contain the single parts of the same sequence.

The full line can be extracted using LINQ queries in different ways.
For example, as described in the question, the lines:

The full line can be extracted using LINQ queries in different ways.
For example, as described in the question, the lines:

Display Record Customer Number . . 0012343345 Short Name . . . . . . SHOES AND THINGS

Can be extracted as:

Can be extracted as:

Dim ocrObjects = New OcrResults(json).Deserialize()

Dim searchResult = ocrObjects.RecognitionResults.
    SelectMany(Function(r) r.Lines.
    SkipWhile(Function(ln) Not ln.Text.Contains("Display Record")).
    TakeWhile(Function(ln) Not ln.Text.StartsWith("Name"))).ToList()

The searchResult list contains all the Line objects in sequence, starting from "Display Record" to "Short Name . . . . . . SHOES AND THINGS".

The searchResult list contains all the Line objects in sequence, starting from "Display Record" to "Short Name . . . . . . SHOES AND THINGS".

To print the result:

To print the result:

Console.WriteLine(String.Join(" ", searchResult.Select(Function(ln) ln.Text)))

equivalent to:

equivalent to:

For Each line As OcrResults.Line In searchResult
    Console.Write(line.Text)
Next

Extract the first Line object where the Text property contains "Zip Code 1900":

Extract the first Line object where the Text property contains "Zip Code 1900":

Dim zipCode = ocrObjects.RecognitionResults.
    SelectMany(Function(r) r.Lines).
    First(Function(ln) ln.Text.Contains("Zip Code 1900"))

Dim zipCodeText = zipCode.Text
Dim zipPoints = zipCode.BoundingBox

or, assign the first recognized Page to a new object, so you can use its Lines objects directly:

or, assign the first recognized Page to a new object, so you can use its Lines objects directly:

Dim ocrFirstPage = ocrObjects.RecognitionResults(0)

Dim zipCode = ocrFirstPage.Lines.
    First(Function(ln) ln.Text.Contains("Zip Code 1900"))

Dim zipCodeText = zipCode.Text
Dim zipPoints = zipCode.BoundingBox

<小时>

Drawing the Text sections:

Each line of recognized Text specifies the polygon that contains the string.
To draw the Polygons corresponding to the boundingBox array of Point structures of each Line of text, one of the

Each line of recognized Text specifies the polygon that contains the string.
To draw the Polygons corresponding to the boundingBox array of Point structures of each Line of text, one of the

Each OcrObjects.RecognitionResults class has a ClockwiseOrientation property that defines the rotation angle of the Text as identified by the OCR.
This angle can be used to rotate the Graphics (or to strighten the Polygon, eventually).

Each OcrObjects.RecognitionResults class has a ClockwiseOrientation property that defines the rotation angle of the Text as identified by the OCR.
This angle can be used to rotate the Graphics (or to strighten the Polygon, eventually).

For example, using a Windows Forms PictureBox control as canvas and a GraphicsPath as the container of the Polygons:

For example, using a Windows Forms PictureBox control as canvas and a GraphicsPath as the container of the Polygons:

Dim ocrObjects As OcrResults.Results = Nothing

' (...)
' Somewhere... 
 ocrObjects = New OcrResults(json).Deserialize()
 PictureBox1.Invalidate()
 ' (...)

Private Sub PictureBox1_Paint(sender As Object, e As PaintEventArgs) Handles PictureBox1.Paint
    If ocrObjects Is Nothing Then Return
    Dim ocr = ocrObjects.RecognitionResults(0)
    'Dim angle As Single = Convert.ToSingle(ocr.ClockwiseOrientation)
    Using path As GraphicsPath = New GraphicsPath
        For i As Integer = 0 To 4
            path.AddPolygon(ocr.Lines(i).BoundingBox)
        Next
        e.Graphics.SmoothingMode = SmoothingMode.AntiAlias
        'e.Graphics.RotateTransform(-angle)
        e.Graphics.DrawPath(Pens.Red, path)
    End Using
End Sub

This will draw the first 5 polygons, giving this result:

This will draw the first 5 polygons, giving this result:

OcrResults class model:

OcrResults class model:

Imports System
Imports System.Collections.Generic
Imports System.Drawing
Imports System.Linq
Imports Newtonsoft.Json

Public Class OcrResults
    Private jSettings As JsonSerializerSettings
    Private jsonInput As String = String.Empty
    Public Sub New()
        Me.New(String.Empty)
    End Sub

    Public Sub New(json As String)
        jsonInput = json
        jSettings = New JsonSerializerSettings() With {
            .Converters = New JsonConverter() {New BoundingBoxConverter()}
        }
    End Sub

    Public Class Results
        Public Property Status As String
        Public Property RecognitionResults As List(Of RecognitionResult)
    End Class

    Public Class RecognitionResult
        Public Property Page As Long
        Public Property ClockwiseOrientation As Double
        Public Property Width As Long
        Public Property Height As Long
        Public Property Unit As String
        Public Property Lines As List(Of Line)
    End Class

    Public Class Line
        Public Property BoundingBox As Point()
        Public Property Text As String
        Public Property Words As List(Of Word)
    End Class
    Public Class Word
        Public Property BoundingBox As Point()
        Public Property Text As String

        <JsonProperty(NullValueHandling:=NullValueHandling.Ignore)>
        Public Property Confidence As String
    End Class


    Public Function Serialize(model As Results) As String
        Return JsonConvert.SerializeObject(model, jSettings)
    End Function

    Public Function Deserialize() As Results
        Return Deserialize(jsonInput)
    End Function

    Public Function Deserialize(json As String) As Results
        Return JsonConvert.DeserializeObject(Of Results)(json, jSettings)
    End Function

    Public Class BoundingBoxConverter
        Inherits JsonConverter

        Public Overrides Function CanConvert(t As Type) As Boolean
            Return t = GetType(Point())
        End Function

        Public Overrides Function ReadJson(reader As JsonReader, t As Type, existingValue As Object, serializer As JsonSerializer) As Object
            If reader.TokenType = JsonToken.Null Then Return Nothing
            If reader.TokenType <> JsonToken.StartArray Then Return existingValue

            Dim values As New List(Of Integer)()
            While reader.TokenType <> JsonToken.EndArray
                Dim pointPart = reader.ReadAsInt32()
                If pointPart IsNot Nothing Then values.Add(pointPart.Value)
            End While
            Return New Point() {
                New Point(values(0), values(1)),
                New Point(values(2), values(3)),
                New Point(values(4), values(5)),
                New Point(values(6), values(7))
            }
        End Function

        Public Overrides Sub WriteJson(writer As JsonWriter, untypedValue As Object, serializer As JsonSerializer)
            If untypedValue Is Nothing Then
                serializer.Serialize(writer, Nothing)
                Return
            End If

            If TypeOf untypedValue Is Point() Then
                Dim points = DirectCast(untypedValue, Point())
                If points.Length = 4 Then
                    Dim values() = points.SelectMany(Function(p) New Integer() {p.X, p.Y}).ToArray()
                    serializer.Serialize(writer, values)
                    Return
                End If
                Throw New Exception("Invalid array of [Point]")
            End If
        End Sub
    End Class
End Class

这篇关于如何从 JSON 中的重复元素中获取值?的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆