使用正则表达式的拆分函数拆分表情符串 [英] Split string of emoji characters by split function with regex

查看:0
本文介绍了使用正则表达式的拆分函数拆分表情符串的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

我想使用Java脚本的Split函数来拆分表情符串。在StackOverflow中有很多这样的问题,但我找不到任何完整的解决方案。所以我用我自己的方式来做:

a)将拆分函数与正则表达式一起使用。

b)按正则表达式Unicode匹配项分割emoji字符:从uD800到uDBFF,从uDC00到uDFFF。

C)在此正则表达式中,不包括零与连接符(U200D)和变体选择符(UFE0F)字符。 所以我写了如下:

var p = '👦🏼👧🏼👩🏼👧🏾👧🏿👩‍👩‍👧‍👧👭👫👨‍❤️‍💋‍👨';

并拆分:

var split = p.split(/(?![u200DuFE0F])([uD800-uDBFF][uDC00-uDFFF])/);

但结果错误:(

["", "👦", "", "🏼", "", "👧", "", "🏼", "", "👩", "", "🏼", "", "👧", "", "🏾", "", "👧", "", "🏿", "", "👩", "‍", "👩", "‍", "👧", "‍", "👧", "", "👭", "", "👫", "", "👨", "‍❤️‍", "💋", "‍", "👨", ""]

我是否正确使用了正则表达式的排除选择器?如果正确,我的想法造成的错误是什么? 预期结果需要为:["👦🏼","👧🏼","👩🏼","👧🏾","👧🏿","👩‍👩‍👧‍👧","👭","👫","👨‍❤️‍💋‍👨"]

==

我想更新信息。我为我的站点解决了这个问题:https://www.emojionline.org。你可以测试一下。我只用一本包含所有表情符号的词典,并使用替换功能将每个表情符号替换为|emoji|。我可以按符号|拆分字符串表情符号。这很好用:)

推荐答案

我用[uD800-uDBFF][uDC00-uDFFF](?:[u200DuFE0F][uD800-uDBFF][uDC00-uDFFF]){2,}替代方案对Mathias Bynens的emoji-regex进行了一点扩展。它匹配一个普通的2字节表情符号,后跟2个或更多个序列(这可以用限定符{2,}控制),即零宽度连接符或变体选择器,然后再次匹配普通的2字节表情符号字符。

如果没有其他选项,则结果为[ '👦🏼','👧🏼','👩🏼','👧🏾','👧🏿','👩‍👩‍👧','👧','👭','👫','👨‍❤️‍💋‍👨' ]

数据-lang="js"数据-隐藏="假"数据-控制台="真"数据-巴贝尔="假">
var p = 'my family 👦🏼👧🏼👩🏼👧🏾👧🏿👩‍👩‍👧‍👧👭👫👨‍❤️‍💋‍👨 here';
var rx = /([uD800-uDBFF][uDC00-uDFFF](?:[u200DuFE0F][uD800-uDBFF][uDC00-uDFFF]){2,}|uD83DuDC69(?:u200D(?:(?:uD83DuDC69u200D)?uD83DuDC67|(?:uD83DuDC69u200D)?uD83DuDC66)|uD83C[uDFFB-uDFFF])|uD83DuDC69u200D(?:uD83DuDC69u200D)?uD83DuDC66u200DuD83DuDC66|uD83DuDC69u200D(?:uD83DuDC69u200D)?uD83DuDC67u200D(?:uD83D[uDC66uDC67])|uD83CuDFF3uFE0Fu200DuD83CuDF08|(?:uD83C[uDFC3uDFC4uDFCA]|uD83D[uDC6EuDC71uDC73uDC77uDC81uDC82uDC86uDC87uDE45-uDE47uDE4BuDE4DuDE4EuDEA3uDEB4-uDEB6]|uD83E[uDD26uDD37-uDD39uDD3DuDD3EuDDD6-uDDDD])(?:uD83C[uDFFB-uDFFF])u200D[u2640u2642]uFE0F|uD83DuDC69(?:uD83C[uDFFB-uDFFF])u200D(?:uD83C[uDF3EuDF73uDF93uDFA4uDFA8uDFEBuDFED]|uD83D[uDCBBuDCBCuDD27uDD2CuDE80uDE92])|(?:uD83C[uDFC3uDFC4uDFCA]|uD83D[uDC6EuDC6FuDC71uDC73uDC77uDC81uDC82uDC86uDC87uDE45-uDE47uDE4BuDE4DuDE4EuDEA3uDEB4-uDEB6]|uD83E[uDD26uDD37-uDD39uDD3C-uDD3EuDDD6-uDDDF])u200D[u2640u2642]uFE0F|uD83CuDDFDuD83CuDDF0|uD83CuDDF6uD83CuDDE6|uD83CuDDF4uD83CuDDF2|uD83CuDDE9(?:uD83C[uDDEAuDDECuDDEFuDDF0uDDF2uDDF4uDDFF])|uD83CuDDF7(?:uD83C[uDDEAuDDF4uDDF8uDDFAuDDFC])|uD83CuDDE8(?:uD83C[uDDE6uDDE8uDDE9uDDEB-uDDEEuDDF0-uDDF5uDDF7uDDFA-uDDFF])|(?:u26F9|uD83C[uDFCBuDFCC]|uD83DuDD75)(?:uFE0Fu200D[u2640u2642]|(?:uD83C[uDFFB-uDFFF])u200D[u2640u2642])uFE0F|(?:uD83DuDC41uFE0Fu200DuD83DuDDE8|uD83DuDC69(?:uD83C[uDFFB-uDFFF])u200D[u2695u2696u2708]|uD83DuDC69u200D[u2695u2696u2708]|uD83DuDC68(?:(?:uD83C[uDFFB-uDFFF])u200D[u2695u2696u2708]|u200D[u2695u2696u2708]))uFE0F|uD83CuDDF2(?:uD83C[uDDE6uDDE8-uDDEDuDDF0-uDDFF])|uD83DuDC69u200D(?:uD83C[uDF3EuDF73uDF93uDFA4uDFA8uDFEBuDFED]|uD83D[uDCBBuDCBCuDD27uDD2CuDE80uDE92]|u2764uFE0Fu200D(?:uD83DuDC8Bu200D(?:uD83D[uDC68uDC69])|uD83D[uDC68uDC69]))|uD83CuDDF1(?:uD83C[uDDE6-uDDE8uDDEEuDDF0uDDF7-uDDFBuDDFE])|uD83CuDDEF(?:uD83C[uDDEAuDDF2uDDF4uDDF5])|uD83CuDDED(?:uD83C[uDDF0uDDF2uDDF3uDDF7uDDF9uDDFA])|uD83CuDDEB(?:uD83C[uDDEE-uDDF0uDDF2uDDF4uDDF7])|[#*0-9]uFE0Fu20E3|uD83CuDDE7(?:uD83C[uDDE6uDDE7uDDE9-uDDEFuDDF1-uDDF4uDDF6-uDDF9uDDFBuDDFCuDDFEuDDFF])|uD83CuDDE6(?:uD83C[uDDE8-uDDECuDDEEuDDF1uDDF2uDDF4uDDF6-uDDFAuDDFCuDDFDuDDFF])|uD83CuDDFF(?:uD83C[uDDE6uDDF2uDDFC])|uD83CuDDF5(?:uD83C[uDDE6uDDEA-uDDEDuDDF0-uDDF3uDDF7-uDDF9uDDFCuDDFE])|uD83CuDDFB(?:uD83C[uDDE6uDDE8uDDEAuDDECuDDEEuDDF3uDDFA])|uD83CuDDF3(?:uD83C[uDDE6uDDE8uDDEA-uDDECuDDEEuDDF1uDDF4uDDF5uDDF7uDDFAuDDFF])|uD83CuDFF4uDB40uDC67uDB40uDC62(?:uDB40uDC77uDB40uDC6CuDB40uDC73|uDB40uDC73uDB40uDC63uDB40uDC74|uDB40uDC65uDB40uDC6EuDB40uDC67)uDB40uDC7F|uD83DuDC68(?:u200D(?:u2764uFE0Fu200D(?:uD83DuDC8Bu200D)?uD83DuDC68|(?:(?:uD83D[uDC68uDC69])u200D)?uD83DuDC66u200DuD83DuDC66|(?:(?:uD83D[uDC68uDC69])u200D)?uD83DuDC67u200D(?:uD83D[uDC66uDC67])|uD83C[uDF3EuDF73uDF93uDFA4uDFA8uDFEBuDFED]|uD83D[uDCBBuDCBCuDD27uDD2CuDE80uDE92])|(?:uD83C[uDFFB-uDFFF])u200D(?:uD83C[uDF3EuDF73uDF93uDFA4uDFA8uDFEBuDFED]|uD83D[uDCBBuDCBCuDD27uDD2CuDE80uDE92]))|uD83CuDDF8(?:uD83C[uDDE6-uDDEAuDDEC-uDDF4uDDF7-uDDF9uDDFBuDDFD-uDDFF])|uD83CuDDF0(?:uD83C[uDDEAuDDEC-uDDEEuDDF2uDDF3uDDF5uDDF7uDDFCuDDFEuDDFF])|uD83CuDDFE(?:uD83C[uDDEAuDDF9])|uD83CuDDEE(?:uD83C[uDDE8-uDDEAuDDF1-uDDF4uDDF6-uDDF9])|uD83CuDDF9(?:uD83C[uDDE6uDDE8uDDE9uDDEB-uDDEDuDDEF-uDDF4uDDF7uDDF9uDDFBuDDFCuDDFF])|uD83CuDDEC(?:uD83C[uDDE6uDDE7uDDE9-uDDEEuDDF1-uDDF3uDDF5-uDDFAuDDFCuDDFE])|uD83CuDDFA(?:uD83C[uDDE6uDDECuDDF2uDDF3uDDF8uDDFEuDDFF])|uD83CuDDEA(?:uD83C[uDDE6uDDE8uDDEAuDDECuDDEDuDDF7-uDDFA])|uD83CuDDFC(?:uD83C[uDDEBuDDF8])|(?:u26F9|uD83C[uDFCBuDFCC]|uD83DuDD75)(?:uD83C[uDFFB-uDFFF])|(?:uD83C[uDFC3uDFC4uDFCA]|uD83D[uDC6EuDC71uDC73uDC77uDC81uDC82uDC86uDC87uDE45-uDE47uDE4BuDE4DuDE4EuDEA3uDEB4-uDEB6]|uD83E[uDD26uDD37-uDD39uDD3DuDD3EuDDD6-uDDDD])(?:uD83C[uDFFB-uDFFF])|(?:[u261Du270A-u270D]|uD83C[uDF85uDFC2uDFC7]|uD83D[uDC42uDC43uDC46-uDC50uDC66uDC67uDC70uDC72uDC74-uDC76uDC78uDC7CuDC83uDC85uDCAAuDD74uDD7AuDD90uDD95uDD96uDE4CuDE4FuDEC0uDECC]|uD83E[uDD18-uDD1CuDD1EuDD1FuDD30-uDD36uDDD1-uDDD5])(?:uD83C[uDFFB-uDFFF])|uD83DuDC68(?:u200D(?:(?:(?:uD83D[uDC68uDC69])u200D)?uD83DuDC67|(?:(?:uD83D[uDC68uDC69])u200D)?uD83DuDC66)|uD83C[uDFFB-uDFFF])|(?:[u261Du26F9u270A-u270D]|uD83C[uDF85uDFC2-uDFC4uDFC7uDFCA-uDFCC]|uD83D[uDC42uDC43uDC46-uDC50uDC66-uDC69uDC6EuDC70-uDC78uDC7CuDC81-uDC83uDC85-uDC87uDCAAuDD74uDD75uDD7AuDD90uDD95uDD96uDE45-uDE47uDE4B-uDE4FuDEA3uDEB4-uDEB6uDEC0uDECC]|uD83E[uDD18-uDD1CuDD1EuDD1FuDD26uDD30-uDD39uDD3DuDD3EuDDD1-uDDDD])(?:uD83C[uDFFB-uDFFF])?|(?:[u231Au231Bu23E9-u23ECu23F0u23F3u25FDu25FEu2614u2615u2648-u2653u267Fu2693u26A1u26AAu26ABu26BDu26BEu26C4u26C5u26CEu26D4u26EAu26F2u26F3u26F5u26FAu26FDu2705u270Au270Bu2728u274Cu274Eu2753-u2755u2757u2795-u2797u27B0u27BFu2B1Bu2B1Cu2B50u2B55]|uD83C[uDC04uDCCFuDD8EuDD91-uDD9AuDDE6-uDDFFuDE01uDE1AuDE2FuDE32-uDE36uDE38-uDE3AuDE50uDE51uDF00-uDF20uDF2D-uDF35uDF37-uDF7CuDF7E-uDF93uDFA0-uDFCAuDFCF-uDFD3uDFE0-uDFF0uDFF4uDFF8-uDFFF]|uD83D[uDC00-uDC3EuDC40uDC42-uDCFCuDCFF-uDD3DuDD4B-uDD4EuDD50-uDD67uDD7AuDD95uDD96uDDA4uDDFB-uDE4FuDE80-uDEC5uDECCuDED0-uDED2uDEEBuDEECuDEF4-uDEF8]|uD83E[uDD10-uDD3AuDD3C-uDD3EuDD40-uDD45uDD47-uDD4CuDD50-uDD6BuDD80-uDD97uDDC0uDDD0-uDDE6])|(?:[#*0-9xA9xAEu203Cu2049u2122u2139u2194-u2199u21A9u21AAu231Au231Bu2328u23CFu23E9-u23F3u23F8-u23FAu24C2u25AAu25ABu25B6u25C0u25FB-u25FEu2600-u2604u260Eu2611u2614u2615u2618u261Du2620u2622u2623u2626u262Au262Eu262Fu2638-u263Au2640u2642u2648-u2653u2660u2663u2665u2666u2668u267Bu267Fu2692-u2697u2699u269Bu269Cu26A0u26A1u26AAu26ABu26B0u26B1u26BDu26BEu26C4u26C5u26C8u26CEu26CFu26D1u26D3u26D4u26E9u26EAu26F0-u26F5u26F7-u26FAu26FDu2702u2705u2708-u270Du270Fu2712u2714u2716u271Du2721u2728u2733u2734u2744u2747u274Cu274Eu2753-u2755u2757u2763u2764u2795-u2797u27A1u27B0u27BFu2934u2935u2B05-u2B07u2B1Bu2B1Cu2B50u2B55u3030u303Du3297u3299]|uD83C[uDC04uDCCFuDD70uDD71uDD7EuDD7FuDD8EuDD91-uDD9AuDDE6-uDDFFuDE01uDE02uDE1AuDE2FuDE32-uDE3AuDE50uDE51uDF00-uDF21uDF24-uDF93uDF96uDF97uDF99-uDF9BuDF9E-uDFF0uDFF3-uDFF5uDFF7-uDFFF]|uD83D[uDC00-uDCFDuDCFF-uDD3DuDD49-uDD4EuDD50-uDD67uDD6FuDD70uDD73-uDD7AuDD87uDD8A-uDD8DuDD90uDD95uDD96uDDA4uDDA5uDDA8uDDB1uDDB2uDDBCuDDC2-uDDC4uDDD1-uDDD3uDDDC-uDDDEuDDE1uDDE3uDDE8uDDEFuDDF3uDDFA-uDE4FuDE80-uDEC5uDECB-uDED2uDEE0-uDEE5uDEE9uDEEBuDEECuDEF0uDEF3-uDEF8]|uD83E[uDD10-uDD3AuDD3C-uDD3EuDD40-uDD45uDD47-uDD4CuDD50-uDD6BuDD80-uDD97uDDC0uDDD0-uDDE6])uFE0F)/;
var res = p.split(rx).filter(Boolean);
document.body.innerHTML = res;

这篇关于使用正则表达式的拆分函数拆分表情符串的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆