在javascript中获取gmail邮件正文时删除html格式 [英] Remove html formatting when getting Body of a gmail message in javascript

查看:121
本文介绍了在javascript中获取gmail邮件正文时删除html格式的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

我想删除我的google apps脚本中的html格式。我目前正在搜索电子邮件并将结果打印到谷歌电子表格。我想知道是否有方法来替换text.I知道正则表达式,但我不认为它适用于getBody函数。



我真的很感激一些反馈或在这个问题上的一些帮助。



代码:

  function Search(){

var sheet = SpreadsheetApp.getActiveSheet();
var row = 2;

//清除现有搜索结果
sheet.getRange(2,1,sheet.getMaxRows() - 1,4).clearContent();

//应该搜索哪个Gmail标签?
var label = sheet.getRange(F3)。getValue();

//获取正则表达式搜索模式
var pattern = sheet.getRange(F4)。getValue();

//检索指定标签的所有线程
var threads = GmailApp.search(in:+ label);

for(var i = 0; i< threads.length; i ++){

var messages = threads [i] .getMessages();

for(var m = 0; m var msg = messages [m] .getBody();

//邮件内容是否与搜索模式匹配?
if(msg.search(pattern)!== -1){


//打印邮件主题
sheet.getRange(row,3)。的setValue(消息[M] .getBody());


解决方案

替换为:

  //打印邮件主题
sheet.getRange(row,3).setValue(messages [m] .getBody());

有了这个:

  //打印邮件主题
sheet.getRange(row,3).setValue(getTextFromHtml(messages [m] .getBody()));

getTextFromHtml()函数已被修改来自此回答,并增加了一些基本格式处理(编号和项目符号列表,分段符)。

p>

  function getTextFromHtml(html){
return getTextFromNode(Xml.parse(html,true).getElement());
}

var _itemNum; //用于领导无序&有序列表项。

函数getTextFromNode(x){
switch(x.toString()){
case'XmlText':return x.toXmlString();
case'XmlElement':
var name = x.getName()。getLocalName();
Logger.log(name);
var pre ='';
var post ='';
switch(name){
case'br':
case'p':
pre ='';
post ='\\\
';
休息;
case'ul':
pre ='';
post ='\\\
';
itemNum = 0;
休息;
案例'ol':
pre ='';
post ='\\\
';
_itemNum = 1;
休息;
case'li':
pre ='\\\
'+(_itemNum == 0?' - ':(''+ _itemNum ++ +'。'));
post ='';
休息;
默认值:
pre ='';
post ='';
休息;
}
返回pre + x.getNodes()。map(getTextFromNode).join('')+ post;
default:return'';
}
}


I would like to remove the html formatting in my google apps script. I am currently searching the email and printing the results to a google spreadsheet. I would like to know if there is a way to replace text.I am aware of regex but I dont think it works with the getBody function.

I would really appreciate some feedback or some help on this matter.

Code:

function Search() {

var sheet   = SpreadsheetApp.getActiveSheet();
var row     = 2;

// Clear existing search results
sheet.getRange(2, 1, sheet.getMaxRows() - 1, 4).clearContent();

// Which Gmail Label should be searched?
var label   = sheet.getRange("F3").getValue();

// Get the Regular Expression Search Pattern
var pattern = sheet.getRange("F4").getValue();

// Retrieve all threads of the specified label
var threads = GmailApp.search("in:" + label);

for (var i = 0; i < threads.length; i++) {

var messages = threads[i].getMessages();

for (var m = 0; m < messages.length; m++) {
 var msg = messages[m].getBody();

// Does the message content match the search pattern?
if (msg.search(pattern) !== -1) {


 // Print the message subject
 sheet.getRange(row,3).setValue(messages[m].getBody());

解决方案

Replace this:

// Print the message subject
sheet.getRange(row,3).setValue(messages[m].getBody());

With this:

// Print the message subject
sheet.getRange(row,3).setValue(getTextFromHtml(messages[m].getBody()));

The getTextFromHtml() function has been adapted from this answer, with the addition of handling for some basic formatting (numbered & bullet lists, paragraph breaks).

function getTextFromHtml(html) {
  return getTextFromNode(Xml.parse(html, true).getElement());
}

var _itemNum; // Used to lead unordered & ordered list items.

function getTextFromNode(x) {
  switch(x.toString()) {
    case 'XmlText': return x.toXmlString();
    case 'XmlElement':
      var name = x.getName().getLocalName();
      Logger.log(name);
      var pre = '';
      var post = '';
      switch (name) {
        case 'br':
        case 'p':
          pre = '';
          post = '\n';
          break;
        case 'ul':
          pre = '';
          post = '\n';
          itemNum = 0;
          break;
        case 'ol':
          pre = '';
          post = '\n';
          _itemNum = 1;
          break;
        case 'li':
          pre = '\n' + (_itemNum == 0 ? ' - ' : (' '+ _itemNum++ +'. '));
          post = '';
          break;
        default:
          pre = '';
          post = '';
          break;
      }
      return pre + x.getNodes().map(getTextFromNode).join('') + post;
    default: return '';
  }
}

这篇关于在javascript中获取gmail邮件正文时删除html格式的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆