如何使用Open XML电子表格“uncollapse”电子表格中的单元格? [英] How do I have Open XML spreadsheet "uncollapse" cells in a spreadsheet?

查看:109
本文介绍了如何使用Open XML电子表格“uncollapse”电子表格中的单元格?的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

我正在C#中的服务器端使用xslx Excel文件。在电子表格中,总共有15列(单元格)。在单元格行中,缺少一些值。所以第一行是我的标题将正确地拥有15个单元格。但是我的数据行,一些单元格可能有空值,所以Open XML有一个锯齿形的单元格值集合。第1行将具有完整的15个单元格,第2行可能有13个单元格,因为其中两个值为空。什么!如何正确地映射这些数据?它基本上将所有内容都转移到左边,我的单元格值是错误的。我失踪了什么似乎他们在开放XML术语中被折叠。

  WorkbookPart workbookPart = spreadSheetDocument.WorkbookPart; 
IEnumerable< Sheet> sheet = spreadSheetDocument.WorkbookPart.Workbook.GetFirstChild< Sheets>()。Elements< Sheet>();
string relationId = sheets.First()。Id.Value;
WorksheetPart worksheetPart =(WorksheetPart)spreadSheetDocument.WorkbookPart.GetPartById(relationshipId);
工作表workSheet = worksheetPart.Worksheet;
SheetData sheetData = workSheet.GetFirstChild< SheetData>();
IEnumerable< Row> rows = sheetData.Descendants< Row>();

澄清:这是另一种提出这个问题的方法。如果我想将Excel文件中的内容放入DataTable中,该怎么办?我希望所有的数据列正确排列。我如何才能实现这一点?



这个问题比我在这里更好:阅读Excel Open XML正在忽略空白单元格

解决方案

您可以完成所需的一种方法是找出所有行中最大的列索引,然后填写所有空单元格值将为空。这将保持所有的列正确排列。



以下是一个快速的代码段,以找出最大的列索引:

  INT? maximumColumnIndex = 0; 
foreach(行行行)
{
if(row.Descendants< Cell>()。Any())
{
//找出if这行比以前的行
int有更大的列索引? columnIndex = GetColumnIndexFromName(((Cell)(row.LastChild))。CellReference);
maxColumnIndex = columnIndex.HasValue&& columnIndex> maximumColumnIndex? columnIndex:largestColumnIndex;
}
}

///< summary>
///只给列名(没有行索引),它将返回基于零的列索引。
///注意:此方法仅处理长度最多为两个(即A到Z和AA到ZZ)的列。
///需要时可以实现三个长度。
///< / summary>
///< param name =columnName>列名称(即A或AB)< / param>
///< returns>如果转换成功,则基于零的索引;否则为空< / returns>
public static int? GetColumnIndexFromName(string columnName)
{
int? columnIndex = null;

string [] colLetters = Regex.Split(columnName,([A-Z] +));
colLetters = colLetters.Where(s =>!string.IsNullOrEmpty(s))ToArray();

if(colLetters.Count()< = 2)
{
int index = 0;
foreach(col collet中的string col)
{
列表< char> col1 = colLetters.ElementAt(index).ToCharArray()。ToList();
int? indexValue = Letters.IndexOf(col1.ElementAt(index));

if(indexValue!= -1)
{
//两位数字列的第一个字母需要一些额外的计算
if(index == 0& ;& colLetters.Count()== 2)
{
columnIndex = columnIndex == null? (indexValue + 1)* 26:columnIndex +((indexValue + 1)* 26);
}
else
{
columnIndex = columnIndex == null? indexValue:columnIndex + indexValue;
}
}

index ++;
}
}

return columnIndex;
}

然后调用 InsetCellsForCellRange 方法后,您有最大的列索引填充空白单元格的所有空单元格。然后阅读你的数据,他们应该排队。 (所有帮助方法都在 InsetCellsForCellRange 方法之下)

  /// <总结> 
///如果需要矩形范围的单元格,则插入单元格
///< / summary>
///< param name =startCellReference>矩形的左上方单元格< / param>
///< param name =endCellReference>矩形的右下单元格< / param>
///< param name =worksheetPart>插入单元格的工作表部分< / param>
public static void InsertCellsForCellRange(string startCellReference,string endCellReference,WorksheetPart worksheetPart)
{
uint startRow = GetRowIndex(startCellReference);
uint endRow = GetRowIndex(endCellReference);
string startColumn = GetColumnName(startCellReference);
string endColumn = GetColumnName(endCellReference);

//如果需要,可以逐行插入单元格
(uint currentRow = startRow; currentRow< = endRow; currentRow ++)
{
string currentCell = startColumn + currentRow.ToString();
string endCell = IncrementCellReference(endColumn + currentRow.ToString(),CellReferencePartEnum.Column);

//检查以确保范围内的所有单元格都存在;如果没有创建它们
while(!currentCell.Equals(endCell))
{
if(GetCell(worksheetPart,currentCell)== null)
{
InsertCell GetColumnName(currentCell),GetRowIndex(currentCell),worksheetPart);
}

//将引用移动到范围内的下一个单元格
currentCell = IncrementCellReference(currentCell,CellReferencePartEnum.Column);
}
}
}

///< summary>
///给定单元名称,解析指定的单元格以获取行索引。
///< / summary>
///< param name =cellReference>单元格的地址(即B2)< / param>
///< returns>行索引(即.2)< / returns>
public static uint GetRowIndex(string cellReference)
{
//创建一个正则表达式以匹配单元名称的行索引部分。
Regex regex = new Regex(@\d +);
匹配match = regex.Match(cellReference);

return uint.Parse(match.Value);
}



///< summary>
///给定一个单元名称,解析指定的单元格以获取列名。
///< / summary>
///< param name =cellReference>单元格的地址(即B2)< / param>
///< returns>列名称(即B)< / returns>
public static string GetColumnName(string cellReference)
{
//创建一个正则表达式以匹配单元格名称的列名称部分。
Regex regex = new Regex([A-Za-z] +);
匹配match = regex.Match(cellReference);

return match.Value;
}

///< summary>
///增加给定单元格的引用。这个引用来自CellReference属性
///在单元格上。
///< / summary>
///< param name =reference>引用字符串< / param>
///< param name =cellRefPart>表示要增加什么< / param>
///< returns>< / returns>
public static string IncrementCellReference(string reference,CellReferencePartEnum cellRefPart)
{
string newReference = reference;

if(cellRefPart!= CellReferencePartEnum.None&&String !IsNullOrEmpty(reference))
{
string [] parts = Regex.Split(reference,( [AZ] +));

if(cellRefPart == CellReferencePartEnum.Column || cellRefPart == CellReferencePartEnum.Both)
{
列表< char> col = parts [1] .ToCharArray()。ToList();
bool needsIncrement = true;
int index = col.Count - 1;

do
{
//递增最后一个字母
col [index] = Letters [Letters.IndexOf(col [index])+ 1];

//如果是最后一个字母,那么我们需要将其转换为'A'
if(col [index] == Letters [Letters.Count - 1])
{
col [index] = Letters [0];
}
else
{
needsIncrement = false;
}

} while(needsIncrement&& --index> = 0);

//如果为true,那么我们需要在混合中添加另一个字母。初始值为ZZ
if(needsIncrement)
{
col.Add(Letters [0]);
}

parts [1] = new String(col.ToArray());
}

if(cellRefPart == CellReferencePartEnum.Row || cellRefPart == CellReferencePartEnum.Both)
{
//增加行号。没有这个组件,引用是无效的,所以我们假设它将始终存在。
parts [2] =(int.Parse(parts [2])+ 1).ToString();
}

newReference = parts [1] + parts [2];
}

return newReference;
}

///< summary>
///返回与工作表上的特定地址相对应的单元格对象
///< / summary>
///< param name =workSheetPart> WorkSheet搜索单元格地址< / param>
///< param name =cellAddress>单元格地址(即B2)< / param>
///< returns>单元格对象< / returns>
public static Cell GetCell(WorksheetPart workSheetPart,string cellAddress)
{
return workSheetPart.Worksheet.Descendants< Cell>()
.Where(c => cellAddress.Equals c.CellReference))
.SingleOrDefault();
}

///< summary>
///在指定的colName和rowIndex上插入一个新单元格。如果单元格
///已经存在,则返回现有单元格。
///< / summary>
///< param name =colName>列名< / param>
///< param name =rowIndex>行索引< / param>
///< param name =worksheetPart> Worksheet Part< / param>
///< returns>插入单元格< / returns>
public static Cell InsertCell(string colName,uint rowIndex,WorksheetPart worksheetPart)
{
return InsertCell(colName,rowIndex,worksheetPart,null);
}

///< summary>
///在指定的colName和rowIndex上插入一个新单元格。如果单元格
///已经存在,那么现有的单元格将向右移动。
///< / summary>
///< param name =colName>列名< / param>
///< param name =rowIndex>行索引< / param>
///< param name =worksheetPart> Worksheet Part< / param>
///< param name =cell>< / param>
///< returns>插入单元格< / returns>
public static Cell InsertCell(string colName,uint rowIndex,WorksheetPart worksheetPart,Cell insertCell)
{
工作表工作表= worksheetPart.Worksheet;
SheetData sheetData = worksheet.GetFirstChild< SheetData>();
string insertReference = colName + rowIndex;

//如果工作表不包含具有指定行索引的行,请插入一个。
行行;
if(sheetData.Elements< Row>()。Where(r => r.RowIndex == rowIndex).Count()!= 0)
{
row = sheetData.Elements< ; Row>()。其中​​(r => r.RowIndex == rowIndex).First();
}
else
{
row = new Row(){RowIndex = rowIndex};
sheetData.Append(row);
}

Cell retCell = row.Elements< Cell>()。FirstOrDefault(c => c.CellReference.Value == colName + rowIndex);
//如果retCell不为空,并且我们没有插入一个新的单元格,那么只需跳过所有内容并返回单元格
if(retCell!= null)
{
//注意:如果条件不合并,因为当外部的if为真时,我们想跳过父级else。
//如果retCell不为null,并且我们正在插入一个新的单元格,然后将所有现有的单元格移动到右侧。
if(insertCell!= null)
{
//获取行中具有相等或更高列值的所有单元格。
//将要插入的单元格添加到临时列表中,并重新索引所有单元格。
列表<单元格> cell = row.Descendants< Cell>()。其中​​(c => String.Compare(c.CellReference.Value,insertReference)> = 0).ToList();
cells.Insert(0,insertCell);
string cellReference = insertReference;

foreach(单元格中的单元格单元格)
{
//更新行单元格的引用。
cell.CellReference = new StringValue(cellReference);
IncrementCellReference(cellReference,CellReferencePartEnum.Column);
}

//实际上将新单元格插入行
retCell = row.InsertBefore(insertCell,retCell); //在这一点上,retCell仍然指向具有insertReference

}
// Else retCell为空的行,这意味着在指定的位置没有单元格存在,所以我们需要在这个空间放一个新的单元格。
//如果一个单元格被传入这个方法,那么它将被插入。如果没有,将插入一个新的。
else
{
//根据CellReference,单元格必须按顺序排列。确定插入新单元格的位置。
//顺序顺序不能是字符串比较顺序,必须为Excel顺序(A,B,...AA,BB等)
单元格RefCell =空值;
foreach(row.Elements< Cell>())中的单元格单元
{
string cellColumn = Regex.Replace(cell.CellReference.Value,@\d,) ;
if(colName.Length< = cellColumn.Length&& string.Compare(cell.CellReference.Value,insertReference,true)> 0)
{
refCell = cell;
break;
}
}

//提供插入单元格参数,否则创建一个新单元格
retCell = insertCell? new Cell(){CellReference = insertReference};
row.InsertBefore(retCell,refCell);
}

return retCell;
}

//其他缺失的部分

public enum CellReferencePartEnum
{
无,
列,


$

私有静态列表< char>字母=新列表< char>(){'A','B','C','D','E','F','G','H' 'K','L','M','N','O','P','Q','R','S','T','U','V' ','X','Y','Z',''};


I'm working with xslx Excel file on the server side in C#. In a spreadsheet, say there are 15 columns (cells) total. In the rows of cells, some values are missing. So the first row is my header will properly have the 15 cells. But my data rows, some cells might have empty values, so Open XML has a "jagged" set of cells values. Row 1 will have the full 15 cells, Row 2 might have 13 cells since two of the values are empty. What! How do I map this data properly? It basically shifts everything to the left and my cell values are wrong. What am I missing? It seems like they are being "collapsed" in Open XML terminology.

WorkbookPart workbookPart = spreadSheetDocument.WorkbookPart;
                IEnumerable<Sheet> sheets = spreadSheetDocument.WorkbookPart.Workbook.GetFirstChild<Sheets>().Elements<Sheet>();
                string relationshipId = sheets.First().Id.Value;
                WorksheetPart worksheetPart = (WorksheetPart)spreadSheetDocument.WorkbookPart.GetPartById(relationshipId);
                Worksheet workSheet = worksheetPart.Worksheet;
                SheetData sheetData = workSheet.GetFirstChild<SheetData>();
                IEnumerable<Row> rows = sheetData.Descendants<Row>();

CLARIFICATION: Here's another way to ask this question. What if I want to take the contents on an Excel file and put it into a DataTable. I want all the columns of data to line up correctly. How could I accomplish this?

This question is asked better than I here: reading Excel Open XML is ignoring blank cells

解决方案

One way you can accomplish what you want is to figure out the largest column index in all your rows and then fill in all the empty cell values will blanks. This will keep all your columns lined up properly.

Here is a quick snippet to figure out the largest column index:

int? biggestColumnIndex = 0;
foreach (Row row in rows) 
{
   if (row.Descendants<Cell>().Any())
   {
      // Figure out the if this row has a bigger column index than the previous rows
      int? columnIndex = GetColumnIndexFromName(((Cell)(row.LastChild)).CellReference);
      biggestColumnIndex = columnIndex.HasValue && columnIndex > biggestColumnIndex ?  columnIndex : biggestColumnIndex;                   
   }
}

        /// <summary>
        /// Given just the column name (no row index), it will return the zero based column index.
        /// Note: This method will only handle columns with a length of up to two (ie. A to Z and AA to ZZ). 
        /// A length of three can be implemented when needed.
        /// </summary>
        /// <param name="columnName">Column Name (ie. A or AB)</param>
        /// <returns>Zero based index if the conversion was successful; otherwise null</returns>
        public static int? GetColumnIndexFromName(string columnName)
        {
            int? columnIndex = null;

            string[] colLetters = Regex.Split(columnName, "([A-Z]+)");
            colLetters = colLetters.Where(s => !string.IsNullOrEmpty(s)).ToArray();

            if (colLetters.Count() <= 2)
            {
                int index = 0;
                foreach (string col in colLetters)
                {
                    List<char> col1 = colLetters.ElementAt(index).ToCharArray().ToList();
                    int? indexValue = Letters.IndexOf(col1.ElementAt(index));

                    if (indexValue != -1)
                    {
                        // The first letter of a two digit column needs some extra calculations
                        if (index == 0 && colLetters.Count() == 2)
                        {
                            columnIndex = columnIndex == null ? (indexValue + 1) * 26 : columnIndex + ((indexValue + 1) * 26);
                        }
                        else
                        {
                            columnIndex = columnIndex == null ? indexValue : columnIndex + indexValue;
                        }
                    }

                    index++;
                }
            }

            return columnIndex;
        }

Then call the InsetCellsForCellRange method after you have the biggest column index to fill in all the empty cells with blank cells. Then read in your data and they should all line up. (All helper methods are below the InsetCellsForCellRange method)

/// <summary>
/// Inserts cells if required for a rectangular range of cells
/// </summary>
/// <param name="startCellReference">Upper left cell of the rectangle</param>
/// <param name="endCellReference">Lower right cell of the rectangle</param>
/// <param name="worksheetPart">Worksheet part to insert cells</param>
public static void InsertCellsForCellRange(string startCellReference, string endCellReference, WorksheetPart worksheetPart)
{
    uint startRow = GetRowIndex(startCellReference);
    uint endRow = GetRowIndex(endCellReference);
    string startColumn = GetColumnName(startCellReference);
    string endColumn = GetColumnName(endCellReference);

    // Insert the cells row by row if necessary
    for (uint currentRow = startRow; currentRow <= endRow; currentRow++)
    {
        string currentCell = startColumn + currentRow.ToString();
        string endCell = IncrementCellReference(endColumn + currentRow.ToString(), CellReferencePartEnum.Column);

        // Check to make sure all cells exist in the range; if not create them
        while (!currentCell.Equals(endCell))
        {
            if (GetCell(worksheetPart, currentCell) == null)
            {
                InsertCell(GetColumnName(currentCell), GetRowIndex(currentCell), worksheetPart);
            }

            // Move the reference to the next cell in the range
            currentCell = IncrementCellReference(currentCell, CellReferencePartEnum.Column);
        }
    }
}

        /// <summary>
        /// Given a cell name, parses the specified cell to get the row index.
        /// </summary>
        /// <param name="cellReference">Address of the cell (ie. B2)</param>
        /// <returns>Row Index (ie. 2)</returns>
        public static uint GetRowIndex(string cellReference)
        {
            // Create a regular expression to match the row index portion the cell name.
            Regex regex = new Regex(@"\d+");
            Match match = regex.Match(cellReference);

            return uint.Parse(match.Value);
        }



    /// <summary>
    /// Given a cell name, parses the specified cell to get the column name.
    /// </summary>
    /// <param name="cellReference">Address of the cell (ie. B2)</param>
    /// <returns>Column Name (ie. B)</returns>
    public static string GetColumnName(string cellReference)
    {
        // Create a regular expression to match the column name portion of the cell name.
        Regex regex = new Regex("[A-Za-z]+");
        Match match = regex.Match(cellReference);

        return match.Value;
    }

        /// <summary>
        /// Increments the reference of a given cell.  This reference comes from the CellReference property
        /// on a Cell.
        /// </summary>
        /// <param name="reference">reference string</param>
        /// <param name="cellRefPart">indicates what is to be incremented</param>
        /// <returns></returns>
        public static string IncrementCellReference(string reference, CellReferencePartEnum cellRefPart)
        {
            string newReference = reference;

            if (cellRefPart != CellReferencePartEnum.None && !String.IsNullOrEmpty(reference))
            {
                string[] parts = Regex.Split(reference, "([A-Z]+)");

                if (cellRefPart == CellReferencePartEnum.Column || cellRefPart == CellReferencePartEnum.Both)
                {
                    List<char> col = parts[1].ToCharArray().ToList();
                    bool needsIncrement = true;
                    int index = col.Count - 1;

                    do
                    {
                        // increment the last letter
                        col[index] = Letters[Letters.IndexOf(col[index]) + 1];

                        // if it is the last letter, then we need to roll it over to 'A'
                        if (col[index] == Letters[Letters.Count - 1])
                        {
                            col[index] = Letters[0];
                        }
                        else
                        {
                            needsIncrement = false;
                        }

                    } while (needsIncrement && --index >= 0);

                    // If true, then we need to add another letter to the mix. Initial value was something like "ZZ"
                    if (needsIncrement)
                    {
                        col.Add(Letters[0]);
                    }

                    parts[1] = new String(col.ToArray());
                }

                if (cellRefPart == CellReferencePartEnum.Row || cellRefPart == CellReferencePartEnum.Both)
                {
                    // Increment the row number. A reference is invalid without this componenet, so we assume it will always be present.
                    parts[2] = (int.Parse(parts[2]) + 1).ToString();
                }

                newReference = parts[1] + parts[2];
            }

            return newReference;
        }

        /// <summary>
        /// Returns a cell Object corresponding to a specifc address on the worksheet
        /// </summary>
        /// <param name="workSheetPart">WorkSheet to search for cell adress</param>
        /// <param name="cellAddress">Cell Address (ie. B2)</param>
        /// <returns>Cell Object</returns>
        public static Cell GetCell(WorksheetPart workSheetPart, string cellAddress)
        {
            return workSheetPart.Worksheet.Descendants<Cell>()
                                .Where(c => cellAddress.Equals(c.CellReference))
                                .SingleOrDefault();
        }

        /// <summary>
        /// Inserts a new cell at the specified colName and rowIndex. If a cell
        /// already exists, then the existing cell is returned.
        /// </summary>
        /// <param name="colName">Column Name</param>
        /// <param name="rowIndex">Row Index</param>
        /// <param name="worksheetPart">Worksheet Part</param>
        /// <returns>Inserted Cell</returns>
        public static Cell InsertCell(string colName, uint rowIndex, WorksheetPart worksheetPart)
        {
            return InsertCell(colName, rowIndex, worksheetPart, null);
        }

        /// <summary>
        /// Inserts a new cell at the specified colName and rowIndex. If a cell
        /// already exists, then the existing cells are shifted to the right.
        /// </summary>
        /// <param name="colName">Column Name</param>
        /// <param name="rowIndex">Row Index</param>
        /// <param name="worksheetPart">Worksheet Part</param>
        /// <param name="cell"></param>
        /// <returns>Inserted Cell</returns>
        public static Cell InsertCell(string colName, uint rowIndex, WorksheetPart worksheetPart, Cell insertCell)
        {
            Worksheet worksheet = worksheetPart.Worksheet;
            SheetData sheetData = worksheet.GetFirstChild<SheetData>();
            string insertReference = colName + rowIndex;

            // If the worksheet does not contain a row with the specified row index, insert one.
            Row row;
            if (sheetData.Elements<Row>().Where(r => r.RowIndex == rowIndex).Count() != 0)
            {
                row = sheetData.Elements<Row>().Where(r => r.RowIndex == rowIndex).First();
            }
            else
            {
                row = new Row() { RowIndex = rowIndex };
                sheetData.Append(row);
            }

            Cell retCell = row.Elements<Cell>().FirstOrDefault(c => c.CellReference.Value == colName + rowIndex);
            // If retCell is not null and we are not inserting a new cell, then just skip everything and return the cell
            if (retCell != null)
            {
                // NOTE: if conditions are not combined because we want to skip the parent 'else when the outside 'if' is true.
                // if retCell is not null and we are inserting a new cell, then move all existing cells to the right.
                if (insertCell != null)
                {
                    // Get all the cells in the row with equal or higher column values than the one being inserted. 
                    // Add the cell to be inserted into the temp list and re-index all of the cells.
                    List<Cell> cells = row.Descendants<Cell>().Where(c => String.Compare(c.CellReference.Value, insertReference) >= 0).ToList();
                    cells.Insert(0, insertCell);
                    string cellReference = insertReference;

                    foreach (Cell cell in cells)
                    {
                        // Update the references for the rows cells.
                        cell.CellReference = new StringValue(cellReference);
                        IncrementCellReference(cellReference, CellReferencePartEnum.Column);
                    }

                    // actually insert the new cell into the row
                    retCell = row.InsertBefore(insertCell, retCell);  // at this point, retCell still points to the row that had the insertReference
                }
            }
            // Else retCell is null, this means no cell exists at the specified location so we need to put a new cell in that space.  
            // If a cell was passed into this method, then it will be inserted. If not, a new one will be inserted.
            else
            {
                // Cells must be in sequential order according to CellReference. Determine where to insert the new cell.
                // Sequencial order can't be string comparison order, has to be Excel order ("A", "B", ... "AA", "BB", etc)
                Cell refCell = null;
                foreach (Cell cell in row.Elements<Cell>())
                {
                    string cellColumn = Regex.Replace(cell.CellReference.Value, @"\d", "");
                    if (colName.Length <= cellColumn.Length && string.Compare(cell.CellReference.Value, insertReference, true) > 0)
                    {
                        refCell = cell;
                        break;
                    }
                }

                // Insert cell parameter is supplied, otherwise, create a new cell
                retCell = insertCell ?? new Cell() { CellReference = insertReference };
                row.InsertBefore(retCell, refCell);
            }

            return retCell;
        }

//Other missing pieces

public enum CellReferencePartEnum
    {
        None,
        Column,
        Row,
        Both
    }

 private static List<char> Letters = new List<char>() { 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', ' ' };

这篇关于如何使用Open XML电子表格“uncollapse”电子表格中的单元格?的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆