C# 2.0 解析 Excel 电子表格的最快方法
声明:本页面是StackOverFlow热门问题的中英对照翻译,遵循CC BY-SA 4.0协议,如果您需要使用它,必须同样遵循CC BY-SA许可,注明原文地址和作者信息,同时你必须将它归于原作者(不是我):StackOverFlow
原文地址: http://stackoverflow.com/questions/457636/
Warning: these are provided under cc-by-sa 4.0 license. You are free to use/share it, But you must attribute it to the original authors (not me):
StackOverFlow
C# 2.0 Fastest way to parse Excel spreadsheet
提问by leora
Possible Duplicate:
Reading Excel files from C#
可能的重复:
从 C# 读取 Excel 文件
What is the fastest way to read large sets of data from excel from Csharp. Example code would be great . .
从 Csharp 的 excel 中读取大量数据的最快方法是什么。示例代码会很棒。.
回答by GvS
In our desktop environment, I have reached the best mix between performance, flexibility and stability by using Excel via COM.
在我们的桌面环境中,我通过 COM 使用 Excel 实现了性能、灵活性和稳定性之间的最佳组合。
Access to Excel is always via the same thread.
始终通过同一线程访问 Excel。
I use late-binding (in VB.Net) to make my app version independent.
我使用后期绑定(在 VB.Net 中)使我的应用程序版本独立。
The rest of the application is developed in C#, only this part and some other small parts are in VB, because they are easier in VB.Net.
其余的应用程序是用C#开发的,只有这部分和其他一些小部分在VB中,因为它们在VB.Net中更容易。
Dim workBook As Object = GetObject(fileName)
Dim workSheet As Object = workBook.WorkSheets.Item(WorkSheetNr)
Dim range As Object = workSheet.Cells.Item(1, 1)
Dim range2 As Object = range.CurrentRegion
Dim rrow As Integer = range2.Row ' For XL97, first convert to integer. XL97 will generate an error '
Dim rcolumn As Integer = range2.Column
Dim top As Object = workSheet.Cells.Item(rrow, rcolumn)
Dim bottom As Object = top.Offset(range2.Rows.Count - 1, range2.Columns.Count - 1)
range = workSheet.Range(top, bottom)
Dim values As Object(,)
values = range.Value
Here you have a 2-dimensional array containing the values from Excel. The last statement gets the data from Excel to .Net.
这里有一个包含 Excel 值的二维数组。最后一条语句将数据从 Excel 获取到 .Net。
Since the limits on the size of a Excel sheet, these cannot get very large, so memory should not be a problem.
由于 Excel 工作表的大小限制,这些不能变得非常大,因此内存应该不是问题。
We have done some tests on performance, on multiple systems. It is optimized to create as few as possible (out-of-process) COM calls.
我们已经在多个系统上进行了一些性能测试。它经过优化以创建尽可能少的(进程外)COM 调用。
This way was the one that has given us the best performance, specially since the data is directly in an array, and access to this data is faster as going through a dataset.
这种方式给了我们最好的性能,特别是因为数据直接在一个数组中,并且访问这些数据比通过数据集更快。
Slow in this solution is starting Excel. But if you need to process multiple files, right after each other, the cost of starting Excel is made only once.
此解决方案中的缓慢启动 Excel。但如果您需要处理多个文件,一个接一个,启动 Excel 的成本只需一次。
Also I would not use this solution in a server environment.
此外,我不会在服务器环境中使用此解决方案。
回答by Vedigoundan G
public class ExcelHeaderValues
{
public static string CUSIP = "CUSIP";
public static string ORIG = "ORIG";
public static string PRICE = "PRICE";
public static int COLUMNCOUNT = 3;
}
public class ExcelParser
{
private ArrayList collOutput = null;
string sSheetName = String.Empty;
string[] strValidColumn;
int validRowCnt = 0;
string sColumnPositions = String.Empty;
OleDbCommand ExcelCommand;
OleDbDataAdapter ExcelAdapter;
OleDbConnection ExcelConnection = null;
DataSet dsSheet = null;
string path = string.Empty;
string identifier = string.Empty;
public ExcelParser()
{
collOutput = new ArrayList();
}
public void Extract()
{
bool headermatch = false;
string strCusip = string.Empty, strOrig = string.Empty, strPrice = string.Empty, strData = string.Empty;
string strCusipPos = string.Empty, strPricePos = string.Empty, strOrigPos = string.Empty;
string strColumnHeader = String.Empty;
int reqColcount = 0;
string[] strTemp;
bool bTemp = false;
bool validRow = false;
DataTable schemaTable = GetSchemaTable();
validRowCnt = 0;
foreach (DataRow dr in schemaTable.Rows)
{
if (dsSheet != null)
{
dsSheet.Reset();
dsSheet = null;
}
strCusipPos = string.Empty;
strOrigPos = string.Empty;
strPricePos = string.Empty;
if (isValidSheet(dr))
{
sColumnPositions = string.Empty;
validRowCnt = 0;
foreach (DataRow dataRow in dsSheet.Tables[0].Rows)
{
sColumnPositions = string.Empty;
if (headermatch == false)
{
sColumnPositions = string.Empty;
foreach (DataColumn column in dsSheet.Tables[0].Columns)
{
strColumnHeader = dataRow[column.ColumnName].ToString().ToUpper().Trim();
strColumnHeader = strColumnHeader.ToUpper();
if (strColumnHeader == ExcelHeaderValues.ORIG.ToUpper() || strColumnHeader == ExcelHeaderValues.CUSIP.ToUpper() || strColumnHeader == ExcelHeaderValues.PRICE.ToUpper())
{
bTemp = true;
validRow = true;
reqColcount = ExcelHeaderValues.COLUMNCOUNT;
}
if (bTemp)
{
bTemp = false;
sColumnPositions += column.ColumnName + "^" + strColumnHeader + ";";
}
}
strValidColumn = sColumnPositions.Trim().Split(';');
if (validRow == true && reqColcount == strValidColumn.Length - 1)
{
headermatch = true;
break;
}
validRowCnt++;
}
}
if (headermatch == true)
{
try
{
if (dsSheet.Tables[0].Rows.Count > 0)
{
if (strValidColumn.Length > 0)
{
for (int i = 0; i < strValidColumn.Length - 1; i++)
{
if (strValidColumn[i].ToUpper().Contains("CUSIP"))
{
strTemp = strValidColumn[i].ToString().Split('^');
strCusipPos = strTemp[0].ToString();
strTemp = null;
}
if (strValidColumn[i].ToUpper().Contains("PRICE"))
{
strTemp = strValidColumn[i].ToString().Split('^');
strPricePos = strTemp[0].ToString();
strTemp = null;
}
if (strValidColumn[i].ToUpper().Contains("ORIG"))
{
strTemp = strValidColumn[i].ToString().Split('^');
strOrigPos = strTemp[0].ToString();
strTemp = null;
}
}
}
for (int iData = validRowCnt; iData < dsSheet.Tables[0].Rows.Count; iData++)
{
if (strCusipPos.Trim() != "")
strCusip = dsSheet.Tables[0].Rows[iData][strCusipPos].ToString().Trim();
if (strOrigPos.Trim() != "")
strOrig = dsSheet.Tables[0].Rows[iData][strOrigPos].ToString().Trim();
if (strPricePos.Trim() != "")
strPrice = dsSheet.Tables[0].Rows[iData][strPricePos].ToString().Trim().ToUpper();
strData = "";
if (strCusip.Length == 9 && strCusip != "" && strPrice != "" && strOrig != "" && !strPrice.ToUpper().Contains("SOLD"))
strData = strCusip + "|" + Convert.ToDouble(strOrig) * 1000000 + "|" + strPrice + "|||||";
if (strData != null && strData != "")
collOutput.Add(strData);
strCusip = string.Empty;
strOrig = string.Empty;
strPrice = string.Empty;
strData = string.Empty;
}
}
}
catch (Exception ex)
{
throw ex;
}
}
headermatch = false;
sColumnPositions = string.Empty;
strColumnHeader = string.Empty;
}
}
}
private bool isValidSheet(DataRow dr)
{
bool isValidSheet = false;
sSheetName = dr[2].ToString().ToUpper();
if (!(sSheetName.Contains("$_")) && !(sSheetName.Contains("$'_")) && (!sSheetName.Contains("Print_Titles".ToUpper())) && (dr[3].ToString() == "TABLE" && ((!sSheetName.Contains("Print_Area".ToUpper())))) && !(sSheetName.ToUpper() == "DLOFFERLOOKUP"))
{
if (sSheetName.Trim().ToUpper() != "Disclaimer$".ToUpper() && sSheetName.Trim().ToUpper() != "Summary$".ToUpper() && sSheetName.Trim().ToUpper() != "FORMULAS$" )
{
string sQry = string.Empty;
sQry = "SELECT * FROM [" + sSheetName + "]";
ExcelCommand = new OleDbCommand(sQry, ExcelConnection);
dsSheet = new DataSet();
ExcelAdapter = new OleDbDataAdapter(ExcelCommand);
isValidSheet = false;
try
{
ExcelAdapter.Fill(dsSheet, sSheetName);
isValidSheet = true;
}
catch (Exception ex)
{
isValidSheet = false;
throw new Exception(ex.Message.ToString());
}
finally
{
ExcelAdapter = null;
ExcelCommand = null;
}
}
}
return isValidSheet;
}
private DataTable GetSchemaTable()
{
DataTable dt = null;
string connectionString = String.Empty;
connectionString = GetConnectionString();
ExcelConnection = new OleDbConnection(connectionString);
try
{
ExcelConnection.Open();
dt = ExcelConnection.GetOleDbSchemaTable(OleDbSchemaGuid.Tables, new object[] { null, null, null, "TABLE" });
}
catch (Exception ex)
{
throw ex;
}
return dt;
}
private string GetConnectionString()
{
string connStr = String.Empty;
try
{
if (path.ToLower().Contains(".xlsx"))
{
connStr = "Provider=Microsoft.ACE.OLEDB.12.0;" + "Data Source='" + path + "';" + "Extended Properties='Excel 12.0;HDR=No;IMEX=1;'";
}
else if (path.ToLower().Contains(".xlsm"))
{
connStr = "Provider=Microsoft.ACE.OLEDB.12.0;" + "Data Source='" + path + "';" + "Extended Properties='Excel 12.0 Macro;HDR=No;IMEX=1;'";
}
else if (path.ToLower().Contains(".xls"))
{
connStr = "provider=Microsoft.Jet.OLEDB.4.0;Data Source='" + path + "';Extended Properties='Excel 8.0;HDR=No;IMEX=1;'";
}
else
{connStr = "Provider=Microsoft.ACE.OLEDB.12.0;" + "Data Source='" + path + "';" + "Extended Properties='HTML Import;IMEX=1;HDR=No;'";
}
}
catch (Exception ex)
{
throw ex;
}
return connStr;
}
}