oracle 如何使用c#将csv数据导入Oracle
声明:本页面是StackOverFlow热门问题的中英对照翻译,遵循CC BY-SA 4.0协议,如果您需要使用它,必须同样遵循CC BY-SA许可,注明原文地址和作者信息,同时你必须将它归于原作者(不是我):StackOverFlow
原文地址: http://stackoverflow.com/questions/5235777/
Warning: these are provided under cc-by-sa 4.0 license. You are free to use/share it, But you must attribute it to the original authors (not me):
StackOverFlow
how to import csv data into Oracle using c#
提问by xoops
How to import csv data into Oracle using c#. Where data to be imported 3GB in size and number of rows 7512263. I've managed to import csv data into Oracle, but the time it takes about 1 hour. How to speed up the time it takes to import csv data into oracle. Thank you. This is my code:
如何使用 c# 将 csv 数据导入 Oracle。其中要导入的数据大小为 3GB,行数为 7512263。我已经设法将 csv 数据导入 Oracle,但时间大约需要 1 小时。如何加快将csv数据导入oracle所需的时间。谢谢你。这是我的代码:
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Diagnostics;
using System.Threading;
using System.Text.RegularExpressions;
using System.IO;
using FileHelpers;
using System.Data.OracleClient;
namespace sqlloader
{
class Program
{
static void Main(string[] args)
{
int jum;
int i;
bool isFirstLine = false;
FileHelperEngine engine = new FileHelperEngine(typeof(XL_XDR));
//Connect To Database
string constr = "Data Source=(DESCRIPTION=(ADDRESS_LIST="
+ "(ADDRESS=(PROTOCOL=TCP)(HOST= pt-9a84825594af )(PORT=1521 )))"
+ "(CONNECT_DATA=(SERVER=DEDICATED)(SERVICE_NAME=o11g)));"
+ "User Id=xl;Password=rahasia;";
OracleConnection con = new OracleConnection(constr);
con.Open();
// To Read Use:
XL_XDR[] res = engine.ReadFile("DataOut.csv") as XL_XDR[];
jum = CountLinesInFile("DataOut.csv");
FileInfo f2 = new FileInfo("DataOut.csv");
long s2 = f2.Length;
int jmlRecord = jum - 1;
for (i = 0; i < jum; i++)
{
ShowPercentProgress("Processing...", i, jum);
Thread.Sleep(100);
if (isFirstLine == false)
{
isFirstLine = true;
}
else
{
string sql = "INSERT INTO XL_XDR (XDR_ID, XDR_TYPE, SESSION_START_TIME, SESSION_END_TIME, SESSION_LAST_UPDATE_TIME, " +
"SESSION_FLAG, VERSION, CONNECTION_ROW_COUNT, ERROR_CODE, METHOD, HOST_LEN, HOST, URL_LEN, URL, CONNECTION_START_TIME, " +
"CONNECTION_LAST_UPDATE_TIME, CONNECTION_FLAG, CONNECTION_ID, TOTAL_EVENT_COUNT, TUNNEL_PAIR_ID, RESPONSIVENESS_TYPE, " +
"CLIENT_PORT, PAYLOAD_TYPE, VIRTUAL_TYPE, VID_CLIENT, VID_SERVER, CLIENT_ADDR, SERVER_ADDR, CLIENT_TUNNEL_ADDR, " +
"SERVER_TUNNEL_ADDR, ERROR_CODE_2, IPID, C2S_PKTS, C2S_OCTETS, S2C_PKTS, S2C_OCTETS, NUM_SUCC_TRANS, CONNECT_TIME, " +
"TOTAL_RESP, TIMEOUTS, RETRIES, RAI, TCP_SYNS, TCP_SYN_ACKS, TCP_SYN_RESETS, TCP_SYN_FINS, EVENT_TYPE, FLAGS, TIME_STAMP, " +
"EVENT_ID, EVENT_CODE) VALUES (" +
"'" + res[i].XDR_ID + "', '" + res[i].XDR_TYPE + "', '" + res[i].SESSION_START_TIME + "', '" + res[i].SESSION_END_TIME + "', " +
"'" + res[i].SESSION_LAST_UPDATE_TIME + "', '" + res[i].SESSION_FLAG + "', '" + res[i].VERSION + "', '" + res[i].CONNECTION_ROW_COUNT + "', " +
"'" + res[i].ERROR_CODE + "', '" + res[i].METHOD + "', '" + res[i].HOST_LEN + "', '" + res[i].HOST + "', " +
"'" + res[i].URL_LEN + "', '" + res[i].URL + "', '" + res[i].CONNECTION_START_TIME + "', '" + res[i].CONNECTION_LAST_UPDATE_TIME + "', " +
"'" + res[i].CONNECTION_FLAG + "', '" + res[i].CONNECTION_ID + "', '" + res[i].TOTAL_EVENT_COUNT + "', '" + res[i].TUNNEL_PAIR_ID + "', " +
"'" + res[i].RESPONSIVENESS_TYPE + "', '" + res[i].CLIENT_PORT + "', '" + res[i].PAYLOAD_TYPE + "', '" + res[i].VIRTUAL_TYPE + "', " +
"'" + res[i].VID_CLIENT + "', '" + res[i].VID_SERVER + "', '" + res[i].CLIENT_ADDR + "', '" + res[i].SERVER_ADDR + "', " +
"'" + res[i].CLIENT_TUNNEL_ADDR + "', '" + res[i].SERVER_TUNNEL_ADDR + "', '" + res[i].ERROR_CODE_2 + "', '" + res[i].IPID + "', " +
"'" + res[i].C2S_PKTS + "', '" + res[i].C2S_OCTETS + "', '" + res[i].S2C_PKTS + "', '" + res[i].S2C_OCTETS + "', " +
"'" + res[i].NUM_SUCC_TRANS + "', '" + res[i].CONNECT_TIME + "', '" + res[i].TOTAL_RESP + "', '" + res[i].TIMEOUTS + "', " +
"'" + res[i].RETRIES + "', '" + res[i].RAI + "', '" + res[i].TCP_SYNS + "', '" + res[i].TCP_SYN_ACKS + "', " +
"'" + res[i].TCP_SYN_RESETS + "', '" + res[i].TCP_SYN_FINS + "', '" + res[i].EVENT_TYPE + "', '" + res[i].FLAGS + "', " +
"'" + res[i].TIME_STAMP + "', '" + res[i].EVENT_ID + "', '" + res[i].EVENT_CODE + "')";
OracleCommand command = new OracleCommand(sql, con);
command.ExecuteNonQuery();
}
}
Console.WriteLine("Successfully Inserted");
Console.WriteLine();
Console.WriteLine("Number of Row Data: " + jmlRecord.ToString());
Console.WriteLine();
Console.WriteLine("The size of {0} is {1} bytes.", f2.Name, f2.Length);
con.Close();
}
static void ShowPercentProgress(string message, int currElementIndex, int totalElementCount)
{
if (currElementIndex < 0 || currElementIndex >= totalElementCount)
{
throw new InvalidOperationException("currElement out of range");
}
int percent = (100 * (currElementIndex + 1)) / totalElementCount;
Console.Write("\r{0}{1}% complete", message, percent);
if (currElementIndex == totalElementCount - 1)
{
Console.WriteLine(Environment.NewLine);
}
}
static int CountLinesInFile(string f)
{
int count = 0;
using (StreamReader r = new StreamReader(f))
{
string line;
while ((line = r.ReadLine()) != null)
{
count++;
}
}
return count;
}
}
[DelimitedRecord(",")]
public class XL_XDR
{
public string XDR_ID;
public string XDR_TYPE;
public string SESSION_START_TIME;
public string SESSION_END_TIME;
public string SESSION_LAST_UPDATE_TIME;
public string SESSION_FLAG;
public string VERSION;
public string CONNECTION_ROW_COUNT;
public string ERROR_CODE;
public string METHOD;
public string HOST_LEN;
public string HOST;
public string URL_LEN;
public string URL;
public string CONNECTION_START_TIME;
public string CONNECTION_LAST_UPDATE_TIME;
public string CONNECTION_FLAG;
public string CONNECTION_ID;
public string TOTAL_EVENT_COUNT;
public string TUNNEL_PAIR_ID;
public string RESPONSIVENESS_TYPE;
public string CLIENT_PORT;
public string PAYLOAD_TYPE;
public string VIRTUAL_TYPE;
public string VID_CLIENT;
public string VID_SERVER;
public string CLIENT_ADDR;
public string SERVER_ADDR;
public string CLIENT_TUNNEL_ADDR;
public string SERVER_TUNNEL_ADDR;
public string ERROR_CODE_2;
public string IPID;
public string C2S_PKTS;
public string C2S_OCTETS;
public string S2C_PKTS;
public string S2C_OCTETS;
public string NUM_SUCC_TRANS;
public string CONNECT_TIME;
public string TOTAL_RESP;
public string TIMEOUTS;
public string RETRIES;
public string RAI;
public string TCP_SYNS;
public string TCP_SYN_ACKS;
public string TCP_SYN_RESETS;
public string TCP_SYN_FINS;
public string EVENT_TYPE;
public string FLAGS;
public string TIME_STAMP;
public string EVENT_ID;
public string EVENT_CODE;
}
}
回答by Brosto
I've had luck using the bulk binding method in ODB.NET. Do a search for "ArrayBindCount" on this page.
我很幸运在 ODB.NET 中使用了批量绑定方法。在此页面上搜索“ArrayBindCount”。
http://dotnetslackers.com/articles/ado_net/BulkOperationsUsingOracleDataProviderForNETODPNET.aspx
http://dotnetslackers.com/articles/ado_net/BulkOperationsUsingOracleDataProviderForNETODPNET.aspx
This will allow you to insert everything at once, instead of thousands of tiny inserts. The hours you're currently seeing will turn to minutes.
这将允许您一次插入所有内容,而不是数以千计的微小插入。您当前看到的小时数将变为分钟。
回答by Justin Cave
While strongly seconding Vincent's suggestion to use bind variables (which is likely going to be a huge performance gain as well as preventing the DBA from throttling you for crushing the shared pool) and Brosto's suggestion to do bulk binds, I would tend to question why you would write this sort of thing in C# in the first place. It would be much more efficient to do something like use external tables so that your application causes a file to be placed on the database server's file system and the parsing and loading of the data is done by the external table definition (or even having your application invoke SQL*Loader). That lets you leverage the code Oracle has already optimized for this sort of processing.
虽然强烈支持 Vincent 的使用绑定变量的建议(这可能会带来巨大的性能提升,并防止 DBA 因破坏共享池而限制您)和 Brosto 的进行批量绑定的建议,但我倾向于质疑为什么您首先会在 C# 中编写这种东西。执行诸如使用外部表之类的操作会更有效,以便您的应用程序将文件放置在数据库服务器的文件系统上,并且数据的解析和加载由外部表定义完成(甚至让您的应用程序调用 SQL*Loader)。这使您可以利用 Oracle 已经针对此类处理优化的代码。
Additionally, since you are counting the number of lines in the file once at the outset in order to display a progress bar, you can optimize that by getting an approximate row count from more easily obtained information. You know roughly how many bytes per line in the file and the size of a file is a relatively easy file attribute to retrieve. That should allow you to estimate the number of lines in the file relatively accurately which should be more than enough for a progress bar.
此外,由于您在开始时计算文件中的行数以显示进度条,因此您可以通过从更容易获得的信息中获取近似行数来优化它。您大致知道文件中每行有多少字节,文件的大小是一个相对容易检索的文件属性。这应该允许您相对准确地估计文件中的行数,这对于进度条来说应该绰绰有余。
回答by Alexandre Nascimento
Use a bulk insert to load fast:
使用批量插入快速加载:
Insert Bulk Data From CSV File To Database Table Using SQLBulkCopy Class
使用 SQLBulkCopy 类将 CSV 文件中的批量数据插入到数据库表中
Or
或者
Use a SQL*Loader utility:
使用 SQL*Loader 实用程序:
Loading CSV file data into a Table Variable (Index-by table) in Oracle
回答by Davide Piras
just removing the call to CountLinesInFile
would probably help because in that method you are reading all file line by line and as you say the file is biiig...
只需删除对 的调用CountLinesInFile
可能会有所帮助,因为在该方法中您正在逐行读取所有文件,并且正如您所说的文件是 biiig ...