需要通过 PHP 将大型 CSV 文件导入多个 MySQL 表的省时方法 [英] Need Time-Efficient Method of Importing Large CSV File Via PHP Into Multiple MySQL Tables

查看:29
本文介绍了需要通过 PHP 将大型 CSV 文件导入多个 MySQL 表的省时方法的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

好的,我这里遇到了一些严重的问题.我是这个网站的新手,也是通过 PHP 导入 CSV 数据的新手,但我对编程并不陌生.

目前,我正在建立客户关系经理.我需要创建一个脚本来导入一个文件,该文件将用线索填充数据库.这里的主要问题是潜在客户数据由该公司的公司和员工组成.此外,还从主表中分离出一些其他表,例如帐单信息.

我有一个工作脚本,允许用户将导入的数据映射到特定的行和列.

function mapData($file) {//打开文本文件$fd = fopen($file, "r");//如果找不到文件,则返回 FALSE如果(!$fd){返回假;}//获取前两行$first = 0;$data = 数组();while(!feof($fd)) {if($first == 0) {$cols = fgetcsv($fd, 4096);$data['cols'] = array();if(is_array($cols) && count($cols)) {foreach($cols 作为 $col) {如果(!$col){继续;}$data['cols'][] = $col;}}if(empty($data['cols'])) {返回数组();}$第一个++;继续;}别的 {$data['first'] = fgetcsv($fd, 4096);休息;}}fclose($fd);//返回数据返回 $data;}

上述脚本仅在 CodeIgniter 将文件移动到工作目录后激活.我已经知道文件名是什么了.该文件进入并返回列列表和第一行.任何空列都将被忽略.

此后,进程传递到映射脚本.一旦映射完成并按下导入",这段代码就会加载.

function importLeads($file, $map) {//打开文本文件if(!file_exists($file)) {返回假;}错误报告(E_ALL);设置时间限制(240);ini_set("memory_limit", "512M");$fd = fopen($file, "r");//如果找不到文件,则返回 FALSE如果(!$fd){返回假;}//遍历文件的每一行$真=假;$first = 0;while(!feof($fd)) {if($first == 0) {$cols = fgetcsv($fd);$第一个++;继续;}//获取每一行的列$row = fgetcsv($fd);//遍历列$group = array();$lead_status = array();$lead_type = array();$lead_source = array();$user = 数组();$user_cstm = array();$user_prof = array();$acct = 数组();$acct_cstm = array();$acct_prof = array();$acct_group = array();如果(!$行){继续;}foreach($row as $num => $val) {如果(空($map[$num])){继续;}$val = str_replace('"', "&#34;", $val);$val = str_replace("'", "&#39;", $val);开关($map[$num]){//公司账户案例公司名称":$acct['company_name'] = $val;休息;案例lead_type":$lead_type['name'] = $val;休息;案例lead_source":$lead_source['name'] = $val;休息;案例lead_source_description":$lead_source['name'] = $val;休息;案例运动":$campaign['name'] = $val;休息;案例mcn":$acct['mcn'] = $val;休息;案例usdot":$acct['usdot'] = $val;休息;案例sic_codes":$acct_cstm['sic_codes'] = $val;休息;案例naics_codes":$acct_cstm['naics_codes'] = $val;休息;案例agent_assigned":$acct_cstm['agent_assigned'] = $val;休息;案例group_assigned":$group['name'] = $val;休息;案例评级":$acct_cstm['rating'] = $val;休息;案例main_phone":$acct['phone'] = $val;休息;案例billing_phone":$acct_cstm['billing_phone'] = $val;休息;案例company_fax":$acct['fax'] = $val;休息;案例company_email":$acct['email2'] = $val;休息;//公司地址案例primary_address":$acct['address'] = $val;休息;案例primary_address2":$acct['address2'] = $val;休息;案例primary_city":$acct['city'] = $val;休息;案例primary_state":$acct['state'] = $val;休息;案例primary_zip":$acct['zip'] = $val;休息;案例primary_country":$acct['country'] = $val;休息;案例billing_address":$billing['address'] = $val;休息;案例billing_address2":$billing['address2'] = $val;休息;案例billing_city":$billing['city'] = $val;休息;案例billing_state":$billing['state'] = $val;休息;案例billing_zip":$billing['zip'] = $val;休息;案例billing_country":$billing['country'] = $val;休息;案例company_website":$acct_cstm['网站'] = $val;休息;案例company_revenue":$acct_cstm['收入'] = $val;休息;案例company_about":$acct_prof['aboutus'] = $val;休息;//杂项.公司资料案例bols_per_mo":$acct_cstm['approx_bols_per_mo'] = $val;休息;案例no_employees":$acct_cstm['no_employees'] = $val;休息;案例no_drivers":$acct_prof['drivers'] = $val;休息;案例no_trucks":$acct_prof['power_units'] = $val;休息;案例no_trailers":$acct_cstm['no_trailers'] = $acct_prof['trailers'] = $val;休息;案例no_parcels_day":$acct_cstm['no_parcels_day'] = $val;休息;案例no_shipping_locations":$acct_cstm['no_shipping_locations'] = $val;休息;案例approves_inbound":$acct_cstm['approves_inbound'] = $val;休息;案例what_erp_used":$acct_cstm['what_erp_used'] = $val;休息;案例鸟狗":$acct_cstm['birddog_referral'] = $val;休息;案例status_notes":$acct_cstm['status_notes'] = $val;休息;案例注释":$acct_cstm['notes'] = $val;休息;案例internal_notes":$acct_cstm['notes_internal'] = $val;休息;//用户数据案例称呼":$user_cstm['salutation'] = $val;休息;案例名字":$user['first_name'] = $billing['first_name'] = $val;休息;案例姓氏":$user['last_name'] = $billing['last_name'] = $val;休息;案例user_title":$user_prof['title'] = $val;休息;案例user_about":$user_prof['about'] = $val;休息;案例user_email":$user['email'] = $val;休息;案例home_phone":$user_prof['phone'] = $val;休息;案例手机":$user_cstm['mobile_phone'] = $val;休息;案例direct_phone":$user_cstm['direct_phone'] = $val;休息;案例user_fax":$user_prof['fax'] = $val;休息;案例user_locale":$user['location'] = $val;休息;案例user_website":$user_prof['website_url'] = $val;休息;案例user_facebook":$user_prof['fb_url'] = $val;休息;案例user_twitter":$user_prof['twitter_url'] = $val;休息;案例user_linkedin":$user_prof['linkedin_url'] = $val;休息;}}if(empty($acct['company_name']) || empty($user['first_name']) || empty($user['last_name'])) {继续;}$this->db = $this->load->database('crm_db', TRUE);if(isset($lead_type['name']) && ($name = $lead_type['name'])) {$count = $this->db->count_all("lead_types");$check = $this->db->get_where("lead_types", array("name" => $name));if($check->num_rows() <1) {$this->db->insert("lead_types", array("name" => $name, "order" => $count));$ltyp​​e = $this->db->insert_id();$acct_cstm['lead_type'] = $acct['account_type'] = $user['company_type'] = $ltyp​​e;}}if(isset($lead_source['name']) && ($name = $lead_source['name'])) {$count = $this->db->count_all("lead_sources");$check = $this->db->get_where("lead_sources", array("name" => $name));if($check->num_rows() <1) {$this->db->insert("lead_sources", array("name" => $name, "order" => $count));$acct_cstm['lead_source'] = $this->db->insert_id();}}if(isset($campaign['name']) && ($name = $campaign['name'])) {$check = $this->db->get_where("campaigns", array("name" => $name));if($check->num_rows() <1) {$campaign['id'] = $accounts_cstm['campaign'] = $this->Secure_m->generate_sugar_id();$campaign['date_entered'] = time();$campaign['date_modified'] = time();$campaign['modified_user_id'] = $this->session->userdata('id');$campaign['created_by'] = $this->session->userdata('id');$this->db->insert("campaigns", $campaign);}}if(isset($group['name']) && ($name = $group['name'])) {$order = $this->db->count_all("groups");$check = $this->db->get_where("groups", array("name" => $name));if($check->num_rows() <1) {$this->db->insert("groups", array("name" => $name, "order" => $order));$acct_group['id'] = $this->db->insert_id();}}$mem = 新标准类;$uid = 0;if(is_array($user) && count($user)) {$where = "";if(!empty($user['phone'])) {$where .= "prof.phone = '{$user['phone']}' OR";$where .= "cstm.mobile_phone = '{$user['phone']}' 或 ";$where .= "cstm.direct_phone = '{$user['phone']}'";}if(!empty($user['mobile_phone'])) {如果($哪里){$where .=或";}$where .= "prof.phone = '{$user['mobile_phone']}' 或 ";$where .= "cstm.mobile_phone = '{$user['mobile_phone']}' 或 ";$where .= "cstm.direct_phone = '{$user['mobile_phone']}'";}if(!empty($user['direct_phone'])) {如果($哪里){$where .=或";}$where .= "prof.phone = '{$user['direct_phone']}' OR";$where .= "cstm.mobile_phone = '{$user['direct_phone']}' 或 ";$where .= "cstm.direct_phone = '{$user['direct_phone']}'";}$query = $this->db->query($this->Account_m->userQuery($where));$mem = reset($query->result());if($where && !empty($mem->id)) {$uid = $mem->id;$new = 数组();foreach($user as $k => $v) {if(!empty($mem->$k)) {$new[$k] = $mem->$k;未设置($user[$k]);}别的 {$new[$k] = $v;}}//$this->db->update("leads", $user, array("id" => $uid));$user = $new;}别的 {$user['uxtime'] = time();$user['isclient'] = 0;$user['flag'] = 0;$user['activation_code'] = $this->Secure_m->generate_activate_id();$uid = $this->Secure_m->generate_activate_id(10);$query = $this->db->get_where("leads", array("id" => $uid), 1);$data = reset($query->result());while(!empty($data->id)) {$uid = $this->Secure_m->generate_activate_id(10);$query = $this->db->get_where("leads", array("id" => $uid), 1);$data = reset($query->result());}$user['id'] = $uid;$this->db->insert("leads", $user);}}if($uid && is_array($user_prof) && count($user_prof)) {if(!empty($mem->uid)) {$new = 数组();foreach($user_prof as $k => $v) {if(!empty($mem->$k)) {$new[$k] = $mem->$k;未设置($user_prof[$k]);}别的 {$new[$k] = $v;}}//$this->db->update("mprofiles", $user_prof, array("uid" => $uid));$user_prof = $new;}别的 {$user_prof['uid'] = $uid;$user_prof['flag'] = 0;$this->db->insert("ldetails", $user_prof);}}if($uid && is_array($user_cstm) && count($user_cstm)) {$query = $this->db->get_where("leads_cstm", array("crm_id" => $cid), 1);$data = reset($query->result());if(!empty($data->crm_id)) {$new = 数组();foreach($user_cstm as $k => $v) {if(!empty($mem->$k)) {$new[$k] = $mem->$k;未设置($user_cstm[$k]);}别的 {$new[$k] = $v;}}//$this->db->update("leads_cstm", $acct_prof, array("fa_user_id" => $cid));$user_cstm = $new;}别的 {$user_cstm['crm_id'] = $uid;$user_cstm['date_entered'] = time();$user_cstm['date_modified'] = time();$user_cstm['created_by'] = $this->session->userdata('id');$user_cstm['modified_user_id'] = $this->session->userdata('id');$this->db->insert("leads_cstm", $user_cstm);}}$cmp = 新标准类;$cid = 0;if(is_array($acct) && count($acct)) {$acct['uid'] = $uid;$acct['main_contact'] = "{$user['first_name']} {$user['last_name']}";if(!empty($user['email'])) {$acct['email'] = $user['email'];}$acct['isprospect'] = 0;$acct['flag'] = 0;if(!empty($acct['mcn'])) {$where .= "fms.mcn = '{$acct['mcn']}'";}if(!empty($acct['phone'])) {如果($哪里){$where .=或";}$where .= "fms.phone = '{$acct['phone']}' 或 ";$where .= "crm.billing_phone = '{$acct['phone']}'";}if(!empty($acct['billing_phone'])) {如果($哪里){$where .=或";}$where .= "fms.phone = '{$acct['billing_phone']}' 或 ";$where .= "crm.billing_phone = '{$acct['billing_phone']}'";}if(!empty($acct['company_name'])) {如果($哪里){$where .=或";}$where .= "fms.company_name = '{$acct['company_name']}'";}$query = $this->db->query($this->Account_m->acctQuery($where));$cmp = reset($query->result());if($where && !empty($cmp->id)) {$cid = $cmp->id;$new = 数组();foreach($acct as $k => $v) {if(!empty($cmp->$k)) {$new[$k] = $cmp->$k;未设置($acct[$k]);}别的 {$new[$k] = $v;}}//$this->db->update("accounts", $billing, array("cid" => $cid));$acct = $new;}别的 {$cid = $this->Secure_m->generate_activate_id(10);$query = $this->db->get_where("leads", array("id" => $uid), 1);$data = reset($query->result());while(!empty($data->id)) {$cid = $this->Secure_m->generate_activate_id(10);$query = $this->db->get_where("accounts", array("id" => $cid), 1);$data = reset($query->result());}$acct['id'] = $cid;$this->db->insert("accounts", $acct);}}if($cid && is_array($acct_group) && count($acct_group)) {$grp = $this->db->get_where("accounts_groups", array("cid" => $cid, "gid" => $acct_group['id']));如果(空($ cmp-> id)){$acct_group['cid'] = $cid;$this->db->insert("accounts_groups", $acct_group);}}if($cid && is_array($acct_prof) && count($acct_prof)) {if(!empty($cmp->id)) {$new = 数组();foreach($acct_prof as $k => $v) {if(!empty($cmp->$k)) {$new[$k] = $cmp->$k;未设置($acct_prof[$k]);}别的 {$new[$k] = $v;}}//$this->db->update("cprofiles", $acct_prof, array("cid" => $cid));$acct_prof = $new;}别的 {$acct_prof['cid'] = $cid;$acct_prof['flag'] = 0;$this->db->insert("details", $acct_prof);}}if($cid && is_array($billing) && count($billing)) {$bill = $this->db->get_where("accounts_billing", array("cid" => $cid));if(!empty($bill->id)) {$new = 数组();foreach($acct_prof as $k => $v) {if(!empty($cmp->$k)) {$new[$k] = $cmp->$k;未设置($acct_prof[$k]);}别的 {$new[$k] = $v;}}//$this->db->update("accounts_billing", $billing, array("cid" => $cid));}别的 {$billing['cid'] = $cid;$billing['flag'] = 0;$this->db->insert("accounts_billing", $billing);}}if($cid && $uid) {$this->db->update("leads", array("cid" => $cid), array("id" => $uid));}if($cid && is_array($acct_cstm) && count($acct_cstm)) {$query = $this->db->get_where("accounts_cstm", array("crm_id" => $cid), 1);$data = reset($query->result());if(!empty($data->crm_id)) {$new = 数组();foreach($acct_cstm as $k => $v) {if(!empty($cmp->$k)) {$new[$k] = $cmp->$k;未设置($acct_cstm[$k]);}别的 {$new[$k] = $v;}}//$this->db->update("accounts_cstm", $acct_cstm, array("crm_id" => $cid));$acct_cstm = $new;}别的 {$acct_cstm['crm_id'] = $cid;$acct_cstm['date_entered'] = time();$acct_cstm['date_modified'] = time();$acct_cstm['created_by'] = $this->session->userdata('id');$acct_cstm['modified_user_id'] = $this->session->userdata('id');如果(空($acct_cstm['评级'])){$acct_cstm['评级'] = 1;}$this->db->insert("accounts_cstm", $acct_cstm);}}$真=真;}fclose($fd);返回 $true;}

现在,据我所知,该脚本运行良好.实际代码本身没有任何问题.问题是在大约 400-500 行之后,脚本就会停止.我没有收到错误,但没有进一步处理代码.

我知道这一点,因为在此之后我有代码应该通过 AJAX 返回重定向页面.但是,在 importLeads 函数中的循环之后没有任何内容加载.

我不知道如何让这个脚本更有效率...我肯定它超时了,但我不知道如何让它更有效地运行.我需要这个脚本来分别处理上面的所有信息.我有各种单独的表,它们都链接在一起,这个导入脚本必须以不同的方式设置所有内容.

我已经和我的客户讨论过这个项目.当我将它放到大约 400 行时,这个脚本会起作用.他有很多这样的 CSV 文件,大约有 75,000 行.我要导入的是一个较小的,只有大约 1,200 行.

我曾尝试寻找替代方法,例如 MySQL 的导入脚本,但我不能这样做,因为该脚本必须将数据导入到单独的表中,并且必须首先检查现有数据.我还应该使用导入的信息更新所有空字段,但这会使情况变得更糟.

如果有人知道更有效的方法,将不胜感激.我试图尽可能详细.值得注意的是,我会提到我正在使用 CodeIgniter,但如果有一种更有效的方法不使用 CodeIgniter,我会使用它(不过我仍然可以将它放入 CI 模型中).

解决方案

我编写了 PHP 脚本来批量加载 Stack Overflow 数据转储发布的数据.我导入了数百万行,而且不需要那么长时间.

这里有一些提示:

  • 不要依赖自动提交.为每一行启动和提交事务的开销是巨大的.使用显式事务,并在每 1000 行(或更多行)后提交.

  • 使用准备好的语句.由于您基本上都在执行相同的插入数千次,因此您可以在开始循环之前准备每个插入,然后在循环期间执行,将值作为参数传递.我不知道如何用 CodeIgniter 的数据库库来做到这一点,你必须弄清楚.

  • 调整 MySQL 以进行导入. 增加缓存缓冲区等.有关详细信息,请参阅INSERT 语句的速度.p>

  • 使用 LOAD DATA INFILE. 如果可能.它比使用 INSERT 逐行加载数据快 20 倍.我理解您是否不能,因为您需要获取最后一个插入 id 等等.但在大多数情况下,即使您读取 CSV 文件,重新排列它并将其写入多个临时 CSV 文件,数据加载仍然比使用 INSERT 快.

  • 离线进行.不要在网络请求期间运行长时间运行的任务.PHP 请求的时间限制将终止作业,如果不是今天,那么下周二当作业延长 10% 时.相反,让 Web 请求将作业排入队列,然后将控制权返回给用户.您应该将数据导入作为服务器进程运行,并定期让用户一瞥进度.例如,一种廉价的方法是让您的导入脚本输出.".到临时文件,然后用户可以请求查看临时文件并继续在浏览器中重新加载.如果您想变得更有趣,请使用 Ajax.

Okay, I'm having some serious issues here. I'm new to this site, and new to dealing with importing CSV data via PHP, but I'm not new to programming.

Currently, I'm working on building a Customer Relationship Manager. I need to create a script to import a file that will populate the database with leads. The main issue here is that the Lead data consists of Companies and Employees of said Company. Also, a few other tables are split off, such as billing information, from the main tables.

I have a working script that will allow users to map the imported data to specific rows and columns.

function mapData($file) {
    // Open the Text File
    $fd = fopen($file, "r");

    // Return FALSE if file not found
    if(!$fd) {
        return FALSE;
    }

    // Get the First Two Lines
    $first = 0;
    $data = array();
    while(!feof($fd)) {
        if($first == 0) {
            $cols = fgetcsv($fd, 4096);
            $data['cols'] = array();
            if(is_array($cols) && count($cols)) {
                foreach($cols as $col) {
                    if(!$col) {
                        continue;
                    }
                    $data['cols'][] = $col;
                }
            }
            if(empty($data['cols'])) {
                return array();
            }
            $first++;
            continue;
        }
        else {
            $data['first'] = fgetcsv($fd, 4096);
            break;
        }
    }
    fclose($fd);

    // Return Data
    return $data;
}

The above script only activates after CodeIgniter moves the file to a working directory. I already know what the file name is by this point. The file goes in and returns the list of columns and the first row. Any empty columns are ignored.

After this, process passes to a mapping script. Once mapping is done and "Import" is pressed, this piece of code loads.

function importLeads($file, $map) {
    // Open the Text File
    if(!file_exists($file)) {
        return false;
    }
    error_reporting(E_ALL);
    set_time_limit(240);
    ini_set("memory_limit", "512M");
    $fd = fopen($file, "r");

    // Return FALSE if file not found
    if(!$fd) {
        return FALSE;
    }

    // Traverse Each Line of the File
    $true = false;
    $first = 0;
    while(!feof($fd)) {
        if($first == 0) {
            $cols = fgetcsv($fd);
            $first++;
            continue;
        }

        // Get the columns of each line
        $row = fgetcsv($fd);

        // Traverse columns
        $group = array();
        $lead_status = array();
        $lead_type = array();
        $lead_source = array();
        $user = array();
        $user_cstm = array();
        $user_prof = array();
        $acct = array();
        $acct_cstm = array();
        $acct_prof = array();
        $acct_group = array();
        if(!$row) {
            continue;
        }
        foreach($row as $num => $val) {
            if(empty($map[$num])) {
                continue;
            }
            $val = str_replace('"', "&#34;", $val);
            $val = str_replace("'", "&#39;", $val);
            switch($map[$num]) {
            // Company Account
            case "company_name":
                $acct['company_name'] = $val;
                break;
            case "lead_type":
                $lead_type['name'] = $val;
                break;
            case "lead_source":
                $lead_source['name'] = $val;
                break;
            case "lead_source_description":
                $lead_source['name'] = $val;
                break;
            case "campaign":
                $campaign['name'] = $val;
                break;
            case "mcn":
                $acct['mcn'] = $val;
                break;
            case "usdot":
                $acct['usdot'] = $val;
                break;
            case "sic_codes":
                $acct_cstm['sic_codes'] = $val;
                break;
            case "naics_codes":
                $acct_cstm['naics_codes'] = $val;
                break;
            case "agent_assigned":
                $acct_cstm['agent_assigned'] = $val;
                break;
            case "group_assigned":
                $group['name'] = $val;
                break;
            case "rating":
                $acct_cstm['rating'] = $val;
                break;
            case "main_phone":
                $acct['phone'] = $val;
                break;
            case "billing_phone":
                $acct_cstm['billing_phone'] = $val;
                break;
            case "company_fax":
                $acct['fax'] = $val;
                break;
            case "company_email":
                $acct['email2'] = $val;
                break;

            // Company Location
            case "primary_address":
                $acct['address'] = $val;
                break;
            case "primary_address2":
                $acct['address2'] = $val;
                break;
            case "primary_city":
                $acct['city'] = $val;
                break;
            case "primary_state":
                $acct['state'] = $val;
                break;
            case "primary_zip":
                $acct['zip'] = $val;
                break;
            case "primary_country":
                $acct['country'] = $val;
                break;
            case "billing_address":
                $billing['address'] = $val;
                break;
            case "billing_address2":
                $billing['address2'] = $val;
                break;
            case "billing_city":
                $billing['city'] = $val;
                break;
            case "billing_state":
                $billing['state'] = $val;
                break;
            case "billing_zip":
                $billing['zip'] = $val;
                break;
            case "billing_country":
                $billing['country'] = $val;
                break;
            case "company_website":
                $acct_cstm['website'] = $val;
                break;
            case "company_revenue":
                $acct_cstm['revenue'] = $val;
                break;
            case "company_about":
                $acct_prof['aboutus'] = $val;
                break;

            // Misc. Company Data
            case "bols_per_mo":
                $acct_cstm['approx_bols_per_mo'] = $val;
                break;
            case "no_employees":
                $acct_cstm['no_employees'] = $val;
                break;
            case "no_drivers":
                $acct_prof['drivers'] = $val;
                break;
            case "no_trucks":
                $acct_prof['power_units'] = $val;
                break;
            case "no_trailers":
                $acct_cstm['no_trailers'] = $acct_prof['trailers'] = $val;
                break;
            case "no_parcels_day":
                $acct_cstm['no_parcels_day'] = $val;
                break;
            case "no_shipping_locations":
                $acct_cstm['no_shipping_locations'] = $val;
                break;
            case "approves_inbound":
                $acct_cstm['approves_inbound'] = $val;
                break;
            case "what_erp_used":
                $acct_cstm['what_erp_used'] = $val;
                break;
            case "birddog":
                $acct_cstm['birddog_referral'] = $val;
                break;
            case "status_notes":
                $acct_cstm['status_notes'] = $val;
                break;
            case "notes":
                $acct_cstm['notes'] = $val;
                break;
            case "internal_notes":
                $acct_cstm['notes_internal'] = $val;
                break;

            // User Data
            case "salutation":
                $user_cstm['salutation'] = $val;
                break;
            case "first_name":
                $user['first_name'] = $billing['first_name'] = $val;
                break;
            case "last_name":
                $user['last_name'] = $billing['last_name'] = $val;
                break;
            case "user_title":
                $user_prof['title'] = $val;
                break;
            case "user_about":
                $user_prof['about'] = $val;
                break;
            case "user_email":
                $user['email'] = $val;
                break;
            case "home_phone":
                $user_prof['phone'] = $val;
                break;
            case "mobile_phone":
                $user_cstm['mobile_phone'] = $val;
                break;
            case "direct_phone":
                $user_cstm['direct_phone'] = $val;
                break;
            case "user_fax":
                $user_prof['fax'] = $val;
                break;
            case "user_locale":
                $user['location'] = $val;
                break;
            case "user_website":
                $user_prof['website_url'] = $val;
                break;
            case "user_facebook":
                $user_prof['fb_url'] = $val;
                break;
            case "user_twitter":
                $user_prof['twitter_url'] = $val;
                break;
            case "user_linkedin":
                $user_prof['linkedin_url'] = $val;
                break;
            }
        }
        if(empty($acct['company_name']) || empty($user['first_name']) || empty($user['last_name'])) {
            continue;
        }
        $this->db = $this->load->database('crm_db', TRUE);
        if(isset($lead_type['name']) && ($name = $lead_type['name'])) {
            $count = $this->db->count_all("lead_types");
            $check = $this->db->get_where("lead_types", array("name" => $name));
            if($check->num_rows() < 1) {
                $this->db->insert("lead_types", array("name" => $name, "order" => $count));
                $ltype = $this->db->insert_id();
                $acct_cstm['lead_type'] = $acct['account_type'] = $user['company_type'] = $ltype;
            }
        }
        if(isset($lead_source['name']) && ($name = $lead_source['name'])) {
            $count = $this->db->count_all("lead_sources");
            $check = $this->db->get_where("lead_sources", array("name" => $name));
            if($check->num_rows() < 1) {
                $this->db->insert("lead_sources", array("name" => $name, "order" => $count));
                $acct_cstm['lead_source'] = $this->db->insert_id();
            }
        }
        if(isset($campaign['name']) && ($name = $campaign['name'])) {
            $check = $this->db->get_where("campaigns", array("name" => $name));
            if($check->num_rows() < 1) {
                $campaign['id'] = $accounts_cstm['campaign'] = $this->Secure_m->generate_sugar_id();
                $campaign['date_entered'] = time();
                $campaign['date_modified'] = time();
                $campaign['modified_user_id'] = $this->session->userdata('id');
                $campaign['created_by'] = $this->session->userdata('id');
                $this->db->insert("campaigns", $campaign);
            }
        }
        if(isset($group['name']) && ($name = $group['name'])) {
            $order = $this->db->count_all("groups");
            $check = $this->db->get_where("groups", array("name" => $name));
            if($check->num_rows() < 1) {
                $this->db->insert("groups", array("name" => $name, "order" => $order));
                $acct_group['id'] = $this->db->insert_id();
            }
        }
        $mem = new stdclass;
        $uid = 0;
        if(is_array($user) && count($user)) {
            $where = "";
            if(!empty($user['phone'])) {
                $where .= "prof.phone = '{$user['phone']}' OR ";
                $where .= "cstm.mobile_phone = '{$user['phone']}' OR ";
                $where .= "cstm.direct_phone = '{$user['phone']}'";
            }
            if(!empty($user['mobile_phone'])) {
                if($where) {
                    $where .= " OR ";
                }
                $where .= "prof.phone = '{$user['mobile_phone']}' OR ";
                $where .= "cstm.mobile_phone = '{$user['mobile_phone']}' OR ";
                $where .= "cstm.direct_phone = '{$user['mobile_phone']}'";
            }
            if(!empty($user['direct_phone'])) {
                if($where) {
                    $where .= " OR ";
                }
                $where .= "prof.phone = '{$user['direct_phone']}' OR ";
                $where .= "cstm.mobile_phone = '{$user['direct_phone']}' OR ";
                $where .= "cstm.direct_phone = '{$user['direct_phone']}'";
            }
            $query = $this->db->query($this->Account_m->userQuery($where));
            $mem = reset($query->result());
            if($where && !empty($mem->id)) {
                $uid = $mem->id;
                $new = array();
                foreach($user as $k => $v) {
                    if(!empty($mem->$k)) {
                        $new[$k] = $mem->$k;
                        unset($user[$k]);
                    }
                    else {
                        $new[$k] = $v;
                    }
                }
                //$this->db->update("leads", $user, array("id" => $uid));
                $user = $new;
            }
            else {
                $user['uxtime'] = time();
                $user['isclient'] = 0;
                $user['flag'] = 0;
                $user['activation_code'] = $this->Secure_m->generate_activate_id();
                $uid = $this->Secure_m->generate_activate_id(10);
                $query = $this->db->get_where("leads", array("id" => $uid), 1);
                $data = reset($query->result());
                while(!empty($data->id)) {
                    $uid = $this->Secure_m->generate_activate_id(10);
                    $query = $this->db->get_where("leads", array("id" => $uid), 1);
                    $data = reset($query->result());
                }
                $user['id'] = $uid;
                $this->db->insert("leads", $user);
            }
        }
        if($uid && is_array($user_prof) && count($user_prof)) {
            if(!empty($mem->uid)) {
                $new = array();
                foreach($user_prof as $k => $v) {
                    if(!empty($mem->$k)) {
                        $new[$k] = $mem->$k;
                        unset($user_prof[$k]);
                    }
                    else {
                        $new[$k] = $v;
                    }
                }
                //$this->db->update("mprofiles", $user_prof, array("uid" => $uid));
                $user_prof = $new;
            }
            else {
                $user_prof['uid'] = $uid;
                $user_prof['flag'] = 0;
                $this->db->insert("ldetails", $user_prof);
            }
        }
        if($uid && is_array($user_cstm) && count($user_cstm)) {
            $query = $this->db->get_where("leads_cstm", array("crm_id" => $cid), 1);
            $data = reset($query->result());
            if(!empty($data->crm_id)) {
                $new = array();
                foreach($user_cstm as $k => $v) {
                    if(!empty($mem->$k)) {
                        $new[$k] = $mem->$k;
                        unset($user_cstm[$k]);
                    }
                    else {
                        $new[$k] = $v;
                    }
                }
                //$this->db->update("leads_cstm", $acct_prof, array("fa_user_id" => $cid));
                $user_cstm = $new;
            }
            else {
                $user_cstm['crm_id'] = $uid;
                $user_cstm['date_entered'] = time();
                $user_cstm['date_modified'] = time();
                $user_cstm['created_by'] = $this->session->userdata('id');
                $user_cstm['modified_user_id'] = $this->session->userdata('id');
                $this->db->insert("leads_cstm", $user_cstm);
            }
        }
        $cmp = new stdclass;
        $cid = 0;
        if(is_array($acct) && count($acct)) {
            $acct['uid'] = $uid;
            $acct['main_contact'] = "{$user['first_name']} {$user['last_name']}";
            if(!empty($user['email'])) {
                $acct['email'] = $user['email'];
            }
            $acct['isprospect'] = 0;
            $acct['flag'] = 0;
            if(!empty($acct['mcn'])) {
                $where .= "fms.mcn = '{$acct['mcn']}'";
            }
            if(!empty($acct['phone'])) {
                if($where) {
                    $where .= " OR ";
                }
                $where .= "fms.phone = '{$acct['phone']}' OR ";
                $where .= "crm.billing_phone = '{$acct['phone']}'";
            }
            if(!empty($acct['billing_phone'])) {
                if($where) {
                    $where .= " OR ";
                }
                $where .= "fms.phone = '{$acct['billing_phone']}' OR ";
                $where .= "crm.billing_phone = '{$acct['billing_phone']}'";
            }
            if(!empty($acct['company_name'])) {
                if($where) {
                    $where .= " OR ";
                }
                $where .= "fms.company_name = '{$acct['company_name']}'";
            }
            $query = $this->db->query($this->Account_m->acctQuery($where));
            $cmp = reset($query->result());
            if($where && !empty($cmp->id)) {
                $cid = $cmp->id;
                $new = array();
                foreach($acct as $k => $v) {
                    if(!empty($cmp->$k)) {
                        $new[$k] = $cmp->$k;
                        unset($acct[$k]);
                    }
                    else {
                        $new[$k] = $v;
                    }
                }
                //$this->db->update("accounts", $billing, array("cid" => $cid));
                $acct = $new;
            }
            else {
                $cid = $this->Secure_m->generate_activate_id(10);
                $query = $this->db->get_where("leads", array("id" => $uid), 1);
                $data = reset($query->result());
                while(!empty($data->id)) {
                    $cid = $this->Secure_m->generate_activate_id(10);
                    $query = $this->db->get_where("accounts", array("id" => $cid), 1);
                    $data = reset($query->result());
                }
                $acct['id'] = $cid;
                $this->db->insert("accounts", $acct);
            }
        }
        if($cid && is_array($acct_group) && count($acct_group)) {
            $grp = $this->db->get_where("accounts_groups", array("cid" => $cid, "gid" => $acct_group['id']));
            if(empty($cmp->id)) {
                $acct_group['cid'] = $cid;
                $this->db->insert("accounts_groups", $acct_group);
            }
        }
        if($cid && is_array($acct_prof) && count($acct_prof)) {
            if(!empty($cmp->id)) {
                $new = array();
                foreach($acct_prof as $k => $v) {
                    if(!empty($cmp->$k)) {
                        $new[$k] = $cmp->$k;
                        unset($acct_prof[$k]);
                    }
                    else {
                        $new[$k] = $v;
                    }
                }
                //$this->db->update("cprofiles", $acct_prof, array("cid" => $cid));
                $acct_prof = $new;
            }
            else {
                $acct_prof['cid'] = $cid;
                $acct_prof['flag'] = 0;
                $this->db->insert("adetails", $acct_prof);
            }
        }
        if($cid && is_array($billing) && count($billing)) {
            $bill = $this->db->get_where("accounts_billing", array("cid" => $cid));
            if(!empty($bill->id)) {
                $new = array();
                foreach($acct_prof as $k => $v) {
                    if(!empty($cmp->$k)) {
                        $new[$k] = $cmp->$k;
                        unset($acct_prof[$k]);
                    }
                    else {
                        $new[$k] = $v;
                    }
                }
                //$this->db->update("accounts_billing", $billing, array("cid" => $cid));
            }
            else {
                $billing['cid'] = $cid;
                $billing['flag'] = 0;
                $this->db->insert("accounts_billing", $billing);
            }
        }
        if($cid && $uid) {
            $this->db->update("leads", array("cid" => $cid), array("id" => $uid));
        }
        if($cid && is_array($acct_cstm) && count($acct_cstm)) {
            $query = $this->db->get_where("accounts_cstm", array("crm_id" => $cid), 1);
            $data = reset($query->result());
            if(!empty($data->crm_id)) {
                $new = array();
                foreach($acct_cstm as $k => $v) {
                    if(!empty($cmp->$k)) {
                        $new[$k] = $cmp->$k;
                        unset($acct_cstm[$k]);
                    }
                    else {
                        $new[$k] = $v;
                    }
                }
                //$this->db->update("accounts_cstm", $acct_cstm, array("crm_id" => $cid));
                $acct_cstm = $new;
            }
            else {
                $acct_cstm['crm_id'] = $cid;
                $acct_cstm['date_entered'] = time();
                $acct_cstm['date_modified'] = time();
                $acct_cstm['created_by'] = $this->session->userdata('id');
                $acct_cstm['modified_user_id'] = $this->session->userdata('id');
                if(empty($acct_cstm['rating'])) {
                    $acct_cstm['rating'] = 1;
                }
                $this->db->insert("accounts_cstm", $acct_cstm);
            }
        }
        $true = TRUE;
    }
    fclose($fd);

    return $true;
}

Now, as far as I can see, the script works perfectly fine. There's nothing wrong with the actual code itself. The problem is that after around 400-500 rows, the script just stops. I don't receive an error, but no further code is processed.

I know this because I have code after this that is supposed to return a redirect page through AJAX. Nothing after my loop in the importLeads function ever loads, though.

I'm not sure how to make this script more efficient... I'm positive it is timing out, but I don't know how to make it run more efficiently. I NEED this script to process all the information above separately. I have a variety of separate tables that all link together, and this import script has to set everything up in different ways.

I've talked with my client about this project. This script works when I drop it to around 400 rows. He has some a lot of these CSV files that are around 75,000 rows. The one I am importing is a smaller one, only about 1,200 rows.

I've tried looking into alternate methods, such as MySQL's import script, but I can't do that because this script must import data into separate tables, and it must check for existing data first. I'm also supposed to have all empty fields update with imported information, but that will make this even worse.

If anyone knows of a more efficient method it would be much appreciated. I tried to be as detailed as I could. Of note, I will mention that I'm using CodeIgniter, but if there's a more efficient way that doesn't use CodeIgniter I'll take it (I can still put it into a CI model, though).

解决方案

I have written PHP scripts to bulk-load the data published by Stack Overflow data dump. I import millions of rows and it doesn't take that long.

Here are some tips:

  • Don't rely on autocommit. The overhead of starting and committing a transaction for every row is enormous. Use explicit transactions, and commit after every 1000 rows (or more).

  • Use prepared statements. Since you are basically doing the same inserts thousands of times, you can prepare each insert before you start looping, and then execute during the loop, passing values as parameters. I don't know how to do this with CodeIgniter's database library, you'll have to figure it out.

  • Tune MySQL for import. Increase cache buffers and so on. See Speed of INSERT Statements for more information.

  • Use LOAD DATA INFILE. If possible. It's literally 20x faster than using INSERT to load data row by row. I understand if you can't because you need to get the last insert id and so on. But in most cases, even if you read the CSV file, rearrange it and write it out to multiple temp CSV files, the data load is still faster than using INSERT.

  • Do it offline. Don't run long-running tasks during a web request. The time limit of a PHP request will terminate the job, if not today then next Tuesday when the job is 10% longer. Instead, make the web request queue the job, and then return control to the user. You should run the data import as a server process, and periodically allow the user to glimpse the rate of progress. For instance, a cheap way to do this is for your import script to output "." to a temp file, and then the user can request to view the temp file and keep reloading in their browser. If you want to get fancy, do something with Ajax.

这篇关于需要通过 PHP 将大型 CSV 文件导入多个 MySQL 表的省时方法的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆