<?php
/*******************************************************************************
 *
 * LEIDEN OPEN VARIATION DATABASE (LOVD)
 *
 * Created     : 2007-02-27 (included in LOVD 2008-05-26)
 * Modified    : 2013-02-04
 * For LOVD    : 2.0-35
 *
 * Access      : Curators and up.
 * Purpose     : Download variant data of selected databases in common Central
 *               Repository format, in dbSNP format or in Mutalyzer format for
 *               the batch Name Checker.
 *
 * Copyright   : 2004-2013 Leiden University Medical Center; http://www.LUMC.nl/
 * Programmers : Ing. Ivo F.A.C. Fokkema <I.F.A.C.Fokkema@LUMC.nl>
 *               Ir. Gerard C.P. Schaafsma <G.C.P.Schaafsma@LUMC.nl>
 *               Ing. Ivar C Lugtenburg <I.C.Lugtenburg@LUMC.nl>
 * Last edited : Ing. Ivar C Lugtenburg <I.C.Lugtenburg@LUMC.nl>
 *
 *
 * This file is part of LOVD.
 *
 * LOVD is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * LOVD is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with LOVD; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 *
 *************/

define('ROOT_PATH', './');
require ROOT_PATH . 'inc-init.php';

// 2011-01-26; 2.0-30; mail address to send a dbSNP export file to
$sDbSNPReportMail = 'lrg@ncbi.nlm.nih.gov';

// 2009-03-10; 2.0-17; WikiProfessional export format.
if (!empty($_GET['format']) && $_GET['format'] == 'wiki') {
    if (!lovd_validateIP($_SETT['wikiprofessional_iprange'], $_SERVER['REMOTE_ADDR']) && !HAS_AUTH) {
        exit;
    }
    // Visitor from the CMBI.RU.NL range or authorized user who found this URL.
    // We will assume the user wants all genes, or all accessible genes at least.

    // We need to know which cols are available to us...
    require ROOT_PATH . 'class/currdb.php';

    // Text output...
    header('Content-type: text/plain; charset=ISO-8859-1');

    // 2009-04-29; 2.0-18; Added increased execution time to script to help export large databases.
    if ((int) ini_get('max_execution_time') < 60) {
        set_time_limit(60);
    }

    $aGenes = array();
    $qGenes = mysql_query('SELECT symbol FROM ' . TABLE_DBS . ' WHERE allow_index_wiki = 1 ORDER BY symbol');
    if (!mysql_num_rows($qGenes)) {
        die('No genes available for indexing.');
    }

    // Gather all columns.
    $aCols = array();
    $aTable =
             array(
                    'Gene/Symbol',
                    'allele_',
                    'pathogenic_',
                  );

    while ($r = mysql_fetch_row($qGenes)) {
        list($sGene) = $r;
        $sSymbol = substr($sGene, 0, strpos($sGene . '_', '_'));

        // Curator and not the owner?
        if (!lovd_validateIP($_SETT['wikiprofessional_iprange'], $_SERVER['REMOTE_ADDR']) && !lovd_isCurator($sGene)) {
            // Skipping, access denied.
            continue;
        }

        // Store for later use.
        $aGenes[$sGene] = $sSymbol;

        // Variant columns.
        $aCols[$sGene] = array();
        $qCols = mysql_query('SELECT colid FROM ' . TABLEPREFIX . '_' . $sGene . '_columns WHERE public = 1 ORDER BY col_order, colid');
        while ($r = mysql_fetch_row($qCols)) {
            if (!in_array($r[0], $aTable)) {
                $aTable[] = $r[0]; // All columns.
            }
            $aCols[$sGene][] = $r[0]; // This gene's columns.
        }
    }

    // Add patient columns.
    $qCols = mysql_query('SELECT colid FROM ' . TABLE_PATIENTS_COLS . ' WHERE public = 1 ORDER BY col_order, colid');
    while ($r = mysql_fetch_row($qCols)) {
        if (!in_array($r[0], $aTable)) {
            $aTable[] = $r[0];
        }
    }

    // Additional columns.
    $aTable = array_merge($aTable,
             array(
                    'status_',
                    'submitter_',
                    'variant_created_by_',
                    'variant_created_date_',
                    'variant_edited_by_',
                    'variant_edited_date_',
                    'patient_created_by_',
                    'patient_created_date_',
                    'patient_edited_by_',
                    'patient_edited_date_',
                    'ID_variantid_',
                    'ID_patientid_',
                    'ID_allele_',
                    'ID_pathogenic_',
                    'ID_status_',
                    'ID_submitterid_',
                    'URL',
                  ));

    // Print header information.
    $i = 0;
    foreach ($aTable as $sField) {
        print(($i? "\t" : '') . '"{{ ' . $sField . ' }}"');
        $i ++;
    }

    foreach ($aGenes as $sGene => $sSymbol) {
        $sQ = 'SELECT v.*, p.*, p2v.allele, p2v.status, p2v.created_by AS ID_variant_created_by_, p2v.created_date AS variant_created_date_, p2v.edited_by AS ID_variant_edited_by_, p2v.edited_date AS variant_edited_date_, stat.status_text AS status_, path.pathogenic, path.pathogenic_text AS pathogenic_, uvc.name AS variant_created_by_, uve.name AS variant_edited_by_, upc.name AS patient_created_by_, upe.name AS patient_edited_by_, CONCAT(s.firstname, " ", s.lastname) AS submitter_ FROM ' . TABLEPREFIX . '_' . $sGene . '_variants AS v LEFT JOIN ' . TABLE_PAT2VAR . ' AS p2v ON (p2v.symbol = "' . $sGene . '" AND v.variantid = p2v.variantid) LEFT JOIN ' . TABLE_PATIENTS . ' AS p USING (patientid) LEFT OUTER JOIN ' . TABLE_VAR_STATUS . ' AS stat ON (p2v.status = stat.status) LEFT OUTER JOIN ' . TABLE_PATHOGENIC . ' AS path ON (p2v.pathogenic = path.pathogenic) LEFT OUTER JOIN ' . TABLE_USERS . ' AS uvc ON (p2v.created_by = uvc.userid) LEFT OUTER JOIN ' . TABLE_USERS . ' AS uve ON (p2v.edited_by = uve.userid) LEFT OUTER JOIN ' . TABLE_USERS . ' AS upc ON (p.created_by = upc.userid) LEFT OUTER JOIN ' . TABLE_USERS . ' AS upe ON (p.edited_by = upe.userid) LEFT OUTER JOIN ' . TABLE_SUBS . ' AS s ON (p.submitterid = s.submitterid) WHERE p2v.status >= ' . STATUS_MARKED . ' GROUP BY v.variantid, p.patientid, p2v.allele';
        // 2009-07-21; 2.0-21; Do not sort on Variant/Exon anymore; sort column contains Exon information.
        $sQ .= ' ORDER BY v.sort, v.`Variant/DNA`, p.patientid';

        $q = mysql_query($sQ);
        if (!$q) {
            lovd_dbFout('WikiExport', $sQ, mysql_error());
        }

        while ($zData = mysql_fetch_assoc($q)) {
            print("\r\n");
            $zData['Gene/Symbol'] = $sSymbol;
            $zData['allele_'] = $_SETT['var_allele'][$zData['allele']];
            if (!$zData['variant_created_by_']) {
                $zData['variant_created_by_'] = $zData['submitter_'];
            }
            if (!$zData['variant_edited_by_'] && $zData['variant_edited_date_']) {
                $zData['variant_edited_by_'] = $zData['submitter_'];
            }
            $zData['patient_created_date_'] = $zData['created_date'];
            $zData['patient_edited_date_'] = $zData['edited_date'];
            if (!$zData['patient_created_by_']) {
                $zData['patient_created_by_'] = $zData['submitter_'];
            }
            if (!$zData['patient_edited_by_'] && $zData['patient_edited_date_']) {
                $zData['patient_edited_by_'] = $zData['submitter_'];
            }
            $zData['ID_allele_'] = $zData['allele'];
            $zData['ID_pathogenic_'] = $zData['pathogenic'];
            $zData['ID_status_'] = $zData['status'];
            $zData['ID_variantid_'] = $zData['variantid'];
            $zData['ID_patientid_'] = $zData['patientid'];
            $zData['ID_submitterid_'] = $zData['submitterid'];
            $zData['URL'] = $_CONF['location_url'] . 'variants.php?select_db=' . $sGene . '&action=view&view=' . $zData['patientid'] . '%2C' . $zData['variantid'] . '%2C' . $zData['allele'];

            // Quote data.
            lovd_magicQuote($zData);

            $i = 0;
            foreach ($aTable as $sField) {
                // Should we be seeing this?
                if (substr($sField, 0, 8) == 'Variant/' && !in_array($sField, $aCols[$sGene])) {
                    $zData[$sField] = '';
                }

                // Replace line endings.
                $zData[$sField] = str_replace(array("\r\n", "\r", "\n"), array('\r\n', '\r', '\n'), $zData[$sField]);
                print(($i? "\t" : '') . '"' . $zData[$sField] . '"');
                $i ++;
            }
        }
    }
    exit;
}





// Require curator clearance.
lovd_requireAUTH(LEVEL_CURATOR);

if (isset($_GET['all_genes'])) {
    // Just so that it will start downloading if all other options are specified.
    $_GET['genes'] = array(1);
}

if (isset($_GET['varioml']) && !empty($_GET['display'])) {
    // VarioML format, genes given.
    header('Location: ' . PROTOCOL . $_SERVER['HTTP_HOST'] . rtrim(dirname($_SERVER['PHP_SELF']), '/') . '/api/export.php' . (empty($_GET['genes'])? '' : '/' . $_GET['genes']) . (!empty($_GET['display']) && $_GET['display'] == 'dl'? '?download' : '') . lovd_showSID());
    exit;

} elseif (empty($_GET['genes']) || !is_array($_GET['genes']) || empty($_GET['filter_status']) || !isset($_GET['filter_unsure']) || empty($_GET['display'])) {
    // First show the available genes.
    require ROOT_PATH . 'inc-lib-form.php';
    require ROOT_PATH . 'inc-top.php';
    lovd_printHeader('export_data', 'LOVD - Export variant data');

    if (empty($_GET['genes']) || !is_array($_GET['genes'])) {
        $_GET['genes'] = array();
    }

    // Fetch genes.
    $qGenes = mysql_query('SELECT symbol, gene FROM ' . TABLE_DBS . ' ORDER BY symbol');
    $nGenes = mysql_num_rows($qGenes);
    $aGenes = array();
    while ($zGenes = mysql_fetch_assoc($qGenes)) {
        // 2008-07-22; 2.0-09; Curators only see the genes they have access to.
        if ($_AUTH['level'] == LEVEL_CURATOR && !lovd_isCurator($zGenes['symbol'])) {
            $nGenes --;
            continue;
        }

        if (isset($_GET['all_genes'])) {
            $_GET['genes'][] = $zGenes['symbol'];
        }
        $aGenes[$zGenes['symbol']] = $zGenes['symbol'] . ' (' . lovd_shortenString($zGenes['gene'], 50) . ')';
    }

    $aFilterStatus = array(STATUS_MARKED => 'Only public variants', STATUS_NEW => 'All variants');
    $aFilterUnsure = array('Don\'t filter unsure variants', 'Filter unsure variants', 'Show unsure variants only');
    $aDisplay      = array('screen' => 'Display directly on screen', 'dl' => 'Download as separate file', 'dbSNP' => 'Send directly to dbSNP by email', 'dbSNP_to_me' => 'Send to my email address (dbSNP format)');

    // 2010-12-21; 2.0-30; added options 'send to dbSNP' and 'Download as Mutalyzer etc...'
    // Check if the Mutalyzer module is active
    if ($_MODULES->isLoaded('mutalyzer')) {
        $aDisplay['mutalyzer'] = 'Download as Mutalyzer Name Checker batch file';
    }

    // Table.
    print('      <FORM action="' . $_SERVER['PHP_SELF'] . '" method="get">' . "\n" .
          (isset($_GET['varioml'])?
          '        <INPUT type="hidden" name="varioml" value="">' : '') .
          '        <TABLE border="0" cellpadding="0" cellspacing="1" width="950">');

    // Array which will make up the form table.
    $aForm = array(
                    0  => array('GET', '', '', '40%', '60%'),
                    1  => array('<B>Select the gene(s) to download variant data from</B>', 'select', 'genes', ($nGenes > 5? 5 : $nGenes), $aGenes, false, true, true),
                    2  => 'skip',
                    3  => array('', 'print', '<B>Filter options</B>'),
                    4  => array('On status', 'select', 'filter_status', 1, $aFilterStatus, false, false, false),
                    5  => array('Unsure variants', 'select', 'filter_unsure', 1, $aFilterUnsure, false, false, false),
                    6  => array('', 'print', '<SPAN class="form_note">Unsure in this case means variants where the DNA field contains a questionmark at the beginning or the end of the variant field, i.e. "c.?34C>G" or "c.34C>G?", or variants without a proper DB-ID value.<BR><I>Please note that when sending variants to dbSNP, unsure variants will <B>always</B> be filtered.</I><BR>The \'Send to my email address\' option will send the exact same file to you as \'Send directly to dbSNP by email\' would send to dbSNP.</SPAN>'),
                    7  => 'skip',
                    8  => array('Retrieve results', 'select', 'display', 1, $aDisplay, false, false, false),
                    9  => 'skip',
                    10 => array('', 'submit', 'Download variant data'),
                  );

    if (isset($_GET['varioml'])) {
        // This is for VarioML download, so adapt the form!
        // Genes is single-select!
        $aForm[1][3] = 1; // Size = 1.
        $aForm[1][5] = 'All genes'; // Select option.
        $aForm[1][6] = false; // Single select.
        $aForm[1][7] = false; // No select_all link.
        unset($aForm[3], $aForm[4], $aForm[5], $aForm[6], $aForm[7]); // Get rid of filter options.
        unset($aForm[8][4]['dbSNP'], $aForm[8][4]['dbSNP_to_me'], $aForm[8][4]['mutalyzer']);
    }

    $_MODULES->processForm('ExportDataChooseGenes', $aForm);
    lovd_viewForm($aForm);

    print('</TABLE></FORM><BR>' . "\n\n");
    require ROOT_PATH . 'inc-bot.php';
    exit;
}





// We need to know which cols are available to us...
require ROOT_PATH . 'class/currdb.php';

$sDate = date('Y-m-d_H.i.s');
if ($_GET['display'] == 'dbSNP_to_me') {
    $_GET['mail_to_me'] = true;
    $_GET['display'] = 'dbSNP';
}

if ($_GET['display'] == 'dl') {
    // Download to external file...
    header('Content-type: text/plain; charset=ISO-8859-1');
    header('Content-Disposition: attachment; filename="LOVD_export_' . $sDate . '.txt"');
    // 2008-08-27; 2.0-11; Fix IE download issue when using SSL.
    header('Pragma: public');
    print("Gene\tDNA\tDBID\tDbSNP\tOMIM\n");

} elseif ($_GET['display'] == 'mutalyzer') {
    // 2010-12-09; 2.0-30 Download to external file to use for Mutalyzer Name Checker
    header('Content-type: text/plain; charset=ISO-8859-1');
    header('Content-Disposition: attachment; filename="LOVD_Name_Check_' . $sDate . '.txt"');
    header('Pragma: public');
    $aRefSeqsGen = array();
    $aSkipped = array();
    $sQ = 'SELECT symbol, refseq_mrna, refseq_genomic FROM ' . TABLE_DBS . ' WHERE refseq_mrna != "" and refseq_genomic != ""';
    $q = mysql_query($sQ);
    while ($r = mysql_fetch_row($q)) {
        $aRefSeqsGen[$r[0]] = array($r[1], $r[2]);
    }
    $aSett = $GLOBALS['_MODULES']->getSettings('mutalyzer');
    // FIXME; Seems to me a foreach through $aRefSeqsGen is more logical.
    foreach ($aSett['accessionid'] as $sSymbol => $aMutalyzerSettings) {
        // Add the Mutalyzer transcript versions (e.g. ACTA1_v001) to the NG_ id
        if (array_key_exists('MutVer', $aMutalyzerSettings)) {
            $aRefSeqsGen[$sSymbol][1] .= '(' . trim($aMutalyzerSettings['MutVer']) . ')';
        }
    }

} elseif ($_GET['display'] == 'dbSNP') {
    // Enforce filtering.
    $_GET['filter_unsure'] = 1;

    // 2010-10-20; 2.0-30; Send in a MIME email
    $_SETT['mime_boundary'] = md5('PHP_MIME');

    $aRefSeqs = array();
    $bDoubles = 0;
    $q = mysql_query('SELECT symbol, refseq_mrna FROM ' . TABLE_DBS . ' WHERE refseq_mrna != ""');
    while ($r = mysql_fetch_row($q)) {
        $aRefSeqs[$r[0]] = $r[1];
    }

    // User can send data.
    $_SETT['mime_headers']  = 'MIME-version: 1.0' . "\n" .
                              'Content-Type: multipart/mixed; boundary="' . $_SETT['mime_boundary'] . '"' . "\n" .
                              'Content-Transfer-Encoding: 7bit' . "\n" .
                              'X-Priority: 3' . "\n" .
                              'X-MSMail-Priority: Normal' . "\n" .
                              'X-Mailer: PHP/' . phpversion() . "\n" .
                              'From: "LOVDv.' . $_SETT['system']['tree'] . ' (' . $_CONF['location_name'] . ')" <' . $_CONF['email_address'] . '>' . "\n" .
                              'Reply-To: ' . $_AUTH['email'] . "\n" .
                              'Cc: ' . $_AUTH['email'] . "\n\n";

    $sEmail = "\n" .
          'This is a MIME Encoded Message' . "\n\n" .
          '--' . $_SETT['mime_boundary'] . "\n" .
          'Content-Type: text/plain; charset=ISO-8859-1' . "\n" .// this part resembles $_SETT['headers'] pretty much
          'Content-Transfer-Encoding: 7bit' . "\n\n" .
          'Dear dbSNP,' . "\n\n" .
          'Please find attached an export of variants from the LOVD database at:' . "\n" .
          $_CONF['location_url'] . "\n\n" .
          'Regards,' . "\n" .
          $_AUTH['name'] . ' (' . $_SETT['user_levels'][$_AUTH['level']] . ")\n\n" .
          '--' . $_SETT['mime_boundary'] . "\n" .
          'Content-Type:&nbsp;text/plain;&nbsp;name="LOVD_' . (count($_GET['genes']) > 1? '' : $_GET['genes'][0] . '_') . 'dbSNP_export_' . $sDate . '.txt"' . "\n" .
          'Content-Transfer-Encoding: base64' . "\n" .
          'Content-Disposition:&nbsp;attachment;&nbsp;filename="LOVD_' . (count($_GET['genes']) > 1? '' : $_GET['genes'][0] . '_') . 'dbSNP_export_' . $sDate . '.txt"' . "\n\n";

    $sOutputFile  = '#Submission Name: ' . str_replace(' ', '_', $_AUTH['name']) . '_' . $sDate . "\n";
    $sOutputFile .= '#Submitter Name: ' . $_AUTH['name'] . "\n" . '#Submitter email: ' . $_AUTH['email'] . "\n";
    $sOutputFile .= '#Curator(s)' . "\t" . '#Linkout' . "\t" . '#HGVS Name' . "\t" . 'Local ID' . "\t" . 'Germline/Somatic' . "\t" . 'dbSNP' . "\t" . 'OMIM allelic variant ID' . "\t" . 'Functional consequence' . "\t" . 'PubMed ID(s)' . "\t" . 'Alternate designations' . "\n";

} else {
    // Text output...
    header('Content-type: text/plain; charset=ISO-8859-1');
    print("Gene\tDNA\tDBID\tDbSNP\tOMIM\tPubMed\n");
}



$_SETT['var_effect'] = array(
                            5 => 'unknown functional consequence',
                            9 => 'has functional consequence',
                            7 => 'probably has functional consequence',
                            3 => 'probably no functional consequence',
                            1 => 'no known functional consequence',
                            );
$aPrev  = array('DNA' => '', 'DBID' => ''); // Initialize to prevent notices in checks.
$aDiffDBID = array();// to hold entries with same HGVS description but different DBID number
$aDiffDNA = array();// to hold entries with different HGVS description but same DBID number
$aGenesNoRef = array();// to hold those genes without a reference sequence
$aCurators = array();// to hold curator(s) name and email for each gene
$aCustomLinksColumns = array();
$aLinkOutURLs = array(); // Stores the linkout URLs (per gene) needed for the dbSNP output file.

// 2010-12-21; 2.0-30; Find out which variant columns have an active custom link to which database(s).
$qLinks = mysql_query('SELECT DISTINCT c2l.colid FROM ' . TABLE_COLS2LINKS . ' AS c2l LEFT JOIN ' . TABLE_LINKS . ' AS l USING (linkid) WHERE c2l.colid LIKE "Variant/%" AND l.active = 1 AND l.linkname IN ("PubMed", "OMIM", "dbSNP")');
while ($r = mysql_fetch_row($qLinks)) {
    $aCustomLinksColumns[] = $r[0];
}

$qGenes = mysql_query('SELECT symbol FROM ' . TABLE_DBS . (isset($_GET['all_genes'])? '' : ' WHERE symbol in ("' . implode($_GET['genes'], '", "') . '")') . ' ORDER BY symbol');
if (!mysql_num_rows($qGenes)) {
    die('No genes found!');
}

while (list($sGene) = mysql_fetch_row($qGenes)) {
    $sSymb = substr($sGene, 0, strpos($sGene . '_', '_'));

    if (!lovd_isCurator($sGene)) {
        print('Access denied to ' . $sGene . ' database!' . "\n");
        continue;
    }

    if ($_GET['display'] == 'dbSNP') {
        // 2010-10-27; 2.0-30; warning when no reference sequence available
        if (!array_key_exists($sGene, $aRefSeqs)) {
            $aGenesNoRef[] = $sGene;
        }

        // 2011-02-03; 2.0-30; select the curator names and email addresses
        $qCurators = mysql_query('SELECT CONCAT(u.name, " <", u.email, ">") FROM ' . TABLE_USERS . ' AS u LEFT JOIN ' . TABLE_CURATES . ' USING (userid) WHERE symbol = "' . $sGene . '"');
        while (list($sCurator) = mysql_fetch_row($qCurators)) {
            if (!array_key_exists($sGene, $aCurators)) {
                $aCurators[$sGene] = $sCurator;
            } else {
                $aCurators[$sGene] .= ', ' . $sCurator;
            }
        }

        // Find out which linkout URL we can use for this gene.
        // Check if www.LOVD.nl/GENE points to us. If so, we can use www.LOVD.nl/DB-ID as linkout. If not, use a full URL.
        // Added @ to block errors when gene is not found.
        $aOutput = @lovd_php_file('http://www.LOVD.nl/' . $sSymb . '?getURL');
        if (str_replace('https://', 'http://', strtolower(trim($aOutput[0]))) == str_replace('https://', 'http://', strtolower($_CONF['location_url']))) {
            $aLinkOutURLs[$sGene] = 'http://www.lovd.nl/';
        } else {
            $aLinkOutURLs[$sGene] = $_CONF['location_url'] . 'variants.php?select_db=' . $sGene . '&action=search_unique&search_Variant%2FDBID=';
        }
    }

    // We're just going to assume that Variant/DNA and Variant/DBID are here,
    // because they're HGVS recommended and therefore they're hard to remove.
    // But to order on Variant/Exon and to read out Variant/DNA_remark or
    // Variant/Remarks, we do need a CURRDB set.
    $CurrDB = new CurrDB(true, $sGene);
    if ($_GET['display'] == 'mutalyzer') {
        // 2010-12-09; 2.0-30 Mutalyzer Name Checker batch file only needs variants
        $sQ = 'SELECT DISTINCT v.`Variant/DNA` AS DNA';
    } else {
        $q = mysql_query('DESCRIBE ' . TABLEPREFIX . '_' . $sGene . '_variants');
        $bCellType = false;
        while ($a = mysql_fetch_assoc($q)) {
            if ($a['Field'] == 'Variant/Genetic_origin') {
                $bCellType = true;
            }
        }
        $sQ = 'SELECT DISTINCT v.`Variant/DNA` AS DNA, v.`Variant/DBID` AS DBID, v.`Variant/RNA` AS RNA, v.`Variant/Protein` AS Protein' . ($bCellType? ', v.`Variant/Genetic_origin` AS celltype ' : '') . ', LEFT(p2v.pathogenic, 1) AS func_conseq';
        // 2010-12-21; 2.0-30; Add columns which have an active custom link for PubMed, OMIM or dbSNP
        $aAddedColumns = array();
        foreach ($aCustomLinksColumns as $sColumn) {
            if ($CurrDB->colExists($sColumn)) {
                $aAddedColumns[] = $sColumn;
            }
        }
        $sQ .= ', CONCAT(v.`' . implode('`, v.`', $aAddedColumns) . '`) AS Reference';
    }

    $sQ .= ' FROM ' . TABLEPREFIX . '_' . $sGene . '_variants AS v ';
    if ($_GET['filter_status'] > 1 || in_array($_GET['display'], array('dbSNP', 'dbSNP_to_me'))) {
        $sQ .= ' LEFT JOIN ' . TABLEPREFIX . '_patients2variants AS p2v ON (p2v.symbol = "' . $sGene . '" AND v.variantid = p2v.variantid) WHERE p2v.status >= "' . $_GET['filter_status'] . '"';
    } else {
        $sQ .= ' WHERE 1=1';
    }

    if ($_GET['display'] == 'dbSNP' && $bCellType) {
        $sQ .= ' AND `Variant/Genetic_origin` != "in vitro (cloned)"';
    }

    // Filter unsure variants?
    if ($_GET['filter_unsure'] == 1) {
        $sQ .= ' AND v.`Variant/DNA` NOT LIKE "%?" AND v.`Variant/DNA` NOT LIKE "_.(%?)" AND v.`Variant/DNA` NOT LIKE "?%" AND v.`Variant/DBID` NOT LIKE "' . $sSymb . '_00000%" AND v.`Variant/DBID` != ""';
        // 2011-01-26; 2.0-30; for dbSNP also omit variants with square brackets []
        if ($_GET['display'] == 'dbSNP') {
            $sQ .= ' AND v.`Variant/DNA` NOT LIKE "%[%"';
        }
    } elseif ($_GET['filter_unsure'] == 2) {
        $sQ .= ' AND (v.`Variant/DNA` LIKE "%?" OR v.`Variant/DNA` LIKE "_.(%?)" OR v.`Variant/DNA` LIKE "?%" OR v.`Variant/DBID` LIKE "' . $sSymb . '_00000%" OR v.`Variant/DBID` = "")';
    }

    // 2009-07-21; 2.0-21; Do not sort on Variant/Exon anymore; sort column contains Exon information.
    $sQ .= ' ORDER BY v.sort, v.`Variant/DNA`, v.`Variant/DBID`';
    $qVariants = mysql_query($sQ);

    while ($zData = mysql_fetch_assoc($qVariants)) {
        // Add gene to $zData to access e.g. curator list when printing data from $aPrev.
        $zData['gene'] = $sGene;

        if ($_GET['display'] != 'mutalyzer') {
            // Isolate IDs.
            $sIDs = $zData['DBID'] . $zData['Reference'];
            if (substr_count($sIDs, '{dbSNP')) {
                $zData['dbsnp'] = preg_replace('/^.*(\{dbSNP[0-9]+\s*\}).*$/', "$1", $sIDs);
            } else {
                $zData['dbsnp'] = '';
            }
            if (substr_count($sIDs, '{OMIM')) {
                $zData['omim'] = preg_replace('/^.*(\{OMIM[0-9]+:[0-9]+\s*\}).*$/', "$1", $sIDs);
            } else {
                $zData['omim'] = '';
            }
            // 2010-10-27; 2.0-30; Include PubMed custom link for all output; dbSNP output requires only the PMID.
            if (substr_count($sIDs, '{PMID')) {
                if (preg_match_all('/\{PMID[0-9]+\s?:[^}]*\}/', $sIDs, $aMatches)) {
                    if ($_GET['display'] == 'dbSNP') {
                        // for dbSNP you only want the PubMed ID, not a name and year.
                        foreach ($aMatches[0] as $key => $sReference) {
                            $aMatches[0][$key] = preg_replace('/(\{PMID[0-9]+)(.*$)/', "$1" . '}', $sReference);
                        }
                    }
                    $zData['pubmed'] = implode(', ', $aMatches[0]);
                }
            } else {
                $zData['pubmed'] = '';
            }
            // 2010-10-27; 2.0-30; included celltype (Germline/Somatic) for dbSNP
            if (!isset($zData['celltype']) || !in_array($zData['celltype'], array('unknown', 'germline', 'de novo', 'somatic'))) {
                $zData['celltype'] = 'germline';
            }
        }
        if ($_GET['display'] != 'dbSNP' && $_GET['display'] != 'mutalyzer') {
            // 2011-01-25; 2.0-30; Do not do this for a dbSNP or a Mutalyzer Name Checker batch file
            $zData['DBID'] = preg_replace('/^.*(' . $sSymb . '_([0-9]{5}|(SO|MP|e)[0-9]{1,2}((SO|MP|e)[0-9]{1,2})?b?)).*$/', "$1", $zData['DBID']);
        }
        // Now print the data
        if ($_GET['display'] != 'dbSNP' && $_GET['display'] != 'mutalyzer' && $zData !== $aPrev) {// Variants are not exactly same
            print("$sGene\t{$zData['DNA']}\t{$zData['DBID']}\t{$zData['dbsnp']}\t{$zData['omim']}\t{$zData['pubmed']}\n");
        } elseif ($_GET['display'] == 'mutalyzer' && !empty($aRefSeqsGen[$sGene])) {
            // 2010-12-09; 2.0-30?; Mutalyzer Name Checker batch file
            print("{$aRefSeqsGen[$sGene][1]}:{$zData['DNA']}\n");
        } elseif ($_GET['display'] == 'dbSNP') {
            if ($zData['DBID'] != $aPrev['DBID']) {
                // 2010-10-27; 2.0-30; Different DB ID value... Check if DNA is the same.
                if (str_replace(array('(', ')', '?'), '', $zData['DNA']) == str_replace(array('(', ')', '?'), '', $aPrev['DNA'])) {
                    // Should not occur! Store which DNA variants have multiple DB IDs.
                    if (!array_key_exists($zData['DNA'], $aDiffDBID)) {
                        $aDiffDBID[$zData['DNA']] = array($aPrev['DBID'], $zData['DBID']);
                    } elseif (!in_array($zData['DBID'], $aDiffDBID[$zData['DNA']])) {
                        $aDiffDBID[$zData['DNA']][] = $zData['DBID'];
                    }
                } elseif (!empty($aPrev['DNA'])) {
                    // other variant, write the previous one to file
                    $sOutputFile .= $aCurators[$aPrev['gene']] . "\t" . $aLinkOutURLs[$aPrev['gene']] . $aPrev['DBID'] . "\t" . (array_key_exists($aPrev['gene'], $aRefSeqs) ? $aRefSeqs[$aPrev['gene']] . ':' : '') . $aPrev['DNA'] . "\t" . $aPrev['DBID'] . "\t" . $aPrev['celltype'] . "\t" . $aPrev['dbsnp'] . "\t" . $aPrev['omim'] . "\t" . $_SETT['var_effect'][$aPrev['func_conseq']] . "\t" . $aPrev['pubmed'] . "\t";
                    if ($aPrev['RNA']) {
                        $sOutputFile .= $aPrev['RNA'];
                        if ($aPrev['Protein']) {
                            $sOutputFile .=  '; ' . $aPrev['Protein'];
                        }
                    } elseif ($aPrev['Protein']) {
                        $sOutputFile .= $aPrev['Protein'];
                    }
                    $sOutputFile .= "\n";
                }

            } else {
                if (str_replace(array('(', ')', '?'), '', $zData['DNA']) != str_replace(array('(', ')', '?'), '', $aPrev['DNA'])) {
                    // 2010-10-27; 2.0-30; It is possible (but should not occur) that a variant with a different HGVS description  ($zData['DNA'] with or without '(', ')' and '?') has the same DBID
                    if (!array_key_exists($zData['DBID'], $aDiffDNA)) {
                        // Not seen before? then store in $aDiffDNA
                        $aDiffDNA[$zData['DBID']] = array($aPrev['DNA'], $zData['DNA']);
                    } elseif (!in_array($zData['DNA'], $aDiffDNA[$zData['DBID']])) {
                        // 2010-12-16; 2.0-30?; Keep variants with the same DBID number and different HGVS description together
                        $aDiffDNA[$zData['DBID']][] = $zData['DNA'];
                    }

                } else {
                    // 2010-10-27; 2.0-30; same variant as the previous one, compare all fields, if value not already there, then add
                    // this is for combining different values for an identical field to the same variant
                    foreach ($zData as $key => $value) {
                        if (!empty($aPrev[$key])) {
                            if ($key != 'DNA' && $value != '' && substr_count($aPrev[$key], $zData[$key]) == 0) {
                                // both fields not empty, so append the values
                                if ($key == 'func_conseq' && $zData[$key] != $aPrev[$key]) {
                                    if ($zData[$key] < 5 && $aPrev[$key] < 5) {
                                        $zData[$key] = '3';
                                    } elseif ($zData[$key] > 5 && $aPrev[$key] > 5) {
                                        $zData[$key] = '7';
                                    } else {
                                        $zData[$key] = '5';
                                    }
                                } else {
                                    $zData[$key] .= '; ' . $aPrev[$key];
                                }
                            } else {
                                // retain the previous value
                                $zData[$key] = $aPrev[$key];
                            }
                        }
                    }
                }
            }
        }
        $aPrev = $zData;
    }
}
if ($_GET['display'] == 'dbSNP' && isset($aPrev['gene'])) {
    // Now also print the last variant to the file (for dbSNP only)
    $sOutputFile .= $aCurators[$aPrev['gene']] . "\t" . $aLinkOutURLs[$aPrev['gene']] . $aPrev['DBID'] . "\t" . (array_key_exists($aPrev['gene'], $aRefSeqs) ? $aRefSeqs[$aPrev['gene']] . ':' : '') . $aPrev['DNA'] . "\t" . $aPrev['DBID'] . "\t" . $aPrev['celltype'] . "\t" . $aPrev['dbsnp'] . "\t" . $aPrev['omim'] . "\t" . $_SETT['var_effect'][$aPrev['func_conseq']] . "\t" . $aPrev['pubmed'] . "\t";
    if ($aPrev['RNA']) {
        $sOutputFile .= $aPrev['RNA'];
        if ($aPrev['Protein']) {
            $sOutputFile .=  '; ' . $aPrev['Protein'];
        }
    } elseif ($aPrev['Protein']) {
        $sOutputFile .= $aPrev['Protein'];
    }
    $sOutputFile .= "\n";
}



// 2010-10-27; 2.0-30; send mail to dbSNP
if ($_GET['display'] == 'dbSNP'){
    require ROOT_PATH . 'inc-top.php';

    // Do not continue when there are variants with identical description but different DBID numbers.
    if (!empty($aDiffDBID) || !empty($aDiffDNA)) {
        if (!empty($aGenesNoRef)) {
            lovd_showInfoTable('Please note that no reference sequence is set for ' . (count($aGenesNoRef) == 1 ? 'gene: ' : 'genes: ') . implode(', ', $aGenesNoRef) . '!', 'information');
        }
        if (!empty($aDiffDBID)) {
            lovd_showInfoTable('Database conflict!<BR>There are variants with an identical HGVS description and a different DBID number.<BR>Please correct before exporting these data.', 'stop');
            foreach ($aDiffDBID as $sVariant => $aDBIDs) {
                print('Variant: ' . $sVariant . ' has DBID numbers: ' . implode($aDBIDs, ', ') . '<BR>');
            }
        }
        if (!empty($aDiffDNA)) {
            lovd_showInfoTable('Database conflict!<BR>There are variants with a different HGVS description and an indentical DBID number.<BR>Please correct before exporting these data.', 'stop');
            foreach ($aDiffDNA as $sDBID => $aVariants) {
                print('DBID number: ' . $sDBID . ' has DNA descriptions: ' . implode($aVariants, ', ') . '<BR>');
            }
        }

        require ROOT_PATH . 'inc-bot.php';
        exit;
    }



    $sOutputFile = chunk_split(base64_encode($sOutputFile));
    $nFileSize = round(strlen($sOutputFile)/(1024*1024), 1);
    if ($nFileSize > 40) {
        lovd_showInfoTable('The generated file for dbSNP would be ' . $nFileSize . ' MB in file size. This is too big to be handled by email. Please split your submission to dbSNP in parts, by submitting fewer genes at the same time.', 'stop');
        require ROOT_PATH . 'inc-bot.php';
        exit;
    }

    if (isset($_GET['mail_to_me'])) {
        $sDbSNPReportMail = $_AUTH['email'];
    }

    $sEmail .= $sOutputFile . "\n\n" .
              '--' . $_SETT['mime_boundary'] . "\n";

    require ROOT_PATH . 'inc-lib-form.php';// lovd_wrapText()
    // Send mail.
    $bMail = mail($sDbSNPReportMail,
                  'LOVD ' . (count($_GET['genes']) > 1? '' : $_GET['genes'][0] . ' ') . 'export data from ' . $sDate,
                  str_replace('&nbsp;', ' ', lovd_wrapText($sEmail)), // 2013-02-04; 2.0-35; wrapText should not be allowed to wrap the MIME headers!!!
                  $_SETT['mime_headers']);

    if (!$bMail) {
        lovd_showInfoTable('Failed to send an email to ' . (isset($_GET['mail_to_me'])? 'you' : 'dbSNP') . '!', 'stop');
    } else {
        lovd_showInfoTable('An email with an attached file containing the exported variants was sent to ' . (isset($_GET['mail_to_me'])? 'you' : 'dbSNP. A Cc was sent to you') . '.', 'information');
    }
    require ROOT_PATH . 'inc-bot.php';
}
?>
