<?xml version='1.0'?>
<!DOCTYPE art SYSTEM 'http://www.biomedcentral.com/xml/article.dtd'>
<art>
   <ui>gb-2008-9-5-r86</ui>
   <ji>GBJ</ji>
   <fm>
      <dochead>Method</dochead>
      <bibl>
         <title>
            <p>GeneCount: genome-wide calculation of absolute tumor DNA copy numbers from array comparative genomic hybridization data</p>
         </title>
         <aug>
            <au id="A1" ca="yes">
               <snm>Lyng</snm>
               <fnm>Heidi</fnm>
               <insr iid="I1"/>
               <email>heidi.lyng@rr-research.no</email>
            </au>
            <au id="A2">
               <snm>Lando</snm>
               <fnm>Malin</fnm>
               <insr iid="I1"/>
               <email>malin.lando@rr-research.no</email>
            </au>
            <au id="A3">
               <snm>Br&#248;vig</snm>
               <mi>S</mi>
               <fnm>Runar</fnm>
               <insr iid="I1"/>
               <email>runar.brovig@ge.com</email>
            </au>
            <au id="A4">
               <snm>Svendsrud</snm>
               <mi>H</mi>
               <fnm>Debbie</fnm>
               <insr iid="I1"/>
               <email>debbie.hege.svendsrud@rr-research.no</email>
            </au>
            <au id="A5">
               <snm>Johansen</snm>
               <fnm>Morten</fnm>
               <insr iid="I2"/>
               <email>morten.johansen@rr-research.no</email>
            </au>
            <au id="A6">
               <snm>Galteland</snm>
               <fnm>Eivind</fnm>
               <insr iid="I1"/>
               <email>eivindga@ulrik.uio.no</email>
            </au>
            <au id="A7">
               <snm>Brustugun</snm>
               <mi>T</mi>
               <fnm>Odd</fnm>
               <insr iid="I1"/>
               <insr iid="I3"/>
               <email>odd.terje.brustugun@radiumhospitalet.no</email>
            </au>
            <au id="A8">
               <snm>Meza-Zepeda</snm>
               <mi>A</mi>
               <fnm>Leonardo</fnm>
               <insr iid="I2"/>
               <insr iid="I4"/>
               <email>leonardo.meza-zepeda@rr-research.no</email>
            </au>
            <au id="A9">
               <snm>Myklebost</snm>
               <fnm>Ola</fnm>
               <insr iid="I2"/>
               <insr iid="I4"/>
               <email>ola.myklebost@rr-research.no</email>
            </au>
            <au id="A10">
               <snm>Kristensen</snm>
               <mi>B</mi>
               <fnm>Gunnar</fnm>
               <insr iid="I5"/>
               <insr iid="I6"/>
               <email>gunnar.balle.kristensen@radiumhospitalet.no</email>
            </au>
            <au id="A11">
               <snm>Hovig</snm>
               <fnm>Eivind</fnm>
               <insr iid="I2"/>
               <insr iid="I6"/>
               <insr iid="I7"/>
               <email>ehovig@radium.uio.no</email>
            </au>
            <au id="A12">
               <snm>Stokke</snm>
               <fnm>Trond</fnm>
               <insr iid="I1"/>
               <email>trond.stokke@rr-research.no</email>
            </au>
         </aug>
         <insg>
            <ins id="I1">
               <p>Department of Radiation Biology, Institute for Cancer Research, Norwegian Radium Hospital, Montebello, NO-0310 Oslo, Norway</p>
            </ins>
            <ins id="I2">
               <p>Department of Tumor Biology, Institute for Cancer Research, Norwegian Radium Hospital, Montebello, NO-0310 Oslo, Norway</p>
            </ins>
            <ins id="I3">
               <p>Department of Oncology, Norwegian Radium Hospital, Montebello, NO-0310 Oslo, Norway</p>
            </ins>
            <ins id="I4">
               <p>Norwegian Microarray Consortium, Department of Molecular Bioscience, University of Oslo, NO-0316 Oslo, Norway</p>
            </ins>
            <ins id="I5">
               <p>Department of Gynecologic Oncology, Norwegian Radium Hospital, Montebello, NO-0310 Oslo, Norway</p>
            </ins>
            <ins id="I6">
               <p>Department of Medical Informatics, University of Oslo, NP-0316 Oslo, Norway</p>
            </ins>
            <ins id="I7">
               <p>Institute of Informatics, University of Oslo, NO-0316 Oslo, Norway</p>
            </ins>
         </insg>
         <source>Genome Biology</source>
         <issn>1465-6906</issn>
         <pubdate>2008</pubdate>
         <volume>9</volume>
         <issue>5</issue>
         <fpage>R86</fpage>
         <url>http://genomebiology.com/2008/9/5/R86</url>
         <xrefbib>
            <pubidlist>
               <pubid idtype="pmpid">18500990</pubid>
               <pubid idtype="doi">10.1186/gb-2008-9-5-r86</pubid>
            </pubidlist>
         </xrefbib>
      </bibl>
      <history>
         <rec>
            <date>
               <day>18</day>
               <month>1</month>
               <year>2008</year>
            </date>
         </rec>
         <revrec>
            <date>
               <day>23</day>
               <month>4</month>
               <year>2008</year>
            </date>
         </revrec>
         <acc>
            <date>
               <day>23</day>
               <month>5</month>
               <year>2008</year>
            </date>
         </acc>
         <pub>
            <date>
               <day>23</day>
               <month>05</month>
               <year>2008</year>
            </date>
         </pub>
      </history>
      <cpyrt>
         <year>2008</year>
         <collab>Lyng et al.; licensee BioMed Central Ltd.</collab>
         <note>This is an open access article distributed under the terms of the Creative Commons Attribution License (<url>http://creativecommons.org/licenses/by/2.0</url>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited.</note>
      </cpyrt>
      <abs>
         <sec>
            <st>
               <p>Abstract</p>
            </st>
            <p>Absolute tumor DNA copy numbers can currently be achieved only on a single gene basis by using fluorescence <it>in situ </it>hybridization (FISH). We present GeneCount, a method for genome-wide calculation of absolute copy numbers from clinical array comparative genomic hybridization data. The tumor cell fraction is reliably estimated in the model. Data consistent with FISH results are achieved. We demonstrate significant improvements over existing methods for exploring gene dosages and intratumor copy number heterogeneity in cancers.</p>
         </sec>
      </abs>
   </fm>
   <bdy>
      <sec>
         <st>
            <p>Background</p>
         </st>
         <p>Array comparative genomic hybridization (aCGH) is widely used for genome-wide mapping of DNA copy number changes in malignant cells <abbrgrp><abbr bid="B1">1</abbr><abbr bid="B2">2</abbr></abbrgrp>. Genetic gains and losses impact gene expression levels, and thereby promote tumor growth and progression <abbrgrp><abbr bid="B3">3</abbr><abbr bid="B4">4</abbr><abbr bid="B5">5</abbr></abbrgrp>. Numerous clinical studies have been performed to find tumor characteristics and to classify patients with respect to their prognosis based on the copy number changes <abbrgrp><abbr bid="B6">6</abbr><abbr bid="B7">7</abbr></abbrgrp>. The usefulness of the aCGH data is limited, however, because only relative and not absolute copy numbers are achieved, making the interpretation of the data and comparisons across experiments difficult. Absolute DNA copy numbers can be obtained only on a single gene basis by the use of fluorescence <it>in situ </it>hybridization (FISH). Development of genome-wide methods for this purpose would enable generation of universal gene copy number databases of individual diseases that could be utilized more widely, as is the goal of several public repositories like the Mitelman Database of Chromosome Aberrations in Cancer <abbrgrp><abbr bid="B8">8</abbr></abbrgrp>.</p>
         <p>The relative values achieved in aCGH experiments are influenced by the total DNA content (ploidy) of the tumor cells, the proportion of normal cells in the sample, and the experimental bias, in addition to the DNA copy numbers. The values are presented as intensity ratios between tumor and normal DNA <abbrgrp><abbr bid="B2">2</abbr></abbrgrp>. The data are normalized so that the ratio of 1.0 is the baseline for the analysis, and corresponds to two DNA copies in near diploid (2<it>n</it>) tumors. The copy number changes are identified from the ratios deviating from the baseline, using statistical methods for ratio smoothing and breakpoint detection <abbrgrp><abbr bid="B9">9</abbr><abbr bid="B10">10</abbr><abbr bid="B11">11</abbr><abbr bid="B12">12</abbr></abbrgrp>. To assign an absolute copy number to each ratio level identified by the statistical analysis and thereby score genetic aberrations are, however, challenging. In aneuploid tumors with gross alterations in the DNA content, the baseline represents a copy number other than 2, like 3 or 4 in tri- or tetraploid tumors, or a non-integer value when the DNA content differs from <it>n</it>, 2<it>n</it>, 3<it>n</it>, ... m<it>n </it><abbrgrp><abbr bid="B13">13</abbr></abbrgrp>. The presence of normal cells within the sample and experimental bias reduce the ratio dynamics. Moreover, in many tumors, several subpopulations of malignant cells with different genetic characteristics exist, leading to intratumor heterogeneity in the DNA copy numbers <abbrgrp><abbr bid="B14">14</abbr><abbr bid="B15">15</abbr><abbr bid="B16">16</abbr></abbrgrp> and increased complexity in the data. Unreliable results occur, therefore, when common ratio levels are used to score gains and losses in tumors with different ploidy and normal cell content.</p>
         <p>The confounding effect caused by normal cells within tumor samples is recognized as a problem in aCGH analyses and has been handled by excluding low purity samples <abbrgrp><abbr bid="B17">17</abbr><abbr bid="B18">18</abbr></abbrgrp> or correcting the ratio levels based on histological examination of tumor sections <abbrgrp><abbr bid="B6">6</abbr></abbrgrp>. The latter approach is not satisfactory because only the proportion of connective tissue surrounding the tumor parenchyma, and not the infiltrating immune cells, is precisely quantified. Moreover, the measurements cannot be performed on exactly the same tissue as used in the aCGH experiment and may, therefore, not be representative. A model including the CGH ratios, ploidy, and experimental bias has been proposed for estimation of absolute DNA copy numbers in tumor cell lines <abbrgrp><abbr bid="B19">19</abbr></abbrgrp>. To our knowledge, no method exists that also considers the normal cell content and, thus, is suited for analyses of clinical tumor samples.</p>
         <p>We here present a new model, GeneCount, where the proportion of normal cells is estimated and corrected for and possible intratumor heterogeneity in DNA copy numbers is considered. Inputs to our model are the DNA index (<it>DI</it>, where <it>DI </it>= 1/2&#183;tumor ploidy), tumor cell fraction, experimental bias, and aCGH ratios. Predetermined measures of tumor ploidy, determined either by flow or image based cytometry, are needed. The tumor cell fraction can be determined by, for example, flow cytometry on the same part of the sample as used in the aCGH experiment. In cases of unknown normal cell content, the tumor cell fraction is estimated in the model. The experimental bias is determined from the X-chromosome ratio in aCGH experiments where male and female DNA is compared. Smoothed ratio levels from any existing statistical analysis tools for breakpoint detection can be used.</p>
         <p>We show that the model enabled automatic and genome-wide calculation of DNA copy numbers from aCGH data of both hematopoietic and solid tumors. The feasibility of GeneCount was demonstrated by analysis of 94 lymphomas, for which the DNA index and tumor cell fraction had been determined by use of flow cytometry and an extensive exploration of DNA copy numbers had been performed by the use of FISH in previous studies <abbrgrp><abbr bid="B20">20</abbr><abbr bid="B21">21</abbr><abbr bid="B22">22</abbr><abbr bid="B23">23</abbr><abbr bid="B24">24</abbr><abbr bid="B25">25</abbr></abbrgrp>. The GeneCount results, both based on the pre-determined tumor cell fraction and that determined by the model, were compared with the FISH data of 362 genes with and without gains and losses, showing 97% consistency in both cases. In particular, we explored the copy numbers achieved in the t(14;18) translocated chromosomal region involving <it>BCL2</it>. We further demonstrated the potential of GeneCount in analysis of solid tumors without pre-determined tumor cell fractions by relating the copy number of selected genes in 93 cervical cancers to gene expression and treatment outcome. By use of GeneCount we obtained a higher sensitivity in detecting cervix tumors with copy number changes than was obtained in analysis based directly on the ratio levels. Finally, we identified intratumor heterogeneity of DNA copy numbers in the lymphomas and cervical cancers, and showed how this information could be used to draw conclusions about the evolution of the genetic aberrations in the tumors. GeneCount was implemented in a software package to be used downstream of statistical methods for breakpoint detection, and results based on both the GLAD and CGH-Explorer packages are presented <abbrgrp><abbr bid="B9">9</abbr><abbr bid="B11">11</abbr></abbrgrp>. We supply our method through the open-source and free web-based database BioArray Software Environment (BASE) <abbrgrp><abbr bid="B26">26</abbr></abbrgrp>.</p>
      </sec>
      <sec>
         <st>
            <p>Results</p>
         </st>
         <sec>
            <st>
               <p>Basis of GeneCount</p>
            </st>
            <p>Our model utilizes the fact that the normalized aCGH ratio increases with increasing DNA copy number in a stepwise manner, where the step size is dependent on the <it>DI</it>, the tumor cell fraction, and the experimental bias (Figure <figr fid="F1">1</figr>). In near diploid tumors (<it>DI </it>= 1) without a contribution from normal cells or affected by experimental bias, an increment of 1 in the copy number increases the ratio by a value of 0.5, leading to a normalized ratio of 0.5, 1, 1.5, 2, and so on (-1, 0, 0.69, 1 on a log<sub>2 </sub>scale) for a copy number of 1, 2, 3, and 4, respectively (see Equation 2 in Materials and methods). The corresponding increase in tetraploid tumors (<it>DI </it>= 2) is 0.25, whereas an increase between 0.25 and 0.5 occurs in tumors with a <it>DI </it>between 1 and 2. Baseline, at a log<sub>2 </sub>ratio of 0, corresponds to 2, 3, and 4 DNA copies in near diploid (Figure <figr fid="F1">1a</figr>), triploid, and tetraploid (Figure <figr fid="F1">1b</figr>) tumors, respectively. For <it>DI</it>s between 1 and 1.5 or between 1.5 and 2, baseline represents a copy number between 2 and 3 (Figure <figr fid="F1">1c</figr>) or between 3 and 4. The presence of normal cells within the tumor sample reduces the increase in aCGH ratio with incremental copy number (Equation 3), as can be seen when comparing the ratios of two near diploid lymphomas with different tumor cell fractions (Figures <figr fid="F1">1a,d</figr>). Using common ratio levels for scoring gains and losses in tumors like those presented in Figures <figr fid="F1">1a-d</figr> leads, therefore, to different results with respect to copy number changes.</p>
            <fig id="F1">
               <title>
                  <p>Figure 1</p>
               </title>
               <caption>
                  <p>Illustration of the stepwise increase in aCGH ratios with increasing DNA copy number</p>
               </caption>
               <text>
                  <p>Illustration of the stepwise increase in aCGH ratios with increasing DNA copy number. Frequency histograms (% array probes) of aCGH ratios (left panels) and plot of aCGH ratio versus chromosomal location (right panels) are shown for a lymphoma with a DNA index (<it>DI</it>) of <b>(a) </b>1.02, <b>(b) </b>1.94, <b>(c) </b>1.21, and <b>(d) </b>1.05, and <b>(e) </b>for normal DNA comparing male and female. The tumor cell fraction, measured by flow cytometry, is indicated for each tumor. DNA copy numbers estimated by GeneCount are marked; those in black were consistent with FISH data, whereas those in red have not been subjected to FISH measurements in the specific tumors shown. The arrows in the right panels point to the locations of the FISH probes. At a <it>DI </it>close to 1 and 2 (a,b,d,e) the ratio distribution shows a major peak at a median log<sub>2 </sub>value of approximately zero, representing the most frequent DNA copy numbers of 2 and 4, respectively. At a <it>DI </it>of 1.21 (c) the baseline at a log<sub>2 </sub>ratio of 0 represents a number between 2 and 3 DNA copies. Note the smaller increase in the ratios with increasing DNA copy number at a tumor cell fraction of 70% (d) than of 96% (a). In (e), determination of the dynamic factor, <it>q</it>, as the absolute value of the X-chromosome log<sub>2 </sub>ratio level is indicated.</p>
               </text>
               <graphic file="gb-2008-9-5-r86-1"/>
            </fig>
            <p>A further reduction in the ratio dynamics occurs due to experimental bias (Equation 4). The bias, as represented by the dynamic factor, <it>q</it>, can be determined from control experiments, where normal DNA from males and females is cohybridized (Figure <figr fid="F1">1e</figr>). Theoretically, the X-chromosome ratio is 0.5 (-1 on a log<sub>2 </sub>scale), but the experimental bias reduces the ratio dynamics, leading to a ratio level closer to zero. The absolute value of the log<sub>2</sub>-transformed ratio level was used as a measure of <it>q </it>(Figure <figr fid="F1">1e</figr>). This value differed little among the slide series used here, ranging from 0.75-0.85 with a mean &#177; standard deviation of 0.80 &#177; 0.04 based on 8 control experiments. A <it>q-</it>value of 0.8 and range of 0.7-0.9 was used in the GeneCount calculations in the cases of known and unknown tumor cell fraction, respectively.</p>
            <p>To enable automatic calculation of the copy number associated with each array probe, we implemented GeneCount in a program to be run on top of statistical analysis packages for aCGH ratio smoothing and breakpoint detection (Additional data file 1). A separate algorithm was developed for samples with unknown tumor cell fraction, where the fraction was estimated based on two ratio levels and <it>DI </it>(panel B in Additional data file 1), as described in Materials and methods. One decimal was included in the calculated DNA copy numbers when evaluating the results in comparison with FISH data. Otherwise, the numbers were rounded off to the nearest integer values.</p>
         </sec>
         <sec>
            <st>
               <p>GeneCount copy numbers in comparison with FISH data</p>
            </st>
            <p>We compared the GeneCount results of 94 lymphomas with previously published FISH data from the same tumors <abbrgrp><abbr bid="B20">20</abbr><abbr bid="B21">21</abbr><abbr bid="B22">22</abbr><abbr bid="B23">23</abbr><abbr bid="B24">24</abbr><abbr bid="B25">25</abbr></abbrgrp>. The FISH probes were located at chromosomal regions with frequent copy number changes (Figure <figr fid="F1">1</figr> and Additional data file 2), and copy numbers within the range of 0-8 had been measured. The <it>DI</it>s, ranging from 0.95-2.23, and the tumor cell fractions, ranging from 27% to 98%, were used as inputs to GeneCount, together with the smoothed aCGH ratios from the GLAD and CGH-Explorer packages. CGH-Explorer applied a more extensive ratio smoothing than GLAD, and this led occasionally to differences in the ratio levels and breakpoint detection between the two programs.</p>
            <sec>
               <st>
                  <p>GeneCount with known tumor cell fraction</p>
               </st>
               <p>In most cases, we found an excellent agreement between the DNA copy number determined by GeneCount and FISH, regardless of whether GLAD or CGH-Explorer was used for breakpoint detection (Figure <figr fid="F2">2</figr>). The correlation between the data sets was considerably better than when the ratio levels were used in the comparison (Additional data file 3). Based on GLAD, 350 out of 362 GeneCount values were consistent with the FISH data (97%), whereas the corresponding number based on CGH-Explorer was 340 out of 362 (94%) (Figure <figr fid="F2">2</figr>). The few discrepancies between the GeneCount and FISH results occurred mainly for two reasons. First, GLAD and/or CGH-Explorer failed to detect the ratio change of some of the genes that had a copy number change by FISH (panel A in Additional data file 4). Second, the ratio level, and therefore the copy number, was inaccurately determined for some aberrations involving only a few array probes (panel B in Additional data file 4). This was primarily the case for aberrations with less than three probes, like the homozygote deletion involving two probes that covered <it>RB1 </it>in one of the tumors (FISH copy number of 0 in Figure <figr fid="F2">2</figr> and panel B in Additional data file 4). The discrepancies between the GeneCount and FISH data were related, therefore, to the software used for breakpoint detection and not due to errors in the GeneCount algorithm.</p>
               <fig id="F2">
                  <title>
                     <p>Figure 2</p>
                  </title>
                  <caption>
                     <p>GeneCount calculations with known tumor cell fraction</p>
                  </caption>
                  <text>
                     <p>GeneCount calculations with known tumor cell fraction. DNA copy number calculated by GeneCount is plotted against the corresponding FISH result for 9 genes in 94 lymphomas. The smoothed aCGH ratios from <b>(a) </b>GLAD and <b>(b) </b>CGH-explorer, a <it>q</it>-value of 0.8, and a <it>DI</it> and tumor cell fraction determined by flow cytometry were inputs to GeneCount. Grey and blue columns represent GeneCount results that were consistent and inconsistent with the FISH data, respectively, after rounding off the GeneCount number to the nearest integer value. Frequency distributions are shown for each copy number, containing 1, 25, 246, 66, 15, 5, 4, and 1 value at a FISH copy number of 0, 1, 2, 3, 4, 5, 6, and 8, respectively.</p>
                  </text>
                  <graphic file="gb-2008-9-5-r86-2"/>
               </fig>
            </sec>
            <sec>
               <st>
                  <p>GeneCount with unknown tumor cell fraction</p>
               </st>
               <p>The tumor cell fraction could be estimated for 55 and 43 out of 94 lymphomas based on GLAD and CGH-Explorer, respectively. The remaining tumors lacked aberrations or two different ratio levels that could be used for the estimation (Materials and methods). The estimated tumor cell fractions correlated significantly with those measured by flow cytometry (Figure <figr fid="F3">3</figr>). Moreover, the estimates had a coefficient of variance (CV) of less than 11% (Figure <figr fid="F3">3</figr>), and were therefore fairly stable. The mean <it>q</it>-value determined in the calculation differed little across the tumors, ranging from 0.73-0.84 (GLAD) and 0.74-0.82 (CGH-Explorer) (data not shown).</p>
               <fig id="F3">
                  <title>
                     <p>Figure 3</p>
                  </title>
                  <caption>
                     <p>GeneCount estimations of tumor cell fraction</p>
                  </caption>
                  <text>
                     <p>GeneCount estimations of tumor cell fraction. Tumor cell fraction of lymphomas estimated by GeneCount is plotted against tumor cell fraction measured by flow cytometry. Each point represents mean &#177; standard deviation based on the values achieved for <it>q </it>within the range 0.7-0.9. The smoothed aCGH ratios from <b>(a) </b>GLAD and <b>(b) </b>CGH-explorer, the <it>q </it>range 0.7-0.8, and a <it>DI </it>determined by flow cytometry were inputs to GeneCount. The calculations were based on 55 (a) and 43 (b) tumors for which suitable ratio levels for the calculations existed. Correlation coefficients and <it>P</it>-values from Pearson product moment correlation analyses are indicated.</p>
                  </text>
                  <graphic file="gb-2008-9-5-r86-3"/>
               </fig>
               <p>The consistency between the GeneCount and FISH data (Figure <figr fid="F4">4</figr>) was comparable to when the known tumor cell fraction was used (Figure <figr fid="F2">2</figr>) and much better than when the ratio levels and FISH data were compared (Additional data file 3). Based on GLAD, 218 out of 231 DNA copy numbers were in agreement with the FISH data (94%), whereas the corresponding numbers based on CGH-Explorer were 173 out of 179 (97%) (Figure <figr fid="F4">4</figr>). Most differences between the GeneCount and FISH results occurred for the same reasons as when the known tumor cell fraction was used (Additional data file 4). Additionally, a discrepancy was seen for some of the highest copy numbers based on GLAD (Figure <figr fid="F4">4</figr>), due to a large discrepancy between the estimated and measured tumor cell fraction in one of the cases (Figure <figr fid="F3">3a</figr>).</p>
               <fig id="F4">
                  <title>
                     <p>Figure 4</p>
                  </title>
                  <caption>
                     <p>GeneCount estimations with unknown tumor cell fraction</p>
                  </caption>
                  <text>
                     <p>GeneCount estimations with unknown tumor cell fraction. DNA copy number calculated by GeneCount, using a <it>q</it>-value within the range 0.7-0.9, a <it>DI </it>determined by flow cytomery, and the tumor cell fraction estimated by GeneCount in Figure 3, is plotted against the corresponding FISH result for 9 genes in <b>(a) </b>55 and <b>(b) </b>43 lymphomas. The smoothed array CGH ratio derived from GLAD and CGH-explorer was used in (a) and (b), respectively. Grey and blue columns represent GeneCount results that were consistent and inconsistent with the FISH data, respectively, after rounding off the GeneCount value. Frequency distributions are shown for each copy number, containing 1, 19, 134, 56, 11, 5, 4, and 1 value at a FISH copy number of 0, 1, 2, 3, 4, 5, 6, and 8, respectively, based on GLAD. The corresponding numbers based on CGH Explorer were 1, 15, 98, 48, 7, 5, 4, and 1.</p>
                  </text>
                  <graphic file="gb-2008-9-5-r86-4"/>
               </fig>
            </sec>
            <sec>
               <st>
                  <p>DNA copy numbers in translocated chromosomal regions</p>
               </st>
               <p>The relationship between the GeneCount estimates and FISH data in translocated chromosomal regions was explored by using <it>BCL2</it>, which is involved in the translocation t(14;18) in lymphomas, as an example. The aCGH probe covering <it>BCL2 </it>is located telomeric of the breakpoint. The aCGH data and GeneCount results of <it>BCL2 </it>were therefore not affected by the translocation. For FISH analysis, we selected a <it>BCL2 </it>probe covering the breakpoint. The probe signal was split in tumors with translocation, leading to a signal from both der(14)t(14;18) and der(18)t(14;18), although <it>BCL2 </it>is located on the former chromosome. The FISH signal was therefore higher than the actual <it>BCL2 </it>copy number, and differed from the GeneCount result in all 38 tumors with translocation (Figure <figr fid="F5">5a</figr>). After recalculating the FISH copy numbers as described <abbrgrp><abbr bid="B22">22</abbr></abbrgrp>, the consistency in the data was excellent, except in one case at a corrected FISH value of five copies (Figure <figr fid="F5">5b</figr>). This discrepancy was due to failure of GLAD and CGH-Explorer in detecting a narrow amplicon involving <it>BCL2 </it>(panel C in Additional data file 4).</p>
               <fig id="F5">
                  <title>
                     <p>Figure 5</p>
                  </title>
                  <caption>
                     <p>GeneCount estimations in the t(14;18) translocated region involving <it>BCL2</it></p>
                  </caption>
                  <text>
                     <p>GeneCount estimations in the t(14;18) translocated region involving <it>BCL2</it>. <it>BCL2 </it>copy number estimated by GeneCount, using a <it>q</it>-value of 0.8 and a <it>DI </it>and tumor cell fraction determined by flow cytometry, is plotted against the corresponding FISH result in 94 lymphomas. The smoothed array CGH ratios derived from GLAD and CGH-explorer were used in the left and right panels, respectively. Grey and blue columns represent GeneCount calculations that were consistent and inconsistent with the FISH measurements, respectively, after rounding off the GeneCount value. <b>(a) </b>Uncorrected FISH data are plotted; <b>(b) </b>these data were corrected as described in [22]. Frequency distributions are shown for each copy number, containing 1, 38, 33, 13, 5, and 1 value for a red spot FISH copy number of 1, 2, 3, 4, 5, and 6. The corresponding number of measurements for the corrected FISH data of 1, 2, 3, 4, 5, and 6 were 1, 69, 14, 4, 2 and 1.</p>
                  </text>
                  <graphic file="gb-2008-9-5-r86-5"/>
               </fig>
            </sec>
         </sec>
         <sec>
            <st>
               <p>GeneCount analysis of solid tumors</p>
            </st>
            <p>The feasibility of our method for analysis of solid tumors without information of tumor cell fraction was explored in 99 cervical cancers, for which the <it>DI </it>ranged from 1.00-3.16. The tumor cell fraction could be estimated for 93 and 89 tumors based on GLAD and CGH-Explorer, respectively, fulfilling the requirements for this estimation (Materials and methods). The tumor cell fractions were poorly correlated with the values determined by analysis of histological sections (Additional data file 5). In most cases, the histology result was higher than the GeneCount estimate, probably because immune cells infiltrating the tumor parenchyma were not properly quantified by the histological examination. In a few cases, however, the histology result was higher, probably reflecting that different parts of the sample were used in the aCGH and histology analyses. The tumors for which the tumor cell fraction could be estimated by GeneCount were included in the further analyses.</p>
            <p>A higher number of genetic aberrations were generally found in the cervical cancers than in the lymphomas. High level amplifications with more than 2.5-fold increases in gene dosage (that is, copy number, <it>N</it>, relative to total DNA content given by two times the DNA index (<it>N</it>/(2.<it>DI</it>)), were found in about half of the tumors and most frequently on chromosomes 5p and 11q. GeneCount analysis showed copy numbers within the range of 5-80 in these regions, which were often surrounded by gains at lower levels.</p>
            <p>The GeneCount results were compared with the outcomes of existing analysis methods, where gains and losses were scored from the smoothed ratio levels and breakpoints obtained by GLAD and CGH-Explorer. The log<sub>2 </sub>transformed ratio levels of &#177; 0.2 (that is, approximately two times the ratio standard deviation (Additional data file 6)) were applied as cut-off levels for scoring aberrations. We selected genes that were shown to be affected by gains and losses in previous studies on a subgroup of the patients <abbrgrp><abbr bid="B27">27</abbr></abbrgrp>. Some of the genes showed only a small variation in the aCGH ratios, often within the level of &#177; 0.2, and only a few tumors with aberrations were identified (Figure <figr fid="F6">6a</figr> and panel A in Additional data file 7). A higher number of patients with changes in gene copy numbers and in the corresponding gene dosages were identified with GeneCount, using the cut-off levels of &#177; 0.2 for scoring gene dosage changes (Figure <figr fid="F6">6b,c</figr> and panels B and C in Additional data file 7). The gene dosage correlated significantly with gene expression (Figure <figr fid="F6">6c</figr> and panel C in Additional data file 7), making the copy number changes determined by GeneCount plausible.</p>
            <fig id="F6">
               <title>
                  <p>Figure 6</p>
               </title>
               <caption>
                  <p>GeneCount analyses in cervical cancers</p>
               </caption>
               <text>
                  <p>GeneCount analyses in cervical cancers. <b>(a) </b>Frequency histogram (number of tumors) of smoothed aCGH ratios (GLAD) for <it>MRPS23 </it>(BAC clone ID RP11-19F16). Dotted lines indicate the cut off ratio levels of &#177; 0.2, identifying 5 tumors with genetic gain and 3 tumors with loss. <b>(b) </b>Frequency histogram (number of tumors) of <it>MRPS23 </it>copy number calculated by GeneCount. The GLAD ratio levels, the <it>DI </it>measured by flow cytometry, and the tumor cell fraction estimated by GeneCount were used in the calculation. Similar results were achieved based on the CGH-Explorer ratio levels. <b>(c) </b>Plot of gene expressions against gene dosage; that is, the <it>MRPS23 </it>copy number divided by the total DNA content (<it>N</it>/(2&#183;<it>DI</it>)). Increased gene dosage with more than 15% of the total DNA content (log<sub>2 </sub>transformed gene dosage of at least 0.2) were seen in 15 tumors (red and blue symbols). Red symbols represent the five tumors with gain in (a), whereas blue symbols represent the remaining ten tumors with increased gene dosage that were not identified in (a). The correlation coefficient and <it>P</it>-value from Pearson product moment correlation analysis are indicated. <b>(d) </b>Kaplan Meier analysis based on GeneCount results for <it>MRPS23</it>. Plots of the survival probability are shown for 5 patients with high gene dosage in (c), who also had gain in (a) (red line), 10 patients with high gene dosage in (c) and without gain in (a) (blue line), and 78 patients with low gene dosage in (c). <b>(e) </b>Kaplan Meier analysis based on the <it>MRPS23 </it>ratio levels. The survival probability of 5 patients with gain in (a) (red line) and 88 patients without gain in (a) (black line) is plotted. Only five high risk patients were identified in (e), whereas ten more patients were identified by GeneCount in (d). <it>P</it>-value in log-rank test is indicated in (d,e). Panels (a,b,d) are based on 93 tumors, for which the tumor cell fraction could be estimated by GeneCount. Panel (c) is based on 89 of these tumors, for which both DNA copy number and gene expression were available.</p>
               </text>
               <graphic file="gb-2008-9-5-r86-6"/>
            </fig>
            <p>The copy number changes of <it>MRPS23 </it>have previously been shown to correlate with survival probability <abbrgrp><abbr bid="B27">27</abbr></abbrgrp>. Survival analysis based on the GeneCount data of <it>MRPS23 </it>identified more patients with poor outcome than the corresponding analysis based on ratio levels (Figure <figr fid="F6">6d,e</figr>). Hence, 15 high risk patients were identified based on the GeneCount results, whereas only 5 patients were classified with high risk based on the ratio levels. Nine of the ten patients that were not identified based on ratio levels (blue curve in Figure <figr fid="F6">6d</figr>) had aneuploid tumors with a DNA index ranging from 1.10-1.92. The remaining diploid tumor had a relatively low tumor cell fraction of 23%.</p>
         </sec>
         <sec>
            <st>
               <p>Intratumor heterogeneity in DNA copy numbers</p>
            </st>
            <p>Some tumors had genome regions for which the aCGH ratio was clearly different from that corresponding to an integer copy number. This probably reflected intratumor heterogeneity in the DNA copy numbers, that is, the existence of subpopulations with copy number changes that are not common for all tumor cells in the sample. The common aberrations can thus be considered homogeneous. Lymphomas and cervical cancers with heterogeneous DNA regions had ratio levels that fell in between, and were significantly different from, those corresponding to integer values (Figure <figr fid="F7">7</figr>). The actual ratio level reflected the proportion of cells with that aberration (Equation 1).</p>
            <fig id="F7">
               <title>
                  <p>Figure 7</p>
               </title>
               <caption>
                  <p>GeneCount identification of DNA copy number heterogeneity within tumors</p>
               </caption>
               <text>
                  <p>GeneCount identification of DNA copy number heterogeneity within tumors. <b>(a) </b>Frequency histogram (% array probes) of aCGH ratios in a heterogeneous lymphoma, including data for the entire genome. <b>(b) </b>aCGH ratios are plotted against chromosomal location, showing the heterogeneous regions on chromosomes 8, 9, and 17 with a DNA copy number of 3&amp;4 in blue. <b>(c) </b>Frequency histogram (% array probes) of aCGH ratios for two homogeneous DNA regions with a copy number of 3&amp;4 (upper panel) and the heterogeneous region depicted in (b) with a copy number of 3&amp;4 (lower panel). The ratio distributions of copy number 3, 4, and 3&amp;4 were significantly different (<it>p </it>&lt; 0.001, ANOVA). DNA copy numbers estimated by GeneCount from the <it>DI </it>and tumor cell fractions measured by flow cytometry are marked; those in black were consistent with FISH experiments, whereas those in red have not been subjected to FISH measurements in the specific tumors shown. The arrows in (b) point to the locations of the FISH probes. Note that the 3&amp;4 copy number of the heterogeneous region has been confirmed with FISH.</p>
               </text>
               <graphic file="gb-2008-9-5-r86-7"/>
            </fig>
            <p>Nineteen (20%) lymphomas and 44 (50%) cervical cancers had one or more heterogeneous DNA regions with copy numbers 1&amp;2, 2&amp;3, or 3&amp;4 (Additional data files 8 and 9). Reliable detection of heterogeneity required tumor cell fractions above 24% (Additional data file 10) and 5 out of 93 cervical cancers were therefore excluded from this analysis. Lymphoma L309/89 (Figure <figr fid="F7">7</figr>) had previously been identified as heterogeneous by FISH, showing one population with three and another with four copies of <it>MYC </it>and centromeres 8 and 17 <abbrgrp><abbr bid="B20">20</abbr></abbrgrp>. Moreover, several of the heterogeneous aberrations in the cervical cancers, such as loss on chromosome 4 and X and gain on 11q and 17 in C005/01, loss on 6q and gain of 11q in C006/01, and loss on 4 in C023/01, were similar to those detected earlier by conventional CGH <abbrgrp><abbr bid="B14">14</abbr></abbrgrp>. The previous study was, however, based on a different set of biopsies, which probably explains the lack of consistency for some of the tumors.</p>
            <p>In a few of the heterogeneous tumors, two different ratio levels were identified between one and two copies (Figure <figr fid="F8">8</figr> and Additional data file 11). Thus, it appeared that the corresponding aberrations were present in different fractions of the tumor cell population. Lymphoma L008/92 had two intermediate ratio levels between one and two copies, corresponding to 70% and 30% of the tumor cells (Figure <figr fid="F8">8b</figr>, blue and red ratios, respectively), leading to the possible tumor evolutionary schemes depicted in Figure <figr fid="F8">8c</figr>. As the sum of the two fractions did not exceed 100%, the heterogeneous aberrations may be found in non-overlapping subpopulations of the tumor, where the subpopulations have evolved differently from a predicted common population containing the homogenous aberrations (parallel sequence). A serial sequence, where the populations have evolved in a linear manner from a common population, was also possible. In C024/01, however, the heterogeneous ratio levels corresponded to 78% and 44% of the tumor cells, and a serial sequence was the only one suggested (panel C in Additional data file 11).</p>
            <fig id="F8">
               <title>
                  <p>Figure 8</p>
               </title>
               <caption>
                  <p>Evolutionary sequences of subpopulations in heterogeneous tumors</p>
               </caption>
               <text>
                  <p>Evolutionary sequences of subpopulations in heterogeneous tumors. <b>(a) </b>Frequency histogram (% array probes) of aCGH ratios in a heterogeneous lymphoma is shown, including data for the entire genome. <b>(b) </b>The aCGH ratios are plotted against chromosomal location. The heterogeneous regions on chromosomes 2q, 5p, 7q, 9p, 13q, 20q, and Xp with a DNA copy number of 1&amp;2 and on chromosomes 2p, 4q, 6p, 11q, and 18 with a DNA copy number of 2&amp;3 are shown in blue and red. The blue and red colors represent aberrations that are present in different fractions of the tumor cells; 70% and 30%, respectively. The heterogeneous aberrations are listed in Additional data file 8 except those with a copy number of 2&amp;3, since the lack of 3 DNA copies in this tumor prevented statistical analysis to identify 2&amp;3 heterogeneity. <b>(c) </b>Schematic diagram of two possible evolutionary sequences for the aberrations, one parallel and one serial sequence, are shown. The blue and red circles represent the blue and red aberrations in (b). The percentages indicate the fractions of tumor cells with the listed aberrations, as calculated by GeneCount, showing that the aberrations in blue and red are present in 70% and 30% of the tumor cells, respectively.</p>
               </text>
               <graphic file="gb-2008-9-5-r86-8"/>
            </fig>
         </sec>
      </sec>
      <sec>
         <st>
            <p>Discussion</p>
         </st>
         <p>We have shown that GeneCount is a reliable method for genome-wide calculation of DNA copy numbers in clinical tumor samples. Such data are biologically interesting in themselves but may also lead to improved prediction of treatment outcome and aid in the identification of novel tumor suppressors and oncogenes. We applied the method to lymphomas, for which accurate measures of tumor cell fraction and DNA copy numbers have been obtained by other techniques that could be compared with the GeneCount results. We further used the method on cervical cancers, for which tumor cell fractions representative of the aCGH data are more difficult to achieve by a separate technique. The GeneCount model is simple, due to the use of normal cells with two DNA copies throughout the genome as a reference sample. Moreover, the estimated copy numbers are restricted to positive integers, increasing the robustness of the method. A requirement for achieving the absolute quantification format is the use of pre-determined tumor ploidies, whereas the tumor cell fractions, if not known, and experimental bias can be estimated from the aCGH data.</p>
         <p>The experimental bias is mainly caused by signals from unsuppressed repetitive sequences and nonspecific hybridization <abbrgrp><abbr bid="B2">2</abbr></abbrgrp>. The bias influences the test and reference sample equally and independently of the DNA copy number, since it is generated by sequences distributed throughout the genome. The bias could, therefore, be summed up in an array specific factor, <it>q</it>, representing the dynamics of the log-transformed ratios. Mohapatra <it>et al</it>. <abbrgrp><abbr bid="B19">19</abbr></abbrgrp> included the bias as a constant factor affecting the absolute, rather than the log-transformed, ratios in their model for pure tumor cells. Our approach seems justified because the noise (width) of the log-transformed ratios was independent of the ratios and, therefore, of the DNA copy numbers (Additional data file 6). We allowed for a small variation in <it>q </it>when calculating the tumor cell fraction to account for minor differences in the bias across the tumors. This <it>q</it>-value, optimized for each tumor, was highly similar to the mean <it>q </it>determined from control experiments, indicating that the bias was stable across experiments. Moreover, the discrepancies between the GeneCount and FISH results were related to the specific genetic aberration involved and, therefore, to the breakpoint detection algorithm, rather than to possible uncertainties in <it>q</it>. Recent developments in array CGH technology, utilizing oligonucleotides rather than bacterial artificial chromosome (BAC) clones, led to improved ratio dynamics and reduction in the experimental bias due to less repetitive sequences <abbrgrp><abbr bid="B28">28</abbr></abbrgrp>. Ongoing work in our laboratory shows that by using oligoarrays, GeneCount can be applied with a <it>q </it>value close to 1.</p>
         <p>Inclusion of the tumor cell fraction is a prerequisite for the calculation of absolute DNA copy numbers in clinical tumor samples. The lymphoma data were based on single cell suspensions made from the entire lymph nodes. A tumor cell fraction representative of the lymph node could, therefore, be determined with high accuracy by a separate technique like flow cytometry. In solid tumors such as cervical cancers, the normal cells consist of stroma, which is highly heterogeneously distributed within the tissue, and immune cells, which infiltrate the tumor parenchyma. A measure of the tumor cell fraction achieved by, for example, histological examination, which is based on a part of the sample different from that used for the aCGH experiment and/or fails to quantify the proportion of immune cells accurately, is, therefore, not precise enough for the calculation of DNA copy numbers. Histology data may, however, be useful for preselecting tumor enriched samples for the aCGH analysis. Fairly stable estimates of the tumor cell fraction, consistent with the values measured by flow cytometry, were achieved by the use of GeneCount. The estimates led to DNA copy numbers in agreement with the FISH data, suggesting that the accuracy of the tumor cell fractions was sufficient for reliable data analysis. Selection of appropriate ratio levels for the estimation was crucial for achieving this accuracy. We required that the tumors had at least two aberrations with different copy numbers and with more than ten array probes each to reduce errors caused by poorly defined ratio levels and breakpoints. Moreover, only ratio levels deviating more than 0.15 (log<sub>2 </sub>scale) from the baseline were selected, implying that tumor fractions higher than 24% (diploid) and 36% (tetraploid) were needed when copy numbers were changed to 3 or 5 copies, respectively (Additional data file 12).</p>
         <p>The few discrepancies between the GeneCount and FISH data were not related to our model, but rather to the ability of the statistical methods to detect some of the aberrations. Hence, the consistency between the GeneCount and FISH results was similar to the reliability of GLAD in detecting breakpoints in simulated data <abbrgrp><abbr bid="B9">9</abbr></abbrgrp>. The highest accuracy of the GeneCount results was obtained for well defined aberrant regions containing at least three array probes. In these cases a ratio level representative of the corresponding copy number was achieved and the probability of detecting the aberration was high. The increased uncertainty in the results of narrow aberrations implies that they should be confirmed by a separate technique like FISH. Moreover, to ensure sufficient ratio dynamics and, therefore, a high probability of breakpoint detection, a tumor cell fraction higher than a certain value, which depends on the experimental noise and tumor ploidy, is needed. With the noise of our experiments (Additional data file 6), a tumor cell fraction above 23% in diploid, and somewhat higher in hyperdiploid cases, enabled separation of an aberration with more than three array probes (Additional data file 13). This fraction also enabled detection of heterogeneous DNA copy numbers involving more than ten array probes (Additional data file 10). In experiments with more noise, caused by, for example, poor DNA quality, higher tumor cell fractions are required. In comparison, at least 50% tumor cells is suggested for optimal detection of gains and losses by conventional CGH <abbrgrp><abbr bid="B29">29</abbr></abbrgrp>.</p>
         <p>The DNA copy number of genes involved in translocations cannot be directly assessed by FISH when a probe covering the breakage region is used, because signals from both the original chromosomes are detected in the translocated derivatives. Correction of the probe signal to achieve the true copy number requires knowledge of the breakpoint and genes involved in the translocation. Reliable FISH analysis in solid tumors, where the translocations are not well identified and may occur throughout the genome <abbrgrp><abbr bid="B30">30</abbr></abbrgrp> is, therefore, particularly challenging. By aCGH, the probe signal is measured independently of the actual genome organization of the DNA covered by the probe. Hence, in the case of balanced translocations, a correct result will be obtained even if the probe covers the breakpoint. If the probe is located at the start or end of an amplified or deleted region (unbalanced translocation), the aCGH ratios of the adjacent probes ensure that the correct copy number is calculated. Our model therefore provides a novel method for assessment of copy numbers both in balanced and unbalanced translocated regions and without knowing that the translocation exists.</p>
         <p>Current methods for analysis of aCGH data generally score genetic gains and losses based on ratio levels <abbrgrp><abbr bid="B31">31</abbr><abbr bid="B32">32</abbr><abbr bid="B33">33</abbr><abbr bid="B34">34</abbr><abbr bid="B35">35</abbr><abbr bid="B36">36</abbr></abbrgrp>. The breakpoints in individual tumors can be detected with high accuracy by use of statistical algorithms like GLAD and CGH-Explorer. However, the existing downstream analyses, using common ratio levels for scoring aberrations across tumors, fail to identify gains and losses in cases of high ploidy and normal cell content. By the use of GeneCount, the ratio levels are replaced with the absolute copy numbers relative to the total DNA content as measures of gene dosage, which can be compared across tumors regardless of ploidy and normal cell content. Hence, copy number changes that were not detected by analysis based on ratio levels, but showed significant correlation with gene expression, were found in cervical cancers, suggesting that improved results were achieved. Moreover, many patients with poor outcome that had <it>MRPS23 </it>gain by GeneCount had no gain based on ratio levels. In the latter case, the gain was masked by high content of normal cells or high ploidy, showing that GeneCount is more sensitive in detecting patients with genetic aberrations. The finding further demonstrates that GeneCount applies well to solid tumors for which the tumor cell fraction is generally unknown and must be estimated by the method. Advances in current statistical analysis methods may utilize adjustable ratio levels for scoring gains and losses, optimizing the cut-off ratios for each tumor based on a mathematical evaluation of the ratio dynamics. Such methods may account for varying ploidy and normal cell content across diploid, triploid, and tetraploid tumors. However, the strategy is not useful for tumors with an intermediate ploidy like 1.25 (Figure <figr fid="F1">1c</figr>). In contrast, the absolute DNA copy number relative to the total DNA content, or gene dosage, is comparable also across such tumors.</p>
         <p>We also showed that GeneCount can provide genome-wide and high resolution information of intratumor heterogeneity in the DNA copy numbers. Such heterogeneity has previously been detected only on a single gene basis by FISH or at low resolution by conventional CGH analyses <abbrgrp><abbr bid="B14">14</abbr><abbr bid="B15">15</abbr><abbr bid="B20">20</abbr><abbr bid="B37">37</abbr><abbr bid="B38">38</abbr></abbrgrp>, probably reflecting a high genomic instability <abbrgrp><abbr bid="B39">39</abbr></abbrgrp>. Detection of heterogeneity involving two DNA copies by the use of FISH is challenging, since the heterogeneous tumor population is difficult to distinguish from normal cells. The probability to detect heterogeneity with GeneCount depends on the fraction of tumor cells with the heterogeneous aberration. Obviously, the probability is largest at a fraction of 50%, but fractions higher than 70% and lower than 30% were also identified. Heterogeneity in low copy numbers, like 1&amp;2 and 2&amp;3, are more easily detected, since the separation between the log-transformed ratio levels are larger. At higher copy numbers, the possibility to detect heterogeneity decreases, depending on the ploidy and normal tissue content. However, we also identified heterogeneous regions with copy number 3&amp;4 in several tumors and 4&amp;5 in one tumor. Finally, the probability to detect heterogeneity also depends on the proportion of the genome that is affected. In our data severe heterogeneity affecting up to 40% of the genome could be analyzed with GeneCount (C002/01; Additional data file 9). With an increasingly larger part of the genome affected, difficulties in finding breakpoints and even homogeneous aberrations eventually occur, leading to unreliable results regardless of analysis method.</p>
         <p>The heterogeneity data led to insight into the evolutionary sequence of the copy number changes. The homogeneous aberrations had probably occurred prior to the heterogeneous ones <abbrgrp><abbr bid="B14">14</abbr></abbrgrp>. Moreover, in cases where the heterogeneous aberrations appeared to be present in different fractions of the tumor cell population, these aberrations could be ordered chronologically in a serial and/or parallel sequence. It was not always possible to identify the correct sequence among the proposed ones, as could be done by comparing data for several biopsies from the same tumor <abbrgrp><abbr bid="B14">14</abbr></abbrgrp>. However, identification of the heterogeneous as well as the homogeneous aberrations suggests a further possible investigation of the exact combination of aberrations in each subpopulation, employing, for example, triple-color FISH with one probe for a homogeneous aberrant region and two for the heterogeneous ones.</p>
         <p>In the heterogeneity analysis we assumed that the ploidy was the same for all subpopulations of malignant cells. This assumption was justified because no cases were observed with two aneuploid populations by flow cytometry. A possible difference in the ploidy of two aneuploid populations within a tumor was therefore probably smaller than 10%, leading to less than 10% uncertainty in the copy numbers calculated by GeneCount (Equation 4; data not shown). The same uncertainty also applied to near diploid and heterogeneous cervical cancers. These tumors often showed a broad G<sub>1 </sub>peak with a CV up to 10% by flow cytometry, probably reflecting the existence of several subpopulations with ploidy within the range of 1.0-1.1. Moreover, few or no light chain positive cells were observed in the diploid population of the aneuploid lymphomas, suggesting that the diploid population contained primarily normal cells. It is possible, however, that the diploid population of the aneuploid cervical cancers contained malignant cells, as we have previously shown for aneuploid colorectal cancers <abbrgrp><abbr bid="B40">40</abbr></abbrgrp>. This might have led to larger uncertainties in the heterogeneous copy numbers due to the use of an erroneous DNA index of the diploid population. The data of such tumors can be improved by sorting the diploid and aneuploid fractions by flow cytometry <abbrgrp><abbr bid="B40">40</abbr></abbrgrp> for separate aCGH and GeneCount analyses.</p>
      </sec>
      <sec>
         <st>
            <p>Conclusion</p>
         </st>
         <p>GeneCount provides reliable DNA copy numbers, both when based on the tumor cell fractions determined by flow cytometry and those estimated by the method. Accurate data are also achieved in translocated chromosomal regions, as demonstrated for the t(14;18) translocation involving <it>BCL2</it>. Our method is the only one to provide genome-wide information of absolute DNA copy numbers. Moreover, the method represents a significant improvement compared to existing methods in the study of gene dosages and intratumor copy number heterogeneities. The robustness of GeneCount implies that the method can be utilized widely in the genomic exploration of both hematopoietic and solid tumors, addressing DNA copy number aspects in a reliable manner, regardless of possible translocations. This may lead to improved assays for disease classification and outcome prediction and aid the identification of efficient targets for new cancer therapies.</p>
      </sec>
      <sec>
         <st>
            <p>Materials and methods</p>
         </st>
         <sec>
            <st>
               <p>Tumor samples, DNA index, and tumor cell fraction</p>
            </st>
            <p>Samples from 94 patients with B-cell non-Hodgkin's lymphoma and 99 patients with squamous cell carcinoma of the uterine cervix were analyzed. We used fresh frozen lymphoma cell suspensions for which the tumor subtype, stage, patient treatment, and follow-up have been presented previously <abbrgrp><abbr bid="B25">25</abbr></abbrgrp>. The cervical cancers were of FIGO (F&#233;d&#233;ration Internationale des Gynaecologistes et Obstetristes) stage 1b-4b, treated with radiotherapy. Tumor biopsies taken before the start of treatment were used.</p>
            <p>The <it>DI </it>of the lymphomas and cervical cancers and the tumor cell fraction of the lymphomas were determined by use of flow cytometry, and most of these data have been published earlier <abbrgrp><abbr bid="B14">14</abbr><abbr bid="B23">23</abbr><abbr bid="B25">25</abbr></abbrgrp>. The lymphoma cells were labeled with phycoerythrin-labeled antibodies to the tumor characteristic light chains for identifying the tumor cells and Hoechst 33258 for assessment of DNA content. The <it>DI </it>was determined from the G<sub>1 </sub>peak position of the light chain positive cells relative to the light chain negative cells. Tumor cell fraction was determined as the fraction of light chain positive cells. The <it>DI </it>of the cervical cancers was assessed by preparing clean nuclei, stained with propidium iodide, using the detergent-trypsin method <abbrgrp><abbr bid="B41">41</abbr></abbrgrp>. Cells from a diploid cell line were used as an internal reference. Samples showing two distinct G<sub>1 </sub>peaks in the DNA histogram were classified as aneuploid, and the <it>DI </it>was determined from the position of the G<sub>1 </sub>peak of the aneuploid cells relative to the corresponding peak of the diploid cells. Samples with a single G<sub>1 </sub>peak were classified as near diploid. An estimate of the tumor cell fraction was achieved for each cervical cancer sample by histological examination of hematoxylin and eosin stained sections derived from the middle part of the biopsies. These values were used to compare with the tumor cell fractions estimated by GeneCount.</p>
         </sec>
         <sec>
            <st>
               <p>Array CGH</p>
            </st>
            <p>Genomic array slides produced by the Microarray Facility at the Norwegian Radium Hospital were used <abbrgrp><abbr bid="B42">42</abbr></abbrgrp>. The arrays contained 4,549 unique genomic clones of BACs and P1 artificial chromosomes (PACs) (Wellcome Trust Sanger Institute, Cambridge, UK) that covered the whole genome with a resolution of approximately 1 Mb. The 1 Mb clone collection was supplemented with tiling path probes between 1q12 and 1q25, using overlapping BACs and PACs. The clones were from the RPCI-11 (BAC) and the RPC1-1, -3, -4, and -5 (PAC) libraries. Each clone was printed in 4-8 array spots. The genes covered by the clones were found from Ensembl <abbrgrp><abbr bid="B43">43</abbr></abbrgrp>.</p>
            <p>Genomic DNA was isolated from the lymphoma cell suspensions and cervical cancer biopsies according to a standard protocol, including proteinase K, phenol, chloroform, and isoamylalcohol <abbrgrp><abbr bid="B44">44</abbr></abbrgrp>. DNA (1 &#956;g) was digested overnight, using <it>Dpn</it>II endonuclease (New England Biolabs, Beverly, MA, USA), and purified using the QIAquick PCR Purification Kit (Qiagen, Valencia, CA, USA). Digested and purified DNA and normal reference DNA (0.5 &#956;g each) were labeled by a random primer reaction (BioPrime DNA Labeling System, Invitrogen, Carlsbad, CA, USA) with Cy3-dCTP and Cy5-dCTP (Perkin-Elmer Life Sciences, Foster City, CA, USA), respectively, and co-hybridized to the array slides <abbrgrp><abbr bid="B42">42</abbr></abbrgrp>. Scanning and image analysis were performed by use of an Agilent scanner (Agilent Technologies Inc., Palo Alto, CA, USA) and the GenePix 6.0 image analysis software (Axon Instruments Inc., Union City, CA, USA). The microarray management and preprocessing software BASE <abbrgrp><abbr bid="B26">26</abbr></abbrgrp> was used for spot filtering and ratio normalization. The mean value of the 4-8 spots of each genomic clone was used, provided that the standard deviation was less than 0.2. Lowess normalization was performed so that the mean log-transformed ratio of all clones was equal to 0. The GLAD and CGH-Explorer algorithms were used for ratio smoothing and breakpoint detection <abbrgrp><abbr bid="B9">9</abbr><abbr bid="B11">11</abbr></abbrgrp>. Default values of 8 (GLAD) and 1.5 (CGH-Explorer) for the statistical penalty, &#955;, were used. The smoothed ratios were inputs to GeneCount.</p>
         </sec>
         <sec>
            <st>
               <p>Principle of GeneCount</p>
            </st>
            <p>For a heterogeneous test sample consisting of several cell populations, like normal cells and distinct populations of malignant cells, the DNA of each cell population contributes to the aCGH ratio. Ideally (that is, in cases of no experimental bias), the normalized ratio of each array probe is given by:</p>
            <p>
               <display-formula id="M1">
                  <m:math name="gb-2008-9-5-r86-i1" xmlns:m="http://www.w3.org/1998/Math/MathML">
                     <m:semantics>
                        <m:mrow>
                           <m:msub>
                              <m:mi>R</m:mi>
                              <m:mrow>
                                 <m:mi>i</m:mi>
                                 <m:mi>d</m:mi>
                                 <m:mi>e</m:mi>
                                 <m:mi>a</m:mi>
                                 <m:mi>l</m:mi>
                              </m:mrow>
                           </m:msub>
                           <m:mo>=</m:mo>
                           <m:mfrac>
                              <m:mrow>
                                 <m:mstyle displaystyle="true">
                                    <m:munderover>
                                       <m:mo>&#8721;</m:mo>
                                       <m:mrow>
                                          <m:mi>i</m:mi>
                                          <m:mo>=</m:mo>
                                          <m:mn>1</m:mn>
                                       </m:mrow>
                                       <m:mi>n</m:mi>
                                    </m:munderover>
                                    <m:mrow>
                                       <m:mfrac>
                                          <m:mrow>
                                             <m:msub>
                                                <m:mi>N</m:mi>
                                                <m:mi>i</m:mi>
                                             </m:msub>
                                          </m:mrow>
                                          <m:mrow>
                                             <m:mn>2</m:mn>
                                             <m:mo>&#8901;</m:mo>
                                             <m:mi>D</m:mi>
                                             <m:msub>
                                                <m:mi>I</m:mi>
                                                <m:mi>i</m:mi>
                                             </m:msub>
                                          </m:mrow>
                                       </m:mfrac>
                                       <m:mo>&#8901;</m:mo>
                                       <m:msub>
                                          <m:mi>F</m:mi>
                                          <m:mi>i</m:mi>
                                       </m:msub>
                                       <m:mo>&#8901;</m:mo>
                                       <m:mi>D</m:mi>
                                       <m:msub>
                                          <m:mi>I</m:mi>
                                          <m:mi>i</m:mi>
                                       </m:msub>
                                    </m:mrow>
                                 </m:mstyle>
                              </m:mrow>
                              <m:mrow>
                                 <m:mstyle displaystyle="true">
                                    <m:munderover>
                                       <m:mo>&#8721;</m:mo>
                                       <m:mrow>
                                          <m:mi>i</m:mi>
                                          <m:mo>=</m:mo>
                                          <m:mn>1</m:mn>
                                       </m:mrow>
                                       <m:mi>n</m:mi>
                                    </m:munderover>
                                    <m:mrow>
                                       <m:msub>
                                          <m:mi>F</m:mi>
                                          <m:mi>i</m:mi>
                                       </m:msub>
                                    </m:mrow>
                                 </m:mstyle>
                                 <m:mo>&#8901;</m:mo>
                                 <m:mi>D</m:mi>
                                 <m:msub>
                                    <m:mi>I</m:mi>
                                    <m:mi>i</m:mi>
                                 </m:msub>
                              </m:mrow>
                           </m:mfrac>
                        </m:mrow>
                        <m:annotation encoding="MathType-MTEF">
 MathType@MTEF@5@5@+=feaafiart1ev1aaatCvAUfeBSjuyZL2yd9gzLbvyNv2Caerbhv2BYDwAHbqedmvETj2BSbqee0evGueE0jxyaibaiKI8=vI8GiVeY=Pipec8Eeeu0xXdbba9frFj0xb9Lqpepeea0xd9q8qiYRWxGi6xij=hbbc9s8aq0=yqpe0xbbG8A8frFve9Fve9Fj0dmeaabaqaciGacaGaaeqabaqabeGadaaakeaacaWGsbWaaSbaaSqaaiaadMgacaWGKbGaamyzaiaadggacaWGSbaabeaakiabg2da9KqbaoaalaaabaWaaabCaeaadaWcaaqaaiaad6eadaWgaaqaaiaadMgaaeqaaaqaaiaaikdacqGHflY1caWGebGaamysamaaBaaabaGaamyAaaqabaaaaiabgwSixlaadAeadaWgaaqaaiaadMgaaeqaaiabgwSixlaadseacaWGjbWaaSbaaeaacaWGPbaabeaaaeaacaWGPbGaeyypa0JaaGymaaqaaiaad6gaaiabggHiLdaabaWaaabCaeaacaWGgbWaaSbaaeaacaWGPbaabeaaaeaacaWGPbGaeyypa0JaaGymaaqaaiaad6gaaiabggHiLdGaeyyXICTaamiraiaadMeadaWgaaqaaiaadMgaaeqaaaaaaaa@5AA2@</m:annotation>
                     </m:semantics>
                  </m:math>
               </display-formula>
            </p>
            <p>where <it>R</it><sub>ideal </sub>is the aCGH ratio of a sample with <it>n </it>cell populations, and <it>N</it><sub><it>i</it></sub>, <it>DI</it><sub><it>i</it></sub>, and <it>F</it><sub><it>i </it></sub>are the DNA copy number, DNA index, and tissue fraction of cell population <it>i</it>, respectively. We assume that: the reference sample is normal DNA with a copy number of 2 throughout the genome, except for the X and Y chromosomes in males; sex-matched hybridizations are performed; and <it>DI </it>is given relative to the DNA content of normal cells.</p>
            <p>In cases of a homogeneous sample with a single cell population, for example, a cancer cell line, Equation 1 is reduced to:</p>
            <p>
               <display-formula id="M2">
                  <m:math name="gb-2008-9-5-r86-i2" xmlns:m="http://www.w3.org/1998/Math/MathML">
                     <m:semantics>
                        <m:mrow>
                           <m:msub>
                              <m:mi>R</m:mi>
                              <m:mrow>
                                 <m:mi>i</m:mi>
                                 <m:mi>d</m:mi>
                                 <m:mi>e</m:mi>
                                 <m:mi>a</m:mi>
                                 <m:mi>l</m:mi>
                              </m:mrow>
                           </m:msub>
                           <m:mo>=</m:mo>
                           <m:mfrac>
                              <m:mi>N</m:mi>
                              <m:mrow>
                                 <m:mn>2</m:mn>
                                 <m:mo>&#8901;</m:mo>
                                 <m:mi>D</m:mi>
                                 <m:mi>I</m:mi>
                              </m:mrow>
                           </m:mfrac>
                        </m:mrow>
                        <m:annotation encoding="MathType-MTEF">
 MathType@MTEF@5@5@+=feaafiart1ev1aaatCvAUfeBSjuyZL2yd9gzLbvyNv2Caerbhv2BYDwAHbqedmvETj2BSbqee0evGueE0jxyaibaiKI8=vI8GiVeY=Pipec8Eeeu0xXdbba9frFj0xb9Lqpepeea0xd9q8qiYRWxGi6xij=hbbc9s8aq0=yqpe0xbbG8A8frFve9Fve9Fj0dmeaabaqaciGacaGaaeqabaqabeGadaaakeaacaWGsbWaaSbaaSqaaiaadMgacaWGKbGaamyzaiaadggacaWGSbaabeaakiabg2da9KqbaoaalaaabaGaamOtaaqaaiaaikdacqGHflY1caWGebGaamysaaaaaaa@3CFE@</m:annotation>
                     </m:semantics>
                  </m:math>
               </display-formula>
            </p>
            <p>In clinical samples with two cell populations, that is, malignant and normal cells, the ratio is given by:</p>
            <p>
               <display-formula id="M3">
                  <m:math name="gb-2008-9-5-r86-i3" xmlns:m="http://www.w3.org/1998/Math/MathML">
                     <m:semantics>
                        <m:mrow>
                           <m:msub>
                              <m:mi>R</m:mi>
                              <m:mrow>
                                 <m:mi>i</m:mi>
                                 <m:mi>d</m:mi>
                                 <m:mi>e</m:mi>
                                 <m:mi>a</m:mi>
                                 <m:mi>l</m:mi>
                              </m:mrow>
                           </m:msub>
                           <m:mo>=</m:mo>
                           <m:mfrac>
                              <m:mrow>
                                 <m:msub>
                                    <m:mi>N</m:mi>
                                    <m:mi>T</m:mi>
                                 </m:msub>
                              </m:mrow>
                              <m:mrow>
                                 <m:mn>2</m:mn>
                                 <m:mo>&#8901;</m:mo>
                                 <m:mi>D</m:mi>
                                 <m:msub>
                                    <m:mi>I</m:mi>
                                    <m:mi>T</m:mi>
                                 </m:msub>
                              </m:mrow>
                           </m:mfrac>
                           <m:mo>&#8901;</m:mo>
                           <m:mfrac>
                              <m:mrow>
                                 <m:msub>
                                    <m:mi>F</m:mi>
                                    <m:mi>T</m:mi>
                                 </m:msub>
                                 <m:mo>&#8901;</m:mo>
                                 <m:mi>D</m:mi>
                                 <m:msub>
                                    <m:mi>I</m:mi>
                                    <m:mi>T</m:mi>
                                 </m:msub>
                              </m:mrow>
                              <m:mrow>
                                 <m:msub>
                                    <m:mi>F</m:mi>
                                    <m:mi>T</m:mi>
                                 </m:msub>
                                 <m:mo>&#8901;</m:mo>
                                 <m:mi>D</m:mi>
                                 <m:msub>
                                    <m:mi>I</m:mi>
                                    <m:mi>T</m:mi>
                                 </m:msub>
                                 <m:mo>+</m:mo>
                                 <m:mo stretchy="false">(</m:mo>
                                 <m:mn>1</m:mn>
                                 <m:mo>&#8722;</m:mo>
                                 <m:msub>
                                    <m:mi>F</m:mi>
                                    <m:mi>T</m:mi>
                                 </m:msub>
                                 <m:mo stretchy="false">)</m:mo>
                              </m:mrow>
                           </m:mfrac>
                           <m:mo>+</m:mo>
                           <m:mfrac>
                              <m:mrow>
                                 <m:mn>1</m:mn>
                                 <m:mo>&#8722;</m:mo>
                                 <m:msub>
                                    <m:mi>F</m:mi>
                                    <m:mi>T</m:mi>
                                 </m:msub>
                              </m:mrow>
                              <m:mrow>
                                 <m:msub>
                                    <m:mi>F</m:mi>
                                    <m:mi>T</m:mi>
                                 </m:msub>
                                 <m:mo>&#8901;</m:mo>
                                 <m:mi>D</m:mi>
                                 <m:msub>
                                    <m:mi>I</m:mi>
                                    <m:mi>T</m:mi>
                                 </m:msub>
                                 <m:mo>+</m:mo>
                                 <m:mo stretchy="false">(</m:mo>
                                 <m:mn>1</m:mn>
                                 <m:mo>&#8722;</m:mo>
                                 <m:msub>
                                    <m:mi>F</m:mi>
                                    <m:mi>T</m:mi>
                                 </m:msub>
                                 <m:mo stretchy="false">)</m:mo>
                              </m:mrow>
                           </m:mfrac>
                        </m:mrow>
                        <m:annotation encoding="MathType-MTEF">
 MathType@MTEF@5@5@+=feaafiart1ev1aaatCvAUfeBSjuyZL2yd9gzLbvyNv2Caerbhv2BYDwAHbqedmvETj2BSbqee0evGueE0jxyaibaiKI8=vI8GiVeY=Pipec8Eeeu0xXdbba9frFj0xb9Lqpepeea0xd9q8qiYRWxGi6xij=hbbc9s8aq0=yqpe0xbbG8A8frFve9Fve9Fj0dmeaabaqaciGacaGaaeqabaqabeGadaaakeaacaWGsbWaaSbaaSqaaiaadMgacaWGKbGaamyzaiaadggacaWGSbaabeaakiabg2da9KqbaoaalaaabaGaamOtamaaBaaabaGaamivaaqabaaabaGaaGOmaiabgwSixlaadseacaWGjbWaaSbaaeaacaWGubaabeaaaaGccqGHflY1juaGdaWcaaqaaiaadAeadaWgaaqaaiaadsfaaeqaaiabgwSixlaadseacaWGjbWaaSbaaeaacaWGubaabeaaaeaacaWGgbWaaSbaaeaacaWGubaabeaacqGHflY1caWGebGaamysamaaBaaabaGaamivaaqabaGaey4kaSIaaiikaiaaigdacqGHsislcaWGgbWaaSbaaeaacaWGubaabeaacaGGPaaaaOGaey4kaSscfa4aaSaaaeaacaaIXaGaeyOeI0IaamOramaaBaaabaGaamivaaqabaaabaGaamOramaaBaaabaGaamivaaqabaGaeyyXICTaamiraiaadMeadaWgaaqaaiaadsfaaeqaaiabgUcaRiaacIcacaaIXaGaeyOeI0IaamOramaaBaaabaGaamivaaqabaGaaiykaaaaaaa@660B@</m:annotation>
                     </m:semantics>
                  </m:math>
               </display-formula>
            </p>
            <p>where <it>N</it><sub><it>T</it></sub>, <it>DI</it><sub><it>T</it></sub>, and <it>F</it><sub><it>T </it></sub>are the DNA copy number, DNA index, and fraction of malignant cells in the sample, respectively. 1 - <it>F</it><sub><it>T </it></sub>represents the fraction of normal cells, which have a <it>DI </it>of 1 and DNA copy number (<it>N</it>) of 2.</p>
            <p>It was clear from experiments where normal male DNA was hybridized against female DNA that the ratio dynamics were somewhat reduced (Figure <figr fid="F1">1e</figr>). A dynamic factor, <it>q</it>, was included in Equation 3 to compensate for this effect. Since the experimental noise was independent of the logarithm of the ratio (Additional data file 6), Equation 3 was rewritten to account for the reduced dynamic in the following way:</p>
            <p>
               <display-formula id="M4">
                  <m:math name="gb-2008-9-5-r86-i4" xmlns:m="http://www.w3.org/1998/Math/MathML">
                     <m:semantics>
                        <m:mrow>
                           <m:mi>L</m:mi>
                           <m:mi>o</m:mi>
                           <m:msub>
                              <m:mi>g</m:mi>
                              <m:mn>2</m:mn>
                           </m:msub>
                           <m:mo stretchy="false">(</m:mo>
                           <m:mi>R</m:mi>
                           <m:mo stretchy="false">)</m:mo>
                           <m:mo>=</m:mo>
                           <m:mi>q</m:mi>
                           <m:mo>&#8901;</m:mo>
                           <m:mi>L</m:mi>
                           <m:mi>o</m:mi>
                           <m:msub>
                              <m:mi>g</m:mi>
                              <m:mn>2</m:mn>
                           </m:msub>
                           <m:mrow>
                              <m:mo>(</m:mo>
                              <m:mrow>
                                 <m:mfrac>
                                    <m:mrow>
                                       <m:msub>
                                          <m:mi>N</m:mi>
                                          <m:mi>T</m:mi>
                                       </m:msub>
                                    </m:mrow>
                                    <m:mrow>
                                       <m:mn>2</m:mn>
                                       <m:mo>&#8901;</m:mo>
                                       <m:mi>D</m:mi>
                                       <m:msub>
                                          <m:mi>I</m:mi>
                                          <m:mi>T</m:mi>
                                       </m:msub>
                                    </m:mrow>
                                 </m:mfrac>
                                 <m:mo>&#8901;</m:mo>
                                 <m:mfrac>
                                    <m:mrow>
                                       <m:msub>
                                          <m:mi>F</m:mi>
                                          <m:mi>T</m:mi>
                                       </m:msub>
                                       <m:mo>&#8901;</m:mo>
                                       <m:mi>D</m:mi>
                                       <m:msub>
                                          <m:mi>I</m:mi>
                                          <m:mi>T</m:mi>
                                       </m:msub>
                                    </m:mrow>
                                    <m:mrow>
                                       <m:msub>
                                          <m:mi>F</m:mi>
                                          <m:mi>T</m:mi>
                                       </m:msub>
                                       <m:mo>&#8901;</m:mo>
                                       <m:mi>D</m:mi>
                                       <m:msub>
                                          <m:mi>I</m:mi>
                                          <m:mi>T</m:mi>
                                       </m:msub>
                                       <m:mo>+</m:mo>
                                       <m:mo stretchy="false">(</m:mo>
                                       <m:mn>1</m:mn>
                                       <m:mo>&#8722;</m:mo>
                                       <m:msub>
                                          <m:mi>F</m:mi>
                                          <m:mi>T</m:mi>
                                       </m:msub>
                                       <m:mo stretchy="false">)</m:mo>
                                    </m:mrow>
                                 </m:mfrac>
                                 <m:mo>+</m:mo>
                                 <m:mfrac>
                                    <m:mrow>
                                       <m:mn>1</m:mn>
                                       <m:mo>&#8722;</m:mo>
                                       <m:msub>
                                          <m:mi>F</m:mi>
                                          <m:mi>T</m:mi>
                                       </m:msub>
                                    </m:mrow>
                                    <m:mrow>
                                       <m:msub>
                                          <m:mi>F</m:mi>
                                          <m:mi>T</m:mi>
                                       </m:msub>
                                       <m:mo>&#8901;</m:mo>
                                       <m:mi>D</m:mi>
                                       <m:msub>
                                          <m:mi>I</m:mi>
                                          <m:mi>T</m:mi>
                                       </m:msub>
                                       <m:mo>+</m:mo>
                                       <m:mo stretchy="false">(</m:mo>
                                       <m:mn>1</m:mn>
                                       <m:mo>&#8722;</m:mo>
                                       <m:msub>
                                          <m:mi>F</m:mi>
                                          <m:mi>T</m:mi>
                                       </m:msub>
                                       <m:mo stretchy="false">)</m:mo>
                                    </m:mrow>
                                 </m:mfrac>
                              </m:mrow>
                              <m:mo>)</m:mo>
                           </m:mrow>
                        </m:mrow>
                        <m:annotation encoding="MathType-MTEF">
 MathType@MTEF@5@5@+=feaafiart1ev1aaatCvAUfeBSjuyZL2yd9gzLbvyNv2Caerbhv2BYDwAHbqedmvETj2BSbqee0evGueE0jxyaibaiKI8=vI8GiVeY=Pipec8Eeeu0xXdbba9frFj0xb9Lqpepeea0xd9q8qiYRWxGi6xij=hbbc9s8aq0=yqpe0xbbG8A8frFve9Fve9Fj0dmeaabaqaciGacaGaaeqabaqabeGadaaakeaacaWGmbGaam4BaiaadEgadaWgaaWcbaGaaGOmaaqabaGccaGGOaGaamOuaiaacMcacqGH9aqpcaWGXbGaeyyXICTaamitaiaad+gacaWGNbWaaSbaaSqaaiaaikdaaeqaaOWaaeWaaeaajuaGdaWcaaqaaiaad6eadaWgaaqaaiaadsfaaeqaaaqaaiaaikdacqGHflY1caWGebGaamysamaaBaaabaGaamivaaqabaaaaOGaeyyXICDcfa4aaSaaaeaacaWGgbWaaSbaaeaacaWGubaabeaacqGHflY1caWGebGaamysamaaBaaabaGaamivaaqabaaabaGaamOramaaBaaabaGaamivaaqabaGaeyyXICTaamiraiaadMeadaWgaaqaaiaadsfaaeqaaiabgUcaRiaacIcacaaIXaGaeyOeI0IaamOramaaBaaabaGaamivaaqabaGaaiykaaaakiabgUcaRKqbaoaalaaabaGaaGymaiabgkHiTiaadAeadaWgaaqaaiaadsfaaeqaaaqaaiaadAeadaWgaaqaaiaadsfaaeqaaiabgwSixlaadseacaWGjbWaaSbaaeaacaWGubaabeaacqGHRaWkcaGGOaGaaGymaiabgkHiTiaadAeadaWgaaqaaiaadsfaaeqaaiaacMcaaaaakiaawIcacaGLPaaaaaa@6EAF@</m:annotation>
                     </m:semantics>
                  </m:math>
               </display-formula>
            </p>
            <p>The dynamic factor represents the systematic, non-random reduction in the log-transformed ratios caused by the experimental bias and has a value between 0 and 1, where the latter value occurs in the ideal situation without any reduction in the ratio dynamics. The factor is a characteristic of the array slide series and the laboratory protocol and was determined from the ratio of the X chromosome in a control experiment hybridizing male versus female normal DNA (Figure <figr fid="F1">1e</figr>). Equation 4 was used in GeneCount to calculate <it>F</it><sub><it>T </it></sub>and <it>N</it><sub><it>T </it></sub>from the ratio profile of the sample.</p>
            <p>Intratumor heterogeneity in the DNA copy numbers, that is, the cases of several populations of malignant cells in addition to the normal cells, was identified by selecting the tumors for which one or more of the aCGH ratio levels were different from that corresponding to an integer value by visual inspection. The ratio distributions of the potential heterogeneous regions were compared to the distributions of the adjacent homogeneous aberrations by ANOVA analysis, and a <it>P</it>-value of 0.05 was required to classify the aberration as heterogeneous. The fraction of tumor cells with a heterogeneous aberration was calculated, employing the more general Equation 1. The <it>DI </it>was assumed to be the same for all subpopulations of malignant cells.</p>
         </sec>
         <sec>
            <st>
               <p>Implementation of GeneCount in BASE</p>
            </st>
            <p>We used BASE as a platform for GeneCount and linked the algorithm to the output of the GLAD and CGH-Explorer packages, which were implemented in our BASE version. The method can also be developed as a separate program or integrated in other aCGH analysis packages. The algorithm consists of three major steps: data input for all samples; estimation of tumor cell fraction in the cases when this parameter is unknown; and estimation of DNA copy number for each array probe (panel A in Additional data file 1). The smoothed aCGH ratios served as input, together with the <it>DI</it>, the <it>q</it>-value from control experiments with its lower and upper limits (<it>q</it><sub>min</sub>, <it>q</it><sub>max</sub>) and, if available, the tumor cell fraction.</p>
            <p>In cases of unknown tumor cell fraction, this value was estimated in a simulation procedure based on two selected ratio levels, using the tumor cell fraction and DNA copy numbers as independent and <it>q </it>as dependent variables. The copy numbers and tumor cell fraction were increased in steps of 1 and 0.01, respectively, and the corresponding <it>q</it>-value was calculated (panel B in Additional data file 1). To ensure high accuracy in the estimated fractions, it was required that the absolute value of the selected ratio levels was larger than 0.15. This implied that samples with a tumor cell fraction lower than 24% in diploid and 36% in tetraploid tumors could not be analyzed when only aberrations involving one copy number change existed (Additional data file 12). Moreover, a minimum absolute difference of 0.2 - that is, approximately two times the standard deviation of the log-transformed ratio levels (Additional data file 6) - between the two selected ratio levels was needed. To further increase the reliability of the estimation, only ratio levels with more than ten probes were selected. We optimized <it>q </it>for each tumor by allowing the value to vary within the limited range of <it>q</it><sub>min </sub>to <it>q</it><sub>max</sub>, typically <it>q </it>&#177; 10%, leading to fairly stable estimates of the tumor cell fraction. The mean tumor cell fraction based on these estimates and the corresponding mean <it>q</it>-value was used in Equation 4 to estimate the DNA copy numbers of the tumor. In cases of known tumor cell fraction, this fraction and <it>q </it>from control experiments were used in Equation 4. The source code of the module is provided by communication to the authors. A demo version of GeneCount in BASE is also available <abbrgrp><abbr bid="B45">45</abbr></abbrgrp>.</p>
         </sec>
         <sec>
            <st>
               <p>Fluorescence <it>in situ </it>hybridization</p>
            </st>
            <p>GeneCount estimates for the lymphomas were compared with direct assessments of gene copy numbers by use of FISH. All FISH analyses have been published previously <abbrgrp><abbr bid="B20">20</abbr><abbr bid="B21">21</abbr><abbr bid="B22">22</abbr><abbr bid="B23">23</abbr><abbr bid="B24">24</abbr><abbr bid="B25">25</abbr></abbrgrp>. Dual-color FISH was applied to all 94 tumors. We used spectrum orange labeled locus-specific propidium iodide DNA probes for genes commonly aberrant in lymphomas (<it>CCND3</it>, <it>BMP6</it>, <it>PIM1</it>, <it>MYC</it>, <it>CDKN2A</it>, <it>RB1</it>, <it>TP53</it>, <it>PMAIP1</it>, and <it>MALT1</it>) and spectrum green labeled centromer probes (centromere 1, 6, 8, 17, and 18) (Vysis Inc., Downers Grove, IL, USA) for assessing the quality of the experiment. For exploring DNA copy number calculations in translocated chromosomal regions, <it>BCL2</it>, which is frequently involved in the translocation t(14;18)(q32;q21) in lymphomas, was considered. A dual-color translocation probe involving <it>BCL2 </it>and covering the breakpoint region was used (LSI <it>IGH </it>Spectrum Green/LSI <it>BCL2 </it>Spectrum Orange, Vysis Inc.). Due to splitting of the probe signal in cases of translocation, erroneous high <it>BCL2 </it>copy numbers were derived directly with this probe. The <it>BCL2 </it>copy number was therefore corrected based on the signals from the <it>IGH </it>and centromere 18 probes, as described <abbrgrp><abbr bid="B22">22</abbr></abbrgrp>.</p>
         </sec>
         <sec>
            <st>
               <p>Gene expression microarrays</p>
            </st>
            <p>Gene expressions were determined by microarray analysis of 89 of the cervical cancers and related to the GeneCount estimates. We used array slides produced at the Microarray Facility at the Norwegian Radium Hospital, containing 15,000 cDNA clones. The data from 48 of the patients, with a detailed description of the experimental procedures, have been presented <abbrgrp><abbr bid="B27">27</abbr></abbrgrp>. Cy3- and Cy5-labeled cDNA was synthesized from total RNA by anchored oligo(dT)-primed reverse transcription and co-hybridized with a reference sample (Universal Human Reference RNA, Stratagene, La Jolla, CA, USA) to the array slides overnight at 65&#176;C. Scanning and image analysis were performed with an Agilent scanner and the GenePix 4.1 image analysis software, respectively. Data preprocessing, including correction of saturated intensities, filtering of weak and bad spots, and lowess normalization, was performed in BASE. All hybridizations were performed twice in a dye-swap design, and the average expression ratio based on the two experiments was used in the further analyses.</p>
         </sec>
         <sec>
            <st>
               <p>ArrayExpress accession</p>
            </st>
            <p>The array CGH raw data have been deposited to the ArrayExpress repository (E-TABM-398, E-TABM-399).</p>
         </sec>
      </sec>
      <sec>
         <st>
            <p>Abbreviations</p>
         </st>
         <p>aCGH, array comparative genomic hybridization; BAC, bacterial artificial chromosome; BASE, Bioarray Software Environment; DI, DNA index; FISH, fluorescence <it>in situ </it>hybridization; PAC, P1 artificial chromosome.</p>
      </sec>
      <sec>
         <st>
            <p>Authors' contributions</p>
         </st>
         <p>HL and TS conceived and designed the study and analyzed data. HL wrote the article, ML, RSB, DHS, EG, and OTB carried out the aCGH, FISH, and flow cytometry experiments and participated in data analysis, LAMZ and OM contributed to the aCGH experiments, MJ and EH contributed to the implementation of GeneCount in BASE, GBK provided clinical samples and data, and TS helped to draft the manuscript. All authors read and approved the final manuscript.</p>
      </sec>
      <sec>
         <st>
            <p>Additional data files</p>
         </st>
         <p>The following additional data are available with the online version of this paper. Additional data file <supplr sid="S1">1</supplr> is a figure showing the calculation steps in GeneCount. Additional data file <supplr sid="S2">2</supplr> is a figure showing an example of FISH probe locations. Additional data file <supplr sid="S3">3</supplr> is a figure comparing FISH DNA copy numbers and smoothed aCGH ratio levels in non-Hodgkin's lymphomas. Additional data file <supplr sid="S4">4</supplr> is a figure illustrating discrepancies between GeneCount and FISH DNA copy numbers. Additional data file <supplr sid="S5">5</supplr> is a figure comparing tumor cell fractions derived by histological examination and by GeneCount estimation in cervical cancers. Additional data file <supplr sid="S6">6</supplr> is a figure showing the standard deviation (noise) of the log-transformed aCGH ratios. Additional data file <supplr sid="S7">7</supplr> is a figure comparing results from ratio level and GeneCount analyses in cervical cancers. Additional data file <supplr sid="S8">8</supplr> is a table listing regions with DNA copy number heterogeneity in non-Hodgkin's lymphomas. Additional data file <supplr sid="S9">9</supplr> is a table listing regions with DNA copy number heterogeneity in cervical cancers. Additional data file <supplr sid="S10">10</supplr> is a figure showing tumor cell fraction required for detection of heterogeneous copy number changes. Additional data file <supplr sid="S11">11</supplr> is a figure illustrating analysis of the evolutionary sequence of subpopulations in heterogeneous tumors. Additional data file <supplr sid="S12">12</supplr> is a figure showing the minimum tumor cell fraction that can be calculated in GeneCount. Additional data file <supplr sid="S13">13</supplr> is a figure showing the tumor cell fraction required for detection of homogeneous copy number changes.</p>
         <suppl id="S1">
            <title>
               <p>Additional data file 1</p>
            </title>
            <caption>
               <p>Calculation steps in GeneCount</p>
            </caption>
            <text>
               <p>Calculation steps in GeneCount.</p>
            </text>
            <file name="gb-2008-9-5-r86-S1.pdf">
               <p>Click here for file</p>
            </file>
         </suppl>
         <suppl id="S2">
            <title>
               <p>Additional data file 2</p>
            </title>
            <caption>
               <p>An example of FISH probe locations</p>
            </caption>
            <text>
               <p>An example of FISH probe locations.</p>
            </text>
            <file name="gb-2008-9-5-r86-S2.pdf">
               <p>Click here for file</p>
            </file>
         </suppl>
         <suppl id="S3">
            <title>
               <p>Additional data file 3</p>
            </title>
            <caption>
               <p>Comparison of FISH DNA copy numbers and smoothed aCGH ratio levels in non-Hodgkin's lymphomas</p>
            </caption>
            <text>
               <p>Comparison of FISH DNA copy numbers and smoothed aCGH ratio levels in non-Hodgkin's lymphomas.</p>
            </text>
            <file name="gb-2008-9-5-r86-S3.pdf">
               <p>Click here for file</p>
            </file>
         </suppl>
         <suppl id="S4">
            <title>
               <p>Additional data file 4</p>
            </title>
            <caption>
               <p>Discrepancies between GeneCount and FISH DNA copy numbers</p>
            </caption>
            <text>
               <p>Discrepancies between GeneCount and FISH DNA copy numbers.</p>
            </text>
            <file name="gb-2008-9-5-r86-S4.pdf">
               <p>Click here for file</p>
            </file>
         </suppl>
         <suppl id="S5">
            <title>
               <p>Additional data file 5</p>
            </title>
            <caption>
               <p>Comparison of tumor cell fractions derived by histological examination and by GeneCount estimation in cervical cancers</p>
            </caption>
            <text>
               <p>Comparison of tumor cell fractions derived by histological examination and by GeneCount estimation in cervical cancers.</p>
            </text>
            <file name="gb-2008-9-5-r86-S5.pdf">
               <p>Click here for file</p>
            </file>
         </suppl>
         <suppl id="S6">
            <title>
               <p>Additional data file 6</p>
            </title>
            <caption>
               <p>Standard deviation (noise) of the log-transformed aCGH ratios</p>
            </caption>
            <text>
               <p>Standard deviation (noise) of the log-transformed aCGH ratios.</p>
            </text>
            <file name="gb-2008-9-5-r86-S6.pdf">
               <p>Click here for file</p>
            </file>
         </suppl>
         <suppl id="S7">
            <title>
               <p>Additional data file 7</p>
            </title>
            <caption>
               <p>Comparison of results from ratio level and GeneCount analyses in cervical cancers</p>
            </caption>
            <text>
               <p>Comparison of results from ratio level and GeneCount analyses in cervical cancers.</p>
            </text>
            <file name="gb-2008-9-5-r86-S7.pdf">
               <p>Click here for file</p>
            </file>
         </suppl>
         <suppl id="S8">
            <title>
               <p>Additional data file 8</p>
            </title>
            <caption>
               <p>Regions with DNA copy number heterogeneity in non-Hodgkin's lymphomas</p>
            </caption>
            <text>
               <p>Regions with DNA copy number heterogeneity in non-Hodgkin's lymphomas.</p>
            </text>
            <file name="gb-2008-9-5-r86-S8.pdf">
               <p>Click here for file</p>
            </file>
         </suppl>
         <suppl id="S9">
            <title>
               <p>Additional data file 9</p>
            </title>
            <caption>
               <p>Regions with DNA copy number heterogeneity in cervical cancers</p>
            </caption>
            <text>
               <p>Regions with DNA copy number heterogeneity in cervical cancers.</p>
            </text>
            <file name="gb-2008-9-5-r86-S9.pdf">
               <p>Click here for file</p>
            </file>
         </suppl>
         <suppl id="S10">
            <title>
               <p>Additional data file 10</p>
            </title>
            <caption>
               <p>Tumor cell fraction required for detection of heterogeneous copy number changes</p>
            </caption>
            <text>
               <p>Tumor cell fraction required for detection of heterogeneous copy number changes.</p>
            </text>
            <file name="gb-2008-9-5-r86-S10.pdf">
               <p>Click here for file</p>
            </file>
         </suppl>
         <suppl id="S11">
            <title>
               <p>Additional data file 11</p>
            </title>
            <caption>
               <p>Analysis of the evolutionary sequence of subpopulations in heterogeneous tumors</p>
            </caption>
            <text>
               <p>Analysis of the evolutionary sequence of subpopulations in heterogeneous tumors.</p>
            </text>
            <file name="gb-2008-9-5-r86-S11.pdf">
               <p>Click here for file</p>
            </file>
         </suppl>
         <suppl id="S12">
            <title>
               <p>Additional data file 12</p>
            </title>
            <caption>
               <p>The minimum tumor cell fraction that can be calculated in GeneCount</p>
            </caption>
            <text>
               <p>The minimum tumor cell fraction that can be calculated in GeneCount.</p>
            </text>
            <file name="gb-2008-9-5-r86-S12.pdf">
               <p>Click here for file</p>
            </file>
         </suppl>
         <suppl id="S13">
            <title>
               <p>Additional data file 13</p>
            </title>
            <caption>
               <p>The tumor cell fraction required for detection of homogeneous copy number changes</p>
            </caption>
            <text>
               <p>The tumor cell fraction required for detection of homogeneous copy number changes.</p>
            </text>
            <file name="gb-2008-9-5-r86-S13.pdf">
               <p>Click here for file</p>
            </file>
         </suppl>
      </sec>
   </bdy>
   <bm>
      <ack>
         <sec>
            <st>
               <p>Acknowledgements</p>
            </st>
            <p>We would like to acknowledge Vegard Nygaard from The Norwegian Microarray Consortium for help with implementing the BASE plug-in module. The study was supported by The Norwegian Cancer Society and the Microarray Platform of The National Programme for Research in Functional Genomics (FUGE) in the Research Council of Norway.</p>
         </sec>
      </ack>
      <refgrp>
         <bibl id="B1">
            <title>
               <p>Genomic microarrays in the spotlight.</p>
            </title>
            <aug>
               <au>
                  <snm>Mantripragada</snm>
                  <fnm>KK</fnm>
               </au>
               <au>
                  <snm>Buckley</snm>
                  <fnm>PG</fnm>
               </au>
               <au>
                  <snm>de Stahl</snm>
                  <fnm>TD</fnm>
               </au>
               <au>
                  <snm>Dumanski</snm>
                  <fnm>JP</fnm>
               </au>
            </aug>
            <source>Trends Genet</source>
            <pubdate>2004</pubdate>
            <volume>20</volume>
            <fpage>87</fpage>
            <lpage>94</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1016/j.tig.2003.12.008</pubid>
                  <pubid idtype="pmpid" link="fulltext">14746990</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B2">
            <title>
               <p>Array comparative genomic hybridization and its applications in cancer.</p>
            </title>
            <aug>
               <au>
                  <snm>Pinkel</snm>
                  <fnm>D</fnm>
               </au>
               <au>
                  <snm>Albertson</snm>
                  <fnm>DG</fnm>
               </au>
            </aug>
            <source>Nat Genet</source>
            <pubdate>2005</pubdate>
            <volume>37</volume>
            <issue>Suppl</issue>
            <fpage>S11</fpage>
            <lpage>S17</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1038/ng1569</pubid>
                  <pubid idtype="pmpid" link="fulltext">15920524</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B3">
            <title>
               <p>Chromosome aberrations in solid tumors.</p>
            </title>
            <aug>
               <au>
                  <snm>Albertson</snm>
                  <fnm>DG</fnm>
               </au>
               <au>
                  <snm>Collins</snm>
                  <fnm>C</fnm>
               </au>
               <au>
                  <snm>McCormick</snm>
                  <fnm>F</fnm>
               </au>
               <au>
                  <snm>Gray</snm>
                  <fnm>JW</fnm>
               </au>
            </aug>
            <source>Nat Genet</source>
            <pubdate>2003</pubdate>
            <volume>34</volume>
            <fpage>369</fpage>
            <lpage>376</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1038/ng1215</pubid>
                  <pubid idtype="pmpid" link="fulltext">12923544</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B4">
            <title>
               <p>Gene amplification in cancer.</p>
            </title>
            <aug>
               <au>
                  <snm>Albertson</snm>
                  <fnm>DG</fnm>
               </au>
            </aug>
            <source>Trends Genet</source>
            <pubdate>2006</pubdate>
            <volume>22</volume>
            <fpage>447</fpage>
            <lpage>455</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1016/j.tig.2006.06.007</pubid>
                  <pubid idtype="pmpid" link="fulltext">16787682</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B5">
            <title>
               <p>DNA copy number losses in human neoplasms.</p>
            </title>
            <aug>
               <au>
                  <snm>Knuutila</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Aalto</snm>
                  <fnm>Y</fnm>
               </au>
               <au>
                  <snm>Autio</snm>
                  <fnm>K</fnm>
               </au>
               <au>
                  <snm>Bjorkqvist</snm>
                  <fnm>AM</fnm>
               </au>
               <au>
                  <snm>El-Rifai</snm>
                  <fnm>W</fnm>
               </au>
               <au>
                  <snm>Hemmer</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Huhta</snm>
                  <fnm>T</fnm>
               </au>
               <au>
                  <snm>Kettunen</snm>
                  <fnm>E</fnm>
               </au>
               <au>
                  <snm>Kiuru-Kuhlefelt</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Larramendy</snm>
                  <fnm>ML</fnm>
               </au>
               <au>
                  <snm>Lushnikova</snm>
                  <fnm>T</fnm>
               </au>
               <au>
                  <snm>Monni</snm>
                  <fnm>O</fnm>
               </au>
               <au>
                  <snm>Pere</snm>
                  <fnm>H</fnm>
               </au>
               <au>
                  <snm>Tapper</snm>
                  <fnm>J</fnm>
               </au>
               <au>
                  <snm>Tarkkanen</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Varis</snm>
                  <fnm>A</fnm>
               </au>
               <au>
                  <snm>Wasenius</snm>
                  <fnm>VM</fnm>
               </au>
               <au>
                  <snm>Wolf</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Zhu</snm>
                  <fnm>Y</fnm>
               </au>
            </aug>
            <source>Am J Pathol</source>
            <pubdate>1999</pubdate>
            <volume>155</volume>
            <fpage>683</fpage>
            <lpage>694</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmcid">1866903</pubid>
                  <pubid idtype="pmpid" link="fulltext">10487825</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B6">
            <title>
               <p>High-resolution array-CGH and expression profiling identifies a novel genomic subtype of ER negative breast cancer.</p>
            </title>
            <aug>
               <au>
                  <snm>Chin</snm>
                  <fnm>SF</fnm>
               </au>
               <au>
                  <snm>Teschendorff</snm>
                  <fnm>AE</fnm>
               </au>
               <au>
                  <snm>Marioni</snm>
                  <fnm>JC</fnm>
               </au>
               <au>
                  <snm>Wang</snm>
                  <fnm>Y</fnm>
               </au>
               <au>
                  <snm>Barbosa-Morais</snm>
                  <fnm>NL</fnm>
               </au>
               <au>
                  <snm>Thorne</snm>
                  <fnm>NP</fnm>
               </au>
               <au>
                  <snm>Costa</snm>
                  <fnm>JL</fnm>
               </au>
               <au>
                  <snm>Pinder</snm>
                  <fnm>SE</fnm>
               </au>
               <au>
                  <snm>Wiel</snm>
                  <mnm>van de</mnm>
                  <fnm>MA</fnm>
               </au>
               <au>
                  <snm>Green</snm>
                  <fnm>AR</fnm>
               </au>
               <au>
                  <snm>Ellis</snm>
                  <fnm>IO</fnm>
               </au>
               <au>
                  <snm>Porter</snm>
                  <fnm>PL</fnm>
               </au>
               <au>
                  <snm>Tavare</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Brenton</snm>
                  <fnm>JD</fnm>
               </au>
               <au>
                  <snm>Ylstra</snm>
                  <fnm>B</fnm>
               </au>
               <au>
                  <snm>Caldas</snm>
                  <fnm>C</fnm>
               </au>
            </aug>
            <source>Genome Biol</source>
            <pubdate>2007</pubdate>
            <volume>8</volume>
            <fpage>R215</fpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmcid">2246289</pubid>
                  <pubid idtype="pmpid" link="fulltext">17925008</pubid>
                  <pubid idtype="doi">10.1186/gb-2007-8-10-r215</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B7">
            <title>
               <p>Array comparative genomic hybridization reveals genomic copy number changes associated with outcome in diffuse large B-cell lymphomas.</p>
            </title>
            <aug>
               <au>
                  <snm>Chen</snm>
                  <fnm>W</fnm>
               </au>
               <au>
                  <snm>Houldsworth</snm>
                  <fnm>J</fnm>
               </au>
               <au>
                  <snm>Olshen</snm>
                  <fnm>AB</fnm>
               </au>
               <au>
                  <snm>Nanjangud</snm>
                  <fnm>G</fnm>
               </au>
               <au>
                  <snm>Chaganti</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Venkatraman</snm>
                  <fnm>ES</fnm>
               </au>
               <au>
                  <snm>Halaas</snm>
                  <fnm>J</fnm>
               </au>
               <au>
                  <snm>Teruya-Feldstein</snm>
                  <fnm>J</fnm>
               </au>
               <au>
                  <snm>Zelenetz</snm>
                  <fnm>AD</fnm>
               </au>
               <au>
                  <snm>Chaganti</snm>
                  <fnm>RS</fnm>
               </au>
            </aug>
            <source>Blood</source>
            <pubdate>2006</pubdate>
            <volume>107</volume>
            <fpage>2477</fpage>
            <lpage>2485</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmcid">1895737</pubid>
                  <pubid idtype="pmpid" link="fulltext">16317097</pubid>
                  <pubid idtype="doi">10.1182/blood-2005-07-2950</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B8">
            <title>
               <p>The Mitelman Database of Chromosome Aberrations in Cancer</p>
            </title>
            <url>http://cgap.nci.nih.gov/Chromosomes/Mitelman</url>
         </bibl>
         <bibl id="B9">
            <title>
               <p>Analysis of array CGH data: from signal ratio to gain and loss of DNA regions.</p>
            </title>
            <aug>
               <au>
                  <snm>Hupe</snm>
                  <fnm>P</fnm>
               </au>
               <au>
                  <snm>Stransky</snm>
                  <fnm>N</fnm>
               </au>
               <au>
                  <snm>Thiery</snm>
                  <fnm>JP</fnm>
               </au>
               <au>
                  <snm>Radvanyi</snm>
                  <fnm>F</fnm>
               </au>
               <au>
                  <snm>Barillot</snm>
                  <fnm>E</fnm>
               </au>
            </aug>
            <source>Bioinformatics</source>
            <pubdate>2004</pubdate>
            <volume>20</volume>
            <fpage>3413</fpage>
            <lpage>3422</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1093/bioinformatics/bth418</pubid>
                  <pubid idtype="pmpid" link="fulltext">15381628</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B10">
            <title>
               <p>Breakpoint identification and smoothing of array comparative genomic hybridization data.</p>
            </title>
            <aug>
               <au>
                  <snm>Jong</snm>
                  <fnm>K</fnm>
               </au>
               <au>
                  <snm>Marchiori</snm>
                  <fnm>E</fnm>
               </au>
               <au>
                  <snm>Meijer</snm>
                  <fnm>G</fnm>
               </au>
               <au>
                  <snm>Vaart</snm>
                  <fnm>AV</fnm>
               </au>
               <au>
                  <snm>Ylstra</snm>
                  <fnm>B</fnm>
               </au>
            </aug>
            <source>Bioinformatics</source>
            <pubdate>2004</pubdate>
            <volume>20</volume>
            <fpage>3636</fpage>
            <lpage>3637</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1093/bioinformatics/bth355</pubid>
                  <pubid idtype="pmpid" link="fulltext">15201182</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B11">
            <title>
               <p>CGH-Explorer: a program for analysis of array-CGH data.</p>
            </title>
            <aug>
               <au>
                  <snm>Lingjaerde</snm>
                  <fnm>OC</fnm>
               </au>
               <au>
                  <snm>Baumbusch</snm>
                  <fnm>LO</fnm>
               </au>
               <au>
                  <snm>Liestol</snm>
                  <fnm>K</fnm>
               </au>
               <au>
                  <snm>Glad</snm>
                  <fnm>IK</fnm>
               </au>
               <au>
                  <snm>Borresen-Dale</snm>
                  <fnm>AL</fnm>
               </au>
            </aug>
            <source>Bioinformatics</source>
            <pubdate>2005</pubdate>
            <volume>21</volume>
            <fpage>821</fpage>
            <lpage>822</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1093/bioinformatics/bti113</pubid>
                  <pubid idtype="pmpid" link="fulltext">15531610</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B12">
            <title>
               <p>Circular binary segmentation for the analysis of array-based DNA copy number data.</p>
            </title>
            <aug>
               <au>
                  <snm>Olshen</snm>
                  <fnm>AB</fnm>
               </au>
               <au>
                  <snm>Venkatraman</snm>
                  <fnm>ES</fnm>
               </au>
               <au>
                  <snm>Lucito</snm>
                  <fnm>R</fnm>
               </au>
               <au>
                  <snm>Wigler</snm>
                  <fnm>M</fnm>
               </au>
            </aug>
            <source>Biostatistics</source>
            <pubdate>2004</pubdate>
            <volume>5</volume>
            <fpage>557</fpage>
            <lpage>572</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1093/biostatistics/kxh008</pubid>
                  <pubid idtype="pmpid" link="fulltext">15475419</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B13">
            <title>
               <p>Comparative genomic hybridization in hypotriploid/hyperdiploid tumors.</p>
            </title>
            <aug>
               <au>
                  <snm>Rosenberg</snm>
                  <fnm>C</fnm>
               </au>
               <au>
                  <snm>Schut</snm>
                  <fnm>TB</fnm>
               </au>
               <au>
                  <snm>Mostert</snm>
                  <fnm>MC</fnm>
               </au>
               <au>
                  <snm>Tanke</snm>
                  <fnm>HJ</fnm>
               </au>
               <au>
                  <snm>Raap</snm>
                  <fnm>AK</fnm>
               </au>
               <au>
                  <snm>Oosterhuis</snm>
                  <fnm>JW</fnm>
               </au>
               <au>
                  <snm>Looijenga</snm>
                  <fnm>LH</fnm>
               </au>
            </aug>
            <source>Cytometry</source>
            <pubdate>1997</pubdate>
            <volume>29</volume>
            <fpage>113</fpage>
            <lpage>121</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1002/(SICI)1097-0320(19971001)29:2&lt;113::AID-CYTO3>3.0.CO;2-E</pubid>
                  <pubid idtype="pmpid" link="fulltext">9332817</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B14">
            <title>
               <p>Intratumor chromosomal heterogeneity in advanced carcinomas of the uterine cervix.</p>
            </title>
            <aug>
               <au>
                  <snm>Lyng</snm>
                  <fnm>H</fnm>
               </au>
               <au>
                  <snm>Beigi</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Svendsrud</snm>
                  <fnm>DH</fnm>
               </au>
               <au>
                  <snm>Brustugun</snm>
                  <fnm>OT</fnm>
               </au>
               <au>
                  <snm>Stokke</snm>
                  <fnm>T</fnm>
               </au>
               <au>
                  <snm>Kristensen</snm>
                  <fnm>GB</fnm>
               </au>
               <au>
                  <snm>Sundfor</snm>
                  <fnm>K</fnm>
               </au>
               <au>
                  <snm>Skjonsberg</snm>
                  <fnm>A</fnm>
               </au>
               <au>
                  <snm>De Angelis</snm>
                  <fnm>PM</fnm>
               </au>
            </aug>
            <source>Int J Cancer</source>
            <pubdate>2004</pubdate>
            <volume>111</volume>
            <fpage>358</fpage>
            <lpage>366</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1002/ijc.20258</pubid>
                  <pubid idtype="pmpid" link="fulltext">15221962</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B15">
            <title>
               <p>Intratumoral cytogenetic heterogeneity detected by comparative genomic hybridization and laser scanning cytometry in human gliomas.</p>
            </title>
            <aug>
               <au>
                  <snm>Harada</snm>
                  <fnm>K</fnm>
               </au>
               <au>
                  <snm>Nishizaki</snm>
                  <fnm>T</fnm>
               </au>
               <au>
                  <snm>Ozaki</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Kubota</snm>
                  <fnm>H</fnm>
               </au>
               <au>
                  <snm>Ito</snm>
                  <fnm>H</fnm>
               </au>
               <au>
                  <snm>Sasaki</snm>
                  <fnm>K</fnm>
               </au>
            </aug>
            <source>Cancer Res</source>
            <pubdate>1998</pubdate>
            <volume>58</volume>
            <fpage>4694</fpage>
            <lpage>4700</lpage>
            <xrefbib>
               <pubid idtype="pmpid" link="fulltext">9788624</pubid>
            </xrefbib>
         </bibl>
         <bibl id="B16">
            <title>
               <p>Interglandular cytogenetic heterogeneity detected by comparative genomic hybridization in pancreatic cancer.</p>
            </title>
            <aug>
               <au>
                  <snm>Harada</snm>
                  <fnm>T</fnm>
               </au>
               <au>
                  <snm>Okita</snm>
                  <fnm>K</fnm>
               </au>
               <au>
                  <snm>Shiraishi</snm>
                  <fnm>K</fnm>
               </au>
               <au>
                  <snm>Kusano</snm>
                  <fnm>N</fnm>
               </au>
               <au>
                  <snm>Kondoh</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Sasaki</snm>
                  <fnm>K</fnm>
               </au>
            </aug>
            <source>Cancer Res</source>
            <pubdate>2002</pubdate>
            <volume>62</volume>
            <fpage>835</fpage>
            <lpage>839</lpage>
            <xrefbib>
               <pubid idtype="pmpid" link="fulltext">11830540</pubid>
            </xrefbib>
         </bibl>
         <bibl id="B17">
            <title>
               <p>Assessing the significance of chromosomal aberrations in cancer: methodology and application to glioma.</p>
            </title>
            <aug>
               <au>
                  <snm>Beroukhim</snm>
                  <fnm>R</fnm>
               </au>
               <au>
                  <snm>Getz</snm>
                  <fnm>G</fnm>
               </au>
               <au>
                  <snm>Nghiemphu</snm>
                  <fnm>L</fnm>
               </au>
               <au>
                  <snm>Barretina</snm>
                  <fnm>J</fnm>
               </au>
               <au>
                  <snm>Hsueh</snm>
                  <fnm>T</fnm>
               </au>
               <au>
                  <snm>Linhart</snm>
                  <fnm>D</fnm>
               </au>
               <au>
                  <snm>Vivanco</snm>
                  <fnm>I</fnm>
               </au>
               <au>
                  <snm>Lee</snm>
                  <fnm>JC</fnm>
               </au>
               <au>
                  <snm>Huang</snm>
                  <fnm>JH</fnm>
               </au>
               <au>
                  <snm>Alexander</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Du</snm>
                  <fnm>J</fnm>
               </au>
               <au>
                  <snm>Kau</snm>
                  <fnm>T</fnm>
               </au>
               <au>
                  <snm>Thomas</snm>
                  <fnm>RK</fnm>
               </au>
               <au>
                  <snm>Shah</snm>
                  <fnm>K</fnm>
               </au>
               <au>
                  <snm>Soto</snm>
                  <fnm>H</fnm>
               </au>
               <au>
                  <snm>Perner</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Prensner</snm>
                  <fnm>J</fnm>
               </au>
               <au>
                  <snm>Debiasi</snm>
                  <fnm>RM</fnm>
               </au>
               <au>
                  <snm>Demichelis</snm>
                  <fnm>F</fnm>
               </au>
               <au>
                  <snm>Hatton</snm>
                  <fnm>C</fnm>
               </au>
               <au>
                  <snm>Rubin</snm>
                  <fnm>MA</fnm>
               </au>
               <au>
                  <snm>Garraway</snm>
                  <fnm>LA</fnm>
               </au>
               <au>
                  <snm>Nelson</snm>
                  <fnm>SF</fnm>
               </au>
               <au>
                  <snm>Liau</snm>
                  <fnm>L</fnm>
               </au>
               <au>
                  <snm>Mischel</snm>
                  <fnm>PS</fnm>
               </au>
               <au>
                  <snm>Cloughesy</snm>
                  <fnm>TF</fnm>
               </au>
               <au>
                  <snm>Meyerson</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Golub</snm>
                  <fnm>TA</fnm>
               </au>
               <au>
                  <snm>Lander</snm>
                  <fnm>ES</fnm>
               </au>
               <au>
                  <snm>Mellinghoff</snm>
                  <fnm>IK</fnm>
               </au>
               <etal/>
            </aug>
            <source>Proc Natl Acad Sci USA</source>
            <pubdate>2007</pubdate>
            <volume>104</volume>
            <fpage>20007</fpage>
            <lpage>20012</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1073/pnas.0710052104</pubid>
                  <pubid idtype="pmpid" link="fulltext">18077431</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B18">
            <title>
               <p>Characterizing the cancer genome in lung adenocarcinoma.</p>
            </title>
            <aug>
               <au>
                  <snm>Weir</snm>
                  <fnm>BA</fnm>
               </au>
               <au>
                  <snm>Woo</snm>
                  <fnm>MS</fnm>
               </au>
               <au>
                  <snm>Getz</snm>
                  <fnm>G</fnm>
               </au>
               <au>
                  <snm>Perner</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Ding</snm>
                  <fnm>L</fnm>
               </au>
               <au>
                  <snm>Beroukhim</snm>
                  <fnm>R</fnm>
               </au>
               <au>
                  <snm>Lin</snm>
                  <fnm>WM</fnm>
               </au>
               <au>
                  <snm>Province</snm>
                  <fnm>MA</fnm>
               </au>
               <au>
                  <snm>Kraja</snm>
                  <fnm>A</fnm>
               </au>
               <au>
                  <snm>Johnson</snm>
                  <fnm>LA</fnm>
               </au>
               <au>
                  <snm>Shah</snm>
                  <fnm>K</fnm>
               </au>
               <au>
                  <snm>Sato</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Thomas</snm>
                  <fnm>RK</fnm>
               </au>
               <au>
                  <snm>Barletta</snm>
                  <fnm>JA</fnm>
               </au>
               <au>
                  <snm>Borecki</snm>
                  <fnm>IB</fnm>
               </au>
               <au>
                  <snm>Broderick</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Chang</snm>
                  <fnm>AC</fnm>
               </au>
               <au>
      