<?xml version='1.0'?>
<!DOCTYPE art SYSTEM 'http://www.biomedcentral.com/xml/article.dtd'>
<art>
   <ui>gb-2010-11-3-r35</ui>
   <ji>GBJ</ji>
   <fm>
      <dochead>Research</dochead>
      <bibl>
         <title>
            <p>Conserved developmental transcriptomes in evolutionarily divergent species</p>
         </title>
         <aug>
            <au id="A1" ce="yes">
               <snm>Parikh</snm>
               <fnm>Anup</fnm>
               <insr iid="I1"/>
               <insr iid="I2"/>
               <email>anup.parikh@gmail.com</email>
            </au>
            <au id="A2" ce="yes">
               <snm>Miranda</snm>
               <fnm>Edward Roshan</fnm>
               <insr iid="I1"/>
               <insr iid="I3"/>
               <email>roshmiranda@gmail.com</email>
            </au>
            <au id="A3">
               <snm>Katoh-Kurasawa</snm>
               <fnm>Mariko</fnm>
               <insr iid="I1"/>
               <email>mkatoh@bcm.tmc.edu</email>
            </au>
            <au id="A4">
               <snm>Fuller</snm>
               <fnm>Danny</fnm>
               <insr iid="I4"/>
               <email>dfuller@ucsd.edu</email>
            </au>
            <au id="A5">
               <snm>Rot</snm>
               <fnm>Gregor</fnm>
               <insr iid="I5"/>
               <email>gregor.rot@fri.uni-lj.si</email>
            </au>
            <au id="A6">
               <snm>Zagar</snm>
               <fnm>Lan</fnm>
               <insr iid="I5"/>
               <email>lan.zagar@fri.uni-lj.si</email>
            </au>
            <au id="A7">
               <snm>Curk</snm>
               <fnm>Tomaz</fnm>
               <insr iid="I5"/>
               <email>tomaz.curk@fri.uni-lj.si</email>
            </au>
            <au id="A8">
               <snm>Sucgang</snm>
               <fnm>Richard</fnm>
               <insr iid="I6"/>
               <email>rsucgang@bcm.edu</email>
            </au>
            <au id="A9">
               <snm>Chen</snm>
               <fnm>Rui</fnm>
               <insr iid="I1"/>
               <email>ruichen@bcm.tmc.edu</email>
            </au>
            <au id="A10">
               <snm>Zupan</snm>
               <fnm>Blaz</fnm>
               <insr iid="I1"/>
               <insr iid="I5"/>
               <email>Blaz.Zupan@fri.uni-lj.si</email>
            </au>
            <au id="A11">
               <snm>Loomis</snm>
               <mi>F</mi>
               <fnm>William</fnm>
               <insr iid="I4"/>
               <email>wloomis@ucsd.edu</email>
            </au>
            <au id="A12">
               <snm>Kuspa</snm>
               <fnm>Adam</fnm>
               <insr iid="I1"/>
               <insr iid="I3"/>
               <insr iid="I6"/>
               <email>akuspa@bcm.tmc.edu</email>
            </au>
            <au ca="yes" id="A13">
               <snm>Shaulsky</snm>
               <fnm>Gad</fnm>
               <insr iid="I1"/>
               <insr iid="I2"/>
               <insr iid="I3"/>
               <email>gadi@bcm.edu</email>
            </au>
         </aug>
         <insg>
            <ins id="I1">
               <p>Department of Molecular and Human Genetics, Baylor College of Medicine, One Baylor Plaza, Houston, TX 77030, USA</p>
            </ins>
            <ins id="I2">
               <p>Graduate Program in Structural and Computational Biology and Molecular Biophysics, Baylor College of Medicine, One Baylor Plaza, Houston, TX 77030, USA</p>
            </ins>
            <ins id="I3">
               <p>Graduate Program in Developmental Biology, Baylor College of Medicine, One Baylor Plaza, Houston, TX 77030, USA</p>
            </ins>
            <ins id="I4">
               <p>Section of Cell and Developmental Biology, University of California San Diego, 9500 Gilman Drive, La Jolla, CA 92093, USA</p>
            </ins>
            <ins id="I5">
               <p>Faculty of Computer and Information Science, University of Ljubljana, Trzaska cesta 25, SI-1001 Ljubljana, Slovenia</p>
            </ins>
            <ins id="I6">
               <p>Department of Biochemistry and Molecular Biology, Baylor College of Medicine, One Baylor Plaza, Houston, TX 77030, USA</p>
            </ins>
         </insg>
         <source>Genome Biology</source>
         <issn>1465-6906</issn>
         <pubdate>2010</pubdate>
         <volume>11</volume>
         <issue>3</issue>
         <fpage>R35</fpage>
         <url>http://genomebiology.com/2010/11/3/R35</url>
         <xrefbib>
            <pubidlist>
               <pubid idtype="doi">10.1186/gb-2010-11-3-r35</pubid>
               <pubid idtype="pmpid">20236529</pubid>
            </pubidlist>
         </xrefbib>
      </bibl>
      <history>
         <rec>
            <date>
               <day>16</day>
               <month>12</month>
               <year>2009</year>
            </date>
         </rec>
         <revrec>
            <date>
               <day>11</day>
               <month>2</month>
               <year>2010</year>
            </date>
         </revrec>
         <acc>
            <date>
               <day>17</day>
               <month>3</month>
               <year>2010</year>
            </date>
         </acc>
         <pub>
            <date>
               <day>17</day>
               <month>3</month>
               <year>2010</year>
            </date>
         </pub>
      </history>
      <cpyrt>
         <year>2010</year>
         <collab>Parikh et al.; licensee BioMed Central Ltd.</collab>
         <note>This is an open access article distributed under the terms of the Creative Commons Attribution License (<url>http://creativecommons.org/licenses/by/2.0</url>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited.</note>
      </cpyrt>
      <shorttitle>
         <p>Conserved developmental transcriptomes</p>
      </shorttitle>
      <shortabs>
         <p>Transcriptional profiling of Dictyostelium development reveals significant conservation of transcriptional profiles between evolutionarily divergent species.</p>
      </shortabs>
      <abs>
         <sec>
            <st>
               <p>Abstract</p>
            </st>
            <sec>
               <st>
                  <p>Background</p>
               </st>
               <p>Evolutionarily divergent organisms often share developmental anatomies despite vast differences between their genome sequences. The social amoebae <it>Dictyostelium discoideum </it>and <it>Dictyostelium purpureum </it>have similar developmental morphologies although their genomes are as divergent as those of man and jawed fish.</p>
            </sec>
            <sec>
               <st>
                  <p>Results</p>
               </st>
               <p>Here we show that the anatomical similarities are accompanied by extensive transcriptome conservation. Using RNA sequencing we compared the abundance and developmental regulation of all the transcripts in the two species. In both species, most genes are developmentally regulated and the greatest expression changes occur during the transition from unicellularity to multicellularity. The developmental regulation of transcription is highly conserved between orthologs in the two species. In addition to timing of expression, the level of mRNA production is also conserved between orthologs and is consistent with the intuitive notion that transcript abundance correlates with the amount of protein required. Furthermore, the conservation of transcriptomes extends to cell-type specific expression.</p>
            </sec>
            <sec>
               <st>
                  <p>Conclusions</p>
               </st>
               <p>These findings suggest that developmental programs are remarkably conserved at the transcriptome level, considering the great evolutionary distance between the genomes. Moreover, this transcriptional conservation may be responsible for the similar developmental anatomies of <it>Dictyostelium discoideum </it>and <it>Dictyostelium purpureum</it>.</p>
            </sec>
         </sec>
      </abs>
   </fm>
   <meta>
      <classifications>
         <classification id="30010005" subtype="man_spc_id" type="BMC">Development</classification>
         <classification id="30010008" subtype="man_spc_id" type="BMC">Evolution</classification>
         <classification id="300100010" subtype="man_spc_id" type="BMC">Genome studies</classification>
      </classifications>
   </meta>
   <bdy>
      <sec>
         <st>
            <p>Background</p>
         </st>
         <p>Comparisons between morphology, physiology and developmental transitions of organisms have been used for some time to study evolutionary relationships between species. We can now use genome sequence comparisons and start to relate genetic information to organismal function and morphology. High-throughput methods for the analysis of RNA, protein and metabolites are beginning to bridge the gap between genomes and functions, and evolutionary comparisons between organisms using these methods are increasing our understanding of the relationship between genes and function.</p>
         <p>Gene regulation is sometimes surprisingly similar between divergent species, revealing common pathways in fundamental processes despite vast evolutionary distances <abbrgrp><abbr bid="B1">1</abbr><abbr bid="B2">2</abbr></abbrgrp>. Comparing the transcriptomes of evolutionarily distant organisms has revealed ancient conserved genetic networks and helped in assigning function to unknown genes <abbrgrp><abbr bid="B3">3</abbr><abbr bid="B4">4</abbr></abbrgrp>. On the other hand, there is evidence for extensive divergence of developmental gene regulation in closely related species <abbrgrp><abbr bid="B5">5</abbr></abbrgrp> and comparative studies have shown that evolution of transcriptional regulation in specific pathways can drive divergence of developmental anatomies. For example, differences in the spatiotemporal regulation of Hox genes can account for variations in animal patterning <abbrgrp><abbr bid="B6">6</abbr></abbrgrp> and differences in the expression patterns of conserved genes can determine variations in heart development <abbrgrp><abbr bid="B7">7</abbr></abbrgrp>. In light of these findings, it is interesting that divergent species sometimes share developmental anatomies despite differences in their genome sequences and in their gene regulation <abbrgrp><abbr bid="B8">8</abbr></abbrgrp>. We therefore wanted to study the global transcriptional basis of evolutionarily conserved developmental anatomies between divergent organisms.</p>
         <p>Deep RNA sequencing (RNA-seq), in which millions of short reads are mapped to fully sequenced genomes, introduces a new dimension to transcriptome analysis. The method yields a quantitative, digital description of all the mRNA molecules in a given sample, in addition to improved sensitivity and increased dynamic range relative to hybridization based microarrays <abbrgrp><abbr bid="B9">9</abbr></abbrgrp>. Moreover, mRNA abundance can be directly compared between genes with different sequences, within and between organisms. We used RNA-seq to compare the developmental transcriptomes of two dictyostelid species, <it>Dictyostelium discoideum </it>and <it>Dictyostelium purpureum</it>, that exhibit vast sequence divergence. The genome of <it>D. purpureum </it>has been sequenced recently and compared to that of the previously sequenced genome of <it>D. discoideum </it>(R Sucgang <it>et al </it>"Comparative genomics of the social amoeba: <it>Dictyostelium discoideum </it>and <it>Dictyostelium purpureum</it>", unpublished work). The two genomes are almost identical in size and both have a high A+T content. The genome divergence between the two species was estimated by analyzing numerous orthologous protein clusters representing plant, animal, fungal and amoebal species. This analysis suggested that the genomes of <it>D. discoideum </it>and <it>D. purpureum </it>are as different from each other as the genome of jawed fish is from that of humans (R Sucgang <it>et al</it>, unpublished work). Considering the estimate that the rates of protein evolution in the amoebozoa are comparable to those of plants and animals <abbrgrp><abbr bid="B10">10</abbr></abbrgrp>, <it>D. purpureum </it>and <it>D. discoideum </it>probably shared a common ancestor approximately 400 million years ago.</p>
         <p>The dictyostelids are an order of amoebae that prey on bacteria in the soil and propagate by fission as solitary cells. Upon starvation they become social and embark on a developmental program that begins with aggregation of thousands of cells into a mound and ends with a multicellular structure that consists of a ball of spores carried atop a cellular stalk. Despite their vast evolutionary distance, <it>D. discoideum </it>and <it>D. purpureum </it>exhibit very similar developmental programs and inhabit the same ecological niche <abbrgrp><abbr bid="B11">11</abbr></abbrgrp>. Both organisms begin their multicellular development immediately following starvation, both use chemotaxis towards cAMP as a means of aggregation, and both differentiate into two types of cells during the slug stage - prespore and prestalk cells (Figure <figr fid="F1">1a</figr>). The two cell types eventually develop into a cluster of spores, called the sorus, and a thin rod of vacuolated cells called the stalk. The fruiting bodies of the two species are similar in size and shape <abbrgrp><abbr bid="B12">12</abbr></abbrgrp>, although <it>D. purpureum </it>commits its cells to the sterile stalk tissue during the multicellular phase by generating a stalk during slug migration, whereas <it>D. discoideum </it>does not. There is also a difference in pigmentation of the sori, as illustrated in Figure <figr fid="F1">1a</figr>. Despite the similarities between the species, if cells of <it>D. discoideum </it>and <it>D. purpureum </it>happen to aggregate together, they soon sort out to form species-specific fruiting bodies <abbrgrp><abbr bid="B11">11</abbr></abbrgrp>. Other prominent differences are a 4-hour delay in aggregation and a 4-hour delay in culmination of <it>D. purpureum </it>compared to <it>D. discoideum</it>. However, by the end of the 24-hour developmental program, both species have formed fruiting bodies, consisting of spore-filled sori carried atop cellular stalks. We wanted to test whether the developmental transcriptional profiles of the two species mirror the morphological similarities despite the protein sequence divergence.</p>
         <fig id="F1">
            <title>
               <p>Figure 1</p>
            </title>
            <caption>
               <p>Conservation of morphology and gene expression patterns in the developmental programs of <it>D. discoideum </it>and <it>D. purpureum</it></p>
            </caption>
            <text>
               <p><b>Conservation of morphology and gene expression patterns in the developmental programs of <it>D. discoideum </it>and <it>D. purpureum</it></b>. <b>(a) </b>An illustration of the developmental programs. Both species begin the developmental program by aggregation of starving cells into centers that contain approximately 50,000 cells. The aggregates undergo morphological transformations from loose aggregates to tight aggregates to tipped aggregates while the cells differentiate into prespore and prestalk cells (not shown). Later in development, <it>D. purpureum </it>slugs (right) migrate while leaving a cellular stalk behind them whereas <it>D. discoideum </it>slugs do not. After culmination, the fruiting bodies are similar in size and shape and both consist of a ball of spores (sorus) carried on top of a cellular stalk as indicated. They differ in that <it>D. purpureum </it>fruiting bodies lack a basal disc at the bottom of the stalk and their sori are purple rather than yellow. <b>(b) </b>Developmental morphologies. A top view with light microscopy of cells developing on dark nitrocellulose filters is shown. Species names and developmental times are indicated. Scale bar: 0.5 mm. <b>(c) </b>The heat maps represent the patterns of change in standardized mRNA abundance for all the genes in the <it>D. discoideum </it>and the <it>D. purpureum </it>genomes. Each row represents an average of 85 genes and each column represents a developmental time point (hours). The colors represent relative mRNA abundances (see scale). The genes are ordered according to their regulation pattern in each species. The black lines divide the transcripts, from top to bottom, into: down-regulated, intermediate regulation and up-regulated. The dendrograms represent the differences between the transcriptomes at each time point. <b>(d) </b>The maximal similarity between each <it>D. purpureum </it>developmental time point (x-axis) to each <it>D. discoideum </it>time point (y-axis) across the 7,560 orthologs. The dashed line represents a hypothetical comparison between perfectly synchronous developmental programs.</p>
            </text>
            <graphic file="gb-2010-11-3-r35-1"/>
         </fig>
      </sec>
      <sec>
         <st>
            <p>Results and discussion</p>
         </st>
         <sec>
            <st>
               <p>Conservation of developmental gene expression profiles</p>
            </st>
            <p>We collected RNA samples at 4-hour intervals during the 24-hour developmental programs in two independent replicas for each species and analyzed them by RNA-Seq (Table S1 in the supplementary material <abbrgrp><abbr bid="B13">13</abbr></abbrgrp>). We found that 69% of the <it>D. discoideum </it>genome was transcribed, with 12% in unannotated regions. In <it>D. purpureum</it>, 74% of the genome was transcribed, with 17% in unannotated regions. The biological replicates were highly similar to each other (mean Pearson's correlation of >0.95 between the biological replicates; Figure S1 in the supplementary material <abbrgrp><abbr bid="B13">13</abbr></abbrgrp>) and the expression of known marker genes was readily validated by quantitative RT-PCR (Figure S2 in the supplementary material <abbrgrp><abbr bid="B13">13</abbr></abbrgrp>). There are 13,970 gene models in <it>D. discoideum </it>and 12,410 in <it>D. purpureum </it>(R Sucgang <it>et al</it>, unpublished work). We found evidence for 8,435 gene transcripts in <it>D. discoideum </it>and 9,403 gene transcripts in <it>D. purpureum </it>that were expressed at greater than one mRNA molecule per cell (>30 read counts per gene; see Materials and methods) either in growing or in developing cells and had at least 5% mapable sequences. In most cases we found high reproducibility between the transcript levels in the biological replicates (>0.5 Pearson's correlation) but a few groups of genes failed the reproducibility test. One of the interesting groups is a set of heat shock proteins that had coordinate differences in transcript abundance between the biological replicates of <it>D. discoideum</it>. We suspect that some of these variable genes represent meaningful responses to subtle differences in the environment, as observed in other systems <abbrgrp><abbr bid="B14">14</abbr></abbrgrp>.</p>
            <p>Analysis of the biologically reproducible transcripts revealed that the abundance of almost every mRNA changed at least two-fold during development of both species. Figure <figr fid="F1">1c</figr> shows these findings as heat maps with the genes in each species ordered according to their developmental patterns and subdivided into three groups. In <it>D. discoideum</it>, 1,779 transcripts are down-regulated, 3,777 are up-regulated, and 2,822 have other patterns of developmental regulation. In <it>D. purpureum</it>, 3,168 are down-regulated, 3,472 are up-regulated, and 2,533 have other patterns of regulation. We also compared the similarity between the transcriptomes at each time point using hierarchical clustering and represent the distances between the transcriptomes as dendrograms above the heat maps (Figure <figr fid="F1">1c</figr>). In both species, the largest change in the transcriptome occurs during the transition from unicellularity to multicellularity, between 4 and 8 hours in <it>D. discoideum </it>and between 8 and 12 hours in <it>D. purpureum </it>(Figure <figr fid="F1">1c</figr>). These results indicate that both developmental programs are accompanied by sweeping changes in the transcriptional regulation of the entire genome and that the major transitions may be conserved.</p>
            <p>The genomes of <it>D. discoideum </it>and <it>D. purpureum </it>contain 7,619 orthologs, more than 50% of the genes in each genome (R Sucgang <it>et al</it>, unpublished work). To compare the developmental programs of the two species more closely, we compared the progression of developmental changes in 7,560 orthologs whose transcripts meet our quality criteria. We compared the similarity in the global transcriptional profiles between each <it>D. purpureum </it>developmental time point and each <it>D. discoideum </it>time point and plotted the maximal correlation (Figure <figr fid="F1">1d</figr>). The results indicate that the general developmental progression is similar between the two species, with two lags in the <it>D. purpureum </it>progression relative to <it>D. discoideum </it>- one between 4 and 8 hours and another between 16 and 20 hours. The transcriptional delays seen in Figure <figr fid="F1">1d</figr> occur at the same time as the morphological delays seen in Figure <figr fid="F1">1b</figr>, suggesting that the two are causally related.</p>
         </sec>
         <sec>
            <st>
               <p>Conserved regulation of developmental gene expression</p>
            </st>
            <p>To quantify the conservation between the developmental transcriptomes of <it>D. discoideum </it>and <it>D. purpureum</it>, we compared the expression profiles of the orthologs. Figure <figr fid="F2">2a</figr> shows the distribution of expression profile similarities between the two species (Pearson's correlation) and the transcript abundance (average read counts). The three-dimensional density plot indicates that most of the transcripts are similar between the two species, as quantified in the histogram projected on the back panel (Figure <figr fid="F2">2a</figr>). Specifically, the transcriptional profiles of over 57% of the genes are nearly identical (Pearson's correlation >0.5) and another 22% of the genes are similar (Pearson's correlation >0), suggesting that over 75% of the orthologs participate in evolutionarily conserved developmental processes (Figure <figr fid="F2">2a</figr>). Moreover, this transcriptional conservation is not affected by transcript abundance (Pearson's correlation 0.23), as can be seen on the x-axis in Figure <figr fid="F2">2a</figr>. The transcriptional profile of every transcript in <it>D. discoideum </it>and <it>D. purpureum </it>can be inspected on dictyExpress <abbrgrp><abbr bid="B15">15</abbr><abbr bid="B16">16</abbr></abbrgrp>.</p>
            <fig id="F2">
               <title>
                  <p>Figure 2</p>
               </title>
               <caption>
                  <p>Conservation of regulation and function between <it>D. discoideum </it>and <it>D. purpureum </it>transcriptional profiles</p>
               </caption>
               <text>
                  <p><b>Conservation of regulation and function between <it>D. discoideum </it>and <it>D. purpureum </it>transcriptional profiles</b>. We compared the similarity between the transcriptional profiles of orthologs from the two species. <b>(a) </b>The three-dimensional density plot represents the distribution of expression levels (x-axis, average read count) and of the similarities between the transcription profiles of the orthologs (y-axis, Pearson's correlation). The z-axis (gene count) represents the number of genes in each bin (defined by the black gridlines). The histogram behind the density plot summarizes the gene counts in four sections (separated by the yellow lines). The number of genes (top) and their fraction of the total (%) are indicated. <b>(b) </b>The bars represent the number of transcripts with various highly conserved expression patterns (gene counts indicated inside bars). <b>(c) </b>Prominent Gene Ontology terms enriched within each group. <b>(d) </b>Representative expression patterns in <it>D. discoideum </it>(yellow) and <it>D. purpureum </it>(purple). The time (hours; x-axis), relative mRNA abundance (y-axis), and gene names are indicated.</p>
               </text>
               <graphic file="gb-2010-11-3-r35-2"/>
            </fig>
            <p>Coordinate regulation of genes with common functions in specific developmental processes is a good indicator that the functions are being utilized during development <abbrgrp><abbr bid="B4">4</abbr><abbr bid="B17">17</abbr></abbrgrp>. We therefore tested which cellular functions are characteristic of the developmentally co-regulated genes. First we determined the maximal similarity between the transcriptional profiles of <it>D. discoideum </it>and <it>D. purpureum </it>genes with and without temporal transformations. Figure <figr fid="F2">2</figr> shows four gene groups that exhibit similar patterns of expression between <it>D. discoideum </it>and <it>D. purpureum </it>(Figure <figr fid="F2">2b</figr>), their enriched biological processes (Figure <figr fid="F2">2c</figr>) and examples of selected gene trajectories (Figure <figr fid="F2">2d</figr>). The enriched annotations among the 1,009 transcriptionally similar (Pearson's correlation >0.75) and up-regulated genes include differentiation, spore development, and regulation of transcription (Figure <figr fid="F2">2c</figr>; Table S2 in the supplementary material <abbrgrp><abbr bid="B13">13</abbr></abbrgrp>). The first two functions suggest that the two species have conserved developmental and differentiation pathways. The latter suggests that regulation of transcription is a central component in developmental regulation, consistent with the finding that most of the genes in the genome are developmentally regulated in both species (Figure <figr fid="F1">1</figr>). The enriched functions among the 547 down-regulated genes include translation (for example, ribosomal proteins), response to bacteria and cytoskeleton organization (Figure <figr fid="F2">2c</figr>; Table S2 in the supplementary material <abbrgrp><abbr bid="B13">13</abbr></abbrgrp>). These functions have central roles in <it>D. discoideum </it>growth and our data suggest conservation of these processes in <it>D. purpureum </it><abbrgrp><abbr bid="B12">12</abbr><abbr bid="B18">18</abbr></abbrgrp>. We also identified 334 genes with various patterns of developmental regulation, such as transient up or down-regulation, that were enriched in functions related to signal transduction (Figure <figr fid="F2">2c</figr>; Table S2 in the supplementary material <abbrgrp><abbr bid="B13">13</abbr></abbrgrp>), a well-known function in <it>Dictyostelium </it>development <abbrgrp><abbr bid="B12">12</abbr></abbrgrp>.</p>
            <p>Considering the temporal shifts between the developmental programs of <it>D. discoideum </it>and <it>D. purpureum </it>(Figure <figr fid="F1">1d</figr>), we hypothesized that the expression profiles of orthologous genes required during these stages would be temporally shifted. Therefore, we searched for transcripts that are more similar to each other after applying temporal transformations to the developmental profiles. We found 630 such transcripts, 344 of which exhibit a 4-hour delay in <it>D. purpureum </it>compared to <it>D. discoideum </it>(Figure <figr fid="F2">2b</figr>). Some of the prominent functions of these transcripts are response to stimulus, phagocytosis, cell adhesion, and cytoskeleton organization (Figure <figr fid="F2">2c</figr>; Table S2 in the supplementary material <abbrgrp><abbr bid="B13">13</abbr></abbrgrp>). Previous studies have shown that these functions are essential during the initiation of development in <it>D. discoideum </it><abbrgrp><abbr bid="B12">12</abbr><abbr bid="B18">18</abbr></abbrgrp>, so the 4-hour delay in gene expression is consistent with the delayed transition from unicellularity to multicellularity observed in <it>D. purpureum </it>(Figure <figr fid="F1">1b</figr>).</p>
            <p>We also tested the relationship between the degree of coding sequence conservation and the degree of expression profile conservation, which gave inconsistent results in previous studies <abbrgrp><abbr bid="B19">19</abbr><abbr bid="B20">20</abbr><abbr bid="B21">21</abbr></abbrgrp>. Analyzing the orthologous genes between <it>D. discoideum </it>and <it>D. purpureum</it>, we find no significant correlation between protein sequence conservation and expression profile conservation (Figure S3 in the supplementary material <abbrgrp><abbr bid="B13">13</abbr></abbrgrp>). However, we find that the developmental process is accompanied by a transition from expressing evolutionarily conserved genes to expressing more species-specific genes (Figure S4 in the supplementary material <abbrgrp><abbr bid="B13">13</abbr></abbrgrp>).</p>
         </sec>
         <sec>
            <st>
               <p>Conserved mRNA abundance</p>
            </st>
            <p>Thus far, we have only considered the relative changes in transcript abundance during development in order to focus on gene regulation. RNA-seq data also allow the comparison of transcript abundance between genes within each species and between species. We compared the sums of mRNA abundances from all developmental stages for each of the orthologs and found a surprising similarity between <it>D. discoideum </it>and <it>D. purpureum </it>(Pearson's correlation = 0.83), suggesting that the absolute mRNA abundances of most genes are conserved between the two species (Figure <figr fid="F3">3a</figr>; Table S3 in the supplementary material <abbrgrp><abbr bid="B13">13</abbr></abbrgrp>). We then divided the transcripts into three groups, based on their abundance, and analyzed the annotations of the genes. We found that mRNAs for structural molecules and for translation (for example, ribosomal proteins) are highly enriched among the 436 most abundant transcripts. The second group (2,498 transcripts) exhibits intermediate transcript levels and is enriched in mRNAs for enzyme regulators and catalytic activity. The least abundant transcripts, which represent over half the orthologs, are enriched in various annotations, including transcription (Table S3 in the supplementary material <abbrgrp><abbr bid="B13">13</abbr></abbrgrp>). These results are consistent with the intuitive notion that transcript abundance correlates with the amount of protein required in the cell. To test the generality of this notion, we compared our data to published RNA-seq data from yeast and mouse <abbrgrp><abbr bid="B22">22</abbr><abbr bid="B23">23</abbr></abbrgrp>. We created five broad functional categories using the Gene Ontology (GO) slim terminology <abbrgrp><abbr bid="B24">24</abbr></abbrgrp> and calculated the median gene abundance rank within each category (Figure <figr fid="F3">3b</figr>; Table S4 in the supplementary material <abbrgrp><abbr bid="B13">13</abbr></abbrgrp>). We used ranking rather than actual transcript abundance to allow comparison despite the different normalization methods used in the three studies. In all four species we found that genes involved in translation and in cellular structures had the highest mRNA abundance, transcripts encoding catalytic proteins and enzyme regulators had an intermediate abundance, and mRNAs involved in transcription were among the least abundant ones (Figure <figr fid="F3">3b</figr>). These results highlight the quantitative dimension provided by RNA-seq and show conservation of transcript abundance across large evolutionary distances.</p>
            <fig id="F3">
               <title>
                  <p>Figure 3</p>
               </title>
               <caption>
                  <p>Conservation of transcript abundance between various species</p>
               </caption>
               <text>
                  <p><b>Conservation of transcript abundance between various species</b>. <b>(a) </b>Scatter plot representing the abundance of the <it>D. discoideum </it>transcripts (x-axis, log<sub>10 </sub>scale) compared to their <it>D. purpureum </it>orthologs (y-axis, log<sub>10 </sub>scale). Each point represents the sum of read counts over the seven developmental time points. We divided the genes into three groups and indicated enriched Gene Ontology terms. Low abundance, &lt;1,000 reads (green); intermediate abundance, 1,000 to 10,000 reads (blue); and high abundance, >10,000 reads (red). <b>(b) </b>We calculated the median gene abundance rank (y-axis, percentile) within five functional categories (indicated by the color code) in amoebae (<it>D. discoideum </it>and <it>D. purpureum</it>), mice (<it>M. musculus</it>), and yeast (<it>S. cerevisiae</it>), as indicated (x-axis). The asterisk indicates that only 21 genes represent this category in <it>D. purpureum </it>whereas the other species have >100 genes.</p>
               </text>
               <graphic file="gb-2010-11-3-r35-3"/>
            </fig>
            <p>We also analyzed the differences in mRNA abundance between orthologs and non-orthologs in <it>D. discoideum </it>and <it>D. purpureum </it>and observed that non-orthologous transcripts are less abundant in both species compared to the orthologous transcripts (<it>t</it>-test; <it>D. discoideum P</it>-value = 3.6e-10; <it>D. purpureum P</it>-value = 2.2e-16). This finding is consistent with previous studies showing a positive relationship between sequence conservation and levels of gene expression <abbrgrp><abbr bid="B25">25</abbr></abbrgrp>.</p>
         </sec>
         <sec>
            <st>
               <p>Conservation of cell-type differentiation</p>
            </st>
            <p>Developing <it>Dictyostelium </it>cells differentiate into two major cell types - prespore and prestalk. We tested how many genes were cell-type enriched in <it>D. discoideum </it>and whether that enrichment was conserved in <it>D. purpureum</it>. We separated the prestalk and the prespore cells from the slug stage of <it>D. discoideum </it>and <it>D. purpureum</it>, and analyzed them by RNA-seq. Previous studies used <it>in situ </it>RNA hybridization to identify 132 <it>D. discoideum </it>genes that are preferentially expressed in prespore or prestalk cells <abbrgrp><abbr bid="B26">26</abbr></abbrgrp>. We traced the abundance of these transcripts in the <it>D. discoideum </it>RNA-seq data and used them as standards to define cell-type enriched transcripts, identifying 850 prespore genes and 915 prestalk genes (Figure S5 and Table S5 in the supplementary material <abbrgrp><abbr bid="B13">13</abbr></abbrgrp>). We then used the <it>D. purpureum </it>orthologs of the known <it>D. discoideum </it>markers to define cell-type enriched genes in a similar way and identified 1,984 prespore genes and 801 prestalk genes (Figure S5 and Table S6 in the supplementary material <abbrgrp><abbr bid="B13">13</abbr></abbrgrp>). Since we only considered two biological replicas of each species, these data rely on a conservative method for estimating the confidence statistic. A new but less statistically robust method that relies on the sequence coverage of each nucleotide in the transcript yielded quantitatively better results (Figure S5 and Supplementary methods in the supplementary material <abbrgrp><abbr bid="B13">13</abbr></abbrgrp>).</p>
            <p>We then focused on the 7,560 orthologs and found 1,158 to be cell-type enriched in <it>D. discoideum </it>and 2,064 to be cell-type enriched in <it>D. purpureum</it>. Of those, 455 transcripts were enriched in the same cell type in both species (Figure <figr fid="F4">4</figr>). This group of conserved cell-type-enriched transcripts was significantly enriched in transcriptionally conserved genes (<it>n </it>= 188, hypergeometric <it>P</it>-value = 4.5e-7). We hypothesized that the relatively low level of conservation among the cell-type-enriched transcripts was due to the stalk formation during slug migration in <it>D. purpureum </it>and not in <it>D. discoideum</it>. We therefore traced the expression profiles of the cell-type-enriched transcripts in the developmental transcriptomes to identify prestalk enriched genes that are temporally shifted between the two species, but could not find a significant number within the list of orthologs. The data shown in Figure <figr fid="F4">4</figr> greatly expand our knowledge of cell-type-enriched transcripts in <it>Dictyostelium </it>and indicate that the conservation in the transcriptomes extends to cell type differentiation, albeit to a lesser extent than the developmental conservation.</p>
            <fig id="F4">
               <title>
                  <p>Figure 4</p>
               </title>
               <caption>
                  <p>Conservation of cell-type specificity between <it>D. discoideum </it>and <it>D. purpureum </it>transcripts</p>
               </caption>
               <text>
                  <p><b>Conservation of cell-type specificity between <it>D. discoideum </it>and <it>D. purpureum </it>transcripts</b>. Similarity between cell-type enriched orthologs. The yellow circle represents <it>D. discoideum </it>transcripts, the purple circle represents <it>D. purpureum</it>, and the overlap represents the conservation of cell-type-enriched genes. The differentially expressed genes within each set are divided into prespore enriched (green), prestalk enriched (red) and known markers (in parentheses).</p>
               </text>
               <graphic file="gb-2010-11-3-r35-4"/>
            </fig>
         </sec>
      </sec>
      <sec>
         <st>
            <p>Conclusions</p>
         </st>
         <p>The conservation of the developmental transcriptomes of <it>D. discoideum </it>and <it>D. purpureum </it>is rather surprising, considering the evolutionary distance between the genomes of the two species (R Sucgang <it>et al</it>, unpublished work). Previous studies have argued that divergent regulation of gene expression is a major component of morphological divergence during evolution <abbrgrp><abbr bid="B6">6</abbr><abbr bid="B27">27</abbr></abbrgrp>. Our analysis shows the other side of that argument, suggesting that conservation of transcriptional regulation may be responsible for anatomical conservation.</p>
         <p>Comparison of <it>D. discoideum </it>and <it>D. purpureum </it>offers a unique insight into the role of transcriptional regulation in developmental programs, because both developmental processes are highly synchronous and the two species have only two major cell types. Furthermore, <it>Dictyostelium </it>is particularly amenable to RNA-seq transcriptome analyses since large amounts of homogeneous biological samples can be collected at all stages throughout development and the two major cells types can be separated at the slug stage. Other multicellular organisms may present more complicated patterns of cellular differentiation and it may be difficult to define analogous developmental stages between distant species. Nevertheless, comparative transcriptome analyses by RNA-seq could still be quite informative in such organisms, especially for the analysis of defined tissues and purified cell types.</p>
      </sec>
      <sec>
         <st>
            <p>Materials and methods</p>
         </st>
         <sec>
            <st>
               <p>Growth, development and RNA preparation</p>
            </st>
            <p>For the developmental time courses, we used the <it>D. discoideum </it>strain AX4 <abbrgrp><abbr bid="B28">28</abbr></abbrgrp> and the <it>D. purpureum </it>strain DpAX1, whose genomes have been sequenced (R Sucgang <it>et al</it>, unpublished work) <abbrgrp><abbr bid="B29">29</abbr></abbrgrp>. For cell type enrichment, we used the <it>D. discoideum </it>strain NC4 <abbrgrp><abbr bid="B30">30</abbr></abbrgrp> and the <it>D. purpureum </it>strain DpAX1. We grew the cells to mid-log phase in association with <it>Klebsiella aerogenes </it>bacteria on SM-agar plates <abbrgrp><abbr bid="B31">31</abbr><abbr bid="B32">32</abbr></abbrgrp>. To induce development, we collected the cells, washed them as described <abbrgrp><abbr bid="B31">31</abbr></abbrgrp>, deposited them on nitrocellulose filters and developed them in the dark at 22&#176;C. At each time point, we collected 1 &#215; 10<sup>8 </sup>cells directly into 1 ml Trizol reagent (Life Technologies, Carlsbad, CA, USA) and extracted total RNA according to the manufacturer's recommended protocol. We collected cells at the finger stage, prepared prespore and prestalk cells by centrifugation through percoll gradients as described <abbrgrp><abbr bid="B33">33</abbr></abbrgrp>, and extracted RNA as above. We repeated each experiment twice, independently. In each case we tested the quality of the RNA by quantitative RT-PCR with oligonucleotides against several known developmental markers (Figure S2 in the supplementary material <abbrgrp><abbr bid="B13">13</abbr></abbrgrp>) and, in the case of cell type enrichment, we tested the RNA by quantitative RT-PCR with oligonucleotides against known cell-type-specific markers from <it>D. discoideum </it><abbrgrp><abbr bid="B26">26</abbr></abbrgrp> and their <it>D. purpureum </it>orthologs.</p>
         </sec>
         <sec>
            <st>
               <p>cDNA preparation</p>
            </st>
            <p>To prepare cDNA, we subjected 20 &#956;g of total RNA to one round of poly-A selection on oligo(dT) beads (Dynal, Carlsbad, CA, USA). We fragmented 125 ng of the resulting RNA to an average size of 200 bases using divalent cations (Fragmentation Buffer, Ambion, Austin, TX, USA) at 70&#176;C for 5 minutes and terminated the reaction with stop buffer (Ambion). We precipitated the fragments by adjusting the reaction to 66 mM NaOAC, pH 5.2, 0.22 mg/ml glycogen and 70% ethanol, washed the precipitate once with 70% ethanol and resuspended it in RNAse free water. We prepared first-strand cDNA with Super Script II reverse transcriptase (Invitrogen, Carlsbad, CA, USA) and 3 &#956;g of random hexamer primers. We then synthesized second strand cDNA with DNA Polymerase I and RNaseH in an Illumina custom buffer (Illumina, San Diego, CA, USA). We purified the products on a QiaQuick PCR column (Qiagen, Valencia, CA, USA) and eluted them in 30 &#956;l EB buffer (Qiagen). We further processed the cDNAs using the Genomic DNA Sequencing Sample Prep Kit (Illumina) according to the manufacturer's recommended protocol. A detailed description of the RNA-seq sample preparation methods is provided in the supplementary material <abbrgrp><abbr bid="B13">13</abbr></abbrgrp>.</p>
         </sec>
         <sec>
            <st>
               <p>Sequencing and data processing</p>
            </st>
            <p>We sequenced the cDNA libraries (read length = 35 bases) on a high-throughput Illumina Genome Analyzer II using the manufacturer's recommended pipeline (versions 1.2 and 1.3). The resulting FASTQ files were mapped in multiple steps using the short-read alignment software novoalign from Novocraft according to the manufacturer's default parameters <abbrgrp><abbr bid="B34">34</abbr></abbrgrp>. First we mapped the reads to the reference genome. Sequenced reads from <it>D. discoideum </it>were mapped to the 13 May 2009 genome build of <it>D. discoideum </it>from dictyBase <abbrgrp><abbr bid="B35">35</abbr></abbrgrp>, while masking the duplicated region of chromosome 2 (nucleotides 3,015,984 to 3,768,555) and a half of the ribosomal DNA palindrome (nucleotides 42,801 to 78,150). Sequenced reads from <it>D. purpureum </it>were mapped to the <it>D. purpureum </it>genome assembly (R Sucgang <it>et al</it>, unpublished work). Sequences that did not match the chromosomal sequences were mapped to a library of all possible splice junctions that we determined using the annotated gene models. The gene models for <it>D. discoideum </it>are defined by the 13 May 2009 build from dictyBase <abbrgrp><abbr bid="B35">35</abbr></abbrgrp> and for <it>D. purpureum </it>by the published genome annotations (R Sucgang <it>et al</it>, unpublished work). Finally, we mapped the remaining RNA-seq reads after trimming two bases from the end of the reads, iteratively, until the reads were shorter than 25 bases. The expanded genome, including the masked chromosomal sequences and all possible splice junctions, and the gene models we used for both species are available in the supplementary material <abbrgrp><abbr bid="B13">13</abbr></abbrgrp>. The nucleotide level coverage can be visualized in the transcriptome browser <abbrgrp><abbr bid="B36">36</abbr></abbrgrp>.</p>
         </sec>
         <sec>
            <st>
               <p>Mapability</p>
            </st>
            <p>We calculated the mapability of every nucleotide by generating all possible 35 bp oligomers from each genome and mapping them back to the respective genome using the default parameters of novoalign <abbrgrp><abbr bid="B34">34</abbr></abbrgrp>. A nucleotide is defined as mapable if the 35 bp sequence starting at that nucleotide can be unambiguously mapped to the genome. We define the effective length of each gene as the count of mapable nucleotides.</p>
         </sec>
         <sec>
            <st>
               <p>Scaled mRNA abundance levels</p>
            </st>
            <p>In order to compare transcript abundance between different time points and cell types within and between species, we scaled the transcript abundance values to account for mapability and for the total read counts from each sequencing run. Since the coverage across transcripts is variable, we excluded transcripts that are less than 5% mapable. We also excluded transcripts that are not polyadenylated because our library preparation protocol selects for polyadenylated genes. All genes on the mitochondrial or rDNA chromosomes and any tRNA, rRNA or other non-coding RNAs were excluded. We only identified a single ortholog of non-polyadenylated mRNA in the <it>D. purpureum </it>genome. We conducted all of the analyses on this filtered list, which consisted of 12,713 <it>D. discoideum </it>genes and 12,246 <it>D. purpureum </it>genes. We defined the raw abundance level of each transcript (<it>i</it>) in a sample (<it>j</it>) as the sum of all the unique reads that map to the transcript in the expanded genome. We then scaled this count by the effective gene length and by the total read count from the entire sequencing run as follows:</p>
            <p>
               <display-formula>
                  <graphic file="gb-2010-11-3-r35-i1.gif"/>
               </display-formula>
            </p>
            <p>where <it>a</it><sub><it>ij </it></sub>is the scaled abundance for all genes <it>i </it>from each sample <it>j</it>, <it>r</it><sub><it>i </it></sub>is the sum of reads that mapped to gene <it>i</it>, <it>L </it>is the median effective gene length of all the genes, <it>N </it>is the mean of the total read counts of all the sequencing runs considered in the experiment, <it>l</it><sub><it>i </it></sub>is the effective length of gene <it>i </it>and <it>n</it><sub><it>j </it></sub>is the total number of uniquely mapped reads from sequencing run <it>j</it>, excluding the non-polyadenylated genes. This method accounts for the transcript size, as well as for differences in the total read count between samples, while preserving the dynamic range of the original data. We provide the raw data as well as the scaled data in the supplementary material <abbrgrp><abbr bid="B13">13</abbr></abbrgrp>. We also made the scaled data available for independent exploration through dictyExpress <abbrgrp><abbr bid="B15">15</abbr><abbr bid="B16">16</abbr></abbrgrp>.</p>
            <p>We estimated the number of mRNA molecules per cell as represented by the RNA-seq read count. From each sample of 10<sup>8 </sup>cells we extracted approximately 500 &#956;g of total RNA. The average transcript length in <it>D. discoideum </it>is 1,577 bases and the average molecular weight of a ribonucleotide monophosphate is 339.5 g/mol. Assuming that total RNA contains 4% mRNA <abbrgrp><abbr bid="B37">37</abbr></abbrgrp> (20 &#956;g), we estimated the number of transcripts per cell represented by each RNA-seq read as follows:</p>
            <p>
               <display-formula>
                  <graphic file="gb-2010-11-3-r35-i2.gif"/>
               </display-formula>
            </p>
            <p>Since the initial RNA extraction was from 10<sup>8 </sup>cells, the number of transcripts per cell is calculated as follows:</p>
            <p>
               <display-formula>
                  <graphic file="gb-2010-11-3-r35-i3.gif"/>
               </display-formula>
            </p>
            <p>Considering an average of 5 &#215; 10<sup>6 </sup>mRNA reads per RNA-seq lane, we calculated the number of transcripts represented by a sequencing read as:</p>
            <p>
               <display-formula>
                  <graphic file="gb-2010-11-3-r35-i4.gif"/>
               </display-formula>
            </p>
            <p>Each RNA-seq read represents approximately 0.04 transcripts per cell, so 30 reads represent approximately 1 mRNA molecule per cell.</p>
         </sec>
         <sec>
            <st>
               <p>Statistical analysis</p>
            </st>
            <p>We performed all the statistical analyses in the statistical software package R <abbrgrp><abbr bid="B38">38</abbr></abbrgrp>. The complete analysis presented in the paper can be recreated using the R scripts and the scaled transcript abundance counts provided in the supplementary material <abbrgrp><abbr bid="B13">13</abbr></abbrgrp>. Analyses within each species include all the polyadenylated genes with at least 5% mapable nucleotides, >30 raw read-counts in at least one time point and high reproducibility between biological replicates. For all analyses that require a similarity metric we tested both Pearson's correlation and Spearman correlation. We found little difference between the results and therefore present the results calculated using the Pearson's correlation since it is a more powerful test. We define biologically reproducible genes as those having >0.5 Pearson's correlation between the developmental expression profiles from the two biological replicates. In <it>D. discoideum</it>, 795 genes did not have sufficient mapable sequences, whereas in <it>D. purpureum</it>, 163 genes failed this criterion. In <it>D. discoideum</it>, 715 genes failed the reproducibility criterion and 3,563 were not expressed, whereas in <it>D. purpureum</it>, 321 genes failed the reproducibility criterion and 2,522 were not expressed. In <it>D. discoideum </it>we also excluded 462 genes that lack a poly-A tail. We identified only one such gene in <it>D. purpureum</it>. Comparisons between species only includes the 7,619 identified orthologs between the species (R Sucgang <it>et al</it>, unpublished work). All analyses were done on log-transformed scaled read counts.</p>
            <p>We defined developmentally up- or down-regulated genes based on the similarity of a gene's trajectory to a hypothetical increasing trajectory using the function y = x, where y is the scaled read count and x is the developmental time point. Genes with >0.5 Pearson's correlation coefficient are defined as up-regulated genes, whereas genes with &lt;-0.5 Pearson's correlation coefficient are down-regulated genes. Invariant genes are defined as having less than a two-fold change in abundance between any two developmental time points.</p>
            <p>To identify GO categories enriched within gene lists we used the Cytoscape software version 2.6.3 <abbrgrp><abbr bid="B39">39</abbr></abbrgrp> with the Bingo plugin <abbrgrp><abbr bid="B40">40</abbr></abbrgrp>. Briefly, the tool uses the hypergeometric distribution with a Benjamini and Hochberg false discovery rate correction to identify GO terms found within a gene list more often than expected by chance. The GO annotation files for <it>Mus musculus </it>and <it>Saccharomyces cerevisiae </it>were obtained from the GO website. The GO files for <it>D. discoideum </it>and <it>D. purpureum </it>were obtained from dictyBase <abbrgrp><abbr bid="B35">35</abbr></abbrgrp>.</p>
         </sec>
         <sec>
            <st>
               <p>Data visualization</p>
            </st>
            <p>We generated heat maps in Figure <figr fid="F1">1</figr> with the heatmap.2 function from the gplots package <abbrgrp><abbr bid="B41">41</abbr></abbrgrp>. To allow comparison between gene profiles with different abundances, we normalized the developmental profiles to have a mean of 0 and a standard deviation of 1. The resulting z-scores represent the number of standard deviations a time point is above or below the profile mean and are used to color the heat map. We ordered the genes based on their regulation from down-regulated to up-regulated. To calculate the similarity between time points we performed hierarchical clustering (R function hclust) on the expression vectors from the time points, consisting of all genes, and visualized the results as a dendrogram. We used Pearson's correlation as the distance metric and average linkage as the clustering criterion. In the presentation, objects (individual time points or groups of time points) are joined if they are more similar to each other than to any of the other objects. The vertical distance of the joint from the top is proportional to the dissimilarity between the joined objects.</p>
            <p>The three-dimensional visualization in Figure <figr fid="F2">2</figr> was generated using a two-dimensional kernel density estimation provided in the R package MASS with 50 bins along each dimension <abbrgrp><abbr bid="B42">42</abbr></abbrgrp>. The transcript abundances were calculated as the average of read counts from all developmental stages in both species, and the similarity was calculated using Pearson's correlation between the expression profiles of the orthologs. We divided the distribution into four bins based on the expression profile similarity dimension: >0.5 Pearson's correlation, between 0.5 and 0 Pearson's correlation, between 0 and -0.5 Pearson's correlation, and &lt;-0.5 Pearson's correlation. Genes with &lt;0.75 Pearson's correlation were subjected to various temporal transformations and grouped based on the transformation achieving greater than 0.75 correlation. Using cross-correlation (R function ccf) we determined the temporal shift required for maximal correlation. We grouped genes into four categories: delayed by 4 hours in <it>D. purpureum</it>, delayed by >4 hours in <it>D. purpureum</it>, delayed by 4 hours in <it>D. discoideum</it>, and delayed by >4 hours in <it>D. discoideum</it>. The developmental trajectories in Figure <figr fid="F2">2d</figr> were generated by normalization of the expression profiles to have a mean of 0 and standard deviation of 1. The resulting z-scores represent the number of standard deviations a time point is above or below the profile mean.</p>
            <p>To measure the similarity of transcript abundance between <it>D. discoideum </it>and <it>D. purpureum</it>, we created an expression vector consisting of the sum of read counts from all developmental time points for all orthologous genes. We used Pearson's correlation as a measure of similarity between the two expression vectors.</p>
            <p>We also compared our data to published mouse and yeast data. We calculated the transcript abundance data for the mouse as the sum of abundances from published data on two replicate samples of brain, liver and muscle transcriptomes <abbrgrp><abbr bid="B22">22</abbr></abbrgrp>. The yeast RNA-seq data are the sum of all the published biological and technical replicates from cells grown in rich media <abbrgrp><abbr bid="B23">23</abbr></abbrgrp>. Since the published data were from different quantification methods, we used transcript abundance ranks rather than straight transcript abundances in comparing the functional categories between the species. We calculated the ranks as follows:</p>
            <p>
               <display-formula>
                  <graphic file="gb-2010-11-3-r35-i5.gif"/>
               </display-formula>
            </p>
            <p>where <it>P</it><sub><it>ik </it></sub>is the rank (abundance percentile) of category <it>j </it>(structural molecule, translation, enzyme regulator, catalytic activity, or transcription) from species <it>k </it>(<it>D. discoideum, D. purpureum, M. musculus, S. cerevisiae</it>). <it>g</it><sub><it>ijk </it></sub>is the gene abundance of gene <it>i </it>within category <it>j </it>within species <it>k</it>, and <it>N</it><sub><it>k </it></sub>is the total number of genes in species <it>k</it>. The genes within each category are defined by the GO slim mapping <abbrgrp><abbr bid="B24">24</abbr></abbrgrp>.</p>
         </sec>
         <sec>
            <st>
               <p>Two methods for defining cell-type-specific genes</p>
            </st>
            <p>RNA-seq allows us to define the abundance of each nucleotide and from these values calculate the abundance of genes. There is little technical variability in gene abundance across biological replicates, but at the nucleotide level there is a clear sequence bias that leads to highly variable coverage across a single transcript (and a slight 3' bias; see Figure S6 in the supplementary material <abbrgrp><abbr bid="B13">13</abbr></abbrgrp>). We assessed differential expression of genes using both of these data sets.</p>
         </sec>
         <sec>
            <st>
               <p>Whole-transcript method</p>
            </st>
            <p>Results derived using the whole-transcript method are shown in Figure S5a,b in the supplementary material <abbrgrp><abbr bid="B13">13</abbr></abbrgrp>. We calculated the differential expression of normalized read counts for each gene using the LIMMA package in R <abbrgrp><abbr bid="B43">43</abbr></abbrgrp>. We fitted a linear model to the log<sub>2</sub>-transformed data with biological replicates and cell types as factors and we used an empirical Bayes method <abbrgrp><abbr bid="B44">44</abbr></abbrgrp> to moderate standard errors. This method does not account for the variability in nucleotide coverage and is limited by the low number of replications. However, we chose to present the results of that method in the figures because it is more commonly used.</p>
         </sec>
         <sec>
            <st>
               <p>Nucleotide method</p>
            </st>
            <p>We also used the nucleotide coverage in an attempt to account for variability across a transcript and improve the assessment of differential expression. We fitted a linear model using biological replicates and cell types as factors and the log<sub>2</sub>-transformed read counts at each nucleotide across a gene as repeated measurements. This method violates the distributional assumptions of independence, normality and homoscedasticity for linear modeling, but its results are empirically better than the whole-transcript method. Genes with low read counts or bias due to sequence naturally have high variability in the coverage and can only be detected using this type of analysis. The results of using this method and a comparison between the two methods are presented in Figure S5 in the supplementary material <abbrgrp><abbr bid="B13">13</abbr></abbrgrp>.</p>
         </sec>
         <sec>
            <st>
               <p>Defining cell-type enriched transcripts</p>
            </st>
            <p>The cDNA Atlas project defined 132 <it>D. discoideum </it>transcripts as cell-type enriched using <it>in situ </it>RNA hybridization <abbrgrp><abbr bid="B26">26</abbr></abbrgrp>. We used these data to determine empirical thresholds for defining cell-type enrichment in the RNA-seq data. Since we do not have such data for <it>D. purpureum</it>, we used 95 orthologs from the list of 132 <it>D. discoideum </it>transcripts to determine the empirical threshold values for <it>D. purpureum</it>. We defined differentially expressed genes as those that meet our quality criteria and have at least a two-fold change in abundance between the two cell types and a <it>P</it>-value lower than the maximum <it>P</it>-value of the known cell-type-specific genes in <it>D. discoideum</it>. The list of genes that are differentially expressed using the nucleotide coverage method is a subset of the list of genes found using the gene abundance counts. If we do not impose the minimum read count criteria, many of the genes identified as differentially expressed using the whole-transcript method fall below the 30 read count threshold and therefore had highly variable nucleotide coverage. Using the nucleotide coverage method, this variability is implicitly accounted for within the linear model and low abundance genes are not identified as differentially expressed.</p>
         </sec>
         <sec>
            <st>
               <p>Data availability</p>
            </st>
            <p>We provide supplement material <abbrgrp><abbr bid="B13">13</abbr></abbrgrp> that includes a downloadable version of all the analyzed data and the R code we used to generate them as well as the supplementary figures and tables referred to in the main text. In addition, we provide a link to a transcriptome browser that allows exploration of all the data through a genome-centric graphical interface as well as detailed data about individual genes and summaries about individual experiments <abbrgrp><abbr bid="B36">36</abbr></abbrgrp>, and a link to dictyExpress, allowing exploration and data mining of individual genes and small groups of genes <abbrgrp><abbr bid="B16">16</abbr></abbrgrp>. The raw sequences and mapped data are also deposited in the Gene Expression Omnibus (accession number [GEO:GSE17637]).</p>
         </sec>
      </sec>
      <sec>
         <st>
            <p>Abbreviations</p>
         </st>
         <p>Bp: base pair; GO: Gene Ontology; RNA-seq: RNA sequencing.</p>
      </sec>
      <sec>
         <st>
            <p>Authors' contributions</p>
         </st>
         <p>REM, MKK and DF performed the experiments; AP, GR, LZ and TC performed the data analysis; AP, REM and GS wrote the manuscript; all of the authors contributed to the research design, discussed the results and commented on the manuscript.</p>
      </sec>
   </bdy>
   <bm>
      <ack>
         <sec>
            <st>
               <p>Acknowledgements</p>
            </st>
            <p>We thank members of our research groups for technical assistance and discussions. This work was supported by grants from the National Institutes of Health. AP and REM were supported by fellowships from the Keck Center for Interdisciplinary Bioscience Training of the Gulf Coast Consortia.</p>
         </sec>
      </ack>
      <refgrp>
         <bibl id="B1">
            <title>
               <p>Comparing genomic expression patterns across species identifies shared transcriptional profile in aging.</p>
            </title>
            <aug>
               <au>
                  <snm>McCarroll</snm>
                  <fnm>SA</fnm>
               </au>
               <au>
                  <snm>Murphy</snm>
                  <fnm>CT</fnm>
               </au>
               <au>
                  <snm>Zou</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Pletcher</snm>
                  <fnm>SD</fnm>
               </au>
               <au>
                  <snm>Chin</snm>
                  <fnm>CS</fnm>
               </au>
               <au>
                  <snm>Jan</snm>
                  <fnm>YN</fnm>
               </au>
               <au>
                  <snm>Kenyon</snm>
                  <fnm>C</fnm>
               </au>
               <au>
                  <snm>Bargmann</snm>
                  <fnm>CI</fnm>
               </au>
               <au>
                  <snm>Li</snm>
                  <fnm>H</fnm>
               </au>
            </aug>
            <source>Nat Genet</source>
            <pubdate>2004</pubdate>
            <volume>36</volume>
            <fpage>197</fpage>
            <lpage>204</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1038/ng1291</pubid>
                  <pubid idtype="pmpid" link="fulltext">14730301</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B2">
            <title>
               <p>Conservation and divergence of light-regulated genome expression patterns during seedling development in rice and Arabidopsis.</p>
            </title>
            <aug>
               <au>
                  <snm>Jiao</snm>
                  <fnm>Y</fnm>
               </au>
               <au>
                  <snm>Ma</snm>
                  <fnm>L</fnm>
               </au>
               <au>
                  <snm>Strickland</snm>
                  <fnm>E</fnm>
               </au>
               <au>
                  <snm>Deng</snm>
                  <fnm>XW</fnm>
               </au>
            </aug>
            <source>Plant Cell</source>
            <pubdate>2005</pubdate>
            <volume>17</volume>
            <fpage>3239</fpage>
            <lpage>3256</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1105/tpc.105.035840</pubid>
                  <pubid idtype="pmcid">1315367</pubid>
                  <pubid idtype="pmpid" link="fulltext">16284311</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B3">
            <title>
               <p>Similarities and differences in genome-wide expression data of six organisms.</p>
            </title>
            <aug>
               <au>
                  <snm>Bergmann</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Ihmels</snm>
                  <fnm>J</fnm>
               </au>
               <au>
                  <snm>Barkai</snm>
                  <fnm>N</fnm>
               </au>
            </aug>
            <source>PLoS Biol</source>
            <pubdate>2004</pubdate>
            <volume>2</volume>
            <fpage>E9</fpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1371/journal.pbio.0020009</pubid>
                  <pubid idtype="pmcid">300882</pubid>
                  <pubid idtype="pmpid" link="fulltext">14737187</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B4">
            <title>
               <p>A gene-coexpression network for global discovery of conserved genetic modules.</p>
            </title>
            <aug>
               <au>
                  <snm>Stuart</snm>
                  <fnm>JM</fnm>
               </au>
               <au>
                  <snm>Segal</snm>
                  <fnm>E</fnm>
               </au>
               <au>
                  <snm>Koller</snm>
                  <fnm>D</fnm>
               </au>
               <au>
                  <snm>Kim</snm>
                  <fnm>SK</fnm>
               </au>
            </aug>
            <source>Science</source>
            <pubdate>2003</pubdate>
            <volume>302</volume>
            <fpage>249</fpage>
            <lpage>255</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1126/science.1087447</pubid>
                  <pubid idtype="pmpid" link="fulltext">12934013</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B5">
            <title>
               <p>Evolution of gene expression in the <it>Drosophila melanogaster </it>subgroup.</p>
            </title>
            <aug>
               <au>
                  <snm>Rifkin</snm>
                  <fnm>SA</fnm>
               </au>
               <au>
                  <snm>Kim</snm>
                  <fnm>J</fnm>
               </au>
               <au>
                  <snm>White</snm>
                  <fnm>KP</fnm>
               </au>
            </aug>
            <source>Nat Genet</source>
            <pubdate>2003</pubdate>
            <volume>33</volume>
            <fpage>138</fpage>
            <lpage>144</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1038/ng1086</pubid>
                  <pubid idtype="pmpid" link="fulltext">12548287</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B6">
            <title>
               <p>Endless forms: the evolution of gene regulation and morphological diversity.</p>
            </title>
            <aug>
               <au>
                  <snm>Carroll</snm>
                  <fnm>SB</fnm>
               </au>
            </aug>
            <source>Cell</source>
            <pubdate>2000</pubdate>
            <volume>101</volume>
            <fpage>577</fpage>
            <lpage>580</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1016/S0092-8674(00)80868-5</pubid>
                  <pubid idtype="pmpid" link="fulltext">10892643</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B7">
            <title>
               <p>Regulation of the tinman homologues in Xenopus embryos.</p>
            </title>
            <aug>
               <au>
                  <snm>Sparrow</snm>
                  <fnm>DB</fnm>
               </au>
               <au>
                  <snm>Cai</snm>
                  <fnm>C</fnm>
               </au>
               <au>
                  <snm>Kotecha</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Latinkic</snm>
                  <fnm>B</fnm>
               </au>
               <au>
                  <snm>Cooper</snm>
                  <fnm>B</fnm>
               </au>
               <au>
                  <snm>Towers</snm>
                  <fnm>N</fnm>
               </au>
               <au>
                  <snm>Evans</snm>
                  <fnm>SM</fnm>
               </au>
               <au>
                  <snm>Mohun</snm>
                  <fnm>TJ</fnm>
               </au>
            </aug>
            <source>Dev Biol</source>
            <pubdate>2000</pubdate>
            <volume>227</volume>
            <fpage>65</fpage>
            <lpage>79</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1006/dbio.2000.9891</pubid>
                  <pubid idtype="pmpid" link="fulltext">11076677</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B8">
            <aug>
               <au>
                  <snm>Wolpert</snm>
                  <fnm>L</fnm>
               </au>
               <au>
                  <snm>Beddington</snm>
                  <fnm>R</fnm>
               </au>
               <au>
                  <snm>Jessell</snm>
                  <fnm>T</fnm>
               </au>
               <au>
                  <snm>Lawrence</snm>
                  <fnm>P</fnm>
               </au>
               <au>
                  <snm>Meyerowitz</snm>
                  <fnm>E</fnm>
               </au>
               <au>
                  <snm>Smith</snm>
                  <fnm>J</fnm>
               </au>
            </aug>
            <source>Principles of Development</source>
            <publisher>Oxford: Oxford University Press</publisher>
            <edition>Second</edition>
            <pubdate>2002</pubdate>
         </bibl>
         <bibl id="B9">
            <title>
               <p>RNA-Seq: a revolutionary tool for transcriptomics.</p>
            </title>
            <aug>
               <au>
                  <snm>Wang</snm>
                  <fnm>Z</fnm>
               </au>
               <au>
                  <snm>Gerstein</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Snyder</snm>
                  <fnm>M</fnm>
               </au>
            </aug>
            <source>Nat Rev Genet</source>
            <pubdate>2009</pubdate>
            <volume>10</volume>
            <fpage>57</fpage>
            <lpage>63</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1038/nrg2484</pubid>
                  <pubid idtype="pmpid" link="fulltext">19015660</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B10">
            <title>
               <p>Comparing the <it>Dictyostelium </it>and <it>Entamoeba </it>genomes reveals an ancient split in the Conosa lineage.</p>
            </title>
            <aug>
               <au>
                  <snm>Song</snm>
                  <fnm>J</fnm>
               </au>
               <au>
                  <snm>Xu</snm>
                  <fnm>Q</fnm>
               </au>
               <au>
                  <snm>Olsen</snm>
                  <fnm>R</fnm>
               </au>
               <au>
                  <snm>Loomis</snm>
                  <fnm>W</fnm>
               </au>
               <au>
                  <snm>Shaulsky</snm>
                  <fnm>G</fnm>
               </au>
               <au>
                  <snm>Kuspa</snm>
                  <fnm>A</fnm>
               </au>
               <au>
                  <snm>Sucgang</snm>
                  <fnm>R</fnm>
               </au>
            </aug>
            <source>PLoS Comput Biol</source>
            <pubdate>2005</pubdate>
            <volume>1</volume>
            <fpage>e71</fpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1371/journal.pcbi.0010071</pubid>
                  <pubid idtype="pmcid">1314882,1314882</pubid>
                  <pubid idtype="pmpid" link="fulltext">16362072</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B11">
            <title>
               <p>Interspecific Mixtures in the Dictyosteliaceae.</p>
            </title>
            <aug>
               <au>
                  <snm>Raper</snm>
                  <fnm>KB</fnm>
               </au>
               <au>
                  <snm>Thom</snm>
                  <fnm>C</fnm>
               </au>
            </aug>
            <source>American Journal of Botany</source>
            <pubdate>1941</pubdate>
            <volume>28</volume>
            <fpage>69</fpage>
            <lpage>78</lpage>
            <xrefbib>
               <pubid idtype="doi">10.2307/2437063</pubid>
            </xrefbib>
         </bibl>
         <bibl id="B12">
            <aug>
               <au>
                  <snm>Kessin</snm>
                  <fnm>RH</fnm>
               </au>
            </aug>
            <source>Dictyostelium - Evolution, cell biology, and the development of multicellularity</source>
            <publisher>Cambridge, UK: Cambridge Univ. Press</publisher>
            <pubdate>2001</pubdate>
         </bibl>
         <bibl id="B13">
            <title>
               <p>Supplementary Material</p>
            </title>
            <url>http://dictygenome.bcm.tmc.edu/~anup/RNAseq/</url>
         </bibl>
         <bibl id="B14">
            <title>
               <p>Functional discovery via a compendium of expression profiles.</p>
            </title>
            <aug>
               <au>
                  <snm>Hughes</snm>
                  <fnm>TR</fnm>
               </au>
               <au>
                  <snm>Marton</snm>
                  <fnm>MJ</fnm>
               </au>
               <au>
                  <snm>Jones</snm>
                  <fnm>AR</fnm>
               </au>
               <au>
                  <snm>Roberts</snm>
                  <fnm>CJ</fnm>
               </au>
               <au>
                  <snm>Stoughton</snm>
                  <fnm>R</fnm>
               </au>
               <au>
                  <snm>Armour</snm>
                  <fnm>CD</fnm>
               </au>
               <au>
                  <snm>Bennett</snm>
                  <fnm>HA</fnm>
               </au>
               <au>
                  <snm>Coffey</snm>
                  <fnm>E</fnm>
               </au>
               <au>
                  <snm>Dai</snm>
                  <fnm>H</fnm>
               </au>
               <au>
                  <snm>He</snm>
                  <fnm>YD</fnm>
               </au>
               <au>
                  <snm>Kidd</snm>
                  <fnm>MJ</fnm>
               </au>
               <au>
                  <snm>King</snm>
                  <fnm>AM</fnm>
               </au>
               <au>
                  <snm>Meyer</snm>
                  <fnm>MR</fnm>
               </au>
               <au>
                  <snm>Slade</snm>
                  <fnm>D</fnm>
               </au>
               <au>
                  <snm>Lum</snm>
                  <fnm>PY</fnm>
               </au>
               <au>
                  <snm>Stepaniants</snm>
                  <fnm>SB</fnm>
               </au>
               <au>
                  <snm>Shoemaker</snm>
                  <fnm>DD</fnm>
               </au>
               <au>
                  <snm>Gachotte</snm>
                  <fnm>D</fnm>
               </au>
               <au>
                  <snm>Chakraburtty</snm>
                  <fnm>K</fnm>
               </au>
               <au>
                  <snm>Simon</snm>
                  <fnm>J</fnm>
               </au>
               <au>
                  <snm>Bard</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Friend</snm>
                  <fnm>SH</fnm>
               </au>
            </aug>
            <source>Cell</source>
            <pubdate>2000</pubdate>
            <volume>102</volume>
            <fpage>109</fpage>
            <lpage>126</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1016/S0092-8674(00)00015-5</pubid>
                  <pubid idtype="pmpid" link="fulltext">10929718</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B15">
            <title>
               <p>dictyExpress: a <it>Dictyostelium discoideum </it>gene expression database with an explorative data analysis web-based interface.</p>
            </title>
            <aug>
               <au>
                  <snm>Rot</snm>
                  <fnm>G</fnm>
               </au>
               <au>
                  <snm>Parikh</snm>
                  <fnm>A</fnm>
               </au>
               <au>
                  <snm>Curk</snm>
                  <fnm>T</fnm>
               </au>
               <au>
                  <snm>Kuspa</snm>
                  <fnm>A</fnm>
               </au>
               <au>
                  <snm>Shaulsky</snm>
                  <fnm>G</fnm>
               </au>
               <au>
                  <snm>Zupan</snm>
                  <fnm>B</fnm>
               </au>
            </aug>
            <source>BMC Bioinformatics</source>
            <pubdate>2009</pubdate>
            <volume>10</volume>
            <fpage>265</fpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1186/1471-2105-10-265</pubid>
                  <pubid idtype="pmcid">2738683</pubid>
                  <pubid idtype="pmpid" link="fulltext">19706156</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B16">
            <title>
               <p>dictyExpress</p>
            </title>
            <url>http://www.ailab.si/dictyexpress/</url>
         </bibl>
         <bibl id="B17">
            <title>
               <p>Microarray phenotyping in <it>Dictyostelium </it>reveals a regulon of chemotaxis genes.</p>
            </title>
            <aug>
               <au>
                  <snm>Booth</snm>
                  <fnm>EO</fnm>
               </au>
               <au>
                  <snm>Van Driessche</snm>
                  <fnm>N</fnm>
               </au>
               <au>
                  <snm>Zhuchenko</snm>
                  <fnm>O</fnm>
               </au>
               <au>
                  <snm>Kuspa</snm>
                  <fnm>A</fnm>
               </au>
               <au>
                  <snm>Shaulsky</snm>
                  <fnm>G</fnm>
               </au>
            </aug>
            <source>Bioinformatics</source>
            <pubdate>2005</pubdate>
            <volume>21</volume>
            <fpage>4371</fpage>
            <lpage>4377</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1093/bioinformatics/bti726</pubid>
                  <pubid idtype="pmpid" link="fulltext">16234315</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B18">
            <aug>
               <au>
                  <snm>Loomis</snm>
                  <fnm>WF</fnm>
               </au>
            </aug>
            <source>Dictyostelium discoideum. A Developmental System</source>
            <publisher>New York: Academic Press</publisher>
            <pubdate>1975</pubdate>
         </bibl>
         <bibl id="B19">
            <title>
               <p>Evolutionary significance of gene expression divergence.</p>
            </title>
            <aug>
               <au>
                  <snm>Jordan</snm>
                  <fnm>IK</fnm>
               </au>
               <au>
                  <snm>Marino-Ramirez</snm>
                  <fnm>L</fnm>
               </au>
               <au>
                  <snm>Koonin</snm>
                  <fnm>EV</fnm>
               </au>
            </aug>
            <source>Gene</source>
            <pubdate>2005</pubdate>
            <volume>345</volume>
            <fpage>119</fpage>
            <lpage>126</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1016/j.gene.2004.11.034</pubid>
                  <pubid idtype="pmcid">1859841</pubid>
                  <pubid idtype="pmpid" link="fulltext">15716085</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B20">
            <title>
               <p>Parallel Patterns of Evolution in the Genomes and Transcriptomes of Humans and Chimpanzees.</p>
            </title>
            <aug>
               <au>
                  <snm>Khaitovich</snm>
                  <fnm>P</fnm>
               </au>
               <au>
                  <snm>Hellmann</snm>
                  <fnm>I</fnm>
               </au>
               <au>
                  <snm>Enard</snm>
                  <fnm>W</fnm>
               </au>
               <au>
                  <snm>Nowick</snm>
                  <fnm>K</fnm>
               </au>
               <au>
                  <snm>Leinweber</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Franz</snm>
                  <fnm>H</fnm>
               </au>
               <au>
                  <snm>Weiss</snm>
                  <fnm>G</fnm>
               </au>
               <au>
                  <snm>Lachmann</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Paabo</snm>
                  <fnm>S</fnm>
               </au>
            </aug>
            <source>Science</source>
            <pubdate>2005</pubdate>
            <volume>309</volume>
            <fpage>1850</fpage>
            <lpage>1854</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1126/science.1108296</pubid>
                  <pubid idtype="pmpid" link="fulltext">16141373</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B21">
            <title>
               <p>Incongruent expression profiles between human and mouse orthologous genes suggest widespread neutral evolution of transcription control.</p>
            </title>
            <aug>
               <au>
                  <snm>Yanai</snm>
                  <fnm>I</fnm>
               </au>
               <au>
                  <snm>Graur</snm>
                  <fnm>D</fnm>
               </au>
               <au>
                  <snm>Ophir</snm>
                  <fnm>R</fnm>
               </au>
            </aug>
            <source>OMICS</source>
            <pubdate>2004</pubdate>
            <volume>8</volume>
            <fpage>15</fpage>
            <lpage>24</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1089/153623104773547462</pubid>
                  <pubid idtype="pmpid" link="fulltext">15107234</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B22">
            <title>
               <p>Mapping and quantifying mammalian transcriptomes by RNA-Seq.</p>
            </title>
            <aug>
               <au>
                  <snm>Mortazavi</snm>
                  <fnm>A</fnm>
               </au>
               <au>
                  <snm>Williams</snm>
                  <fnm>BA</fnm>
               </au>
               <au>
                  <snm>McCue</snm>
                  <fnm>K</fnm>
               </au>
               <au>
                  <snm>Schaeffer</snm>
                  <fnm>L</fnm>
               </au>
               <au>
                  <snm>Wold</snm>
                  <fnm>B</fnm>
               </au>
            </aug>
            <source>Nat Methods</source>
            <pubdate>2008</pubdate>
            <volume>5</volume>
            <fpage>621</fpage>
            <lpage>628</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1038/nmeth.1226</pubid>
                  <pubid idtype="pmpid" link="fulltext">18516045</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B23">
            <title>
               <p>The transcriptional landscape of the yeast genome defined by RNA sequencing.</p>
            </title>
            <aug>
               <au>
                  <snm>Nagalakshmi</snm>
                  <fnm>U</fnm>
               </au>
               <au>
                  <snm>Wang</snm>
                  <fnm>Z</fnm>
               </au>
               <au>
                  <snm>Waern</snm>
                  <fnm>K</fnm>
               </au>
               <au>
                  <snm>Shou</snm>
                  <fnm>C</fnm>
               </au>
               <au>
                  <snm>Raha</snm>
                  <fnm>D</fnm>
               </au>
               <au>
                  <snm>Gerstein</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Snyder</snm>
                  <fnm>M</fnm>
               </au>
            </aug>
            <source>Science</source>
            <pubdate>2008</pubdate>
            <volume>320</volume>
            <fpage>1344</fpage>
            <lpage>1349</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1126/science.1158441</pubid>
                  <pubid idtype="pmpid" link="fulltext">18451266</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B24">
            <title>
               <p>Applications of InterPro in protein annotation and genome analysis.</p>
            </title>
            <aug>
               <au>
                  <snm>Biswas</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>O'Rourke</snm>
                  <fnm>JF</fnm>
               </au>
               <au>
                  <snm>Camon</snm>
                  <fnm>E</fnm>
               </au>
               <au>
                  <snm>Fraser</snm>
                  <fnm>G</fnm>
               </au>
               <au>
                  <snm>Kanapin</snm>
                  <fnm>A</fnm>
               </au>
               <au>
                  <snm>Karavidopoulou</snm>
                  <fnm>Y</fnm>
               </au>
               <au>
                  <snm>Kersey</snm>
                  <fnm>P</fnm>
               </au>
               <au>
                  <snm>Kriventseva</snm>
                  <fnm>E</fnm>
               </au>
               <au>
                  <snm>Mittard</snm>
                  <fnm>V</fnm>
               </au>
               <au>
                  <snm>Mulder</snm>
                  <fnm>N</fnm>
               </au>
               <au>
                  <snm>Phan</snm>
                  <fnm>I</fnm>
               </au>
               <au>
                  <snm>Servant</snm>
                  <fnm>F</fnm>
               </au>
               <au>
                  <snm>Apweiler</snm>
                  <fnm>R</fnm>
               </au>
            </aug>
            <source>Brief Bioinform</source>
            <pubdate>2002</pubdate>
            <volume>3</volume>
            <fpage>285</fpage>
            <lpage>295</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1093/bib/3.3.285</pubid>
                  <pubid idtype="pmpid" link="fulltext">12230037</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B25">
            <title>
               <p>Why highly expressed proteins evolve slowly.</p>
            </title>
            <aug>
               <au>
                  <snm>Drummond</snm>
                  <fnm>DA</fnm>
               </au>
               <au>
                  <snm>Bloom</snm>
                  <fnm>JD</fnm>
               </au>
               <au>
                  <snm>Adami</snm>
                  <fnm>C</fnm>
               </au>
               <au>
                  <snm>Wilke</snm>
                  <fnm>CO</fnm>
               </au>
               <au>
                  <snm>Arnold</snm>
                  <fnm>FH</fnm>
               </au>
            </aug>
            <source>Proc Natl Acad Sci USA</source>
            <pubdate>2005</pubdate>
            <volume>102</volume>
            <fpage>14338</fpage>
            <lpage>14343</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1073/pnas.0504070102</pubid>
                  <pubid idtype="pmcid">1242296</pubid>
                  <pubid idtype="pmpid" link="fulltext">16176987</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B26">
            <title>
               <p>Changing patterns of gene expression in dictyostelium prestalk cell subtypes recognized by in situ hybridization with genes from microarray analyses.</p>
            </title>
            <aug>
               <au>
                  <snm>Maeda</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Sakamoto</snm>
                  <fnm>H</fnm>
               </au>
               <au>
                  <snm>Iranfar</snm>
                  <fnm>N</fnm>
               </au>
               <au>
                  <snm>Fuller</snm>
                  <fnm>D</fnm>
               </au>
               <au>
                  <snm>Maruo</snm>
                  <fnm>T</fnm>
               </au>
               <au>
                  <snm>Ogihara</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Morio</snm>
                  <fnm>T</fnm>
               </au>
               <au>
                  <snm>Urushihara</snm>
                  <fnm>H</fnm>
               </au>
               <au>
                  <snm>Tanaka</snm>
                  <fnm>Y</fnm>
               </au>
               <au>
                  <snm>Loomis</snm>
                  <fnm>WF</fnm>
               </au>
            </aug>
            <source>Eukaryot Cell</source>
            <pubdate>2003</pubdate>
            <volume>2</volume>
            <fpage>627</fpage>
            <lpage>637</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1128/EC.2.3.627-637.2003</pubid>
                  <pubid idtype="pmcid">161460</pubid>
                  <pubid idtype="pmpid" link="fulltext">12796308</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B27">
            <title>
               <p>Evolution at Two Levels Humans and Chimpanze.</p>
            </title>
            <aug>
               <au>
                  <snm>King</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Wilson</snm>
                  <fnm>A</fnm>
               </au>
            </aug>
            <source>Science</source>
            <pubdate>1975</pubdate>
            <volume>188</volume>
            <fpage>107</fpage>
            <lpage>116</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1126/science.1090005</pubid>
                  <pubid idtype="pmpid" link="fulltext">1090005</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B28">
            <title>
               <p>Developmental regulation of <it>Dictyostelium discoideum </it>actin gene fusions carried on low-copy and high-copy transformation vectors.</p>
            </title>
            <aug>
               <au>
                  <snm>Knecht</snm>
                  <fnm>DA</fnm>
               </au>
               <au>
                  <snm>Cohen</snm>
                  <fnm>SM</fnm>
               </au>
               <au>
                  <snm>Loomis</snm>
                  <fnm>WF</fnm>
               </au>
               <au>
                  <snm>Lodish</snm>
                  <fnm>HF</fnm>
               </au>
            </aug>
            <source>Mol Cell Biol</source>
            <pubdate>1986</pubdate>
            <volume>6</volume>
            <fpage>3973</fpage>
            <lpage>3983</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmcid">367161</pubid>
                  <pubid idtype="pmpid" link="fulltext">3025622</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B29">
            <title>
               <p>The genome of the social amoeba <it>Dictyostelium discoideum</it>.</p>
            </title>
            <aug>
               <au>
                  <snm>Eichinger</snm>
                  <fnm>L</fnm>
               </au>
               <au>
                  <snm>Pachebat</snm>
                  <fnm>JA</fnm>
               </au>
               <au>
                  <snm>Glockner</snm>
                  <fnm>G</fnm>
               </au>
               <au>
                  <snm>Rajandream</snm>
                  <fnm>MA</fnm>
               </au>
               <au>
                  <snm>Sucgang</snm>
                  <fnm>R</fnm>
               </au>
               <au>
                  <snm>Berriman</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Song</snm>
                  <fnm>J</fnm>
               </au>
               <au>
                  <snm>Olsen</snm>
                  <fnm>R</fnm>
               </au>
               <au>
                  <snm>Szafranski</snm>
                  <fnm>K</fnm>
               </au>
               <au>
                  <snm>Xu</snm>
                  <fnm>Q</fnm>
               </au>
               <au>
                  <snm>Tunggal</snm>
                  <fnm>B</fnm>
               </au>
               <au>
                  <snm>Kummerfeld</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Madera</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Konfortov</snm>
                  <fnm>BA</fnm>
               </au>
               <au>
                  <snm>Rivero</snm>
                  <fnm>F</fnm>
               </au>
               <au>
                  <snm>Bankier</snm>
                  <fnm>AT</fnm>
               </au>
               <au>
                  <snm>Lehmann</snm>
                  <fnm>R</fnm>
               </au>
               <au>
                  <snm>Hamlin</snm>
                  <fnm>N</fnm>
               </au>
               <au>
                  <snm>Davies</snm>
                  <fnm>R</fnm>
               </au>
               <au>
                  <snm>Gaudet</snm>
                  <fnm>P</fnm>
               </au>
               <au>
                  <snm>Fey</snm>
                  <fnm>P</fnm>
               </au>
               <au>
                  <snm>Pilcher</snm>
                  <fnm>K</fnm>
               </au>
               <au>
                  <snm>Chen</snm>
                  <fnm>G</fnm>
               </au>
               <au>
                  <snm>Saunders</snm>
                  <fnm>D</fnm>
               </au>
               <au>
                  <snm>Sodergren</snm>
                  <fnm>E</fnm>
               </au>
               <au>
                  <snm>Davis</snm>
                  <fnm>P</fnm>
               </au>
               <au>
                  <snm>Kerhornou</snm>
                  <fnm>A</fnm>
               </au>
               <au>
                  <snm>Nie</snm>
                  <fnm>X</fnm>
               </au>
               <au>
                  <snm>Hall</snm>
                  <fnm>N</fnm>
               </au>
               <au>
                  <snm>Anjard</snm>
                  <fnm>C</fnm>
               </au>
               <etal/>
            </aug>
            <source>Nature</source>
            <pubdate>2005</pubdate>
            <volume>435</volume>
            <fpage>43</fpage>
            <lpage>57</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1038/nature03481</pubid>
                  <pubid idtype="pmcid">1352341</pubid>
                  <pubid idtype="pmpid" link="fulltext">15875012</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B30">
            <title>
               <p><it>Dictyostelium discoideum</it>, a new species of slime mold from decaying forest leaves.</p>
            </title>
            <aug>
               <au>
                  <snm>Raper</snm>
                  <fnm>KB</fnm>
               </au>
            </aug>
            <source>J Agr Res</source>
            <pubdate>1935</pubdate>
            <volume>50</volume>
            <fpage>135</fpage>
            <lpage>147</lpage>
         </bibl>
         <bibl id="B31">
            <title>
               <p>Kin discrimination increases with genetic distance in a social amoeba.</p>
            </title>
            <aug>
               <au>
                  <snm>Ostrowski</snm>
                  <fnm>EA</fnm>
               </au>
               <au>
                  <snm>Katoh</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Shaulsky</snm>
                  <fnm>G</fnm>
               </au>
               <au>
                  <snm>Queller</snm>
                  <fnm>DC</fnm>
               </au>
               <au>
                  <snm>Strassmann</snm>
                  <fnm>JE</fnm>
               </au>
            </aug>
            <source>PLoS Biol</source>
            <pubdate>2008</pubdate>
            <volume>6</volume>
            <fpage>e287</fpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1371/journal.pbio.0060287</pubid>
                  <pubid idtype="pmcid">2586364</pubid>
                  <pubid idtype="pmpid" link="fulltext">19067487</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B32">
            <title>
               <p>Cultivation and synchronous morphogenesis of <it>Dictyostelium </it>under controlled experimental conditions.</p>
            </title>
            <aug>
               <au>
                  <snm>Sussman</snm>
                  <fnm>M</fnm>
               </au>
            </aug>
            <source>Methods Cell Biol</source>
            <pubdate>1987</pubdate>
            <volume>28</volume>
            <fpage>9</fpage>
            <lpage>29</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmpid">3298997</pubid>
                  <pubid idtype="doi">full_text</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B33">
            <title>
               <p>Expression patterns of cell-type-specific genes in <it>Dictyostelium</it>.</p>
            </title>
            <aug>
               <au>
                  <snm>Iranfar</snm>
                  <fnm>N</fnm>
               </au>
               <au>
                  <snm>Fuller</snm>
                  <fnm>D</fnm>
               </au>
               <au>
                  <snm>Sasik</snm>
                  <fnm>R</fnm>
               </au>
               <au>
                  <snm>Hwa</snm>
                  <fnm>T</fnm>
               </au>
               <au>
                  <snm>Laub</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Loomis</snm>
                  <fnm>WF</fnm>
               </au>
            </aug>
            <source>Mol Biol Cell</source>
            <pubdate>2001</pubdate>
            <volume>12</volume>
            <fpage>2590</fpage>
            <lpage>2600</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmcid">59697</pubid>
                  <pubid idtype="pmpid" link="fulltext">11553701</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B34">
            <title>
               <p>Novocraft</p>
            </title>
            <url>http://www.novocraft.com/</url>
         </bibl>
         <bibl id="B35">
            <title>
               <p>dictyBase</p>
            </title>
            <url>http://dictybase.org/</url>
         </bibl>
         <bibl id="B36">
            <title>
               <p>TBrowse</p>
            </title>
            <url>http://dictygenome.bcm.tmc.edu/~anup/RNAseq/RNA-seq-browser/</url>
         </bibl>
         <bibl id="B37">
            <aug>
               <au>
                  <snm>Brown</snm>
                  <fnm>TA</fnm>
               </au>
            </aug>
            <source>Genomes</source>
            <publisher>Oxford, UK: BIOS Scientific Publishers Ltd</publisher>
            <edition>2</edition>
            <pubdate>2002</pubdate>
         </bibl>
         <bibl id="B38">
            <title>
               <p>R: A language for data analysis and graphics.</p>
            </title>
            <aug>
               <au>
                  <snm>Ihaka</snm>
                  <fnm>R</fnm>
               </au>
               <au>
                  <snm>Gentleman</snm>
                  <fnm>R</fnm>
               </au>
            </aug>
            <source>J Comput Graphical Stat</source>
            <pubdate>1996</pubdate>
            <fpage>299</fpage>
            <lpage>314</lpage>
         </bibl>
         <bibl id="B39">
            <title>
               <p>Cytoscape</p>
            </title>
            <url>http://www.cytoscape.org/</url>
         </bibl>
         <bibl id="B40">
            <title>
               <p>BiNGO: a Cytoscape plugin to assess overrepresentation of Gene Ontology categories in Biological Networks.</p>
            </title>
            <aug>
               <au>
                  <snm>Maere</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Heymans</snm>
                  <fnm>K</fnm>
               </au>
               <au>
                  <snm>Kuiper</snm>
                  <fnm>M</fnm>
               </au>
            </aug>
            <source>Bioinformatics</source>
            <pubdate>2005</pubdate>
            <volume>21</volume>
            <fpage>3448</fpage>
            <lpage>3449</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1093/bioinformatics/bti551</pubid>
                  <pubid idtype="pmpid" link="fulltext">15972284</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B41">
            <title>
               <p>gplots: Various R programming tools for plotting data</p>
            </title>
            <url>http://cran.r-project.org/web/packages/gplots/index.html</url>
         </bibl>
         <bibl id="B42">
            <aug>
               <au>
                  <snm>Venables</snm>
                  <fnm>W</fnm>
               </au>
               <au>
                  <snm>Ripley</snm>
                  <fnm>B</fnm>
               </au>
            </aug>
            <source>Modern applied statistics with S</source>
            <publisher>Springer verlag</publisher>
            <pubdate>2002</pubdate>
         </bibl>
         <bibl id="B43">
            <title>
               <p>Limma: linear models for microarray data.</p>
            </title>
            <aug>
               <au>
                  <snm>Smyth</snm>
                  <fnm>G</fnm>
               </au>
            </aug>
            <source>Bioinformatics Computational Biology Solutions Using R and Bioconductor</source>
            <publisher>Springer</publisher>
            <editor>Gentleman R, Carey V, Huber W, Irizarry R, Dudoit S</editor>
            <pubdate>2005</pubdate>
            <fpage>397</fpage>
            <lpage>420</lpage>
            <xrefbib>
               <pubid idtype="doi">full_text</pubid>
            </xrefbib>
         </bibl>
         <bibl id="B44">
            <title>
               <p>Linear models and empirical Bayes methods for assessing differential expression in microarray experiments.</p>
            </title>
            <aug>
               <au>
                  <snm>Smyth</snm>
                  <fnm>G</fnm>
               </au>
            </aug>
            <source>Stat Appl Genet Mol Biol</source>
            <pubdate>2004</pubdate>
            <volume>3</volume>
            <fpage>Article 3</fpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="pmpid" link="fulltext">16646809</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
      </refgrp>
   </bm>
</art>
