<?xml version="1.0" encoding="UTF-8"?>
<wdkModel name="toyModel" displayName="GUS Web Development Kit Demo">

    <!-- ******************* -->
    <!-- model introduction  -->
    <!-- ******************* -->
    <introduction>
        Welcome to WDKToySite, a sample website driven by the GUS Web Development Kit.
        You are invited to explore this site to find out about the core functionalities of WDK.
        Enjoy the surfing!
    </introduction>


    <!-- ************ -->
    <!-- questionSets -->
    <!-- ************ -->
    
    <!-- 
    A question is "a query that returns a list of records."  (Regular queries 
    return columns and rows.)  They are formed by pairing a query with a
    record class.  The query must be one that returns one column containing
    a list of primary keys.  The record class must be one that expects as
    a primary key the type of key returned by the query.
    
    A question set is a grouping of questions.  It may be used in the
    user interface to present the enclosed questions in one choice box.  
    -->


    <!-- questions that return RNA records -->
    <questionSet name="RnaQuestions"
                 displayName="Queries to find RNAs">

        <description>
          These queries find RNAs
        </description>

        <question name="ByNumSeqs" 
                  displayName="Find RNAs by number of assembly sequences" 
                  queryRef="RnaIds.ByNumSeqs"
                  summaryAttributesRef="assembly_consistency,contains_mrna,overview,sequence,taxonLink"
                  recordClassRef="RnaRecordClasses.RnaRecordClass">
            <description>
                Find RNAs from a given organism that contain more than a 
                specified number of ESTs in their assembly.
            </description>
        </question>

        <question name="ByDbESTLib" 
                  displayName="Find RNAs by number of EST libs"
                  queryRef="RnaIds.ByDbESTLib" 
                  recordClassRef="RnaRecordClasses.RnaRecordClass">
            <description>
                Find RNAs with required assembly consistency score that
                contain ESTs from more than a specified number of libraries.
            </description>
        </question>

    </questionSet>

    <!-- questions that return EST records -->
    <questionSet name="EstQuestions"
                 displayName="Queries to find ESTs">

        <description>
          These queries find ESTs
        </description>

        <question name="ByEstLib" 
                  displayName="Find ESTs by originating library" 
                  queryRef="EstIds.ByEstLib" 
                  recordClassRef="EstRecordClasses.EstRecordClass">
            <description>
                Find ESTs from a given library
            </description>
        </question>

        <question name="EstNullAttributes" displayName="Question that will return some controversial ESTs!"
                  queryRef="EstIds.NullAttributeEsts" recordClassRef="EstRecordClasses.EstRecordClass">

             <description>
                  Question that will return ESTs, some of which have null values for their atttributes
             </description>
        </question>

    </questionSet>

    <!-- questions for use in nestedRecords and nestedRecordLists -->
    <questionSet name="NestedRecordQuestions" displayName="Questions for use in NestedRecords" isInternal="true">
        
         <question name="RNA" queryRef="RnaIds.EstParent" recordClassRef="RnaRecordClasses.RnaRecordClass" displayName="Nested Record RNA">
              <description> Question that returns the RNA that belongs to the given EST; for use in a Nested Record </description>
         </question>

         <question name="ESTs" queryRef="RnaTables.SimpleEstIds" recordClassRef="EstRecordClasses.EstRecordClass" displayName="Nested Record ESTs">
              <description> Question that returns ESTs that belong to the given RNA; for use in a Nested Record List </description>
         </question>

    </questionSet>

    <!-- questions for use in data dumps -->
    <questionSet displayName="Questions for use in data dumps" isInternal="true" name="DataDumpQuestions">

        <description>
            These questions are for data dumps
        </description>

        <question displayName="Find all RNAs" name="RnaGff" queryRef="DataDumpQueries.RnaGff" recordClassRef="DataDumpRecordClasses.RnaGff">
            <description>This question find all RNAs to dump in GFF format</description>
        </question>

    </questionSet>

    <!-- Definitions of Xml sources -->

    <!-- xml question sets -->
    <xmlQuestionSet name="XmlQuestions"
                 displayName="Data contents from XML data sources">

        <description>
          Data contents from XML data sources
        </description>

        <xmlQuestion name="NewsQuestion" 
                  displayName="News about CryptoDB" 
                  recordClassRef="XmlRecordClasses.NewsRecord"
                  xmlDataURL="news.xml">
            <description>
                Retrieve news from XML data source
            </description>
        </xmlQuestion>

        <xmlQuestion name="CNNNewsQuestion" 
                  displayName="News about PlasmoDB" 
                  recordClassRef="XmlRecordClasses.NewsRecord"
                  xmlDataURL="http://rss.cnn.com/rss/cnn_topstories.rss"
                  xslURL="rss.xsl">
            <description>
                Retrieve CNN headline news from XML data source, and converted it on the fly
            </description>
        </xmlQuestion>
 
        <xmlQuestion name="ResourceQuestion" 
                  displayName="Resource of PlasmoDB analysis pipeline" 
                  recordClassRef="XmlRecordClasses.ResourceRecord"
                  xmlDataURL="plasmoResources.xml"
                  xslURL="short_resource.xsl">
            <description>
                Resource of PlasmoDB analysis pipeline
            </description>
        </xmlQuestion>

    </xmlQuestionSet>

    <!-- xml record class set -->
    <xmlRecordClassSet name="XmlRecordClasses">

        <xmlRecordClass 
        	name="NewsRecord" 
        	type="Toy News"
        	idPrefix="NEWS."
        	delimiter="@">
            <attributeField name="title"/>  
            <attributeField name="link"/>  
            <attributeField name="description"/>  
            <attributeField name="date"/>  
         </xmlRecordClass>


        <xmlRecordClass name="ResourceRecord" 
        	type="PlasmoDB Analysis Pipeline Resource"
        	idPrefix="RESOURCE."
        	delimiter="@">
            <attributeField name="resource"/>  
            <attributeField name="version"/>  
            <attributeField name="url"/>  
         </xmlRecordClass>

    </xmlRecordClassSet>

    <!-- *************** -->
    <!-- recordClassSets -->
    <!-- *************** -->

    <!-- 
    A "record" is an object that gathers together data about an entity in the
    database.  The entity is defined as data associated with a primary key, 
    and data in the record is based on that key. The data comes in three forms:
    attributes, text attributes and tables.  An attribute is a single value
    that describes the entity, such as "size."  A text attribute is similar,
    but the value is created by defining a text string and optionally embedding
    into that string the values of other attributes.  A table is a value
    in the form of columns and rows, such as a table describing related 
    publications.
  
    A record class is a template for creating records from a given primary key.
    It includes "attribute queries," "table queries" and "text attributes."  

    An attribute query must have only one parameter, the primary key.  It must
    return exactly zero or one rows.  The columns in the query are interpreted
    as attributes of the record.  

    A text attribute has a body of text with the values of other attributes
    optionally embedded in it.  The other attribute values are referred to
    as variables of the form "$$name_of_attribute$$".

    A table query must have only one parameter, the primary key.  It may
    return any number of rows (but typically not a huge number).  The result
    of the query is interpreted as being a table that 
    
    A "record set" is a grouping of records. It is useful in organizing the
    model xml file.

    The full name of a record is is of the form "set_name.name."
    --> 

    <!-- RNA recordClasses -->
    <recordClassSet name="RnaRecordClasses">

        <recordClass idPrefix="TR." name="RnaRecordClass" type="Toy RNA"
            attributeOrdering="assembly_consistency,contains_mrna,taxon_name">
            <attributeQueryRef ref="RnaAttributes.GeneAttrs"/>  
            <attributeQueryRef ref="RnaAttributes.AssemblyAttrs"/>

            <tableQueryRef ref="RnaTables.ConstituentEsts"/>

            <linkAttribute name="taxonLink" displayName="Taxon" 
                           visible="$$taxon_name$$">
                 <url>
                    <![CDATA[@TAXON_URL@]]>
                 </url>
            </linkAttribute>

            <textAttribute name="overview" displayName="Overview" truncateToRef="80">
                <text>
                    RNA $$primaryKey$$ is from $$taxon_name$$.
                </text>
            </textAttribute>

            <nestedRecordList questionRef="NestedRecordQuestions.ESTs"/>

         </recordClass>
    </recordClassSet>

    <!-- EST recordClasses -->
    <recordClassSet name="EstRecordClasses">
        <recordClass idPrefix="EST." name="EstRecordClass" type="EST">
            <attributeQueryRef ref="EstAttributes.EstAttrs"/>  
            <attributeQueryRef ref="EstAttributes.LibAttrs"/>
            <attributeQueryRef ref="EstAttributes.NullAttrs"/>
        
            <nestedRecord questionRef="NestedRecordQuestions.RNA"/>
        </recordClass>

    </recordClassSet>

    <!-- recordClasses for data dumping -->
    <recordClassSet name="DataDumpRecordClasses">

        <recordClass attributeOrdering="seqid,source,type,fstart,fend,score,strand,phase,attr_id" idPrefix="TR." name="RnaGff" type="Toy RNA">

            <attributeQueryRef ref="DataDumpQueries.RnaGffAttrs"/>

            <tableQueryRef ref="DataDumpQueries.RnaEsts"/>

         </recordClass>
        
    </recordClassSet>

    <!-- *************** -->
    <!-- model querySets -->
    <!-- *************** -->

    <!--
    A "query" obtains tabular values from a data source.  It has columns
    and parameters.  So far, only SQL data sources are supported, but others,
    such as flat files are coming.  

    Queries are used for different purposes: providing primary keys to a 
    question; providing attributes and tables to a record; and, providing
    vocabularies to vocabulary parameters.

    A "query set" is a grouping of queries.  It is useful in organizing the
    model xml file.  

    The full name of a query is of the form "set_name.name."
    -->


    <!-- Queries that return RNA primary keys (for use in questions and nested records.). -->

    <querySet name="RnaIds">

        <sqlQuery name="ByNumSeqs" isCacheable="true">
            <paramRef ref="params.NumSeqs"/>
            <paramRef ref="params.ApiTaxon"/>
            <column name="na_sequence_id"/>
            <sql>
                <!-- use CDATA because query includes angle brackets -->
                <![CDATA[
                    select distinct a.na_sequence_id
                    from WDKTestAssembly a
                    where a.number_of_contained_sequences > $$NumSeqs$$
                    and a.taxon_id in ($$ApiTaxon$$)
                ]]>
            </sql>
        </sqlQuery>  
      
        <sqlQuery name="ByDbESTLib">
            <paramRef ref="params.NumEstLibs"/>
            <paramRef ref="params.AssemblyConsistency"/>
            <column name="na_sequence_id"/>
            <sql>
                <!-- use CDATA because query includes angle brackets -->
                <![CDATA[
                    select distinct aseq.assembly_na_sequence_id as na_sequence_id 
                    from WDKTestEst est, WDKTestLibrary lib,
                         WDKTestAssemblySequence aseq, WDKTestAssembly a
                    where lib.library_id = est.library_id 
                    and est.na_sequence_id = aseq.na_sequence_id 
                    and aseq.assembly_na_sequence_id = a.na_sequence_id
                    and aseq.assembly_na_sequence_id is not NULL 
                    and a.assembly_consistency > $$AssemblyConsistency$$
                    group by aseq.assembly_na_sequence_id 
                    having count (distinct lib.dbest_id) >= $$NumEstLibs$$
                ]]>
            </sql>
        </sqlQuery>

        <sqlQuery name="EstParent">
            <paramRef ref="params.na_sequence_id"/>
            <column name="na_sequence_id"/>
            <sql>
                   select a.na_sequence_id
                   from WDKTestAssembly a, WDKTestAssemblySequence aseq
                        where aseq.assembly_na_sequence_id = a.na_sequence_id
                        and aseq.na_sequence_id = $$na_sequence_id$$
            </sql>
        </sqlQuery>

    </querySet>


    <!-- Queries that return EST primary keys (for use in questions). -->

    <querySet name="EstIds">
        <sqlQuery name="ByEstLib">
            <paramRef ref="params.EstLib"/>
            <column name="est_id"/>
            <sql>
               <!-- union with self for federation simulation -->
               (select distinct e.est_id
               from WDKTestEst e, WDkTestLibrary l
               where e.library_id = l.library_id
               and l.library_id in ($$EstLib$$))
               UNION
               (select distinct e.est_id
               from WDKTestEst e, WDkTestLibrary l
               where e.library_id = l.library_id
               and l.library_id in ($$EstLib$$))
            </sql>
        </sqlQuery>  
    
        <sqlQuery name="NullAttributeEsts">

            <column name="est_id"/>
            <sql>
              <![CDATA[
               select est_id from WdkTestEst e 
               where e.est_id > 9644240 and est_id < 9644245
              ]]>
            </sql>
         </sqlQuery>
    </querySet>

    

    <!-- Queries that retrieve attributes of RNAs -->
    
    <querySet name="RnaAttributes">

        <sqlQuery name="GeneAttrs">
            <paramRef ref="params.primaryKey"/>
            <column displayName="Organism" name="taxon_name"/>
            <column displayName="Isoform Count" name="isoform_count"/>
            <sql>
                select taxon_name, isoform_count
                from (select a.na_sequence_id, tn.name as taxon_name, count(*) as isoform_count
                      from WDKTestAssembly a, WDKTestTaxonName tn, WDKTestAssemblySequence aseq
                      where a.taxon_id = tn.taxon_id
                      and a.na_sequence_id = aseq.assembly_na_sequence_id
                      group by a.na_sequence_id, tn.name) tmp
                where tmp.na_sequence_id = $$primaryKey$$ 
            </sql>
        </sqlQuery>

        <sqlQuery name="AssemblyAttrs">
            <paramRef ref="params.primaryKey"/>
            <column displayName="Assembly consistency" name="assembly_consistency"/>
            <column displayName="Contains mRNA" name="contains_mrna"/>
            <column displayName="Number contained sequences" name="number_of_contained_sequences"/>
            <textColumn displayName="Nice Num" name="numseqs" text="number of contained seqs is $$number_of_contained_sequences$$"/>
            <column displayName="Sequence" truncateToRef="8" name="sequence"/>
            <sql>
                select a.assembly_consistency, 
                       a.contains_mrna,
                       a.number_of_contained_sequences,
                       a.sequence
                from WDKTestAssembly a, WDKTestTaxonName tn 
                where a.na_sequence_id = '$$primaryKey$$' 
                and a.taxon_id = tn.taxon_id 
            </sql>
        </sqlQuery>
    </querySet>


    <!-- Queries that retrieve tables belonging to RNAs  -->
    
    <querySet name="RnaTables">
        <sqlQuery name="ConstituentEsts" displayName="Constituent ESTs">
            <paramRef ref="params.primaryKey"/>
            <column displayName="EST Id" name="est_id"/>
            <linkColumn displayName="DoTS RNA" name="DoTS" 
                        visible="$$source_id$$">
                 <url>
                    <![CDATA[@DOTS_URL@]]>
                 </url>
            </linkColumn>
            <column displayName="DoTS RNA Id" name="source_id" isInternal="true"/>
            <column displayName="Sequence Start" name="sequence_start"/>
            <column displayName="Sequence End" name="sequence_end"/>
            <column displayName="Quality Start" name="quality_start"/>
            <column displayName="Quality End" name="quality_end" />
            <sql>
                select aseq.na_sequence_id as est_id,
                       'DT.' || a.na_sequence_id as source_id, aseq.sequence_start,
                       aseq.sequence_end, aseq.quality_start, aseq.quality_end 
                from WDKTestAssemblySequence aseq, WDKTestAssembly a
                where a.na_sequence_id = $$primaryKey$$
                and a.na_sequence_id = aseq.assembly_na_sequence_id
            </sql>
        </sqlQuery>

        <sqlQuery name="SimpleEstIds">
             <paramRef ref="params.primaryKey"/>
             <column displayName="EST Id" name="est_id"/>
             
             <sql>
                select est.est_id
                from WDKTestAssemblySequence aseq, WDKTestAssembly a, WDKTestEst est
                where a.na_sequence_id = $$primaryKey$$
                and a.na_sequence_id = aseq.assembly_na_sequence_id
                and est.na_sequence_id = aseq.na_sequence_id
             </sql>
        </sqlQuery>

    </querySet>
    

    <!-- Queries that retrieve attributes of ESTs -->
    
    <querySet name="EstAttributes">

        <sqlQuery name="EstAttrs">
            <paramRef ref="params.primaryKey"/>
            <linkColumn displayName="EST" name="EST" 
                        visible="$$source_id$$">
                 <url>
                    <![CDATA[@EST_URL@]]>
                 </url>
            </linkColumn>
            <column displayName="AssemblySeqId" name="na_sequence_id" isInternal="true"/>
            <column displayName="Accession" name="source_id" isInternal="true"/>
            <sql>
                <!-- union with self for federation simulation -->
                (select e.accession as source_id, e.na_sequence_id
                from WDKTestEst e
                where e.est_id = $$primaryKey$$)
                  UNION
                (select e.accession as source_id, e.na_sequence_id
                from WDKTestEst e
                where e.est_id = $$primaryKey$$)
            </sql>
        </sqlQuery>

        <sqlQuery name="NullAttrs">
            <paramRef ref="params.primaryKey"/>
            <column displayName="AssemblyId" name="assembly_id"/>
            <sql>
                select a.na_sequence_id as assembly_id
                from WDKTestEst e, WDKTestAssemblySequence aseq, WdkTestAssembly a
                where e.est_id = $$primaryKey$$ and e.na_sequence_id = aseq.na_sequence_id
                and aseq.assembly_na_sequence_id = a.na_sequence_id
            </sql>
        </sqlQuery>


        <sqlQuery name="LibAttrs">
            <paramRef ref="params.primaryKey"/>
            <column displayName="DBEST name" name="dbest_name"/>
            <column displayName="Anatomy Id" name="anatomy_id"/>
            <sql>
                select l.dbest_name, l.anatomy_id 
                from WDKTestEst e, WDKTestLibrary l 
                where e.est_id = $$primaryKey$$ 
                and e.library_id = l.library_id 
            </sql>
        </sqlQuery>
    </querySet>

    <!-- queries that return controlled vocabulary terms -->

    <querySet name="VocabQueries">
        <!-- a query that returns a list of Apicomplexon taxons -->
        <sqlQuery name="ApiTaxon">
            <column name="term"/>
            <column name="internal"/>
            <sql>
                select distinct name as term, taxon_id as internal
                from WDKTestTaxonName
                where name in ('Neospora caninum', 'Plasmodium falciparum', 'Eimeria tenella',
                               'Plasmodium yoelii', 'Sarcocystis neurona', 'Toxoplasma gondii')
            </sql>
        </sqlQuery>

        <!-- a query that returns a list of EST Libraries -->
        <sqlQuery name="EstLib">
            <column name="term"/>
            <column name="internal"/>
            <sql>
                select distinct dbest_name as term, library_id as internal
                from WDKTestLibrary where dbest_name like 'N%'
            </sql>
        </sqlQuery>
    </querySet>


    <!-- Queries that retrieve attributes or tables for data dumping records -->
    
    <querySet name="DataDumpQueries">

        <sqlQuery name="RnaGff" isCacheable="true">
            <column name="na_sequence_id"/>
            <sql>
                <!-- use CDATA because query includes angle brackets -->
                <![CDATA[
                    select distinct a.na_sequence_id
                    from WDKTestAssembly a
                ]]>
            </sql>
        </sqlQuery>  

        <sqlQuery name="RnaGffAttrs">
            <paramRef ref="params.primaryKey"/>
            <column displayName="SeqId" name="seqid"/>
            <column displayName="Data Source" name="source"/>
            <column displayName="Feature Type" name="type"/>
            <column displayName="Start" name="fstart"/>
            <column displayName="Start" name="fend"/>
            <column displayName="Score" name="score"/>
            <column displayName="Strand" name="strand"/>
            <column displayName="Reading Frame" name="phase"/>
            <column displayName="ID" name="attr_id"/>
            <column displayName="Name" name="attr_name"/>
            <column displayName="description" name="attr_description"/>
            <sql>
                select 'ChrNNN' as seqid,
                       'WDKToy' as source,
                       'toyRNA' as type,
                       10 * rownum as fstart,
                       10 * rownum + 5 as fend,
                       '.' as score, '.' as strand, '.' as phase,
                       a.na_sequence_id as attr_id,
                       'aToyRna' as attr_name,
                       'Rna of taxon ' || taxon_id as attr_description
                from WDKTestAssembly a
                where a.na_sequence_id = $$primaryKey$$
            </sql>
        </sqlQuery>

        <sqlQuery name="RnaEsts" displayName="Constituent ESTs">
            <paramRef ref="params.primaryKey"/>
            <column displayName="SeqId" name="seqid"/>
            <column displayName="Data Source" name="source"/>
            <column displayName="Feature Type" name="type"/>
            <column displayName="Start" name="fstart"/>
            <column displayName="End" name="fend"/>
            <column displayName="Score" name="score"/>
            <column displayName="Strand" name="strand"/>
            <column displayName="Reading Frame" name="phase"/>
            <column displayName="ID" name="attr_id"/>
            <column displayName="Name" name="attr_name"/>
            <column displayName="Parent" name="attr_parent"/>
            <column displayName="description" name="attr_description"/>
            <sql>
                select 'ChrNNN' as seqid,
                       'WDKToy' as source,
                       'toyRNA' as type,
                       10 * rownum as fstart,
                       10 * rownum + 5 as fend,
                       '.' as score, '.' as strand, '.' as phase,
                       aseq.na_sequence_id as attr_id,
                       'aToyEst' as attr_name,
                       a.na_sequence_id as attr_parent,
                       aseq.sequence_start || ':' || aseq.sequence_end as attr_description
                from WDKTestAssemblySequence aseq, WDKTestAssembly a
                where a.na_sequence_id = $$primaryKey$$
                and a.na_sequence_id = aseq.assembly_na_sequence_id
            </sql>
        </sqlQuery>

    </querySet>


    <!-- parameters used by queries -->

    <paramSet name="params">

        <stringParam name="primaryKey" 
                     prompt="Primary Key" 
                     help="primary key" />

        <stringParam name="NumSeqs" 
                     prompt="Number of Contained Sequences" 
                     help="number of seqs" 
                     regex="\d+"/>

        <stringParam name="na_sequence_id"
                     prompt="NestedRecord Param"
                     help="Only used in a nested record"/>

        <stringParam name="NumEstLibs" 
                     prompt="Number of EST Libs" 
                     help="number of distinct libs" 
                     regex="\d+"
                     sample="3"/>

        <stringParam name="AssemblyConsistency" 
                     prompt="Assembly consistency score" 
                     help="on the scale of 0-100" 
                     regex="100|\d|\d{2}"
                     sample="95"/>

        <flatVocabParam name="ApiTaxon" 
                        prompt="Organism" 
                        help="Apicomplexon taxons" 
                        multiPick="true"
                        queryRef="VocabQueries.ApiTaxon"/>
        <flatVocabParam name="EstLib" 
                        prompt="EST Library" 
                        help="Library from which EST originate" 
                        queryRef="VocabQueries.EstLib"/>
    </paramSet>

</wdkModel>
