namespace gml = "http://graphml.graphdrawing.org/xmlns/graphml"
namespace pm = "http://www.politicalmashup.nl"
namespace pmd = "http://www.politicalmashup.nl/docinfo"
namespace pmds = "http:www.politicalmashup.nl/debate-summary"

include "toolbox.rnc"
include "DutchParlCommonStructure.rnc"

# Author: Johan van Doornik, Maarten Marx 
# Date: 2011-11-14
# Purpose: Schema to describe a debate summary.
# Status: 

Content = DebateSummary

DebateSummary = element pmds:debate-summary { DebateSummaryContent }

DebateSummaryContent = DocRef, GraphML, Summaries

GraphML = element gml:graphml { GraphMLContent } # The graphML of this debate. No RelaxNg available, for now accept anything with gml namespace and text

GraphMLContent = anythingGml

anythingGml =
    (element gml:* { anythingGml }
     | attribute gml:* { text }
     | text)*

Summaries = element pmds:summaries { SummariesContent } # There are multiple summaries, defined by their level

SummariesContent = Summary+

Summary = element pmds:summary { SummaryContent }

SummaryContent =
    attribute pmds:level { "all|node|edge" } # all: all text in the debate, node: all text from a node (person), edge: all the 'communication' between two persons
    & attribute pmds:id { text } # id of node or edge in graphml
    & Clouds*
    & Dicts*

Clouds = element pmds:clouds { CloudsContent }

CloudsContent = Cloud+

Cloud = element pmds:cloud { CloudContent } # A cloud contain words with an assigned score so that word clouds can be created

CloudContent =
    attribute pmds:word-type { "noun" | "adjective" | "verb" | "named-entity" }
    & attribute pmds:cloud-type { "parsimonious" | "frequency" | "tf-idf" }  # scoring method, new methods may be added to this list
    & Word+

Word = element pmds:word { WordContent }

WordContent =
    Score?
    & Frequency?
    & String
    & Pos?
    & NormalForm?
    & Hit*

Hit = element pmds:hit { HitContent }

HitContent =
    mixed { # contains a snippet containing the string, with the string highlighted (created using the kwic-module from eXist
        Score?
        & Frequency?
        & Pos?
        & DocRef    # reference to the text containing the score
        & element pmds:h { text }*  # example: ...meneer Wilders u kunt die <h>bedrijfspoedel</h> in uw reet steken...
    }

Score = attribute pmds:score { xsd:float } # score according to scoring method specified in cloud-type
Frequency = attribute pmds:frequency { xsd:integer } # nr of occurrances
Pos = attribute pmds:pos { xsd:string } # Part Of Speech
String = attribute pmds:string { xsd:string } # the word itself
NormalForm = attribute pmds:normal-form { text } # link to normalised representation (wikipedia or political mashup resolver)

Dicts = element pmds:dicts { DictsContent }

DictsContent = Dict*

Dict = element pmds:dict { DictContent }

DictContent =
    attribute pmds:type { "raw" | "lemmatised" } # raw: dict contains the frequencies of all words, and the Part-of-Speech tag of the word
                                                 # lemmatised: similar to raw, but words are normalised using Frog
    & Word+