Phase: SimpleHearst Input: Token Options: control = appelt Macro: Article ( ({Token.category == "ART"}) ) Macro: toBeVerb ( ({Token.lemma == "sein"}|{Token.lemma == "bezeichnen"}) ) Macro: NotToBeVerb ( {Token, !Token.lemma == "sein"} ) Macro: LHSHearstBody_tier1modifiers ( ({Token.kind == "number"}|{Token.category == "ADJA"}|{Token.category == "ADJD"}|{Token.string == "."}|{Token.string == "-"}|{Token.category == "KON"}|{Token.category == "KOUS"}|{Token.category == "KOUI"}|{Token.category == "KOKOM"}) ) Macro: LHSHearstBody_tier2modifiers ( ({Token.kind == "number"}|{Token.category == "ADJA"}|{Token.category == "ADJD"}|{Token.string == "."}|{Token.string == "-"}) ) Macro: LHSHearstBody_tier3modifiers ( ({Token.kind == "number"}|{Token.category == "NE"}|{Token.category == "NN"}|{Token.category == "."}|{Token.string == "-"}|{Token.category == "TRUNC"}|{Token.string == "&"}|{Token.string == "/"}) ) //improves recall by 0.2 and decreases precision by 0.1 Macro: RepeatedAdjective ( ({Token.string == "und"}|{Token.string == ","})?({Token.category == "ADJA"})({Token.string == "und"}|{Token.string == ","})({Token.category == "ADJA"})? ) Macro: NumbersInBracketes ( ({Token.position == "startpunct"}) ({Token.kind == "number"})* ({Token.position == "endpunct"}) ) Macro: LHSHearstBody ( (NumbersInBracketes)? (LHSHearstBody_tier1modifiers)* (NumbersInBracketes)? (LHSHearstBody_tier2modifiers)* (NumbersInBracketes)? (LHSHearstBody_tier3modifiers)* (NumbersInBracketes)? ) Macro: Length ( ({Token.kind == "number"})* ({Token.string == "m"}) ) Macro: FromToTimePeriod ( ({Token.string == "von"}) ({Token.kind == "number"})+ ( ({Token.string == "bis"}) ({Token.kind == "number"})+ ) ) Macro: FirstAdditionaInfoInserted ( (Article)? ({Token.string == "von"}|{Token.string == "mit"}|{Token.lemma == "in"}|{Token.lemma == "bei"}|{Token.lemma == "aus"}|{Token.lemma == "durch"}) (Article)? ({Token.kind == "number"}|{Token.category == "ADJA"}|{Token.category == "ADJD"}|{Token.category == "ADV"}|{Token.string == "."}|{Token.string == "-"})* ({Token.category == "NE"}|{Token.category == "NN"}|{Token.string == "."})+ ) Macro: SkipTruncated ( ({Token.category == "ADJA"}|{Token.category == "ADJD"}|{Token.category == "ADV"}|{Token.string == "."}|{Token.string == "-"})* ({Token.category == "TRUNC"})+ ({Token.string == "und"}) ) Macro: MeaningLessWordsImmediatelyAfterToBe ( ({Token.string == "sowohl"}) ) Macro: AdditionaGenericInfoInserted ( (FirstAdditionaInfoInserted) (NumbersInBracketes)* ) Macro: Head ( ({Token.category == "NN"}|{Token.category == "NE"}) ) Macro: NameOf ( ({Token.string == "Name"}) ) Macro: TitleOf ( ({Token.string == "Titel"}) ) Macro: SpeciesOf ( ({Token.string == "species"}) ({Token.string == "of"}) ) Macro: GenusOf ( ({Token.string == "genus"}) ({Token.string == "of"}) ) Macro: OneOf ( ({Token.lemma == "eine"}) ({Token.string == "von"}|{Token.category == "ART"}) ({Token.category == "ADV"})? ) Macro: TypeOf ( ({Token.string == "type"}) ({Token.string == "of"}) ) Rule: HearstRule_Simple Priority: 30 ( (NotToBeVerb)* (toBeVerb) (MeaningLessWordsImmediatelyAfterToBe)? ) (FromToTimePeriod)? ( ((AdditionaGenericInfoInserted) (Article)?)|(Article) ):hearstArticle ((NameOf)(Article)?)? (Length)? (SpeciesOf)? (TitleOf)? (GenusOf)? (TypeOf)? (SkipTruncated)? (LHSHearstBody) (RepeatedAdjective)* //(Article)? (Head) :hearstPattern ({Token.category != "ADJA"}|{Token.string =~ ".*er"}) --> :hearstArticle.harticle = {kind = "isApattern", rule = "HearstRule_Simple"}, :hearstPattern.h = {kind = "isApattern", rule = "HearstRule_Simple", lemma= :hearstPattern.Token.lemma}, { ctx.endPhase(); } Rule: HearstRule_Plural Priority: 30 ( (NotToBeVerb)* (toBeVerb) ) (Article)? ((OneOf)(Article)?) (LHSHearstBody) (RepeatedAdjective)* (Head) :hearstPattern ({Token.category != "ADJA"}|{Token.string =~ ".*er"}) --> :hearstPattern.h = {kind = "isApattern", rule = "HearstRule_Plural", lemma= :hearstPattern.Token.lemma}, { ctx.endPhase(); } Phase: MissingOrShiftedArticle Input: Token h Options: control = appelt Rule: NegativeRuleQuitIfPreviousPhaseSucceeded Priority: 10 ({Token})*({h})({Token})* --> { ctx.endPhase(); } Rule: HearstRule_MissingOrShiftedArticle Priority: 30 ( (NotToBeVerb)* (toBeVerb) (MeaningLessWordsImmediatelyAfterToBe)? ) (Length)? (FromToTimePeriod)? (AdditionaGenericInfoInserted)? (SpeciesOf)? (TitleOf)? (GenusOf)? (TypeOf)? (SkipTruncated)? (LHSHearstBody) (RepeatedAdjective)* //(Article)? (Head) :hearstPattern ({Token.category != "ADJA"}|{Token.string =~ ".*er"}) --> :hearstPattern.h = {kind = "isApattern", rule = "HearstRule_MissingArticle", lemma= :hearstPattern.Token.lemma}, { ctx.endPhase(); }