Skip to content
This repository has been archived by the owner on Feb 15, 2024. It is now read-only.

Commit

Permalink
Merge branch 'annotate'
Browse files Browse the repository at this point in the history
  • Loading branch information
schmmd committed Oct 16, 2013
2 parents f915d40 + 34c33b2 commit 5534ea4
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 2 deletions.
12 changes: 12 additions & 0 deletions core/src/main/scala/edu/knowitall/tool/Format.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
package edu.knowitall.tool

trait Writer[F, T] {
def write(from: F): T
}

trait Reader[F, T] {
def read(from: F): T
}

trait Format[F, T]
extends Writer[F, T] with Reader[T, F]
21 changes: 19 additions & 2 deletions core/src/main/scala/edu/knowitall/tool/postag/PostaggedToken.scala
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
package edu.knowitall
package tool.postag
package edu.knowitall.tool
package postag

import edu.knowitall.common.HashCodeHelper
import edu.knowitall.tool.tokenize.Token
Expand Down Expand Up @@ -78,4 +78,21 @@ object PostaggedToken {
def apply(token: Token, postag: String): PostaggedToken = PostaggedToken(postag, token.string, token.offset)

def unapply(token: PostaggedToken): Option[(String, String, Int)] = Some((token.postag, token.string, token.offset))

object bratFormat extends Format[PostaggedToken, String] {
def write(token: PostaggedToken): String = {
Iterator(token.postag + " " + token.offset + " " + token.offsets.end, token.string).mkString("\t")
}

def read(string: String): PostaggedToken = {
string.split("\t") match {
case Array(meat, token) =>
meat.split("\\s+") match {
case Array(postag, token, offset) => PostaggedToken(postag, token, offset.toInt)
case _ => throw new MatchError("Could not match BRAT PostaggedToken: " + string)
}
case _ => throw new MatchError("Could not match BRAT PostaggedToken: " + string)
}
}
}
}

0 comments on commit 5534ea4

Please sign in to comment.