Skip to content

Commit 96a6901

Browse files
committed
porting to factorie, more code
1 parent 4de4c27 commit 96a6901

File tree

4 files changed

+97
-4
lines changed

4 files changed

+97
-4
lines changed

src/main/scala/edu/umass/cs/iesl/apassos/Lecture1Chunker.scala

+3-2
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,10 @@ package edu.umass.cs.iesl.apassos
22

33
import cc.factorie.app.nlp._
44
import cc.factorie.app.nlp.pos.PennPosLabel
5-
import cc.factorie.{FeatureVectorVariable, CategoricalVectorDomain, LabeledCategoricalVariable, CategoricalDomain}
6-
import cc.factorie.optimize.{LinearMultiClassClassifier, OnlineLinearMultiClassTrainer}
75
import scala.annotation.tailrec
6+
import cc.factorie.variable.{FeatureVectorVariable, CategoricalVectorDomain, LabeledCategoricalVariable, CategoricalDomain}
7+
import cc.factorie.app.classify.{OnlineLinearMultiClassTrainer, LinearMultiClassClassifier}
8+
import cc.factorie.app.nlp.load.LoadOntonotes5
89

910
/**
1011
* User: apassos

src/main/scala/edu/umass/cs/iesl/apassos/Lecture1Demo.scala

+1-1
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ object Lecture1Demo {
77
val doc = new app.nlp.Document("All work and no play makes Jack a dull boy. All work and no play makes Jack a dull boy.")
88
val pipeline = app.nlp.DocumentAnnotatorPipeline[app.nlp.ner.NerLabel,app.nlp.parse.ParseTree]
99
pipeline.process(doc)
10-
val printers = for (ann <- Seq(app.nlp.pos.POS1, app.nlp.ner.NER1, app.nlp.parse.DepParser1)) yield (t: app.nlp.Token) => ann.tokenAnnotationString(t)
10+
val printers = for (ann <- Seq(app.nlp.pos.POS1, app.nlp.ner.BasicConllNER, app.nlp.parse.TransitionParser)) yield (t: app.nlp.Token) => ann.tokenAnnotationString(t)
1111
println(doc.owplString(printers))
1212
}
1313
}

src/main/scala/edu/umass/cs/iesl/apassos/Lecture2POSFilters.scala

+3-1
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
11
package edu.umass.cs.iesl.apassos
22

3-
import cc.factorie.app.nlp.{TokenSpan, Token, LoadOntonotes5}
3+
import cc.factorie.app.nlp.{TokenSpan, Token}
44

55
import cc.factorie._
6+
import cc.factorie.app.nlp.load.LoadOntonotes5
7+
68
/**
79
* User: apassos
810
* Date: 9/23/13
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
package edu.umass.cs.iesl.apassos
2+
3+
import cc.factorie.app.nlp._
4+
import cc.factorie.app.nlp.load._
5+
import cc.factorie.app.nlp.mention.{MentionType, Mention, MentionList}
6+
import cc.factorie.app.nlp.coref.{EntityKey, ConllCorefLoader}
7+
import cc.factorie.app.nlp.hcoref.{EntityVariable, EntityRef}
8+
9+
/**
10+
* User: apassos
11+
* Date: 10/7/13
12+
* Time: 2:20 PM
13+
*/
14+
15+
16+
17+
object Lecture5AceLoader {
18+
def printMentions(aceDoc: Document): Unit = {
19+
val mentionList = aceDoc.attr[MentionList]
20+
val entityMap = collection.mutable.HashMap[String,Int]()
21+
val startToMentionMap = mentionList.map(m => (m.section, m.start) -> m).groupBy(_._1).toMap
22+
val endToMentionMap = mentionList.map(m => (m.section,m.end-1) -> m).groupBy(_._1).toMap
23+
var count = 0
24+
for (t <- aceDoc.tokens) {
25+
var s = ""
26+
if (startToMentionMap.contains((t.section,t.positionInSection))) {
27+
for ((_,m) <- startToMentionMap((t.section,t.positionInSection))) s += "["+m.attr[MentionType].categoryValue+" "
28+
}
29+
s += t.string
30+
if (endToMentionMap.contains((t.section,t.positionInSection))) {
31+
for ((_,m) <- endToMentionMap((t.section,t.positionInSection))) {
32+
s += "]"
33+
val id = entityMap.getOrElseUpdate(m.attr[EntityRef].entity.string, entityMap.size)
34+
s += s"($id)"
35+
}
36+
}
37+
if (t.hasNext && !t.next.string.matches("\\.|,|;|\\?")) s += " "
38+
count += s.length
39+
print(s)
40+
if (count >= 70) { count = 0; println() }
41+
}
42+
}
43+
44+
def main(args: Array[String]): Unit = {
45+
val aceDoc = LoadACE.fromApf("/iesl/data/ldc/LDC2006T06/data/english/bc/fp1/cnn_cf_20030303.1900.00.apf.xml")
46+
val aceMentions = aceDoc.attr[ACEMentionSpanList]
47+
val mentionList = aceDoc.attr += new MentionList
48+
aceMentions.foreach(a => {
49+
val m = new Mention(a.section, a.start, a.length, a.length-1)
50+
mentionList += m
51+
m.attr += new MentionType(m, a.attr[ACEMentionIdentifiers].mType)
52+
m.attr += a.attr[EntityRef]
53+
})
54+
printMentions(aceDoc)
55+
}
56+
}
57+
58+
object Lecture5OntonotesLoader {
59+
def processEntity(name: String): String = {
60+
val s = name.split("-")
61+
if (s.length == 2) s.last else ""
62+
}
63+
64+
def printMentions(aceDoc: Document): Unit = {
65+
val mentionList = aceDoc.attr[MentionList]
66+
val startToMentionMap = mentionList.map(m => (m.section, m.start) -> m).groupBy(_._1).toMap
67+
val endToMentionMap = mentionList.map(m => (m.section,m.end-1) -> m).groupBy(_._1).toMap
68+
var count = 0
69+
for (t <- aceDoc.tokens) {
70+
var s = ""
71+
if (startToMentionMap.contains((t.section,t.positionInSection))) {
72+
for ((_,m) <- startToMentionMap((t.section,t.positionInSection))) s += "["
73+
}
74+
s += t.string
75+
if (endToMentionMap.contains((t.section,t.positionInSection))) {
76+
for ((_,m) <- endToMentionMap((t.section,t.positionInSection))) s += "]"+processEntity(m.attr[EntityKey].name)
77+
}
78+
if (t.hasNext && !t.next.string.matches("\\.|,|;|\\?")) s += " "
79+
count += s.length
80+
print(s)
81+
if (count >= 70) { count = 0; println() }
82+
}
83+
}
84+
85+
def main(args: Array[String]): Unit = {
86+
val ontonotesDoc = "/iesl/canvas/mccallum/data/conll2011/conll-train-clean.txt"
87+
val docs = ConllCorefLoader.loadWithParse(ontonotesDoc)
88+
printMentions(docs.head)
89+
}
90+
}

0 commit comments

Comments
 (0)