1 package eu.fbk.knowledgestore.populator.naf;
2
3 import java.io.File;
4 import java.io.Writer;
5 import java.util.Hashtable;
6 import java.util.LinkedList;
7
8 import org.openrdf.model.URI;
9 import org.openrdf.model.vocabulary.DCTERMS;
10 import org.slf4j.Logger;
11 import org.slf4j.LoggerFactory;
12
13 import eu.fbk.knowledgestore.data.Record;
14 import eu.fbk.knowledgestore.populator.naf.model.NAF;
15 import eu.fbk.knowledgestore.populator.naf.model.NafHeader;
16 import eu.fbk.knowledgestore.populator.naf.model.Terms;
17 import eu.fbk.knowledgestore.populator.naf.model.Text;
18 import eu.fbk.knowledgestore.vocabulary.NWR;
19
20 public class processNAFVariables {
21
22 NAF doc;
23 NafHeader nafHeader;
24 int mentionCounter = 0;
25 DCTERMS dct = new DCTERMS();
26 String nafPublicId;
27 URI NAF_file_id;
28 URI news_file_id;
29 String PREFIX = "http://www.newsreader-project.eu/data/cars";
30 Terms globalTerms;
31 Text globalText;
32 Hashtable<String, URI> nafLayerMapper = new Hashtable<String, URI>();
33 Hashtable<String, URI> entityTypeMapper = new Hashtable<String, URI>();
34 Hashtable<String, URI> timex3TypeMapper = new Hashtable<String, URI>();
35 Hashtable<String, URI> valueTypeMapper = new Hashtable<String, URI>();
36 Hashtable<String, URI> certaintyMapper = new Hashtable<String, URI>();
37 Hashtable<String, URI> factualityMapper = new Hashtable<String, URI>();
38 Hashtable<String, URI> polarityMapper = new Hashtable<String, URI>();
39 Hashtable<String, URI> partOfSpeechMapper = new Hashtable<String, URI>();
40 Hashtable<String, URI> eventClassMapper = new Hashtable<String, URI>();
41 Hashtable<String, URI> entityClassMapper = new Hashtable<String, URI>();
42 Hashtable<String, URI> timex3ModifierMapper = new Hashtable<String, URI>();
43 Hashtable<String, URI> funtionInDocumentMapper = new Hashtable<String, URI>();
44 Hashtable<String, URI> syntacticTypeMapper = new Hashtable<String, URI>();
45 Hashtable<String, URI> tenseMapper = new Hashtable<String, URI>();
46 Hashtable<String, URI> aspectMapper = new Hashtable<String, URI>();
47 Hashtable<String, URI> tLinkTypeMapper = new Hashtable<String, URI>();
48 Hashtable<String, URI> srlExternalRefResourceTypeMapper = new Hashtable<String, URI>();
49 Hashtable<String, Record> mentionListHash = new Hashtable<String, Record>();
50 Hashtable<String, Record> entityMentions = new Hashtable<String, Record>();
51 Logger logger = LoggerFactory.getLogger(nafPopulator.class);
52 Record newsFile2, nafFile2;
53 Writer out;
54 int entityMen = 0, corefMention = 0,corefMentionEvent = 0,corefMentionNotEvent = 0, timeMention = 0, srlMention = 0, entityMen2 = 0,
55 corefMention2 = 0, timeMention2 = 0, srlMention2 = 0, rolewithEntity = 0,
56 rolewithEntity2 = 0, rolewithoutEntity = 0, factualityMentions = 0,
57 factualityMentions2 = 0, roleMentions = 0;
58 int PER = 0, LOC = 0, ORG = 0, PRO = 0, fin = 0, mix = 0, no_mapping = 0, clinkMentions=0,tlinkMentions=0;
59 boolean logDebugActive = true, logErrorActive = true;
60 String rawText = "";
61 boolean storePartialInforInCaseOfError = false;
62 File filePath = null;
63 public int tlinkMentionsDiscarded=0;
64 public int clinkMentionsDiscarded=0;
65 public int tlinkMentionsEnriched=0;
66 processNAFVariables() {
67 PER = 0;
68 LOC = 0;
69 ORG = 0;
70 PRO = 0;
71 fin = 0;
72 mix = 0;
73 clinkMentions=0;
74 tlinkMentions=0;
75 tlinkMentionsDiscarded=0;
76 clinkMentionsDiscarded=0;
77 tlinkMentionsEnriched=0;
78 no_mapping = 0;
79 entityMen2 = 0;
80 corefMention2 = 0;
81 timeMention2 = 0;
82 srlMention2 = 0;
83 rolewithEntity2 = 0;
84 factualityMentions2 = 0;
85 entityMen = 0;
86 corefMention = 0;
87 corefMentionEvent = 0;
88 corefMentionNotEvent = 0;
89 timeMention = 0;
90 srlMention = 0;
91 rolewithEntity = 0;
92 rolewithoutEntity = 0;
93 factualityMentions = 0;
94 roleMentions = 0;
95 rawText = "";
96 nafLayerMapper = new Hashtable<String, URI>();
97 entityTypeMapper = new Hashtable<String, URI>();
98 timex3TypeMapper = new Hashtable<String, URI>();
99 valueTypeMapper = new Hashtable<String, URI>();
100 certaintyMapper = new Hashtable<String, URI>();
101 factualityMapper = new Hashtable<String, URI>();
102 polarityMapper = new Hashtable<String, URI>();
103 partOfSpeechMapper = new Hashtable<String, URI>();
104 eventClassMapper = new Hashtable<String, URI>();
105 entityClassMapper = new Hashtable<String, URI>();
106 timex3ModifierMapper = new Hashtable<String, URI>();
107 funtionInDocumentMapper = new Hashtable<String, URI>();
108 syntacticTypeMapper = new Hashtable<String, URI>();
109 tenseMapper = new Hashtable<String, URI>();
110 aspectMapper = new Hashtable<String, URI>();
111 tLinkTypeMapper = new Hashtable<String, URI>();
112 srlExternalRefResourceTypeMapper = new Hashtable<String, URI>();
113 mentionListHash = new Hashtable<String, Record>();
114 entityMentions = new Hashtable<String, Record>();
115 valueTypeMapper.put("", NWR.VALUE_PERCENT);
116 valueTypeMapper.put("", NWR.VALUE_MONEY);
117 valueTypeMapper.put("", NWR.VALUE_QUANTITY);
118
119 certaintyMapper.put("", NWR.CERTAIN);
120 certaintyMapper.put("", NWR.UNCERTAIN);
121
122 factualityMapper.put("", NWR.FACTUAL);
123 factualityMapper.put("", NWR.COUNTERFACTUAL);
124 factualityMapper.put("", NWR.NON_FACTUAL);
125
126 polarityMapper.put("", NWR.POLARITY_POS);
127 polarityMapper.put("", NWR.POLARITY_NEG);
128
129 partOfSpeechMapper.put("N", NWR.POS_NOUN);
130 partOfSpeechMapper.put("V", NWR.POS_VERB);
131 partOfSpeechMapper.put("", NWR.POS_OTHER);
132
133 eventClassMapper.put("cognition", NWR.EVENT_SPEECH_COGNITIVE);
134 eventClassMapper.put("cognitive", NWR.EVENT_SPEECH_COGNITIVE);
135 eventClassMapper.put("communication", NWR.EVENT_SPEECH_COGNITIVE);
136 eventClassMapper.put("grammatical", NWR.EVENT_GRAMMATICAL);
137 eventClassMapper.put("contextual", NWR.EVENT_OTHER);
138
139 timex3TypeMapper.put("DATE", NWR.TIMEX3_DATE);
140 timex3TypeMapper.put("TIME", NWR.TIMEX3_TIME);
141 timex3TypeMapper.put("DURATION", NWR.TIMEX3_DURATION);
142 timex3TypeMapper.put("SET", NWR.TIMEX3_SET);
143
144 entityClassMapper.put("", NWR.ENTITY_CLASS_SPC);
145 entityClassMapper.put("", NWR.ENTITY_CLASS_GEN);
146 entityClassMapper.put("", NWR.ENTITY_CLASS_USP);
147 entityClassMapper.put("", NWR.ENTITY_CLASS_NEG);
148
149 timex3ModifierMapper.put("", NWR.MOD_BEFORE);
150 timex3ModifierMapper.put("", NWR.MOD_ON_OR_BEFORE);
151 timex3ModifierMapper.put("", NWR.MOD_MID);
152 timex3ModifierMapper.put("", NWR.MOD_END);
153 timex3ModifierMapper.put("", NWR.MOD_AFTER);
154 timex3ModifierMapper.put("", NWR.MOD_ON_OR_AFTER);
155 timex3ModifierMapper.put("", NWR.MOD_LESS_THAN);
156 timex3ModifierMapper.put("", NWR.MOD_MORE_THAN);
157 timex3ModifierMapper.put("", NWR.MOD_EQUAL_OR_LESS);
158 timex3ModifierMapper.put("", NWR.MOD_EQUAL_OR_MORE);
159 timex3ModifierMapper.put("", NWR.MOD_START);
160 timex3ModifierMapper.put("", NWR.MOD_APPROX);
161
162 funtionInDocumentMapper.put("", NWR.FUNCTION_CREATION_TIME);
163 funtionInDocumentMapper.put("", NWR.FUNCTION_EXPIRATION_TIME);
164 funtionInDocumentMapper.put("", NWR.FUNCTION_MODIFICATION_TIME);
165 funtionInDocumentMapper.put("", NWR.FUNCTION_PUBLICATION_TIME);
166 funtionInDocumentMapper.put("", NWR.FUNCTION_RELEASE_TIME);
167 funtionInDocumentMapper.put("", NWR.FUNCTION_RECEPTION_TIME);
168 funtionInDocumentMapper.put("", NWR.FUNCTION_NONE);
169
170 syntacticTypeMapper.put("", NWR.SYNTACTIC_TYPE_NAM);
171 syntacticTypeMapper.put("", NWR.SYNTACTIC_TYPE_NOM);
172 syntacticTypeMapper.put("", NWR.SYNTACTIC_TYPE_PRO);
173 syntacticTypeMapper.put("", NWR.SYNTACTIC_TYPE_PTV);
174 syntacticTypeMapper.put("", NWR.SYNTACTIC_TYPE_PRE);
175 syntacticTypeMapper.put("", NWR.SYNTACTIC_TYPE_HLS);
176 syntacticTypeMapper.put("", NWR.SYNTACTIC_TYPE_CONJ);
177 syntacticTypeMapper.put("", NWR.SYNTACTIC_TYPE_APP);
178 syntacticTypeMapper.put("", NWR.SYNTACTIC_TYPE_ARC);
179
180 entityTypeMapper.put("per", NWR.ENTITY_TYPE_PER);
181 entityTypeMapper.put("loc", NWR.ENTITY_TYPE_LOC);
182 entityTypeMapper.put("org", NWR.ENTITY_TYPE_ORG);
183 entityTypeMapper.put("art", NWR.ENTITY_TYPE_PRO);
184 entityTypeMapper.put("pro", NWR.ENTITY_TYPE_PRO);
185 entityTypeMapper.put("fin", NWR.ENTITY_TYPE_FIN);
186 entityTypeMapper.put("mix", NWR.ENTITY_TYPE_MIX);
187
188 tenseMapper.put("", NWR.TENSE_FUTURE);
189 tenseMapper.put("", NWR.TENSE_PAST);
190 tenseMapper.put("", NWR.TENSE_PRESENT);
191 tenseMapper.put("", NWR.TENSE_INFINITIVE);
192 tenseMapper.put("", NWR.TENSE_PRESPART);
193 tenseMapper.put("", NWR.TENSE_PASTPART);
194 tenseMapper.put("", NWR.TENSE_NONE);
195
196 aspectMapper.put("", NWR.ASPECT_PROGRESSIVE);
197 aspectMapper.put("", NWR.ASPECT_PERFECTIVE);
198 aspectMapper.put("", NWR.ASPECT_PERFECTIVE_PROGRESSIVE);
199 aspectMapper.put("", NWR.ASPECT_NONE);
200
201 nafLayerMapper.put("raw", NWR.LAYER_RAW);
202 nafLayerMapper.put("text", NWR.LAYER_TEXT);
203 nafLayerMapper.put("terms", NWR.LAYER_TERMS);
204 nafLayerMapper.put("deps", NWR.LAYER_DEPS);
205 nafLayerMapper.put("chunks", NWR.LAYER_CHUNKS);
206 nafLayerMapper.put("entities", NWR.LAYER_ENTITIES);
207 nafLayerMapper.put("coreferences", NWR.LAYER_COREFERENCES);
208 nafLayerMapper.put("srl", NWR.LAYER_SRL);
209 nafLayerMapper.put("constituency", NWR.LAYER_CONSTITUENCY);
210 nafLayerMapper.put("timeExpressions", NWR.LAYER_TIME_EXPRESSIONS);
211 nafLayerMapper.put("factuality", NWR.LAYER_FACTUALITY);
212 nafLayerMapper.put("topics", NWR.LAYER_TOPICS);
213 nafLayerMapper.put("markables", NWR.LAYER_MARKABLES);
214 nafLayerMapper.put("factualities", NWR.LAYER_FACTUALITIES);
215 nafLayerMapper.put("opinions", NWR.LAYER_OPINIONS);
216 nafLayerMapper.put("temporalRelations", NWR.LAYER_TEMPORAL_RELATIONS);
217 nafLayerMapper.put("causalRelations", NWR.LAYER_CAUSAL_RELATIONS);
218 nafLayerMapper.put("vua-multiword-tagger", NWR.LAYER_VUA_MULTIWORD_TAGGER);
219 nafLayerMapper.put("vua-event-coref-intradoc-lemma-baseline", NWR.LAYER_VUA_EVENT_COREF_INTRADOC_LEMMA_BASELINE);
220
221
222 nafLayerMapper.put("coreference", NWR.LAYER_COREFERENCES);
223
224
225 nafLayerMapper.put("time_expressions", NWR.LAYER_TIME_EXPRESSIONS);
226 nafLayerMapper.put("timex3", NWR.LAYER_TIME_EXPRESSIONS);
227
228
229 tLinkTypeMapper.put("BEFORE", NWR.TLINK_BEFORE);
230 tLinkTypeMapper.put("AFTER", NWR.TLINK_AFTER);
231 tLinkTypeMapper.put("INCLUDES", NWR.TLINK_INCLUDES);
232 tLinkTypeMapper.put("MEASURE", NWR.TLINK_MEASURE);
233 tLinkTypeMapper.put("IS_INCLUDED", NWR.TLINK_IS_INCLUDED);
234 tLinkTypeMapper.put("SIMULTANEOUS", NWR.TLINK_SIMULTANEOUS);
235 tLinkTypeMapper.put("IAFTER", NWR.TLINK_IAFTER);
236 tLinkTypeMapper.put("IBEFORE", NWR.TLINK_IBEFORE);
237 tLinkTypeMapper.put("BEGINS", NWR.TLINK_BEGINS);
238 tLinkTypeMapper.put("ENDS", NWR.TLINK_ENDS);
239 tLinkTypeMapper.put("BEGUN_BY", NWR.TLINK_BEGUN_BY);
240 tLinkTypeMapper.put("ENDED_BY", NWR.TLINK_ENDED_BY);
241
242 srlExternalRefResourceTypeMapper.put("PropBank", NWR.PROPBANK_REF);
243 srlExternalRefResourceTypeMapper.put("VerbNet", NWR.VERBNET_REF);
244 srlExternalRefResourceTypeMapper.put("FrameNet", NWR.FRAMENET_REF);
245 srlExternalRefResourceTypeMapper.put("NomBank", NWR.NOMBANK_REF);
246 srlExternalRefResourceTypeMapper.put("ESO", NWR.ESO_REF);
247 }
248 }