1   package eu.fbk.knowledgestore.populator.naf;
2   
3   import java.io.File;
4   import java.io.Writer;
5   import java.util.Hashtable;
6   import java.util.LinkedList;
7   
8   import org.openrdf.model.URI;
9   import org.openrdf.model.vocabulary.DCTERMS;
10  import org.slf4j.Logger;
11  import org.slf4j.LoggerFactory;
12  
13  import eu.fbk.knowledgestore.data.Record;
14  import eu.fbk.knowledgestore.populator.naf.model.NAF;
15  import eu.fbk.knowledgestore.populator.naf.model.NafHeader;
16  import eu.fbk.knowledgestore.populator.naf.model.Terms;
17  import eu.fbk.knowledgestore.populator.naf.model.Text;
18  import eu.fbk.knowledgestore.vocabulary.NWR;
19  
20  public class processNAFVariables {
21  
22       NAF doc;
23       NafHeader nafHeader;
24       int mentionCounter = 0;
25       DCTERMS dct = new DCTERMS();
26       String nafPublicId;
27       URI NAF_file_id;
28       URI news_file_id;
29       String PREFIX = "http://www.newsreader-project.eu/data/cars";
30       Terms globalTerms;
31       Text globalText;
32       Hashtable<String, URI> nafLayerMapper = new Hashtable<String, URI>();
33       Hashtable<String, URI> entityTypeMapper = new Hashtable<String, URI>();
34       Hashtable<String, URI> timex3TypeMapper = new Hashtable<String, URI>();
35       Hashtable<String, URI> valueTypeMapper = new Hashtable<String, URI>();
36       Hashtable<String, URI> certaintyMapper = new Hashtable<String, URI>();
37       Hashtable<String, URI> factualityMapper = new Hashtable<String, URI>();
38       Hashtable<String, URI> polarityMapper = new Hashtable<String, URI>();
39       Hashtable<String, URI> partOfSpeechMapper = new Hashtable<String, URI>();
40       Hashtable<String, URI> eventClassMapper = new Hashtable<String, URI>();
41       Hashtable<String, URI> entityClassMapper = new Hashtable<String, URI>();
42       Hashtable<String, URI> timex3ModifierMapper = new Hashtable<String, URI>();
43       Hashtable<String, URI> funtionInDocumentMapper = new Hashtable<String, URI>();
44       Hashtable<String, URI> syntacticTypeMapper = new Hashtable<String, URI>();
45       Hashtable<String, URI> tenseMapper = new Hashtable<String, URI>();
46       Hashtable<String, URI> aspectMapper = new Hashtable<String, URI>();
47       Hashtable<String, URI> tLinkTypeMapper = new Hashtable<String, URI>();
48       Hashtable<String, URI> srlExternalRefResourceTypeMapper = new Hashtable<String, URI>();
49       Hashtable<String, Record> mentionListHash = new Hashtable<String, Record>();
50       Hashtable<String, Record> entityMentions = new Hashtable<String, Record>();
51      Logger logger = LoggerFactory.getLogger(nafPopulator.class);
52       Record newsFile2, nafFile2;
53       Writer out;
54       int entityMen = 0, corefMention = 0,corefMentionEvent = 0,corefMentionNotEvent = 0, timeMention = 0, srlMention = 0, entityMen2 = 0,
55              corefMention2 = 0, timeMention2 = 0, srlMention2 = 0, rolewithEntity = 0,
56              rolewithEntity2 = 0, rolewithoutEntity = 0, factualityMentions = 0,
57              factualityMentions2 = 0, roleMentions = 0;
58       int PER = 0, LOC = 0, ORG = 0, PRO = 0, fin = 0, mix = 0, no_mapping = 0, clinkMentions=0,tlinkMentions=0;
59       boolean logDebugActive = true, logErrorActive = true;
60       String rawText = "";
61       boolean storePartialInforInCaseOfError = false;
62       File filePath = null;
63  	public int tlinkMentionsDiscarded=0;
64  	public int clinkMentionsDiscarded=0;
65  	public int tlinkMentionsEnriched=0;
66         processNAFVariables() {
67       	PER = 0;
68           LOC = 0;
69           ORG = 0;
70           PRO = 0;
71           fin = 0;
72           mix = 0;
73           clinkMentions=0;
74           tlinkMentions=0;
75           tlinkMentionsDiscarded=0;
76           clinkMentionsDiscarded=0;
77           tlinkMentionsEnriched=0;
78           no_mapping = 0;
79           entityMen2 = 0;
80           corefMention2 = 0;
81           timeMention2 = 0;
82           srlMention2 = 0;
83           rolewithEntity2 = 0;
84           factualityMentions2 = 0;
85           entityMen = 0;
86           corefMention = 0;
87           corefMentionEvent = 0;
88           corefMentionNotEvent = 0;
89           timeMention = 0;
90           srlMention = 0;
91           rolewithEntity = 0;
92           rolewithoutEntity = 0;
93           factualityMentions = 0;
94           roleMentions = 0;
95           rawText = "";
96           nafLayerMapper = new Hashtable<String, URI>();
97           entityTypeMapper = new Hashtable<String, URI>();
98           timex3TypeMapper = new Hashtable<String, URI>();
99           valueTypeMapper = new Hashtable<String, URI>();
100          certaintyMapper = new Hashtable<String, URI>();
101          factualityMapper = new Hashtable<String, URI>();
102          polarityMapper = new Hashtable<String, URI>();
103          partOfSpeechMapper = new Hashtable<String, URI>();
104          eventClassMapper = new Hashtable<String, URI>();
105          entityClassMapper = new Hashtable<String, URI>();
106          timex3ModifierMapper = new Hashtable<String, URI>();
107          funtionInDocumentMapper = new Hashtable<String, URI>();
108          syntacticTypeMapper = new Hashtable<String, URI>();
109          tenseMapper = new Hashtable<String, URI>();
110          aspectMapper = new Hashtable<String, URI>();
111          tLinkTypeMapper = new Hashtable<String, URI>();
112          srlExternalRefResourceTypeMapper = new Hashtable<String, URI>();
113          mentionListHash = new Hashtable<String, Record>();
114          entityMentions = new Hashtable<String, Record>();
115          valueTypeMapper.put("", NWR.VALUE_PERCENT);
116          valueTypeMapper.put("", NWR.VALUE_MONEY);
117          valueTypeMapper.put("", NWR.VALUE_QUANTITY);
118 
119          certaintyMapper.put("", NWR.CERTAIN);
120          certaintyMapper.put("", NWR.UNCERTAIN);
121 
122          factualityMapper.put("", NWR.FACTUAL);
123          factualityMapper.put("", NWR.COUNTERFACTUAL);
124          factualityMapper.put("", NWR.NON_FACTUAL);
125 
126          polarityMapper.put("", NWR.POLARITY_POS);
127          polarityMapper.put("", NWR.POLARITY_NEG);
128 
129          partOfSpeechMapper.put("N", NWR.POS_NOUN);
130          partOfSpeechMapper.put("V", NWR.POS_VERB);
131          partOfSpeechMapper.put("", NWR.POS_OTHER);
132 
133          eventClassMapper.put("cognition", NWR.EVENT_SPEECH_COGNITIVE);
134          eventClassMapper.put("cognitive", NWR.EVENT_SPEECH_COGNITIVE);
135          eventClassMapper.put("communication", NWR.EVENT_SPEECH_COGNITIVE);
136          eventClassMapper.put("grammatical", NWR.EVENT_GRAMMATICAL);
137          eventClassMapper.put("contextual", NWR.EVENT_OTHER);
138 
139          timex3TypeMapper.put("DATE", NWR.TIMEX3_DATE);
140          timex3TypeMapper.put("TIME", NWR.TIMEX3_TIME);
141          timex3TypeMapper.put("DURATION", NWR.TIMEX3_DURATION);
142          timex3TypeMapper.put("SET", NWR.TIMEX3_SET);
143 
144          entityClassMapper.put("", NWR.ENTITY_CLASS_SPC);
145          entityClassMapper.put("", NWR.ENTITY_CLASS_GEN);
146          entityClassMapper.put("", NWR.ENTITY_CLASS_USP);
147          entityClassMapper.put("", NWR.ENTITY_CLASS_NEG);
148 
149          timex3ModifierMapper.put("", NWR.MOD_BEFORE);
150          timex3ModifierMapper.put("", NWR.MOD_ON_OR_BEFORE);
151          timex3ModifierMapper.put("", NWR.MOD_MID);
152          timex3ModifierMapper.put("", NWR.MOD_END);
153          timex3ModifierMapper.put("", NWR.MOD_AFTER);
154          timex3ModifierMapper.put("", NWR.MOD_ON_OR_AFTER);
155          timex3ModifierMapper.put("", NWR.MOD_LESS_THAN);
156          timex3ModifierMapper.put("", NWR.MOD_MORE_THAN);
157          timex3ModifierMapper.put("", NWR.MOD_EQUAL_OR_LESS);
158          timex3ModifierMapper.put("", NWR.MOD_EQUAL_OR_MORE);
159          timex3ModifierMapper.put("", NWR.MOD_START);
160          timex3ModifierMapper.put("", NWR.MOD_APPROX);
161 
162          funtionInDocumentMapper.put("", NWR.FUNCTION_CREATION_TIME);
163          funtionInDocumentMapper.put("", NWR.FUNCTION_EXPIRATION_TIME);
164          funtionInDocumentMapper.put("", NWR.FUNCTION_MODIFICATION_TIME);
165          funtionInDocumentMapper.put("", NWR.FUNCTION_PUBLICATION_TIME);
166          funtionInDocumentMapper.put("", NWR.FUNCTION_RELEASE_TIME);
167          funtionInDocumentMapper.put("", NWR.FUNCTION_RECEPTION_TIME);
168          funtionInDocumentMapper.put("", NWR.FUNCTION_NONE);
169 
170          syntacticTypeMapper.put("", NWR.SYNTACTIC_TYPE_NAM);
171          syntacticTypeMapper.put("", NWR.SYNTACTIC_TYPE_NOM);
172          syntacticTypeMapper.put("", NWR.SYNTACTIC_TYPE_PRO);
173          syntacticTypeMapper.put("", NWR.SYNTACTIC_TYPE_PTV);
174          syntacticTypeMapper.put("", NWR.SYNTACTIC_TYPE_PRE);
175          syntacticTypeMapper.put("", NWR.SYNTACTIC_TYPE_HLS);
176          syntacticTypeMapper.put("", NWR.SYNTACTIC_TYPE_CONJ);
177          syntacticTypeMapper.put("", NWR.SYNTACTIC_TYPE_APP);
178          syntacticTypeMapper.put("", NWR.SYNTACTIC_TYPE_ARC);
179 
180          entityTypeMapper.put("per", NWR.ENTITY_TYPE_PER);
181          entityTypeMapper.put("loc", NWR.ENTITY_TYPE_LOC);
182          entityTypeMapper.put("org", NWR.ENTITY_TYPE_ORG);
183          entityTypeMapper.put("art", NWR.ENTITY_TYPE_PRO);
184          entityTypeMapper.put("pro", NWR.ENTITY_TYPE_PRO);
185          entityTypeMapper.put("fin", NWR.ENTITY_TYPE_FIN);
186          entityTypeMapper.put("mix", NWR.ENTITY_TYPE_MIX);
187 
188          tenseMapper.put("", NWR.TENSE_FUTURE);
189          tenseMapper.put("", NWR.TENSE_PAST);
190          tenseMapper.put("", NWR.TENSE_PRESENT);
191          tenseMapper.put("", NWR.TENSE_INFINITIVE);
192          tenseMapper.put("", NWR.TENSE_PRESPART);
193          tenseMapper.put("", NWR.TENSE_PASTPART);
194          tenseMapper.put("", NWR.TENSE_NONE);
195 
196          aspectMapper.put("", NWR.ASPECT_PROGRESSIVE);
197          aspectMapper.put("", NWR.ASPECT_PERFECTIVE);
198          aspectMapper.put("", NWR.ASPECT_PERFECTIVE_PROGRESSIVE);
199          aspectMapper.put("", NWR.ASPECT_NONE);
200 
201          nafLayerMapper.put("raw", NWR.LAYER_RAW);
202          nafLayerMapper.put("text", NWR.LAYER_TEXT);
203          nafLayerMapper.put("terms", NWR.LAYER_TERMS);
204          nafLayerMapper.put("deps", NWR.LAYER_DEPS);
205          nafLayerMapper.put("chunks", NWR.LAYER_CHUNKS);
206          nafLayerMapper.put("entities", NWR.LAYER_ENTITIES);
207          nafLayerMapper.put("coreferences", NWR.LAYER_COREFERENCES);
208          nafLayerMapper.put("srl", NWR.LAYER_SRL);
209          nafLayerMapper.put("constituency", NWR.LAYER_CONSTITUENCY);
210          nafLayerMapper.put("timeExpressions", NWR.LAYER_TIME_EXPRESSIONS);
211          nafLayerMapper.put("factuality", NWR.LAYER_FACTUALITY);
212          nafLayerMapper.put("topics", NWR.LAYER_TOPICS);
213          nafLayerMapper.put("markables", NWR.LAYER_MARKABLES);
214          nafLayerMapper.put("factualities", NWR.LAYER_FACTUALITIES);
215          nafLayerMapper.put("opinions",  NWR.LAYER_OPINIONS); 
216          nafLayerMapper.put("temporalRelations",  NWR.LAYER_TEMPORAL_RELATIONS); 
217          nafLayerMapper.put("causalRelations",  NWR.LAYER_CAUSAL_RELATIONS); 
218          nafLayerMapper.put("vua-multiword-tagger",  NWR.LAYER_VUA_MULTIWORD_TAGGER); 
219          nafLayerMapper.put("vua-event-coref-intradoc-lemma-baseline",  NWR.LAYER_VUA_EVENT_COREF_INTRADOC_LEMMA_BASELINE);
220 
221  	// patch for wrong value layer="coreference"
222          nafLayerMapper.put("coreference", NWR.LAYER_COREFERENCES);
223 
224  	// patch for wrong value layer="time_expressions" or layer="timex3"
225          nafLayerMapper.put("time_expressions", NWR.LAYER_TIME_EXPRESSIONS);
226          nafLayerMapper.put("timex3", NWR.LAYER_TIME_EXPRESSIONS);
227 
228 
229          tLinkTypeMapper.put("BEFORE", NWR.TLINK_BEFORE);
230          tLinkTypeMapper.put("AFTER", NWR.TLINK_AFTER);
231          tLinkTypeMapper.put("INCLUDES", NWR.TLINK_INCLUDES);
232          tLinkTypeMapper.put("MEASURE", NWR.TLINK_MEASURE);
233          tLinkTypeMapper.put("IS_INCLUDED", NWR.TLINK_IS_INCLUDED);
234          tLinkTypeMapper.put("SIMULTANEOUS", NWR.TLINK_SIMULTANEOUS);
235          tLinkTypeMapper.put("IAFTER", NWR.TLINK_IAFTER);
236          tLinkTypeMapper.put("IBEFORE", NWR.TLINK_IBEFORE);
237          tLinkTypeMapper.put("BEGINS", NWR.TLINK_BEGINS);
238          tLinkTypeMapper.put("ENDS", NWR.TLINK_ENDS);
239          tLinkTypeMapper.put("BEGUN_BY", NWR.TLINK_BEGUN_BY);
240          tLinkTypeMapper.put("ENDED_BY", NWR.TLINK_ENDED_BY);
241 
242          srlExternalRefResourceTypeMapper.put("PropBank", NWR.PROPBANK_REF);
243          srlExternalRefResourceTypeMapper.put("VerbNet", NWR.VERBNET_REF);
244          srlExternalRefResourceTypeMapper.put("FrameNet", NWR.FRAMENET_REF);
245          srlExternalRefResourceTypeMapper.put("NomBank", NWR.NOMBANK_REF);
246          srlExternalRefResourceTypeMapper.put("ESO", NWR.ESO_REF);
247      }
248 }