1 package eu.fbk.knowledgestore.data;
2
3 import java.io.ByteArrayInputStream;
4 import java.io.Closeable;
5 import java.io.File;
6 import java.io.FileInputStream;
7 import java.io.FileNotFoundException;
8 import java.io.IOException;
9 import java.io.InputStream;
10 import java.io.InputStreamReader;
11 import java.io.OutputStream;
12 import java.io.Reader;
13 import java.net.URL;
14 import java.net.URLConnection;
15 import java.nio.ByteBuffer;
16 import java.nio.CharBuffer;
17 import java.nio.charset.Charset;
18 import java.nio.charset.CharsetEncoder;
19 import java.nio.charset.CoderResult;
20 import java.nio.charset.CodingErrorAction;
21 import java.util.Date;
22
23 import com.google.common.base.Charsets;
24 import com.google.common.base.Preconditions;
25 import com.google.common.base.Throwables;
26 import com.google.common.io.ByteStreams;
27 import com.google.common.io.CharSource;
28 import com.google.common.io.CharStreams;
29 import com.google.common.net.MediaType;
30
31 import org.openrdf.model.URI;
32 import org.slf4j.Logger;
33 import org.slf4j.LoggerFactory;
34
35 import eu.fbk.knowledgestore.vocabulary.KS;
36 import eu.fbk.knowledgestore.vocabulary.NFO;
37 import eu.fbk.knowledgestore.vocabulary.NIE;
38 import eu.fbk.rdfpro.util.IO;
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95 public final class Representation implements Closeable {
96
97 private static final Logger LOGGER = LoggerFactory.getLogger(Representation.class);
98
99 private final Closeable data;
100
101 private final Record metadata;
102
103 private Representation(final Closeable data) {
104 this.data = Preconditions.checkNotNull(data);
105 this.metadata = Record.create(null, KS.REPRESENTATION);
106 }
107
108 private Charset getCharset() {
109 final String mimeType = this.metadata.getUnique(NIE.MIME_TYPE, String.class);
110 if (mimeType == null) {
111 return Charsets.UTF_8;
112 }
113 try {
114 return MediaType.parse(mimeType).charset().or(Charsets.UTF_8);
115 } catch (final Throwable ex) {
116 throw new IllegalArgumentException("Invalid mime type in metadata: " + mimeType, ex);
117 }
118 }
119
120 @Override
121 protected void finalize() throws Throwable {
122 try {
123 close();
124 } finally {
125 super.finalize();
126 }
127 }
128
129
130
131
132
133
134
135
136
137
138 public static Representation create(final InputStream stream) {
139 return new Representation(stream);
140 }
141
142
143
144
145
146
147
148
149
150
151
152 public static Representation create(final byte[] bytes) {
153 final Representation representation = new Representation(new ByteArrayInputStream(bytes));
154 representation.metadata.set(NFO.FILE_SIZE, (long) bytes.length);
155 return representation;
156 }
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173 public static Representation create(final File file, final boolean autoDecompress)
174 throws IllegalArgumentException {
175 try {
176 String name = file.getName();
177 final Representation representation;
178 if (autoDecompress) {
179 byte[] bytes = ByteStreams.toByteArray(IO.read(file.getAbsolutePath()));
180 representation = new Representation(new ByteArrayInputStream(bytes));
181 if (name.endsWith(".gz") || name.endsWith(".xz") || name.endsWith(".7z")) {
182 name = name.substring(0, name.length() - 3);
183 } else if (name.endsWith(".bz2") || name.endsWith(".lz4")) {
184 name = name.substring(0, name.length() - 4);
185 }
186 } else {
187 representation = new Representation(IO.buffer(new FileInputStream(file)));
188 }
189 representation.metadata.set(NFO.FILE_SIZE, file.length());
190 representation.metadata.set(NFO.FILE_NAME, name);
191 representation.metadata.set(NFO.FILE_LAST_MODIFIED, new Date(file.lastModified()));
192 representation.metadata.set(NIE.MIME_TYPE, Data.extensionToMimeType(name));
193 return representation;
194 } catch (final FileNotFoundException ex) {
195 throw new IllegalArgumentException("Not a file: " + file.getAbsolutePath());
196 } catch (final IOException e) {
197 throw new IllegalArgumentException("IOException on file: " + file.getAbsolutePath());
198 }
199 }
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216 public static Representation create(final URL url) throws IllegalArgumentException {
217
218
219 URLConnection connection;
220 InputStream stream;
221 try {
222 connection = url.openConnection();
223 connection.connect();
224 stream = connection.getInputStream();
225 } catch (final IOException ex) {
226 throw new IllegalArgumentException("Cannot acquire a connection to URL " + url, ex);
227 }
228
229
230 final Representation representation = new Representation(stream);
231
232 try {
233
234 final long lastModified = connection.getLastModified();
235 if (lastModified != 0) {
236 representation.metadata.set(NFO.FILE_LAST_MODIFIED, new Date(lastModified));
237 }
238
239
240 String mimeType = connection.getContentType();
241 if (mimeType == null) {
242 mimeType = Data.extensionToMimeType(url.getFile());
243 }
244 representation.metadata.set(NIE.MIME_TYPE, mimeType);
245
246
247 final int length = connection.getContentLength();
248 if (length >= 0) {
249 representation.metadata.set(NFO.FILE_SIZE, length);
250 }
251
252
253 String filename = null;
254 final String disposition = connection.getHeaderField("Content-Disposition");
255 if (disposition != null && disposition.contains("filename")) {
256 final int start = Math.max(disposition.indexOf('\"'), disposition.indexOf('\''));
257 if (start > 0) {
258 final int end = Math.max(disposition.lastIndexOf('\"'),
259 disposition.lastIndexOf('\''));
260 if (end > 0) {
261 filename = disposition.substring(start + 1, end);
262 }
263 }
264 }
265 if (filename == null) {
266 final String path = url.getPath();
267 final int index = path.lastIndexOf('/');
268 if (index >= 0) {
269 filename = path.substring(index + 1);
270 }
271
272 }
273 representation.metadata.set(NFO.FILE_NAME, filename);
274
275
276 final String md5 = connection.getHeaderField("Content-MD5");
277 if (md5 != null) {
278 final Record hash = Record.create();
279 hash.set(NFO.HASH_ALGORITHM, "MD5");
280 hash.set(NFO.HASH_VALUE, md5);
281 representation.metadata.set(NFO.HAS_HASH, hash);
282 }
283
284
285 return representation;
286
287 } catch (final Throwable ex) {
288
289 try {
290 connection.getInputStream().close();
291 } catch (final Throwable ex2) {
292
293 }
294 throw Throwables.propagate(ex);
295 }
296 }
297
298
299
300
301
302
303
304
305
306
307
308
309
310 public static Representation create(final Reader reader) {
311 Preconditions.checkNotNull(reader);
312 return new Representation(reader);
313 }
314
315
316
317
318
319
320
321
322
323
324
325 public static Representation create(final CharSequence sequence) {
326 try {
327 return new Representation(CharSource.wrap(sequence).openStream());
328 } catch (final IOException ex) {
329 throw new Error("Unexpected exception (!): " + ex.getMessage(), ex);
330 }
331 }
332
333
334
335
336
337
338 public Record getMetadata() {
339 return this.metadata;
340 }
341
342
343
344
345
346
347
348
349 public InputStream getInputStream() {
350 if (this.data instanceof InputStream) {
351 return (InputStream) this.data;
352 } else {
353 final Reader reader = (Reader) this.data;
354 return new ReaderInputStream(reader, getCharset());
355 }
356 }
357
358
359
360
361
362
363
364
365 public Reader getReader() {
366 if (this.data instanceof Reader) {
367 return (Reader) this.data;
368 } else {
369 final InputStream stream = (InputStream) this.data;
370 return new InputStreamReader(stream, getCharset());
371 }
372 }
373
374
375
376
377
378
379
380
381
382
383
384
385 public byte[] writeToByteArray() throws IOException {
386 final InputStream stream = getInputStream();
387 try {
388 return ByteStreams.toByteArray(stream);
389 } finally {
390 stream.close();
391 }
392 }
393
394
395
396
397
398
399
400
401
402
403
404
405 public String writeToString() throws IOException {
406 final Reader reader = getReader();
407 try {
408 return CharStreams.toString(reader);
409 } finally {
410 reader.close();
411 }
412 }
413
414
415
416
417
418
419
420
421
422
423
424
425
426 public void writeTo(final OutputStream sink) throws IOException {
427 final InputStream in = getInputStream();
428 try {
429 ByteStreams.copy(in, sink);
430 } finally {
431 in.close();
432 }
433 }
434
435
436
437
438
439
440
441
442
443
444
445
446
447 public void writeTo(final Appendable sink) throws IOException {
448 final Reader reader = getReader();
449 try {
450 CharStreams.copy(reader, sink);
451 } finally {
452 reader.close();
453 }
454 }
455
456 @Override
457 public void close() {
458 try {
459 this.data.close();
460 } catch (final Exception ex) {
461 LOGGER.warn("Exception caught while closing representation", ex);
462 }
463 }
464
465
466
467
468 @Override
469 public String toString() {
470 final String file = this.metadata.getUnique(NFO.FILE_NAME, String.class, "unnamed file");
471 final String type = this.metadata.getUnique(NIE.MIME_TYPE, String.class, "unknown type");
472 final long size = this.metadata.getUnique(NFO.FILE_SIZE, Long.class, -1L);
473 return file + ", " + type + ", " + (size >= 0 ? size + " bytes" : "unknown size");
474 }
475
476
477 private class ReaderInputStream extends InputStream {
478
479 private static final int BUFFER_SIZE = 1024;
480
481 private final Reader reader;
482
483 private final CharsetEncoder enc;
484
485 private final CharBuffer encIn;
486
487 private final ByteBuffer encOut;
488
489 private CoderResult lastCoderResult;
490
491 private boolean eof;
492
493 ReaderInputStream(final Reader reader, final Charset charset) {
494
495 this.reader = reader;
496 this.enc = charset.newEncoder().onMalformedInput(CodingErrorAction.REPLACE)
497 .onUnmappableCharacter(CodingErrorAction.REPLACE);
498 this.encIn = CharBuffer.allocate(BUFFER_SIZE);
499 this.encIn.flip();
500 this.encOut = ByteBuffer.allocate(128);
501 this.encOut.flip();
502 }
503
504 private void fillBuffer() throws IOException {
505
506 if (!this.eof && (this.lastCoderResult == null || this.lastCoderResult.isUnderflow())) {
507 this.encIn.compact();
508 final int p = this.encIn.position();
509 final int c = this.reader.read(this.encIn.array(), p, this.encIn.remaining());
510 if (c == -1) {
511 this.eof = true;
512 } else {
513 this.encIn.position(p + c);
514 }
515 this.encIn.flip();
516 }
517
518 this.encOut.compact();
519 this.lastCoderResult = this.enc.encode(this.encIn, this.encOut, this.eof);
520 this.encOut.flip();
521 }
522
523 @Override
524 public int read(final byte[] b, final int offset, final int length) throws IOException {
525
526 Preconditions.checkNotNull(b);
527 Preconditions.checkPositionIndex(offset, b.length);
528 Preconditions.checkPositionIndex(offset + length, b.length);
529
530 int read = 0;
531 int o = offset;
532 int l = length;
533
534 while (l > 0) {
535 if (this.encOut.hasRemaining()) {
536 final int c = Math.min(this.encOut.remaining(), l);
537 this.encOut.get(b, o, c);
538 o += c;
539 l -= c;
540 read += c;
541 } else {
542 fillBuffer();
543 if (this.eof && !this.encOut.hasRemaining()) {
544 break;
545 }
546 }
547 }
548
549 return read > 0 || !this.eof ? read : l == 0 ? 0 : -1;
550 }
551
552 @Override
553 public int read() throws IOException {
554
555 for (;;) {
556 if (this.encOut.hasRemaining()) {
557 return this.encOut.get() & 0xFF;
558 } else {
559 fillBuffer();
560 if (this.eof && !this.encOut.hasRemaining()) {
561 return -1;
562 }
563 }
564 }
565 }
566
567 @Override
568 public void close() throws IOException {
569 this.reader.close();
570 }
571
572 }
573
574 }