hunt.stream.DataInput source code

1 
2 /*
3  * Hunt - A refined core library for D programming language.
4  *
5  * Copyright (C) 2018-2019 HuntLabs
6  *
7  * Website: https://www.huntlabs.net/
8  *
9  * Licensed under the Apache-2.0 License.
10  *
11  */
12 
13 module hunt.stream.DataInput;
14 /**
15  * The {@code DataInput} interface provides
16  * for reading bytes from a binary stream and
17  * reconstructing from them data in any of
18  * the Java primitive types. There is also
19  * a
20  * facility for reconstructing a {@code string}
21  * from data in
22  * <a href="#modified-utf-8">modified UTF-8</a>
23  * format.
24  * <p>
25  * It is generally true of all the reading
26  * routines in this interface that if end of
27  * file is reached before the desired number
28  * of bytes has been read, an {@code EOFException}
29  * (which is a kind of {@code IOException})
30  * is thrown. If any byte cannot be read for
31  * any reason other than end of file, an {@code IOException}
32  * other than {@code EOFException} is
33  * thrown. In particular, an {@code IOException}
34  * may be thrown if the input stream has been
35  * closed.
36  *
37  * <h3><a id="modified-utf-8">Modified UTF-8</a></h3>
38  * <p>
39  * Implementations of the DataInput and DataOutput interfaces represent
40  * Unicode strings in a format that is a slight modification of UTF-8.
41  * (For information regarding the standard UTF-8 format, see section
42  * <i>3.9 Unicode Encoding Forms</i> of <i>The Unicode Standard, Version
43  * 4.0</i>)
44  *
45  * <ul>
46  * <li>Characters in the range {@code '\u005Cu0001'} to
47  *         {@code '\u005Cu007F'} are represented by a single byte.
48  * <li>The null character {@code '\u005Cu0000'} and characters
49  *         in the range {@code '\u005Cu0080'} to {@code '\u005Cu07FF'} are
50  *         represented by a pair of bytes.
51  * <li>Characters in the range {@code '\u005Cu0800'}
52  *         to {@code '\u005CuFFFF'} are represented by three bytes.
53  * </ul>
54  *
55  *   <table class="plain" style="margin-left:2em;">
56  *     <caption>Encoding of UTF-8 values</caption>
57  *     <thead>
58  *     <tr>
59  *       <th scope="col" rowspan="2">Value</th>
60  *       <th scope="col" rowspan="2">Byte</th>
61  *       <th scope="col" colspan="8" id="bit_a">Bit Values</th>
62  *     </tr>
63  *     <tr>
64  *       <!-- Value -->
65  *       <!-- Byte -->
66  *       <th scope="col" style="width:3em"> 7 </th>
67  *       <th scope="col" style="width:3em"> 6 </th>
68  *       <th scope="col" style="width:3em"> 5 </th>
69  *       <th scope="col" style="width:3em"> 4 </th>
70  *       <th scope="col" style="width:3em"> 3 </th>
71  *       <th scope="col" style="width:3em"> 2 </th>
72  *       <th scope="col" style="width:3em"> 1 </th>
73  *       <th scope="col" style="width:3em"> 0 </th>
74  *     </thead>
75  *     <tbody>
76  *     <tr>
77  *       <th scope="row" style="text-align:left; font-weight:normal">
78  *         {@code \u005Cu0001} to {@code \u005Cu007F} </th>
79  *       <th scope="row" style="font-weight:normal; text-align:center"> 1 </th>
80  *       <td style="text-align:center">0
81  *       <td colspan="7" style="text-align:right; padding-right:6em">bits 6-0
82  *     </tr>
83  *     <tr>
84  *       <th scope="row" rowspan="2" style="text-align:left; font-weight:normal">
85  *           {@code \u005Cu0000},<br>
86  *           {@code \u005Cu0080} to {@code \u005Cu07FF} </th>
87  *       <th scope="row" style="font-weight:normal; text-align:center"> 1 </th>
88  *       <td style="text-align:center">1
89  *       <td style="text-align:center">1
90  *       <td style="text-align:center">0
91  *       <td colspan="5" style="text-align:right; padding-right:6em">bits 10-6
92  *     </tr>
93  *     <tr>
94  *       <!-- (value) -->
95  *       <th scope="row" style="font-weight:normal; text-align:center"> 2 </th>
96  *       <td style="text-align:center">1
97  *       <td style="text-align:center">0
98  *       <td colspan="6" style="text-align:right; padding-right:6em">bits 5-0
99  *     </tr>
100  *     <tr>
101  *       <th scope="row" rowspan="3" style="text-align:left; font-weight:normal">
102  *         {@code \u005Cu0800} to {@code \u005CuFFFF} </th>
103  *       <th scope="row" style="font-weight:normal; text-align:center"> 1 </th>
104  *       <td style="text-align:center">1
105  *       <td style="text-align:center">1
106  *       <td style="text-align:center">1
107  *       <td style="text-align:center">0
108  *       <td colspan="4" style="text-align:right; padding-right:6em">bits 15-12
109  *     </tr>
110  *     <tr>
111  *       <!-- (value) -->
112  *       <th scope="row" style="font-weight:normal; text-align:center"> 2 </th>
113  *       <td style="text-align:center">1
114  *       <td style="text-align:center">0
115  *       <td colspan="6" style="text-align:right; padding-right:6em">bits 11-6
116  *     </tr>
117  *     <tr>
118  *       <!-- (value) -->
119  *       <th scope="row" style="font-weight:normal; text-align:center"> 3 </th>
120  *       <td style="text-align:center">1
121  *       <td style="text-align:center">0
122  *       <td colspan="6" style="text-align:right; padding-right:6em">bits 5-0
123  *     </tr>
124  *     </tbody>
125  *   </table>
126  *
127  * <p>
128  * The differences between this format and the
129  * standard UTF-8 format are the following:
130  * <ul>
131  * <li>The null byte {@code '\u005Cu0000'} is encoded in 2-byte format
132  *     rather than 1-byte, so that the encoded strings never have
133  *     embedded nulls.
134  * <li>Only the 1-byte, 2-byte, and 3-byte formats are used.
135  * <li><a href="../lang/Character.html#unicode">Supplementary characters</a>
136  *     are represented in the form of surrogate pairs.
137  * </ul>
138  * @author  Frank Yellin
139  * @see     java.io.DataInputStream
140  * @see     java.io.DataOutput
141  */
142 public
143 interface DataInput {
144     /**
145      * Reads some bytes from an input
146      * stream and stores them into the buffer
147      * array {@code b}. The number of bytes
148      * read is equal
149      * to the length of {@code b}.
150      * <p>
151      * This method blocks until one of the
152      * following conditions occurs:
153      * <ul>
154      * <li>{@code b.length}
155      * bytes of input data are available, in which
156      * case a normal return is made.
157      *
158      * <li>End of
159      * file is detected, in which case an {@code EOFException}
160      * is thrown.
161      *
162      * <li>An I/O error occurs, in
163      * which case an {@code IOException} other
164      * than {@code EOFException} is thrown.
165      * </ul>
166      * <p>
167      * If {@code b} is {@code null},
168      * a {@code NullPointerException} is thrown.
169      * If {@code b.length} is zero, then
170      * no bytes are read. Otherwise, the first
171      * byte read is stored into element {@code b[0]},
172      * the next one into {@code b[1]}, and
173      * so on.
174      * If an exception is thrown from
175      * this method, then it may be that some but
176      * not all bytes of {@code b} have been
177      * updated with data from the input stream.
178      *
179      * @param   b   the buffer into which the data is read.
180      * @throws  NullPointerException if {@code b} is {@code null}.
181      * @throws  EOFException  if this stream reaches the end before reading
182      *          all the bytes.
183      * @throws  IOException   if an I/O error occurs.
184      */
185     void readFully(byte[] b);
186 
187     /**
188      *
189      * Reads {@code len}
190      * bytes from
191      * an input stream.
192      * <p>
193      * This method
194      * blocks until one of the following conditions
195      * occurs:
196      * <ul>
197      * <li>{@code len} bytes
198      * of input data are available, in which case
199      * a normal return is made.
200      *
201      * <li>End of file
202      * is detected, in which case an {@code EOFException}
203      * is thrown.
204      *
205      * <li>An I/O error occurs, in
206      * which case an {@code IOException} other
207      * than {@code EOFException} is thrown.
208      * </ul>
209      * <p>
210      * If {@code b} is {@code null},
211      * a {@code NullPointerException} is thrown.
212      * If {@code off} is negative, or {@code len}
213      * is negative, or {@code off+len} is
214      * greater than the length of the array {@code b},
215      * then an {@code IndexOutOfBoundsException}
216      * is thrown.
217      * If {@code len} is zero,
218      * then no bytes are read. Otherwise, the first
219      * byte read is stored into element {@code b[off]},
220      * the next one into {@code b[off+1]},
221      * and so on. The number of bytes read is,
222      * at most, equal to {@code len}.
223      *
224      * @param   b    the buffer into which the data is read.
225      * @param   off  an int specifying the offset in the data array {@code b}.
226      * @param   len  an int specifying the number of bytes to read.
227      * @throws  NullPointerException if {@code b} is {@code null}.
228      * @throws  IndexOutOfBoundsException if {@code off} is negative,
229      *          {@code len} is negative, or {@code len} is greater than
230      *          {@code b.length - off}.
231      * @throws  EOFException  if this stream reaches the end before reading
232      *          all the bytes.
233      * @throws  IOException   if an I/O error occurs.
234      */
235     void readFully(byte[] b,  int off, int len) ;
236 
237     /**
238      * Makes an attempt to skip over
239      * {@code n} bytes
240      * of data from the input
241      * stream, discarding the skipped bytes. However,
242      * it may skip
243      * over some smaller number of
244      * bytes, possibly zero. This may result from
245      * any of a
246      * number of conditions; reaching
247      * end of file before {@code n} bytes
248      * have been skipped is
249      * only one possibility.
250      * This method never throws an {@code EOFException}.
251      * The actual
252      * number of bytes skipped is returned.
253      *
254      * @param      n   the number of bytes to be skipped.
255      * @return     the number of bytes actually skipped.
256      * @exception  IOException   if an I/O error occurs.
257      */
258     int skipBytes(int n) ;
259 
260     /**
261      * Reads one input byte and returns
262      * {@code true} if that byte is nonzero,
263      * {@code false} if that byte is zero.
264      * This method is suitable for reading
265      * the byte written by the {@code writeBoolean}
266      * method of interface {@code DataOutput}.
267      *
268      * @return     the {@code bool} value read.
269      * @exception  EOFException  if this stream reaches the end before reading
270      *               all the bytes.
271      * @exception  IOException   if an I/O error occurs.
272      */
273     bool readBoolean() ;
274 
275     /**
276      * Reads and returns one input byte.
277      * The byte is treated as a signed value in
278      * the range {@code -128} through {@code 127},
279      * inclusive.
280      * This method is suitable for
281      * reading the byte written by the {@code writeByte}
282      * method of interface {@code DataOutput}.
283      *
284      * @return     the 8-bit value read.
285      * @exception  EOFException  if this stream reaches the end before reading
286      *               all the bytes.
287      * @exception  IOException   if an I/O error occurs.
288      */
289     byte readByte() ;
290 
291     /**
292      * Reads one input byte, zero-extends
293      * it to type {@code int}, and returns
294      * the result, which is therefore in the range
295      * {@code 0}
296      * through {@code 255}.
297      * This method is suitable for reading
298      * the byte written by the {@code writeByte}
299      * method of interface {@code DataOutput}
300      * if the argument to {@code writeByte}
301      * was intended to be a value in the range
302      * {@code 0} through {@code 255}.
303      *
304      * @return     the unsigned 8-bit value read.
305      * @exception  EOFException  if this stream reaches the end before reading
306      *               all the bytes.
307      * @exception  IOException   if an I/O error occurs.
308      */
309     int readUnsignedByte() ;
310 
311     /**
312      * Reads two input bytes and returns
313      * a {@code short} value. Let {@code a}
314      * be the first byte read and {@code b}
315      * be the second byte. The value
316      * returned
317      * is:
318      * <pre>{@code (short)((a << 8) | (b & 0xff))
319      * }</pre>
320      * This method
321      * is suitable for reading the bytes written
322      * by the {@code writeShort} method of
323      * interface {@code DataOutput}.
324      *
325      * @return     the 16-bit value read.
326      * @exception  EOFException  if this stream reaches the end before reading
327      *               all the bytes.
328      * @exception  IOException   if an I/O error occurs.
329      */
330     short readShort() ;
331 
332     /**
333      * Reads two input bytes and returns
334      * an {@code int} value in the range {@code 0}
335      * through {@code 65535}. Let {@code a}
336      * be the first byte read and
337      * {@code b}
338      * be the second byte. The value returned is:
339      * <pre>{@code (((a & 0xff) << 8) | (b & 0xff))
340      * }</pre>
341      * This method is suitable for reading the bytes
342      * written by the {@code writeShort} method
343      * of interface {@code DataOutput}  if
344      * the argument to {@code writeShort}
345      * was intended to be a value in the range
346      * {@code 0} through {@code 65535}.
347      *
348      * @return     the unsigned 16-bit value read.
349      * @exception  EOFException  if this stream reaches the end before reading
350      *               all the bytes.
351      * @exception  IOException   if an I/O error occurs.
352      */
353     int readUnsignedShort() ;
354 
355     /**
356      * Reads two input bytes and returns a {@code char} value.
357      * Let {@code a}
358      * be the first byte read and {@code b}
359      * be the second byte. The value
360      * returned is:
361      * <pre>{@code (char)((a << 8) | (b & 0xff))
362      * }</pre>
363      * This method
364      * is suitable for reading bytes written by
365      * the {@code writeChar} method of interface
366      * {@code DataOutput}.
367      *
368      * @return     the {@code char} value read.
369      * @exception  EOFException  if this stream reaches the end before reading
370      *               all the bytes.
371      * @exception  IOException   if an I/O error occurs.
372      */
373     char readChar() ;
374 
375     /**
376      * Reads four input bytes and returns an
377      * {@code int} value. Let {@code a-d}
378      * be the first through fourth bytes read. The value returned is:
379      * <pre>{@code
380      * (((a & 0xff) << 24) | ((b & 0xff) << 16) |
381      *  ((c & 0xff) <<  8) | (d & 0xff))
382      * }</pre>
383      * This method is suitable
384      * for reading bytes written by the {@code writeInt}
385      * method of interface {@code DataOutput}.
386      *
387      * @return     the {@code int} value read.
388      * @exception  EOFException  if this stream reaches the end before reading
389      *               all the bytes.
390      * @exception  IOException   if an I/O error occurs.
391      */
392     int readInt() ;
393 
394     /**
395      * Reads eight input bytes and returns
396      * a {@code long} value. Let {@code a-h}
397      * be the first through eighth bytes read.
398      * The value returned is:
399      * <pre>{@code
400      * (((long)(a & 0xff) << 56) |
401      *  ((long)(b & 0xff) << 48) |
402      *  ((long)(c & 0xff) << 40) |
403      *  ((long)(d & 0xff) << 32) |
404      *  ((long)(e & 0xff) << 24) |
405      *  ((long)(f & 0xff) << 16) |
406      *  ((long)(g & 0xff) <<  8) |
407      *  ((long)(h & 0xff)))
408      * }</pre>
409      * <p>
410      * This method is suitable
411      * for reading bytes written by the {@code writeLong}
412      * method of interface {@code DataOutput}.
413      *
414      * @return     the {@code long} value read.
415      * @exception  EOFException  if this stream reaches the end before reading
416      *               all the bytes.
417      * @exception  IOException   if an I/O error occurs.
418      */
419     long readLong() ;
420 
421     /**
422      * Reads four input bytes and returns
423      * a {@code float} value. It does this
424      * by first constructing an {@code int}
425      * value in exactly the manner
426      * of the {@code readInt}
427      * method, then converting this {@code int}
428      * value to a {@code float} in
429      * exactly the manner of the method {@code Float.intBitsToFloat}.
430      * This method is suitable for reading
431      * bytes written by the {@code writeFloat}
432      * method of interface {@code DataOutput}.
433      *
434      * @return     the {@code float} value read.
435      * @exception  EOFException  if this stream reaches the end before reading
436      *               all the bytes.
437      * @exception  IOException   if an I/O error occurs.
438      */
439     float readFloat() ;
440 
441     /**
442      * Reads eight input bytes and returns
443      * a {@code double} value. It does this
444      * by first constructing a {@code long}
445      * value in exactly the manner
446      * of the {@code readLong}
447      * method, then converting this {@code long}
448      * value to a {@code double} in exactly
449      * the manner of the method {@code Double.longBitsToDouble}.
450      * This method is suitable for reading
451      * bytes written by the {@code writeDouble}
452      * method of interface {@code DataOutput}.
453      *
454      * @return     the {@code double} value read.
455      * @exception  EOFException  if this stream reaches the end before reading
456      *               all the bytes.
457      * @exception  IOException   if an I/O error occurs.
458      */
459     double readDouble() ;
460 
461     /**
462      * Reads the next line of text from the input stream.
463      * It reads successive bytes, converting
464      * each byte separately into a character,
465      * until it encounters a line terminator or
466      * end of
467      * file; the characters read are then
468      * returned as a {@code string}. Note
469      * that because this
470      * method processes bytes,
471      * it does not support input of the full Unicode
472      * character set.
473      * <p>
474      * If end of file is encountered
475      * before even one byte can be read, then {@code null}
476      * is returned. Otherwise, each byte that is
477      * read is converted to type {@code char}
478      * by zero-extension. If the character {@code '\n'}
479      * is encountered, it is discarded and reading
480      * ceases. If the character {@code '\r'}
481      * is encountered, it is discarded and, if
482      * the following byte converts &#32;to the
483      * character {@code '\n'}, then that is
484      * discarded also; reading then ceases. If
485      * end of file is encountered before either
486      * of the characters {@code '\n'} and
487      * {@code '\r'} is encountered, reading
488      * ceases. Once reading has ceased, a {@code string}
489      * is returned that contains all the characters
490      * read and not discarded, taken in order.
491      * Note that every character in this string
492      * will have a value less than {@code \u005Cu0100},
493      * that is, {@code (char)256}.
494      *
495      * @return the next line of text from the input stream,
496      *         or {@code null} if the end of file is
497      *         encountered before a byte can be read.
498      * @exception  IOException  if an I/O error occurs.
499      */
500     string readLine() ;
501 
502     /**
503      * Reads in a string that has been encoded using a
504      * <a href="#modified-utf-8">modified UTF-8</a>
505      * format.
506      * The general contract of {@code readUTF}
507      * is that it reads a representation of a Unicode
508      * character string encoded in modified
509      * UTF-8 format; this string of characters
510      * is then returned as a {@code string}.
511      * <p>
512      * First, two bytes are read and used to
513      * construct an unsigned 16-bit integer in
514      * exactly the manner of the {@code readUnsignedShort}
515      * method . This integer value is called the
516      * <i>UTF length</i> and specifies the number
517      * of additional bytes to be read. These bytes
518      * are then converted to characters by considering
519      * them in groups. The length of each group
520      * is computed from the value of the first
521      * byte of the group. The byte following a
522      * group, if any, is the first byte of the
523      * next group.
524      * <p>
525      * If the first byte of a group
526      * matches the bit pattern {@code 0xxxxxxx}
527      * (where {@code x} means "may be {@code 0}
528      * or {@code 1}"), then the group consists
529      * of just that byte. The byte is zero-extended
530      * to form a character.
531      * <p>
532      * If the first byte
533      * of a group matches the bit pattern {@code 110xxxxx},
534      * then the group consists of that byte {@code a}
535      * and a second byte {@code b}. If there
536      * is no byte {@code b} (because byte
537      * {@code a} was the last of the bytes
538      * to be read), or if byte {@code b} does
539      * not match the bit pattern {@code 10xxxxxx},
540      * then a {@code UTFDataFormatException}
541      * is thrown. Otherwise, the group is converted
542      * to the character:
543      * <pre>{@code (char)(((a & 0x1F) << 6) | (b & 0x3F))
544      * }</pre>
545      * If the first byte of a group
546      * matches the bit pattern {@code 1110xxxx},
547      * then the group consists of that byte {@code a}
548      * and two more bytes {@code b} and {@code c}.
549      * If there is no byte {@code c} (because
550      * byte {@code a} was one of the last
551      * two of the bytes to be read), or either
552      * byte {@code b} or byte {@code c}
553      * does not match the bit pattern {@code 10xxxxxx},
554      * then a {@code UTFDataFormatException}
555      * is thrown. Otherwise, the group is converted
556      * to the character:
557      * <pre>{@code
558      * (char)(((a & 0x0F) << 12) | ((b & 0x3F) << 6) | (c & 0x3F))
559      * }</pre>
560      * If the first byte of a group matches the
561      * pattern {@code 1111xxxx} or the pattern
562      * {@code 10xxxxxx}, then a {@code UTFDataFormatException}
563      * is thrown.
564      * <p>
565      * If end of file is encountered
566      * at any time during this entire process,
567      * then an {@code EOFException} is thrown.
568      * <p>
569      * After every group has been converted to
570      * a character by this process, the characters
571      * are gathered, in the same order in which
572      * their corresponding groups were read from
573      * the input stream, to form a {@code string},
574      * which is returned.
575      * <p>
576      * The {@code writeUTF}
577      * method of interface {@code DataOutput}
578      * may be used to write data that is suitable
579      * for reading by this method.
580      * @return     a Unicode string.
581      * @exception  EOFException            if this stream reaches the end
582      *               before reading all the bytes.
583      * @exception  IOException             if an I/O error occurs.
584      * @exception  UTFDataFormatException  if the bytes do not represent a
585      *               valid modified UTF-8 encoding of a string.
586      */
587     string readUTF() ;
588 }