openconnect-unknown/lzs.c

1 /*
2  * OpenConnect (SSL + DTLS) VPN client
3  *
4  * Copyright © 2008-2015 Intel Corporation.
5  *
6  * Author: David Woodhouse <dwmw2@infradead.org>
7  *
8  * This program is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU Lesser General Public License
10  * version 2.1, as published by the Free Software Foundation.
11  *
12  * This program is distributed in the hope that it will be useful, but
13  * WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * Lesser General Public License for more details.
16  */
17 
18 #include <config.h>
19 
20 #include <errno.h>
21 #include <string.h>
22 #include <stdint.h>
23 
24 #include "openconnect-internal.h"
25 
26 #define GET_BITS(bits)							\
27 do {									\
28 	/* Strictly speaking, this check ought to be on			\
29 	 * (srclen < 1 + (bits_left < bits)). However, when bits == 9	\
30 	 * the (bits_left < bits) comparison is always true so it	\
31 	 * always comes out as (srclen < 2).				\
32 	 * And bits is only anything *other* than 9 when we're reading	\
33 	 * reading part of a match encoding. And in that case, there	\
34 	 * damn well ought to be an end marker (7 more bits) after	\
35 	 * what we're reading now, so it's perfectly OK to use		\
36 	 * (srclen < 2) in that case too. And a *lot* cheaper. */	\
37 	if (srclen < 2)							\
38 		return -EINVAL;						\
39 	/* Explicit comparison with 8 to optimise it into a tautology	\
40 	 * in the the bits == 9 case, because the compiler doesn't	\
41 	 * know that bits_left can never be larger than 8. */		\
42 	if (bits >= 8 || bits >= bits_left) {				\
43 		/* We need *all* the bits that are left in the current	\
44 		 * byte. Take them and bump the input pointer. */	\
45 		data = (src[0] << (bits - bits_left)) & ((1 << bits) - 1); \
46 		src++;							\
47 		srclen--;						\
48 		bits_left += 8 - bits;					\
49 		if (bits > 8 || bits_left < 8) {			\
50 			/* We need bits from the next byte too... */	\
51 			data |= src[0] >> bits_left;			\
52 			/* ...if we used *all* of them then (which can	\
53 			 * only happen if bits > 8), then bump the	\
54 			 * input pointer again so we never leave	\
55 			 * bits_left == 0. */				\
56 			if (bits > 8 && !bits_left) {			\
57 				bits_left = 8;				\
58 				src++;					\
59 				srclen--;				\
60 			}						\
61 		}							\
62 	} else {							\
63 		/* We need fewer bits than are left in the current byte */ \
64 		data = (src[0] >> (bits_left - bits)) & ((1ULL << bits) - 1); \
65 		bits_left -= bits;					\
66 	}								\
67 } while (0)
68 
69 int lzs_decompress(unsigned char *dst, int dstlen, const unsigned char *src, int srclen)
70 {
71 	int outlen = 0;
72 	int bits_left = 8; /* Bits left in the current byte at *src */
73 	uint32_t data;
74 	uint16_t offset, length;
75 
76 	while (1) {
77 		/* Get 9 bits, which is the minimum and a common case */
78 		GET_BITS(9);
79 
80 		/* 0bbbbbbbb is a literal byte. The loop gives a hint to
81 		 * the compiler that we expect to see a few of these. */
82 		while (data < 0x100) {
83 			if (outlen == dstlen)
84 				return -EFBIG;
85 			dst[outlen++] = data;
86 			GET_BITS(9);
87 		}
88 
89 		/* 110000000 is the end marker */
90 		if (data == 0x180)
91 			return outlen;
92 
93 		/* 11bbbbbbb is a 7-bit offset */
94 		offset = data & 0x7f;
95 
96 		/* 10bbbbbbbbbbb is an 11-bit offset, so get the next 4 bits */
97 		if (data < 0x180) {
98 			GET_BITS(4);
99 
100 			offset <<= 4;
101 			offset |= data;
102 		}
103 
104 		/* This is a compressed sequence; now get the length */
105 		GET_BITS(2);
106 		if (data != 3) {
107 			/* 00, 01, 10 ==> 2, 3, 4 */
108 			length = data + 2;
109 		} else {
110 			GET_BITS(2);
111 			if (data != 3) {
112 				/* 1100, 1101, 1110 => 5, 6, 7 */
113 				length = data + 5;
114 			} else {
115 				/* For each 1111 prefix add 15 to the length. Then add
116 				   the value of final nybble. */
117 				length = 8;
118 
119 				while (1) {
120 					GET_BITS(4);
121 					if (data != 15) {
122 						length += data;
123 						break;
124 					}
125 					length += 15;
126 				}
127 			}
128 		}
129 		if (offset > outlen)
130 			return -EINVAL;
131 		if (length + outlen > dstlen)
132 			return -EFBIG;
133 
134 		while (length) {
135 			dst[outlen] = dst[outlen - offset];
136 			outlen++;
137 			length--;
138 		}
139 	}
140 	return -EINVAL;
141 }
142 
143 #define PUT_BITS(nr, bits)					\
144 do {								\
145 	outbits <<= (nr);					\
146 	outbits |= (bits);					\
147 	nr_outbits += (nr);					\
148 	if ((nr) > 8) {						\
149 		nr_outbits -= 8;				\
150 		if (outpos == dstlen)				\
151 			return -EFBIG;				\
152 		dst[outpos++] = outbits >> nr_outbits;		\
153 	}							\
154 	if (nr_outbits >= 8) {					\
155 		nr_outbits -= 8;				\
156 		if (outpos == dstlen)				\
157 			return -EFBIG;				\
158 		dst[outpos++] = outbits >> nr_outbits;		\
159 	}							\
160 } while (0)
161 
162 /*
163  * Much of the compression algorithm used here is based very loosely on ideas
164  * from isdn_lzscomp.c by Andre Beck: http://micky.ibh.de/~beck/stuff/lzs4i4l/
165  */
166 int lzs_compress(unsigned char *dst, int dstlen, const unsigned char *src, int srclen)
167 {
168 	int length, offset;
169 	int inpos = 0, outpos = 0;
170 	uint16_t longest_match_len;
171 	uint16_t hofs, longest_match_ofs;
172 	uint16_t hash;
173 	uint32_t outbits = 0;
174 	int nr_outbits = 0;
175 
176 	/*
177 	 * This is theoretically a hash. But RAM is cheap and just loading the
178 	 * 16-bit value and using it as a hash is *much* faster.
179 	 */
180 #define HASH_BITS 16
181 #define HASH_TABLE_SIZE (1ULL << HASH_BITS)
182 #define HASH(p) (((struct oc_packed_uint16_t *)(p))->d)
183 
184 	/*
185 	 * There are two data structures for tracking the history. The first
186 	 * is the true hash table, an array indexed by the hash value described
187 	 * above. It yields the offset in the input buffer at which the given
188 	 * hash was most recently seen. We use INVALID_OFS (0xffff) for none
189 	 * since we know IP packets are limited to 64KiB and we can never be
190 	 * *starting* a match at the penultimate byte of the packet.
191 	 */
192 #define INVALID_OFS 0xffff
193 	uint16_t hash_table[HASH_TABLE_SIZE]; /* Buffer offset for first match */
194 
195 	/*
196 	 * The second data structure allows us to find the previous occurrences
197 	 * of the same hash value. It is a ring buffer containing links only for
198 	 * the latest MAX_HISTORY bytes of the input. The lookup for a given
199 	 * offset will yield the previous offset at which the same data hash
200 	 * value was found.
201 	 */
202 #define MAX_HISTORY (1<<11) /* Highest offset LZS can represent is 11 bits */
203 	uint16_t hash_chain[MAX_HISTORY];
204 
205 	/* Just in case anyone tries to use this in a more general-purpose
206 	 * scenario... */
207 	if (srclen > INVALID_OFS + 1)
208 		return -EFBIG;
209 
210 	/* No need to initialise hash_chain since we can only ever follow
211 	 * links to it that have already been initialised. */
212 	memset(hash_table, 0xff, sizeof(hash_table));
213 
214 	while (inpos < srclen - 2) {
215 		hash = HASH(src + inpos);
216 		hofs = hash_table[hash];
217 
218 		hash_chain[inpos & (MAX_HISTORY - 1)] = hofs;
219 		hash_table[hash] = inpos;
220 
221 		if (hofs == INVALID_OFS || hofs + MAX_HISTORY <= inpos) {
222 			PUT_BITS(9, src[inpos]);
223 			inpos++;
224 			continue;
225 		}
226 
227 		/* Since the hash is 16-bits, we *know* the first two bytes match */
228 		longest_match_len = 2;
229 		longest_match_ofs = hofs;
230 
231 		for (; hofs != INVALID_OFS && hofs + MAX_HISTORY > inpos;
232 		     hofs = hash_chain[hofs & (MAX_HISTORY - 1)]) {
233 
234 			/* We only get here if longest_match_len is >= 2. We need to find
235 			   a match of longest_match_len + 1 for it to be interesting. */
236 			if (!memcmp(src + hofs + 2, src + inpos + 2, longest_match_len - 1)) {
237 				longest_match_ofs = hofs;
238 
239 				do {
240 					longest_match_len++;
241 
242 					/* If we cannot *have* a longer match because we're at the
243 					 * end of the input, stop looking */
244 					if (longest_match_len + inpos == srclen)
245 						goto got_match;
246 
247 				} while (src[longest_match_len + inpos] == src[longest_match_len + hofs]);
248 			}
249 
250 			/* Typical compressor tuning would have a break out of the loop
251 			   here depending on the number of potential match locations we've
252 			   tried, or a value of longest_match_len that's considered "good
253 			   enough" so we stop looking for something better. We could also
254 			   do a hybrid where we count the total bytes compared, so 5
255 			   attempts to find a match better than 10 bytes is worth the same
256 			   as 10 attempts to find a match better than 5 bytes. Or
257 			   something. Anyway, we currently don't give up until we run out
258 			   of reachable history — maximal compression. */
259 		}
260 	got_match:
261 		/* Output offset, as 7-bit or 11-bit as appropriate */
262 		offset = inpos - longest_match_ofs;
263 		length = longest_match_len;
264 
265 		if (offset < 0x80)
266 			PUT_BITS(9, 0x180 | offset);
267 		else
268 			PUT_BITS(13, 0x1000 | offset);
269 
270 		/* Output length */
271 		if (length < 5)
272 			PUT_BITS(2, length - 2);
273 		else if (length < 8)
274 			PUT_BITS(4, length + 7);
275 		else {
276 			length += 7;
277 			while (length >= 30) {
278 				PUT_BITS(8, 0xff);
279 				length -= 30;
280 			}
281 			if (length >= 15)
282 				PUT_BITS(8, 0xf0 + length - 15);
283 			else
284 				PUT_BITS(4, length);
285 		}
286 
287 		/* If we're already done, don't bother updating the hash tables. */
288 		if (inpos + longest_match_len >= srclen - 2) {
289 			inpos += longest_match_len;
290 			break;
291 		}
292 
293 		/* We already added the first byte to the hash tables. Add the rest. */
294 		inpos++;
295 		while (--longest_match_len) {
296 			hash = HASH(src + inpos);
297 			hash_chain[inpos & (MAX_HISTORY - 1)] = hash_table[hash];
298 			hash_table[hash] = inpos++;
299 		}
300 	}
301 
302 	/* Special cases at the end */
303 	if (inpos == srclen - 2) {
304 		hash = HASH(src + inpos);
305 		hofs = hash_table[hash];
306 
307 		if (hofs != INVALID_OFS && hofs + MAX_HISTORY > inpos) {
308 			offset = inpos - hofs;
309 
310 			if (offset < 0x80)
311 				PUT_BITS(9, 0x180 | offset);
312 			else
313 				PUT_BITS(13, 0x1000 | offset);
314 
315 			/* The length is 2 bytes */
316 			PUT_BITS(2, 0);
317 		} else {
318 			PUT_BITS(9, src[inpos]);
319 			PUT_BITS(9, src[inpos + 1]);
320 		}
321 	} else if (inpos == srclen - 1) {
322 		PUT_BITS(9, src[inpos]);
323 	}
324 
325 	/* End marker, with 7 trailing zero bits to ensure that it's flushed. */
326 	PUT_BITS(16, 0xc000);
327 
328 	return outpos;
329 }