web_arg_encode.anubis
9.7 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
*Project* The Anubis Project
*Title* Encoding data for web argument values.
*Copyright* Copyright (c) Alain Prouté 2002.
*Author* Alain Prouté
*Overview*
This file contains encoding and decoding functions which allow to put any serializable
datum as the value of a web argument. The datum is serialized, and the result of
serialization (a byte array) is encoded in such a way that it can safely be used as the
value of a web argument. The encoding process is similar to the standard process
'base64', but nevertheless different, because base64 is not suitable for that purpose.
public define String
web_arg_encode
(
$T datum
).
public define Maybe($T)
web_arg_decode
(
String encoded_value
).
Of course, since the type of the datum is not available from 'encoded_value', a term
like 'web_arg_decode(my_string)' must generally be explicitly typed, like this:
(Maybe(MyType))web_arg_decode(my_string)
These functions are used for example in 'anubis/library/web/kernel.anubis'.
--- That's all for the public part. ---------------------------------------------------
read tools/basis.anubis
Our algorithms are copy-pasted from 'base64.anubis' and slightly modified. The point is
twofold:
(1) base64 encoding inserts carriage return (CR) and line feed (LF) characters every
76 character, but CR and LF are not suitable in the values of a web argument,
(2) the base64 alphabet uses '+' and '/', which are also not suitable in the value of
a web argument, because they have special meanings.
Hence, we just have to modify the base64 algorithms, so as not to generate any CR or
LF, and use '-' and '_' instead of '+' and '/'. Also, we do not use padding characters
'=', which are needless (as remarked in 'anubis/library/tools/base64.anubis').
*** Encoding. *************************************************************************
Translate an index into a wa64 character.
define Word8
truncate_to_word8
(
Word32 n
) =
if n is word32(u,_) then if u is word16(v,_) then v.
define Word8
wa64_alphabet
(
Word32 index // the index is assumed to be >= 0 and < 64
) =
if index -< 0 then should_not_happen(0) else
if index -< 26 then truncate_to_word8(index+'A') else
if index -< 52 then truncate_to_word8(index-26+'a') else
if index -< 62 then truncate_to_word8(index-52+'0') else
if index = 62 then '-' else
if index = 63 then '_' else
should_not_happen(0).
Transform a group of 3 bytes into a group of 4 wa64 letters.
define Word32
to_word32
(
Word8 x
) =
word32(word16(x,0),0).
define (Word8,Word8,Word8,Word8)
transform_group
(
Word8 byte1,
Word8 byte2,
Word8 byte3
) =
with n1 = to_word32(byte1),
n2 = to_word32(byte2),
n3 = to_word32(byte3),
(
wa64_alphabet(n1>>2),
wa64_alphabet(((n1&3)<<4)|(n2>>4)),
wa64_alphabet(((n2&15)<<2)|(n3>>6)),
wa64_alphabet(n3&63)
).
Transform a group of two bytes.
define ByteArray
two_mod_three
(
ByteArray result,
Int result_index,
Word8 byte1,
Word8 byte2
) =
with n1 = to_word32(byte1),
n2 = to_word32(byte2),
forget(put(result,result_index ,wa64_alphabet(n1>>2)));
forget(put(result,result_index+1,wa64_alphabet(((n1&3)<<4)|(n2>>4))));
forget(put(result,result_index+2,wa64_alphabet((n2&15)<<2)));
forget(put(result,result_index+4,0));
result.
Transform a 'group of one byte'.
define ByteArray
one_mod_three
(
ByteArray result,
Int result_index,
Word8 byte1
) =
with n1 = to_word32(byte1),
forget(put(result,result_index ,wa64_alphabet(n1>>2)));
forget(put(result,result_index+1,wa64_alphabet((n1&3)<<4)));
forget(put(result,result_index+4,0));
result.
define ByteArray
wa64_encode
(
ByteArray ba,
Int ba_index, // index into byte array
ByteArray result,
Int result_index
) =
if nth(ba_index,ba) is
{
failure then forget(put(result,result_index,0)); result, // no new block of 3 bytes
success(byte1) then
if nth(ba_index+1,ba) is
{
failure then one_mod_three(result,result_index,byte1),
success(byte2) then
if nth(ba_index+2,ba) is
{
failure then two_mod_three(result,result_index,byte1,byte2),
success(byte3) then
if transform_group(byte1,byte2,byte3) is (c1,c2,c3,c4) then
(
forget(put(result,result_index,c1));
forget(put(result,result_index+1,c2));
forget(put(result,result_index+2,c3));
forget(put(result,result_index+3,c4));
wa64_encode(ba,
ba_index+3,
result,
result_index+4)
)
}
}
}.
define Int
Int x / Int y
=
if (Maybe((Int,Int)))(x/y) is
{
failure then 0,
success(p) then if p is (q,r) then q
}.
define ByteArray
wa64_encode
(
ByteArray ba
) =
with l = (Int)length(ba),
wa64_encode(ba,0,
constant_byte_array((((l/57)+1)*76)+10,0),0).
public define String
web_arg_encode
(
$T datum
) =
to_string(wa64_encode(serialize(datum))).
*** Decoding. *************************************************************************
See the comments in 'anubis/library/tools/base64.anubis'.
Checking if a character belongs to the wa64 alphabet. If true, the function returns the
index of the character in the alphabet.
define Maybe(Word32)
is_wa64_char
(
Word8 c
) =
if ('A' +=< c & c +=< 'Z') then success(to_Word32(c - 'A')) else
if ('a' +=< c & c +=< 'z') then success(to_Word32(c - 'a' + 26)) else
if ('0' +=< c & c +=< '9') then success(to_Word32(c - '0' + 52)) else
if c = '-' then success(62) else
if c = '_' then success(63) else
failure.
Getting the next wa64 character from the input. The function returns the next position
for reading. The function does not return the character itself, but its index in the
alphabet.
define Maybe((Int, // next position for reading
Word32)) // index of character in wa64 alphabet
get_next_character
(
ByteArray ba,
Int n
) =
if nth(n,ba) is
{
failure then failure,
success(c) then
if is_wa64_char(c) is
{
failure then failure,
success(i) then success((n+1,i))
}
}.
Translating a group of characters into a group of bytes.
type TranslateGroupResult2:
three_bytes (Int new_pos, Word8 b1, Word8 b2, Word8 b3),
two_bytes ( Word8 b1, Word8 b2 ),
one_byte ( Word8 b1 ),
zero_bytes,
error.
define TranslateGroupResult2
translate_group
(
ByteArray ba,
Int n
) =
if get_next_character(ba,n) is
{
failure then zero_bytes,
success(p1) then if p1 is (n1,i1) then
if get_next_character(ba,n1) is
{
failure then error,
success(p2) then if p2 is (n2,i2) then
if get_next_character(ba,n2) is
{
failure then // we don't check the padding characters
one_byte(truncate_to_word8((i1<<2)|(i2>>4))),
success(p3) then if p3 is (n3,i3) then
if get_next_character(ba,n3) is
{
failure then
two_bytes(truncate_to_word8((i1<<2)|(i2>>4)),
truncate_to_word8(((i2&15)<<4)|(i3>>2))),
success(p4) then if p4 is (n4,i4) then
three_bytes(n4,truncate_to_word8((i1<<2)|(i2>>4)),
truncate_to_word8(((i2&15)<<4)|(i3>>2)),
truncate_to_word8(((i3&3)<<6)|i4))
}
}
}
}.
define Int // returns the size of the decoded array of bytes
translate_groups
(
ByteArray source,
Int n, // position in source
ByteArray target,
Int m // position in target
) =
if translate_group(source,n) is
{
three_bytes(n1,b1,b2,b3) then
forget(put(target,m,b1));
forget(put(target,m+1,b2));
forget(put(target,m+2,b3));
translate_groups(source,n1,target,m+3),
two_bytes(b1,b2) then
forget(put(target,m,b1));
forget(put(target,m+1,b2));
m+2,
one_byte(b1) then
forget(put(target,m,b1));
m+1,
zero_bytes then
m,
error then
m
}.
define ByteArray
wa64_decode
(
ByteArray ba
) =
with l = (Int)length(ba),
result = constant_byte_array(l,'0'),
truncate(result,translate_groups(ba,0,result,0));
result.
public define Maybe($T)
web_arg_decode
(
String encoded_datum
) =
(Maybe($T))unserialize(wa64_decode(to_byte_array(encoded_datum))).