web_arg_encode.anubis 9.7 KB

 *Project*                             The Anubis Project
   
 *Title*                      Encoding data for web argument values.
   
 *Copyright*                     Copyright (c) Alain Prouté 2002. 


 *Author*       Alain Prouté

   
   
 *Overview*
   This file contains encoding and decoding  functions which allow to put any serializable
   datum as  the value  of a web  argument.  The  datum is serialized,  and the  result of
   serialization (a byte array) is encoded in such a way that it can safely be used as the
   value  of a web  argument.  The  encoding process  is similar  to the  standard process
   'base64', but nevertheless different, because base64 is not suitable for that purpose.
   
   
public define String
  web_arg_encode
    (
      $T datum
    ). 
   
public define Maybe($T)
  web_arg_decode
    (
      String encoded_value
    ). 
   
   Of course, since  the type of the  datum is not available from  'encoded_value', a term
   like 'web_arg_decode(my_string)' must generally be explicitly typed, like this:
   
                       (Maybe(MyType))web_arg_decode(my_string)
   
   
   These functions are used for example in 'anubis/library/web/kernel.anubis'. 
   
   
   
   
   
   --- That's all for the public part. ---------------------------------------------------

read tools/basis.anubis   
   
   Our algorithms are copy-pasted from 'base64.anubis' and slightly modified. The point is
   twofold:
   
     (1) base64 encoding inserts carriage return  (CR) and line feed (LF) characters every
   76 character, but CR and LF are not suitable in the values of a web argument, 
   
     (2) the base64 alphabet uses '+' and '/', which are also not suitable in the value of
   a web argument, because they have special meanings.
   
   Hence, we just  have to modify the base64  algorithms, so as not to generate  any CR or
   LF, and use '-' and '_' instead of  '+' and '/'. Also, we do not use padding characters
   '=', which are needless (as remarked in 'anubis/library/tools/base64.anubis').
   
   
   
   *** Encoding. *************************************************************************
   
   Translate an index into a wa64 character. 
   
define Word8
   truncate_to_word8
     (
       Word32 n
     ) =
   if n is word32(u,_) then if u is word16(v,_) then v.    
   
define Word8
  wa64_alphabet
    (
      Word32 index         // the index is assumed to be >= 0 and < 64
    ) =
  if index -< 0  then should_not_happen(0)             else
  if index -< 26 then truncate_to_word8(index+'A')     else
  if index -< 52 then truncate_to_word8(index-26+'a')  else
  if index -< 62 then truncate_to_word8(index-52+'0')  else
  if index =  62 then '-'                              else
  if index =  63 then '_'                              else
  should_not_happen(0). 
   
   
   
   Transform a group of 3 bytes into a group of 4 wa64 letters. 
   
   
define Word32
   to_word32
     (
       Word8 x
     ) =
   word32(word16(x,0),0). 
   
define (Word8,Word8,Word8,Word8)
  transform_group   
    (
      Word8 byte1, 
      Word8 byte2, 
      Word8 byte3
    ) =
  with n1 = to_word32(byte1), 
       n2 = to_word32(byte2), 
       n3 = to_word32(byte3), 
  (
  wa64_alphabet(n1>>2),
  wa64_alphabet(((n1&3)<<4)|(n2>>4)),
  wa64_alphabet(((n2&15)<<2)|(n3>>6)),
  wa64_alphabet(n3&63)
  ).
   
   
   Transform a group of two bytes. 
   
define ByteArray  
  two_mod_three
    (
      ByteArray result, 
      Int result_index,
      Word8 byte1, 
      Word8 byte2
    ) =
  with n1 = to_word32(byte1), 
       n2 = to_word32(byte2), 
  forget(put(result,result_index  ,wa64_alphabet(n1>>2))); 
  forget(put(result,result_index+1,wa64_alphabet(((n1&3)<<4)|(n2>>4)))); 
  forget(put(result,result_index+2,wa64_alphabet((n2&15)<<2))); 
  forget(put(result,result_index+4,0)); 
  result. 

   
   
   Transform a 'group of one byte'. 
   
define ByteArray
  one_mod_three
    (
      ByteArray result, 
      Int result_index, 
      Word8 byte1
    ) =
  with n1 = to_word32(byte1), 
  forget(put(result,result_index  ,wa64_alphabet(n1>>2))); 
  forget(put(result,result_index+1,wa64_alphabet((n1&3)<<4)));
  forget(put(result,result_index+4,0));
  result. 
   
  
      
define ByteArray
  wa64_encode
    (
      ByteArray ba,
      Int ba_index,           // index into byte array
      ByteArray result,
      Int result_index
    ) = 
  if nth(ba_index,ba) is 
    {
      failure then forget(put(result,result_index,0)); result,      // no new block of 3 bytes
      success(byte1) then 
        if nth(ba_index+1,ba) is 
          {
            failure then one_mod_three(result,result_index,byte1),
            success(byte2) then 
              if nth(ba_index+2,ba) is 
                {
                  failure then two_mod_three(result,result_index,byte1,byte2), 
                  success(byte3) then 
                  if transform_group(byte1,byte2,byte3) is (c1,c2,c3,c4) then 
                    (
                      forget(put(result,result_index,c1)); 
                      forget(put(result,result_index+1,c2)); 
                      forget(put(result,result_index+2,c3)); 
                      forget(put(result,result_index+3,c4)); 
                      wa64_encode(ba,
                                  ba_index+3,
                                  result,
                                  result_index+4)
                    )
                }
          }
    }.
    

define Int
   Int x / Int y
     =
   if (Maybe((Int,Int)))(x/y) is 
     {
       failure then 0, 
       success(p) then if p is (q,r) then q
     }.
   
define ByteArray
  wa64_encode
    (
      ByteArray ba
    ) =
  with l = (Int)length(ba), 
  wa64_encode(ba,0,
    constant_byte_array((((l/57)+1)*76)+10,0),0). 


public define String
  web_arg_encode
    (
      $T datum
    ) =
  to_string(wa64_encode(serialize(datum))). 
   
   
   
   
   
   *** Decoding. *************************************************************************

   See the comments in 'anubis/library/tools/base64.anubis'. 

   Checking if a character belongs to the wa64 alphabet. If true, the function returns the
   index of the character in the alphabet.
   
define Maybe(Word32)
  is_wa64_char
    (
      Word8 c
    ) =
  if ('A' +=< c & c +=< 'Z') then success(to_Word32(c - 'A'))      else
  if ('a' +=< c & c +=< 'z') then success(to_Word32(c - 'a' + 26)) else
  if ('0' +=< c & c +=< '9') then success(to_Word32(c - '0' + 52)) else
  if c = '-' then success(62) else
  if c = '_' then success(63) else
  failure. 
   
   
   
   Getting the next wa64 character from  the input. The function returns the next position
   for reading.  The function  does not return the character itself, but  its index in the
   alphabet.
   
define Maybe((Int,           // next position for reading
              Word32))       // index of character in wa64 alphabet
  get_next_character
    (
      ByteArray ba, 
      Int n
    ) =
  if nth(n,ba) is 
    {
      failure then failure, 
      success(c) then
        if is_wa64_char(c) is
          {
            failure then failure, 
            success(i) then success((n+1,i))
          }
    }. 
    

   Translating a group of characters into a group of bytes. 
   
type TranslateGroupResult2:
  three_bytes  (Int new_pos, Word8 b1, Word8 b2, Word8 b3),  
  two_bytes    (               Word8 b1, Word8 b2         ),
  one_byte     (               Word8 b1                  ), 
  zero_bytes, 
  error.   
   
   
define TranslateGroupResult2
  translate_group
    (
      ByteArray ba, 
      Int n
    ) =
  if get_next_character(ba,n) is 
    {
      failure then zero_bytes, 
      success(p1) then if p1 is (n1,i1) then 
        if get_next_character(ba,n1) is
          {
            failure then error, 
            success(p2) then if p2 is (n2,i2) then 
              if get_next_character(ba,n2) is 
                {
                  failure then // we don't check the padding characters
                    one_byte(truncate_to_word8((i1<<2)|(i2>>4))), 
                  success(p3) then if p3 is (n3,i3) then 
                    if get_next_character(ba,n3) is 
                      {
                        failure then 
                          two_bytes(truncate_to_word8((i1<<2)|(i2>>4)),
                                    truncate_to_word8(((i2&15)<<4)|(i3>>2))),
                        success(p4) then if p4 is (n4,i4) then 
                          three_bytes(n4,truncate_to_word8((i1<<2)|(i2>>4)),
                                         truncate_to_word8(((i2&15)<<4)|(i3>>2)),
                                         truncate_to_word8(((i3&3)<<6)|i4))
                      }
                }
          }
    }.
   
   
define Int                  // returns the size of the decoded array of bytes
  translate_groups
    (
      ByteArray source, 
      Int n,                // position in source
      ByteArray target,
      Int m                 // position in target
    ) = 
  if translate_group(source,n) is 
    {
      three_bytes(n1,b1,b2,b3) then 
        forget(put(target,m,b1)); 
        forget(put(target,m+1,b2)); 
        forget(put(target,m+2,b3)); 
        translate_groups(source,n1,target,m+3), 
   
      two_bytes(b1,b2) then 
        forget(put(target,m,b1)); 
        forget(put(target,m+1,b2));
        m+2, 
           
      one_byte(b1) then 
        forget(put(target,m,b1));
        m+1,
   
      zero_bytes then 
        m, 
   
      error then
        m
    }. 

   
define ByteArray
  wa64_decode
    ( 
      ByteArray ba
    ) =
  with l = (Int)length(ba), 
       result = constant_byte_array(l,'0'),
    truncate(result,translate_groups(ba,0,result,0)); 
    result. 
   
public define Maybe($T)
  web_arg_decode
    (
      String encoded_datum
    ) =
  (Maybe($T))unserialize(wa64_decode(to_byte_array(encoded_datum))).