dlm.c 12.6 KB
/* dlm.c *********************************************************************************

                                      Anubis Compiler
                      Generating Dynamically Loadable Modules (dlm).  

*****************************************************************************************/

#include <time.h>
#include <sys/stat.h>   
#include "cipher.h"
#include "compil.h"
#include "bytecode.h"
#include "minver.h"


/* A 'dlm' is constructed  for each source file. It has the same  name as the source file,
   but with the extension '.dlm' appended.  Each dlm has a 'boundary', which consists in a
   formal representation of  everything needed to dynamically link the  dlm into a running
   program. The boundary contains definitions of types needed in this module. The boundary
   also  contains one 'boundary  item' for  each datum  which is  publicly defined  in the
   source file.
   
   Each boundary item contains the following informations:
   
     - name of the datum (null terminated string)
     - type of the datum (see below how types are represented)
     - address (U32) within the code of the dlm file, of code producing the datum. 
   
   A null byte delimits the list of boundary items. 
   
   Format of a (non encrypted) dlm files:
   
   size (bytes)           content
   ---------------------------------------------------------------------------------------
   20                     sha1 hash of what follows
   1                      '0' meaning non encrypted
   4                      condensed version number as computed by 'condensate_version'
   4                      date as computed by 'time(NULL)'
   4                      flags
   ?                      type definitions
   ?                      list of boundary items
   4                      size of code
   ?                      code (as computed by 'translate_dynamic_code') 
   ---------------------------------------------------------------------------------------
     
   The dlm may  be encrypted. In that case  the dlm file has the following  format.  Let M
   represent  the content  of the  non  encrypted module  (as described  above). Then  the
   encrypted module is as follows.
   
   size (bytes)           content
   ---------------------------------------------------------------------------------------
   20                     sha1 hash of what follows
   1                      byte identifying algorithms used (for example RSA and Blowfish)
   128                    session key encrypted using the public key of the recipient
   ?                      M encrypted using the session key
   ---------------------------------------------------------------------------------------
   
   Now we must describe how type definitions are recorded within the dlm, and how types of
   data are represented. The list of type definitions is delimited by a zero byte. 
   
   Each type definition is as follows:
   
     - name of type (null delimited string),
     - alternatives,
     - null byte.
   
   Each alternative is as follows:
   
     - name of alternative (null delimited string),
     - types of components (names of components are useless), 
     - null byte.
   
   Types are recorded as follows:
     
     - Primitive types:  one byte between 1 and  80, 
   
     - Functional types:  81 followed by the  arity (one byte) and  the representations of
       the target and source types,
   
     - Address types: one  byte between 82 and 100, followed by  the representation of the
       content type.

     - table types: 101 followed by a type representation.
   
     - C structure types: a byte between 102 and 160. 
   
     - Defined  types:  255 followed  by  the  index  of the  type  in  the list  of  type
       definitions (one short integer, i.e. 2 bytes).
   

   What about type  schemes ? There is  no type scheme in the  type definitions. Actually,
   each  definition defines  a type  instance.  For this  reason, type  names may  include
   parentheses. For example, 'List(String)' is a regular type name. 

   */
   
  
#define type_constructs_list\
   item(defined)\
   item(functional)\
   item(table)\
   C_struct_list\
   primitive_types_list\
   item(type_Listener)\
   address_types_list\
   
typedef enum {
#define item(n)    type_construct_##n,
   type_constructs_list
#undef  item   
   type_construct_dummy
} TYPE_CONSTRUCT;   
   
   
int next_temp_implem_id = 0;    

Expr abs_implem_from_implem(Expr implem)
{
  int i; 

  for (i = 0; i < next_implem; i++)
    if (equal(implem,implems[i].implem))
      return abs_implem_from_id(i);
   
  internal_error("Cannot find id of implementation",implem); return nil;
}
   
   
Expr comps_abs_implems(Expr comps)   
{
  Expr result = nil; 
  while(consp(comps))
    {
      result = cons(abs_implem_from_id(integer_value(car(car(comps)))),result);
      comps = cdr(comps); 
    }
   
  return reverse(result); 
}
   
   
Expr alts_abs_implems(Expr alts_geoms)
{
  Expr result = nil; 
  while(consp(alts_geoms))
    {
      result = cons(cons(new_integer(length(cdr(car(alts_geoms)))),
                         comps_abs_implems(cdr(car(alts_geoms)))),
                    result); 
      alts_geoms = cdr(alts_geoms); 
    }
  return reverse(result); 
}
   
Expr tuple_abs_implems_from_implems(Expr implems)
{
  Expr result = nil;
  while (consp(implems))
    {
      result = cons(abs_implem_from_implem(car(implems)),result); 
      implems = cdr(implems);
    }
  return reverse(result); 
}
   
Expr abs_implem_from_id(int implem_id)   
{
  Expr impl; 

  //printf("%d %d %d\n",next_implem,implem_id,implems[implem_id].temp_implem_id);
   
  /* if we have a non zero temp implem id, return this (already_seen . id) */ 
  if (implems[implem_id].temp_implem_id != 0) 
    return cons(already_seen,new_integer(implems[implem_id].temp_implem_id)); 
   
  /* otherwise, mark the implementation as already seen. */
  implems[implem_id].temp_implem_id = ++next_temp_implem_id; 
   
  //debug(new_integer(next_temp_implem_id));
   
  impl = implems[implem_id].implem; 

  //debug(implem_id);
  //debug(implems[implem_id].type);
  //debug(impl); 
   
  if (consp(impl))
    switch (car(impl))
      {
      case small_type:
      case mixed_type:
      case large_type:
        return mcons3(defined_type,second(impl),alts_abs_implems(cdr3(impl)));

      case functype:
        return mcons3(functype,
                      tuple_abs_implems_from_implems(second(impl)),
                      abs_implem_from_implem(cdr2(impl))); 
   
      case type_struct_ptr:
        return impl; 
   
#define item(n)   case n: return cons(car(impl),abs_implem_from_implem(cdr(impl))); 
   address_types_list
#undef item
   
     default: internal_error("Cannot recognize implementation",impl); return nil; 
      }
  else
    return impl; 
}

   
void reset_temp_implem_ids(void)   
{
  int i; 
   
  /* reinitialize temporary implem ids */ 
  next_temp_implem_id = 0; 
   
  for (i = 0; i < next_implem; i++)
    implems[i].temp_implem_id = 0;        /* means not used */  
}
   
int absolute_implem_size(Expr impl)
{
  if(consp(impl)) return absolute_implem_size(car(impl))
                    + absolute_implem_size(cdr(impl));
  return 4; 
}

   
void write_absolute_implem(Expr impl, U8 **dest)
{
  if (consp(impl))
    {
      write_absolute_implem(car(impl),dest);
      write_absolute_implem(cdr(impl),dest);
    }
  else
    {
      *((U32 *)(*dest)) = impl; 
      *dest += 4; 
    }
}

   
#if 0   
void dump_dlm(int iid)
{
  Expr already_appended = nil;
  U8 *translation; 
  U8 *checksum; 
  FILE *f;
  FILE *symfile;
  int i; 
  U32 k; 
  U32 offa; 
  char *module_ext = "dlm";
  char *opname = string_content(car(operations[compiled_ops[iid].op_id].names));
  char *buf = (char *)mallocz(1000); 
  Expr start_label = new_addr_name(); 
  Expr initialization_address_value = new_addr_name();
  Expr variables_deletion_address_value = new_addr_name();
  Expr init_ret_point = new_addr_name();
  Expr sym_code = compiled_ops[iid].offline_code; 
   
  //debug(sym_code); 
  //debug(start_label); 

  sym_code = nil;
   
  /* Complete the code with all functions directly or indirectly called by this one.
     'complete_dynamic_code' is defined in 'vminstr.c'. */
  sym_code = complete_dynamic_code(sym_code,
                                   &already_appended,
                                   initialization_address_value);
   
  /* translate offline symbolic code */ 
  translation = translate_dynamic_code(sym_code,
           start_label, 
           compiled_ops[iid].sha1_digest,
                                       initialization_address_value); 

  /* compute checksum */ 
  checksum = sha1(translation,4+4+4+4+4+20+(*((U32 *)(translation+4+4+4))));  /* volatile */ 

  /* dump symbolic code */
  if (symcode_option)
    {
      struct tm *ta;

      ta = localtime((time_t *)(translation+4)); 

      assert(offsets != NULL);
      assert(offsets_size == new_addr_count); 
      sprintf(buf,"%s.sc",opname);
      symfile = fopenz(buf,"wt"); 
      fprintf(symfile,
       "\n        This file was generated by the Anubis compiler (version 1.%d)\n\n",
       min_version); 
      fprintf(symfile,
              "        Symbolic code for module '%s.%s'\n\n",opname,module_ext); 
      fprintf(symfile,
             "               time stamp: %u (%.4d/%.2d/%.2d %.2d:%.2d:%.2d)\n"
             "                using SSL: %s\n"
             "             size of code: %u bytes\n"
             " starting point at offset: %u (executing '%s' from file '%s' at line %u)\n"
             "    module identification: %s\n",
             *((U32 *)(translation+4)),
      (ta->tm_year)+1900,
             (ta->tm_mon)+1,
             ta->tm_mday,
             ta->tm_hour,
             ta->tm_min,
             ta->tm_sec,
             ((*((U32 *)(translation+4+4))) & mf_using_ssl) ? "yes" : "no",
             *((U32 *)(translation+4+4+4)),
             *((U32 *)(translation+4+4+4+4)),
       opname, 
              string_content(operations[compiled_ops[iid].op_id].file_name),
       integer_value(operations[compiled_ops[iid].op_id].line),
      sha1_to_ascii(translation+4+4+4+4+4));
      fprintf(symfile,
             "          module checksum: %s\n\n",
      sha1_to_ascii(checksum)); 
      fprintf(symfile,
              " offsets | symbolic code\n"
              "---------|--------------------------------------------------------");         
      offa = 0; 
      print_symbolic_code(symfile,sym_code,&offa,
          initialization_address_value,variables_deletion_address_value);
      fclose(symfile); 
    }

  /* create a shell for executing the module */ 
  if (my_shells_directory != NULL && compiled_ops[iid].global == op_adm)
    {
      sprintf(buf,"%s/%s", my_shells_directory, opname); 
#ifdef WIN32
   strcat(buf,".bat");
#endif
      shell_file = fopenz(buf,"wt"); 
      fprintf(shell_file,"anbexec %s/modules/%s.adm --pdir:%s $*\n",
              my_anubis_directory,opname,my_anubis_directory); 
      fclose(shell_file); 
      chmod(buf,S_IRUSR|S_IWUSR|S_IXUSR|S_IRGRP|S_IXGRP); 
    }

  if (make_dep) make_dep_file(opname,module_ext); 
   
  /* open target file 'opname.adm' = 'Anubis Dynamic Module' or 
                      'opname.awp' = 'Anubis Web Page' */ 
  if (compiled_ops[iid].global == op_awp)
    sprintf(buf,"%s/%s.%s",awp_directory,opname,module_ext);
  else if (compiled_ops[iid].global == op_ssl_awp)
    sprintf(buf,"%s/%s.%s",ssl_awp_directory,opname,module_ext);
  else if (library_directory == NULL)
    sprintf(buf,"%s.%s",opname,module_ext);
  else if (compiled_ops[iid].global == op_adm)
    sprintf(buf,"%s/modules/%s.%s",my_anubis_directory,opname,module_ext);
  f = fopenz(buf,"wb");

  /* dump information */ 
  for (k = 0; k < 4+4+4+4+4+20 + (*((U32 *)(translation+4+4+4))); k++)
    putc(translation[k],f); 

  /* including checksum */ 
  for (k = 0; k < 20; k++)
    putc(checksum[k],f); 

  fclose(f); 

  if (verbose)
    {
      printf("\nModule '%s' has been written to disk."
             "\n|             version: %u.%u.%u"
             "\n|                date: %u"
             "\n|           using SSL: %s"
             "\n|        size of code: %u"
             "\n|      starting point: %u"
             "\n|      identification: %s",
      buf,              
             get_maj_version(*((U32 *)translation)),
             get_min_version(*((U32 *)translation)),
             get_rel_version(*((U32 *)translation)),
             *((U32 *)(translation+4)),
             ((*((U32 *)(translation+4+4))) & mf_using_ssl) ? "yes" : "no", 
             *((U32 *)(translation+4+4+4)),
             *((U32 *)(translation+4+4+4+4)),
      sha1_to_ascii(translation+4+4+4+4+4));
      printf("\n|            checksum: %s"
             "\n|__________________\n",
      sha1_to_ascii(checksum)); 
    }
  freez(buf);   
}
#endif