Commit 1bebbd884602f36679085c058dd7c9b670dcd49d

Authored by Alain Prouté
1 parent fc95b240

*** empty log message ***

anubis_dev/compiler/proj/linux/Makefile
... ... @@ -47,8 +47,9 @@ all: myanubis anubis_npd
47 47 #
48 48 #
49 49 myanubis: $(main_objs) $(special_objs:.o=.my.o) $(cipher_objs)
50   - gcc -o $(BINDIR)/myanubis $(main_objs) $(special_objs:.o=.my.o) $(cipher_objs)\
  50 + gcc -o myanubis $(main_objs) $(special_objs:.o=.my.o) $(cipher_objs)\
51 51 -lm -lstdc++ -lpthread
  52 + cp myanubis $(BINDIR)
52 53 # $(strip) $(BINDIR)/myanubis
53 54  
54 55 #
... ... @@ -56,10 +57,12 @@ myanubis: $(main_objs) $(special_objs:.o=.my.o) $(cipher_objs)
56 57 #
57 58 anubis_npd: dependancies_npd $(main_objs) $(special_objs:.o=.npd.o) $(cipher_objs)\
58 59 $(SRCDIR)/predef_npd.aux $(DEVDIR)/include/constructors.h
59   - gcc -o $(BINDIR)/anubis_npd $(main_objs) $(special_objs:.o=.npd.o) $(cipher_objs)\
  60 + gcc -o anubis_npd $(main_objs) $(special_objs:.o=.npd.o) $(cipher_objs)\
60 61 -lm -lstdc++ -lpthread
61   - $(strip) $(BINDIR)/anubis_npd
62   - cp $(BINDIR)/anubis_npd $(BINDIR)/anubis
  62 + $(strip) anubis_npd
  63 + cp anubis_npd anubis
  64 + cp anubis_npd $(HOME)/anubis_distrib/linux_install/bin/anubis
  65 + cp anubis_npd $(BINDIR)/anubis
63 66  
64 67  
65 68  
... ...
anubis_dev/compiler/src/expr.cpp
... ... @@ -1028,7 +1028,8 @@ void _debug_msg(char *msg)
1028 1028 void err_line_col(Expr lc, char *error_name)
1029 1029 {
1030 1030 String completePath;
1031   - completePath.Printf("%s%c%s", include_dir[include_stack_ptr],FILE_SEPARATOR,file_in(lc));
  1031 + //completePath.Printf("%s%c%s", include_dir[include_stack_ptr],FILE_SEPARATOR,file_in(lc));
  1032 + completePath.Printf("%s", file_in(lc));
1032 1033 fprintf(errfile,
1033 1034 msgtext_line_col[0],
1034 1035 completePath.Cstr(),
... ...
anubis_dev/compiler/src/output.cpp
... ... @@ -258,7 +258,7 @@ static void dump_dynamic_compiled_op(int iid) /* compiled op id */
258 258  
259 259 if (1 || verbose)
260 260 {
261   - snprintf(buf,995,"%s.%s",opname,module_ext);
  261 + snprintf(buf,995,"%s/modules/%s.%s",my_anubis_directory,opname,module_ext);
262 262 printf("Constructing module '%s' ...",buf);
263 263 fflush(stdout);
264 264 }
... ...
anubis_dev/include/minver.h
1 1 #define min_version (6)
2   -#define rel_version (5)
  2 +#define rel_version (6)
3 3  
... ...
anubis_dev/vm/proj/linux/Makefile
... ... @@ -129,11 +129,13 @@ $(SRCDIR)/lex.yy.c: $(SRCDIR)/lexer.y $(SRCDIR)/grammar.tab.h
129 129 # Linking anbexec
130 130 #
131 131 anbexec: $(main_objs) $(special_objs) $(cipher_objs) $(graph_objs) $(sqlite3_objs)
132   - gcc -o $(HOME)/bin/anbexec $(main_objs) $(special_objs) $(cipher_objs) $(graph_objs)\
  132 + gcc -o anbexec $(main_objs) $(special_objs) $(cipher_objs) $(graph_objs)\
133 133 $(sqlite3_objs)\
134 134 -L/usr/X11R6/lib\
135 135 -ljpeg -lssl -lcrypto -lm -lX11 -ldl -lstdc++ -lpthread
136   - strip $(HOME)/bin/anbexec
  136 + strip anbexec
  137 + cp anbexec $(HOME)/anubis_distrib/linux_install/bin
  138 + cp anbexec $(HOME)/bin/anbexec
137 139  
138 140  
139 141 clean:
... ...
anubis_dev/vm/src/anbexec.cpp
... ... @@ -225,7 +225,7 @@ void syntax(void)
225 225 {
226 226 LOGINFO("anbexec (Personal Edition) version 1.%d.%d\n"
227 227 "Usage: anbexec <module> [arguments] [options]\n"
228   - " <module> is the name of an '.adm' file.\n"
  228 + " <module> is the name of an '.adm' file (cannot begin by '-').\n"
229 229 " [arguments] are the arguments to be passed to the module.\n"
230 230 " Options are:\n"
231 231 " --verbose\n"
... ... @@ -363,49 +363,6 @@ int load_module(struct Exec_Mod_struct *mod, char *filename)
363 363 /* read code into both arrays */
364 364 for (k = 0; k < size; k++)
365 365 (all+4+4+4+4+4+20)[k] = code[k] = getc(fp);
366   -
367   -#ifdef _CRYPTADM_
368   - /* Now it's time to decrypt the code if the adm has been encrypted. Recall (see
369   - compiler/src/output.c) that only the code is encrypted, but that the value of 'size',
370   - got above, is the length of the code padded by zeros and with an extra byte (Hence,
371   - 'size' is 1 modulo 8). This is what we need, because 'blowfish_decrypt_text' (see
372   - cipher/blowfish.c) accepts as its 'length' (actually 'len') argument the length of
373   - the encrypted text including padding zeros and extra byte. 'blowfish_decrypt_text'
374   - returns the length of the decrypted text, not including padding zeros nor extra
375   - byte. */
376   -
377   - /* 'size' must be 1 mod 8 */
378   - if ((size&7) != 1)
379   - {
380   - return 2; /* module is corrupted */
381   - }
382   -
383   - /* decrypt both arrays */
384   - decrypted_code_length_1 = blowfish_decrypt_text((unsigned char *)adm_encryption_key,
385   - code,
386   - size);
387   - decrypted_code_length_2 = blowfish_decrypt_text((unsigned char *)adm_encryption_key,
388   - all+4+4+4+4+4+20,
389   - size);
390   -
391   - if (decrypted_code_length_1 != decrypted_code_length_2)
392   - {
393   - fprintf(stderr,"'anbexec' is corrupted.\n");
394   - my_exit(1);
395   - }
396   -
397   - /* update 'size' after decryption. */
398   - size = decrypted_code_length_1+1;
399   -
400   - //printf("anbexec (line 354): (size&7) = %d\n", (size&7));
401   -
402   - /*
403   - if ((size&7) == 0)
404   - {
405   - return 2;
406   - }
407   - */
408   -#endif
409 366  
410 367  
411 368 /* compute the checksum */
... ... @@ -903,22 +860,68 @@ void LoadConfigurationFile(IniFile *configFile)
903 860  
904 861  
905 862 /* 'main' function, executing a module */
906   -
907 863 int main(int argc, char **argv)
908 864 {
909 865 int i;
910   -// int mid;
911   - U32 args = 0; /* (List(String))[ ] */
  866 + /* The list of arguments to the module given on the command line must become an Anubis
  867 + datum of type 'List(String)'. We prepare this list in the variable 'args', beginning
  868 + by an empty list. Since 'List(String)' is a mixed type, and the empty list is the
  869 + first alternative (without component), the empty list is just represented by the
  870 + unsigned integer 0. */
  871 + U32 args = 0;
  872 +
  873 +
  874 + /* check somes sizes (used only by us) */
  875 + assert(sizeof(double) <= 8);
  876 +#ifdef WIN32
  877 + assert(sizeof(long) == 4);
  878 +#endif
  879 +
  880 +
  881 + /* anbexec *MUST* receive the name of the module as the *FIRST* argument. This name
  882 + cannot begin by '-'. If it is not the case, we just recall the syntax and exit. */
  883 + if (argc < 2 || argv[1][0] == '-')
  884 + {
  885 + syntax();
  886 + my_exit(1);
  887 + }
  888 + else
  889 + {
  890 + /* Otherwise, we got the name of the module (with or without ".adm"). */
  891 + modname = argv[1];
  892 + }
  893 +
  894 +
  895 +
  896 + /* initialize some 'constants'. The system needs to have a permanent empty string and a
  897 + permanent empty byte array at hand. These data are made permanent by putting 0 in the
  898 + reference counter. */
  899 + if ((anubis_empty_string = (U32)malloc(5)) == (U32)NULL ||
  900 + (anubis_empty_byte_array = (U32)malloc(8)) == (U32)NULL)
  901 + {
  902 + LOGERROR("Not enough memory (3).\n");
  903 + my_exit(1);
  904 + }
  905 + for (i = 0; i < 5; i++) ((U8 *)anubis_empty_string)[i] = 0;
  906 + for (i = 0; i < 8; i++) ((U8 *)anubis_empty_byte_array)[i] = 0;
  907 +
  908 +
  909 +
  910 + /* initialise the input descriptors (by which anbexec must be woken up) */
  911 + FD_ZERO(&descriptors_waited_for_input);
  912 +
912 913  
913   - //if (argc >= 2) printf("argv[1] = %s\n",argv[1]);
914 914  
915 915 //here we make all host OS dependencies init, before entering in kernel land
916 916 kernelInit();
917 917 anubisUserDirectory = GetUserDir();
918 918 anubisUserDirectory << "/my_anubis";
919 919  
  920 +
  921 +
920 922 #ifdef record_allocations
921   - if ((segs_descrs = (struct SegmentDescription *)malloc(sizeof(struct SegmentDescription)*max_seg_descr)) == NULL)
  923 + if ((segs_descrs =
  924 + (struct SegmentDescription *)malloc(sizeof(struct SegmentDescription)*max_seg_descr)) == NULL)
922 925 {s
923 926 LOGERROR("Not enough memory (2).");
924 927 my_exit(1);
... ... @@ -933,27 +936,16 @@ int main(int argc, char **argv)
933 936 }
934 937 #endif
935 938  
936   - /* initialize some 'constants' */
937   - if ((anubis_empty_string = (U32)malloc(5)) == (U32)NULL ||
938   - (anubis_empty_byte_array = (U32)malloc(8)) == (U32)NULL)
939   - {
940   - LOGERROR("Not enough memory (3).\n");
941   - my_exit(1);
942   - }
943   -
944   - for (i = 0; i < 5; i++) ((U8 *)anubis_empty_string)[i] = 0;
945   - for (i = 0; i < 8; i++) ((U8 *)anubis_empty_byte_array)[i] = 0;
946   -
947   - /* initialise the input descriptors (by which anbexec must be woken up) */
948   - FD_ZERO(&descriptors_waited_for_input);
949 939  
950   - assert(sizeof(double) <= 8);
951   -#ifdef WIN32
952   - assert(sizeof(long) == 4);
953   -#endif
  940 +
954 941  
955 942  
956   - /* read options and module name */
  943 + /* read options and arguments to the module. Recall that the name of the module is
  944 + already read as argv[1]. Hence, we read all other argv[i] but only for i >= 2 (i.e. i
  945 + > 1). We read them in reverse order (beginning by the last one), so that the list of
  946 + arguments to the module will be in the right order (no need to reverse it, since each
  947 + argument read is put in front of the list, hence the last one read is the head of
  948 + list, as required). */
957 949 for (i = argc-1; i > 1; i--)
958 950 {
959 951 if (!strcmp(argv[i],"--debug_mem")) debug_mem = 1;
... ... @@ -987,19 +979,22 @@ int main(int argc, char **argv)
987 979 end_debug = atoi(argv[i]+12); }
988 980 #endif
989 981 else if (argv[i][0] == '-' && argv[i][1] == '-')
990   - /* Note: argv[i] has always at least 2 characters, including the trailing 0 */
991   - {
992   - LOGERROR("Unknown option: %s\n",argv[i]);
993   - my_exit(1);
994   - }
  982 + /* Note: argv[i] has always at least 2 characters, including the trailing 0 */
  983 + {
  984 + LOGERROR("Unknown option: %s\n",argv[i]);
  985 + my_exit(1);
  986 + }
995 987 else
996   - {
997   - /* record an argument in the list of arguments */
998   - args = anubis_cons(anubis_string(argv[i],TheAnubisAllocator),args,1,TheAnubisAllocator);
999   -// LOGINFO("arg %d [%s]\n",i, argv[i]);
1000   - }
  988 + {
  989 + /* record an argument in the list of arguments */
  990 + args = anubis_cons(anubis_string(argv[i],TheAnubisAllocator),args,1,TheAnubisAllocator);
  991 + // LOGINFO("arg %d [%s]\n",i, argv[i]);
  992 + }
1001 993 }
1002 994  
  995 +
  996 +
  997 +
1003 998 #ifdef debug_vm
1004 999 if (start_end_debug && end_debug <= start_debug)
1005 1000 {
... ... @@ -1012,6 +1007,10 @@ int main(int argc, char **argv)
1012 1007 #endif
1013 1008  
1014 1009  
  1010 +
  1011 +#ifdef WIN32
  1012 + Si tu veux bien mettre un commentaire pour expliquer ce que font 'config' et 'InitCheck'. Merci.
  1013 +#endif
1015 1014 IniFile config( anubisUserDirectory + "/anubis.conf");
1016 1015 if(config.InitCheck() != B_NO_ERROR)
1017 1016 {
... ... @@ -1023,59 +1022,17 @@ int main(int argc, char **argv)
1023 1022 config.ReadString("PATH", "MY_ANUBIS", "", my_anubis_directory);
1024 1023  
1025 1024  
1026   - modulePaths.AddPath(my_anubis_directory + "/modules/");
1027   - modulePaths.AddPath("/."); /* big astuce */
1028   -
1029 1025 if (my_anubis_directory.Length() == 0)
1030 1026 {
1031   - LOGERROR("No --pdir:<path> option, and the anubis.conf variable 'MY_ANUBIS' has not been defined.\n");
1032   - my_exit(1);
1033   - }
1034   -
1035   -#ifdef _CRYPTADM_
1036   - /* If adms are encrypted, we need to read the djed, and to compute the encryption key. */
1037   -
1038   - if ((djed = (char *)malloc(30+my_anubis_directory.Length())) == NULL)
1039   - {
1040   - fprintf(stderr,"Not enough memory (4).\n");
1041   - my_exit(1);
1042   - }
1043   - sprintf(djed,"%s/Anubis_1_%d.djed",my_anubis_directory.Cstr(),min_version);
1044   -
1045   - /* read djed and construct 'djed+ms' */
1046   - if ((fp = fopen(djed,"rt")) == NULL)
1047   - {
1048   - fprintf(stderr,"Djed not found.\n");
  1027 + LOGERROR(
  1028 + "No --pdir:<path> option, and the 'anubis.conf' variable 'MY_ANUBIS' has not been defined.\n");
1049 1029 my_exit(1);
1050 1030 }
1051 1031  
1052   - /* concatenate djed and magic string (djed+ms)*/
1053   - {
1054   - U8 *ms = (unsigned char *)"Mavslrqcmadsrdpas";
1055   - U32 n;
1056   - U32 i, j;
1057   - int c;
1058   - n = get_file_size(fp);
1059   - if ((id_card_ms = (U8 *)malloc(n+strlen((const char*)ms)+1)) == NULL)
1060   - {
1061   - LOGERROR("Not enough memory (5).\n");
1062   - my_exit(1);
1063   - }
1064   - i = 0;
1065   - while ((c = getc(fp)) != EOF && i < n)
1066   - id_card_ms[i++] = (U8)c;
1067   - fclose(fp);
1068   - i = n; j = 0;
1069   - while(i < n + strlen((const char*)ms)) id_card_ms[i++] = ms[j++];
1070   -
1071   - /* compute encryption key as to_ascii(sha1(id_card+ms)) */
1072   - sprintf(adm_encryption_key,sha1_to_ascii(sha1((char *)id_card_ms,n+strlen((const char*)ms))));
1073   - free(id_card_ms);
1074   -
1075   - }
1076   -#endif
  1032 + modulePaths.AddPath(my_anubis_directory + "/modules/");
  1033 + modulePaths.AddPath("/."); /* big astuce */
1077 1034  
1078   -
  1035 +
1079 1036 if (anubis_directory.Length() == 0)
1080 1037 config.ReadString("PATH", "ANUBIS", ".", anubis_directory);
1081 1038  
... ... @@ -1100,11 +1057,6 @@ int main(int argc, char **argv)
1100 1057  
1101 1058 srand(time(0));
1102 1059  
1103   - if (argc < 2)
1104   - syntax();
1105   - else
1106   - modname = argv[1];
1107   -
1108 1060 if (signal(SIGPIPE,handle_broken_pipe) == SIG_ERR)
1109 1061 {
1110 1062 LOGERROR("Cannot trap SIGPIPE signal.\n");
... ... @@ -1134,8 +1086,10 @@ int main(int argc, char **argv)
1134 1086 my_exit(1);
1135 1087 }
1136 1088  
1137   - /* load the module */
1138   - //printf("modname = %s\n",modname);
  1089 +
  1090 +
  1091 +
  1092 + /* load (and relocate) the module */
1139 1093 switch (load_module(&the_module,modname))
1140 1094 {
1141 1095 case 0: LOGERROR("Cannot find file '%s'.\n",modname); my_exit(1); break;
... ... @@ -1146,16 +1100,16 @@ int main(int argc, char **argv)
1146 1100 case 5: /* bad signature */
1147 1101 {
1148 1102 LOGERROR(
1149   - "Bad Module signature for [%s] \nmaybe too old version, recompile and try again...", modname);
  1103 + "Bad Module signature for [%s] \n recompile and try again...\n", modname);
1150 1104 my_exit(1);
1151 1105 break;
1152 1106 }
1153 1107 default: assert(0);
1154 1108 }
1155   - //dump_module();
1156   - //exit(1);
1157   - //get_module_check_sum();
1158 1109  
  1110 +
  1111 +
  1112 + /* We have some initializations to perform depending on the flags in the module. */
1159 1113 #ifdef _WITH_SSL_
1160 1114 /* initialize SSL if needed */
1161 1115 if (the_module.flags & mf_using_ssl)
... ... @@ -1195,13 +1149,10 @@ int main(int argc, char **argv)
1195 1149 #endif
1196 1150  
1197 1151  
1198   - /* initialize all virtual machines as 'not_used' */
1199   -// for (i = 0; i < max_machine; i++)
1200   -// machines[i].status = machine_not_used;
1201   -
1202 1152 AnubisProcess * currentProcess;
1203   - /* create a new Anubis Process, the first */
1204   - currentProcess = TheAnubisProcessList->CreateAnubisProcess(the_module.starting_point, the_module.byte_code, the_module.code_size, TheAnubisAllocator);
  1153 + /* create a new Anubis Process, the first one. */
  1154 + currentProcess = TheAnubisProcessList->CreateAnubisProcess(the_module.starting_point,
  1155 + the_module.byte_code, the_module.code_size, TheAnubisAllocator);
1205 1156  
1206 1157 if (currentProcess)
1207 1158 {
... ... @@ -1215,6 +1166,8 @@ int main(int argc, char **argv)
1215 1166 /* run the virtual machines */
1216 1167 schedul();
1217 1168  
  1169 +
  1170 +
1218 1171 #ifdef _WITH_GRAPHISM_
1219 1172 /* free graphical resources if needed */
1220 1173 if (the_module.flags & mf_using_graphism)
... ... @@ -1223,25 +1176,9 @@ int main(int argc, char **argv)
1223 1176 }
1224 1177 #endif
1225 1178  
  1179 + /* check if we must restart */
1226 1180 if(must_restart_flag)
1227   - {
1228   -#ifdef comment_c_etait_avant
1229   - pid_t new_PID;
1230   - new_PID = fork();
1231   - if (new_PID == -1)
1232   - {
1233   - }
1234   - else if (new_PID == 0)
1235   - {
1236   - /* new process */
1237   - execvp(argv[0],argv);
1238   - }
1239   - else
1240   - {
1241   - /* old process */
1242   - }
1243   -#endif
1244   -
  1181 + {
1245 1182 execvp(argv[0],argv); /* never returns */
1246 1183 }
1247 1184  
... ...
anubis_distrib/library/web/http_server.anubis 0 → 100644
  1 +
  2 + The Anubis Project.
  3 + A HTTP/HTTPS Server
  4 +
  5 + Copyright (c) Alain Proute' 2003.
  6 + All rights reserved.
  7 +
  8 +
  9 + In this file a HTTP/HTTPS server is defined. It answers HTTP/HTTPS requests, sends
  10 + files (images or any other kind of file), constructs HTML pages on the fly using
  11 + informations received from the client (when the URI ends by '.awp'), handles uploading
  12 + of files, redirections and virtual hosts. It is multitasking by itself, and can handle
  13 + any number of clients simultaneously.
  14 +
  15 +
  16 + * * * The '.awp' extension.
  17 +
  18 + '.awp' (for 'Anubis Web Page') is a virtual extension. No file actually has this
  19 + extension. However, when the server receives a request whose URI (Uniform Resource
  20 + Identifier) ends by '.awp', it understands that, instead of sending a file, it has to
  21 + construct an HTML page on the fly. From this point of view, '.awp' is similar to '.php'
  22 + or '.asp' in some other systems.
  23 +
  24 + However, your client never has to write this extension in his browser. Indeed, assuming
  25 + that the Internet domain name of your site is 'www.our-business.com', the client just
  26 + has to enter this:
  27 +
  28 + http://www.our-business.com
  29 +
  30 + in the address field of his browser. Because you will have set the following
  31 + redirection (see below for details):
  32 +
  33 + redirect("/", "www.our-business.com", "/homepage.awp")
  34 +
  35 + the server will understand that '/homepage.awp' is actually requested. As a
  36 + consequence, since the extension is '.awp', the server will construct an HTML page on
  37 + the fly.
  38 +
  39 + Now, if your client clicks on one of your hypertext links, he may again be directed to
  40 + an '.awp' URI, simply because, you will have put the extension '.awp' explicitly in the
  41 + link.
  42 +
  43 + All other extensions ('.jpg', '.gif', '.pdf', etc...) are treated as file extensions,
  44 + and the files (if found) are sent 'as is' to the client. However, for security reasons,
  45 + the file is sent only if the following conditions are satisfied:
  46 +
  47 + 1. the file is (on the server's disk) within the subtree whose root is the 'public
  48 + directory' (see below).
  49 +
  50 + 2. the MIME type associated to the extension can be recognized. This is the case only
  51 + if this type is recorded in 'web/mime.anubis' (see and customize this file).
  52 +
  53 +
  54 +
  55 +
  56 + * * * HTTP headers, web arguments and web pages.
  57 +
  58 + Each HTTP request which arrives on the server contains a request line followed by a
  59 + series of HTTP headers. Each HTTP header is a pair '(name,value)' assigning a value to
  60 + a name. The type 'HTTP_header' is defined in 'web/common.anubis'.
  61 +
  62 + The request may also have a 'body'. The body contains either 'web arguments' or
  63 + uploaded files (or both). The request line itself may also contain web arguments.
  64 + Like HTTP headers, 'web arguments' are pairs '(name,value)', but the difference is that
  65 + these pairs are generated by the page within which the client clicks, while HTTP
  66 + headers are generated by the browser itself. The type 'Web_arg' is defined in
  67 + 'web/common.anubis'. It has two alternatives, one for ordinary web arguments (pairs)
  68 + and one for uploaded files.
  69 +
  70 + The type 'Web_page' describes pages as they may appear in the client's browser's
  71 + window. It is defined in 'web/html.anubis'.
  72 +
  73 +read web/common.anubis
  74 +read web/html.anubis
  75 +
  76 +
  77 +
  78 +
  79 +
  80 + * * * Redirections and virtual hosts.
  81 +
  82 + The server can handle virtual host, in other words, you may have several sites on the
  83 + same server, with the same IP address and same port number. This is achieved through
  84 + redirections, using the 'Host' HTTP header. The type 'Redirection' is defined in
  85 + 'web/common.anubis'.
  86 +
  87 + When you start your server, you provide a list of redirections. You should at least
  88 + put something like:
  89 +
  90 + redirect("/", "www.our-business.com", "/homepage.awp")
  91 +
  92 + in order to have a convenient entry point to your site. However, you may also handle
  93 + several sites with the same server, putting the following in your list:
  94 +
  95 + redirect("/","www.our-business-1.com","/homepage_1.awp"),
  96 + redirect("/","www.our-business-2.com","/homepage_2.awp"),
  97 + redirect("/","www.our-business-3.com","/homepage_3.awp"),
  98 +
  99 + and you will certainly imagine other applications of this principle.
  100 +
  101 +
  102 +
  103 +
  104 + * * * HTTP/HTTPS server description.
  105 +
  106 + Your HTTP server itself is described by the following type.
  107 +
  108 +public type HTTP_ServerDescription:
  109 + http_server_description(Int32 ip_address,
  110 + Int32 ip_port,
  111 + List(Redirection) redirections,
  112 + String server_directory,
  113 + List(String) journal_extensions,
  114 + List(String) journal_headers,
  115 + (Int32,
  116 + String,
  117 + List(HTTP_header),
  118 + List(Web_arg),
  119 + Server) -> (String,
  120 + String,
  121 + Web_page) awp_handler,
  122 + (One) -> One service).
  123 +
  124 +
  125 +
  126 + For an HTTPS server, there are some more components.
  127 +
  128 +public type HTTPS_ServerDescription:
  129 + https_server_description(Int32 ip_address,
  130 + Int32 ip_port,
  131 + String server_common_name,
  132 + String authorization_secret,
  133 + List(Redirection) redirections,
  134 + String server_directory,
  135 + List(String) journal_extensions,
  136 + List(String) journal_headers,
  137 + (Int32 ip_address,
  138 + String uri,
  139 + List(HTTP_header),
  140 + List(Web_arg),
  141 + Server) -> (String,
  142 + String,
  143 + Web_page) awp_handler,
  144 + (One) -> One service).
  145 +
  146 +
  147 +
  148 + Explanations of the components of these types:
  149 +
  150 +
  151 + 'ip_address' is the TCP/IP address on which the server will listen. If you put 0, the
  152 + server will listen on all available TCP/IP interfaces. Otherwise, you may want to use
  153 + the function 'ip_address' defined in 'tools/basis.anubis', for constructing your IP
  154 + address from a quadruplet of Int8. For example, if you want to start a server only
  155 + reachable from your local area network, put the local address of the server.
  156 +
  157 +
  158 + 'ip_port' is the port on which the server listens. Normally, it is 80 for HTTP and 443
  159 + for HTTPS, but any other value works.
  160 +
  161 +
  162 + 'server_common_name' is the name of the server as declared on your SSL server
  163 + certificate. It is something like 'www.our-business.com', not a numerical address.
  164 +
  165 +
  166 + 'authorization_secret' is any string which should be impossible to guess, and that you
  167 + keep secret. It is used to forge authorizations for private file download. You must be
  168 + very careful that this string (which should be long enough, and choosen with the same
  169 + care as for a password) remains absolutely secret. Since you don't have to remember
  170 + it, you may just use at least 20 characters typed at random on the keyboard, including
  171 + also decimal digits. Do not hesitate to choose a new string if you have any doubt on
  172 + its secrecy. Of course, this string is not sent to the outside, but used to construct
  173 + SHA1 hashes.
  174 +
  175 +
  176 + 'redirections' is the list of redirections, already explained above.
  177 +
  178 +
  179 + 'server_directory' is the directory where the server finds and puts its data. The
  180 + server creates the following subdirectories within the server directory:
  181 +
  182 + public/ containing the public files
  183 + journal/ containing the journal files
  184 + private_download/ containing temporary files to be privately downloaded
  185 + upload_temporary/ containing temporary uploaded files
  186 +
  187 + You put your public files directly in 'public/'. The server will never send a file
  188 + which is not in the subtree whose root is 'public/'. However, the directory
  189 + 'private_download/' may also contain files which are sent to clients, but they are sent
  190 + only if the client is authorized to download the file. See the details below.
  191 +
  192 + The directory 'journal/' contains the journal files constructed by the server. Their
  193 + names are of the form:
  194 +
  195 + yyyy_mm_dd_hh
  196 +
  197 + where 'yyyy' is the year, 'mm' the month, 'dd' the day, and 'hh' the hour to which the
  198 + journal refers. The next components allow to decide what will actually go into the
  199 + journal files (and on the console of the server).
  200 +
  201 + The directory 'upload_temporary/' contains files which have just been uploaded,but
  202 + under a temporary file name. See the type 'Web_arg' in 'web/common.anubis' for
  203 + explanations.
  204 +
  205 +
  206 + 'journal_extensions' is the list of the extensions of the URI you want to store in the
  207 + journal. A possible choice is:
  208 +
  209 + [ ".awp", ".gif", ".jpg", ".png" ]
  210 +
  211 + At least, you should put '.awp'. Whenever the URI in the request ends by one of these
  212 + extensions the request line is stored in the journal (and shown on the console of the
  213 + server).
  214 +
  215 +
  216 + 'journal_headers' is the list of HTTP headers you want to store in the journal (and
  217 + show on the console of the server) for requests which are stored in the journal. A
  218 + possible choice is:
  219 +
  220 + [ "user-agent", "host" ]
  221 +
  222 +
  223 + 'awp_handler' is the function which handles the requests (only those whose URI ends by
  224 + '.awp', because other requests are handled just by sending a file). Its type is:
  225 +
  226 + (Int32,String,List(HTTP_header),List(Web_arg)) -> (String,String,Web_page)
  227 +
  228 + So, it takes 4 arguments which are:
  229 +
  230 + Int32 ip_address, // IP address of client
  231 + String uri, // URI requested by the client
  232 + List(HTTP_header) headers, // list of HTTP headers sent by the client
  233 + List(Web_arg) web_args, // list of web arguments sent by the client
  234 +
  235 + and returns a triplet of type (String,String,Web_page). This function is called each
  236 + time a new '.awp' request arrives. It receives the IP address of the client, the
  237 + (redirected) URI requested by the client, the list of HTTP headers sent by the client,
  238 + and the list of web arguments sent by the client. From these data the function is
  239 + supposed to construct the answer to be sent to the client. This answer is of type
  240 + 'Web_page' (see 'web/html.anubis'). But is also returns two strings which are the
  241 + so-called 'c_ticket' and 's_ticket'. If you use 'web/kernel.anubis', the values of the
  242 + two tickets are produced by the function 'tickets_and_web_page' to be found there.
  243 + Otherwise, just put empty strings "" for the two tickets.
  244 +
  245 +
  246 + 'service' is a function of type (One) -> One which is executed every second in a
  247 + separate virtual machine. This function may perform any repetitive task.
  248 +
  249 +
  250 +
  251 +
  252 +
  253 +
  254 + * * * Starting your HTTP/HTTPS servers.
  255 +
  256 + Well, when your HTTP server descritions are ready, you can start your servers with:
  257 +
  258 +public define One
  259 + start_http_server
  260 + (
  261 + HTTP_ServerDescription description
  262 + ).
  263 +
  264 +public define One
  265 + start_https_server
  266 + (
  267 + HTTPS_ServerDescription description
  268 + ).
  269 +
  270 + These fonctions create the subdirectories of the server directory (but not the server
  271 + directory itself, which must have been created by hand), starts the server (in another
  272 + virtual machine), and return immediatly.
  273 +
  274 +
  275 +
  276 +
  277 +
  278 +
  279 +
  280 + * * * Private download.
  281 +
  282 + It may happen that you want to propose private files for download. This means that such
  283 + a file could be downloaded only by the authorized person. This feature can be used only
  284 + under HTTPS, not under HTTP.
  285 +
  286 + The server creates the subdirectory 'private_download/' within the 'server directory'.
  287 + This directory and its subdirectories contain private files to be downloaded. The
  288 + server never sends a file from this directory tree without checking the autorization.
  289 +
  290 + When you construct a page for an identified client, you can insert a 'link for private
  291 + download', like this:
  292 +
  293 + with filename = "/clients/smith/secrets.pdf",
  294 + link(filename+"?auth="+make_authorization(authorisation_secret,filename),
  295 + text(filename))
  296 +
  297 + This will enable the client to download the file 'secrets.pdf'. Notice that the file
  298 + name '/clients/smith/secrets.pdf' is relative to the subdirectory 'private_download/'
  299 + of the server directory.
  300 +
  301 + The value of the web argument 'auth' is a cryptographical hash which is the
  302 + authorisation itself. After the client clicks on the private download link, the server
  303 + verifies that the authorization is valid and corresponds to the file which is requested
  304 + ('/clients/smith/secrets.pdf' in our example, relative to 'private_download/'). The
  305 + function 'make_authorisation' just constructs the authorisation, it does not manipulate
  306 + files. It is your responsability to put the file to be downloaded at the right place.
  307 +
  308 +
  309 +
  310 +
  311 + --- That's all for the public part ! --------------------------------------------------
  312 +
  313 +
  314 +
  315 +
  316 +
  317 + --- Table of Contents ---
  318 +
  319 +
  320 + *** (1) Types which are private to this file.
  321 +
  322 + *** (2) Tools.
  323 + *** (2.1) Formating an error message.
  324 + *** (2.2) Converting IP addresses.
  325 + *** (2.3) Reading and unputting characters.
  326 + *** (2.4) Reading and discarding characters.
  327 + *** (2.5) Reading a character string.
  328 + *** (2.6) Padding integers with zeros.
  329 + *** (2.7) Converting web arguments to ASCII.
  330 + *** (2.8) Server description.
  331 +
  332 + *** (3) Managing the journal.
  333 + *** (3.1) Naming journal files.
  334 + *** (3.2) Formating HTTP headers.
  335 + *** (3.3) Formating web arguments.
  336 + *** (3.4) Formating the whole request.
  337 + *** (3.5) Putting it in the journal file (and on the console).
  338 +
  339 + *** (4) Reading the HTTP request.
  340 + *** (4.1) Skipping leading blanks.
  341 + *** (4.2) Reading a new line.
  342 + *** (4.3) Reading a 'word'.
  343 + *** (4.4) Separating the URI from the query string.
  344 + *** (4.5) Reading the web arguments.
  345 + *** (4.7) Reading the request line.
  346 + *** (4.8) Reading the HTTP headers.
  347 + *** (4.9) Getting the size of the request's body.
  348 + *** (4.10) Reading the body of the request.
  349 +
  350 + *** (5) Making the HTTP answer.
  351 + *** (5.1) Avoiding illegal URIs.
  352 + *** (5.2) Managing authorizations for downloading private files.
  353 + *** (5.3) Recognizing MIME types.
  354 + *** (5.4) Formating HTTP headers.
  355 + *** (5.5) Sending a file.
  356 + *** (5.6) Answering a www-url encoded request.
  357 + *** (5.7) Answering a multipart/form-data encoded request.
  358 + *** (5.7.1) Finding the boundary.
  359 + *** (5.7.2) Reading attributes from a multipart entity.
  360 + *** (5.7.3) Creating a temporary filename for an uploaded file.
  361 + *** (5.7.4) Saving an uploaded file under a temporary filename.
  362 + *** (5.7.5) Removing the path from a file name.
  363 + *** (5.7.6) Reading a multipart entity.
  364 + *** (5.8) Handling redirections.
  365 + *** (5.9) Answering both sorts of requests.
  366 +
  367 + *** (6) The HTTP/HTTPS server.
  368 + *** (6.1) The HTTP request handler.
  369 + *** (6.2) Starting the HTTP/HTTPS server.
  370 +
  371 +
  372 +
  373 +read tools/basis.anubis
  374 +read tools/findstring.anubis
  375 +read tools/connections.anubis
  376 +read web/html.anubis
  377 +read web/mime.anubis
  378 +
  379 +
  380 +
  381 +
  382 +
  383 + *** (1) Types which are private to this file.
  384 +
  385 + We use the following self-explanatory types.
  386 +
  387 +type Error:
  388 + cannot_read_from_connection,
  389 + not_get_or_post_request(String),
  390 + end_of_line_expected,
  391 + incorrect_content_length_value,
  392 + colon_expected.
  393 +
  394 +type HTTP_RequestType:
  395 + get,
  396 + post.
  397 +
  398 +type HTTP_RequestLine:
  399 + request_line (HTTP_RequestType type,
  400 + String uri,
  401 + List(Web_arg) query_string).
  402 +
  403 +type EncodingType:
  404 + www_url,
  405 + multipart_form_data.
  406 +
  407 +
  408 +
  409 +
  410 +
  411 +
  412 +
  413 + *** (2) Tools.
  414 +
  415 +
  416 + *** (2.1) Formating an error message.
  417 +
  418 + The next function formats an error message.
  419 +
  420 +define String
  421 + format
  422 + (
  423 + Error msg
  424 + ) =
  425 + if msg is
  426 + {
  427 + cannot_read_from_connection then
  428 + "Cannot read from connection.",
  429 + not_get_or_post_request(s) then
  430 + "The request did not begin by 'GET' or 'POST': "+s+".",
  431 + end_of_line_expected then
  432 + "End of line expected.\n",
  433 + incorrect_content_length_value then
  434 + "Incorrect value for HTTP header 'Content-Length'.",
  435 + colon_expected then
  436 + "':' was expected.",
  437 + }.
  438 +
  439 +
  440 +
  441 +
  442 +
  443 +
  444 + *** (2.2) Converting IP addresses.
  445 +
  446 + We need two conversion functions for IP addresses:
  447 +
  448 + (Int8,Int8,Int8,Int8) --> Int32 ip_address
  449 + Int32 --> String ip_addr_to_string
  450 +
  451 + These conversions are defined in 'anubis/library/tools/basis.anubis'.
  452 +
  453 +
  454 +
  455 +
  456 +
  457 +
  458 +
  459 +
  460 + *** (2.3) Reading and unputting characters.
  461 +
  462 + We need a mecanism for unputting several characters (actually at least 3). This is
  463 + because when reading the client connection, we must sometimes go ahead several
  464 + characters, and virtually put them back into the connection, so that they can be
  465 + reread. Of course, we do not send them back to the client. We store them in a list
  466 + (hold by the variable 'unput_chars'), and we manage this list, so that characters may
  467 + be virtually put back in the connection (this is called 'unputting').
  468 +
  469 +variable List(Int8) unput_chars = [].
  470 +
  471 + The most recently read one is the head of list. Fortunately, this variable is private
  472 + to this virtual machine (hence to this client).
  473 +
  474 +
  475 +define One
  476 + unput // unputting a character (add it in front of the list)
  477 + (
  478 + Int8 character
  479 + ) =
  480 + unput_chars <- (List(Int8))[character . *unput_chars].
  481 +
  482 +
  483 +define Maybe(Int8)
  484 + read_one_byte
  485 + (
  486 + Connection connection
  487 + ) =
  488 + if read(connection,1,60) is
  489 + {
  490 + failure then failure,
  491 + success(ba) then nth(0,ba)
  492 + }.
  493 +
  494 +
  495 +define Maybe(Int8)
  496 + next_char // reading a character (check the list first, and read on the connection
  497 + // only when the list is empty).
  498 + (
  499 + Connection connection
  500 + ) =
  501 + if *unput_chars is
  502 + {
  503 + [ ] then read_one_byte(connection),
  504 +
  505 + [h . t] then
  506 + unput_chars <- t;
  507 + success(h)
  508 + }.
  509 +
  510 +
  511 +
  512 +
  513 +
  514 +
  515 +
  516 + *** (2.4) Reading and discarding characters.
  517 +
  518 + The next function reads the specified number of bytes (this is the same as
  519 + 'characters') from the connection and discards them. This is used for discarding CR LF
  520 + just before the body of a request.
  521 +
  522 +define Result(Error,One)
  523 + read_and_ignore
  524 + (
  525 + Connection connection, // to client
  526 + Int32 number_of_characters // number of characters to read and ignore
  527 + ) =
  528 + if number_of_characters =< 0 then ok(unique) else
  529 + if next_char(connection) is
  530 + {
  531 + failure then error(cannot_read_from_connection),
  532 + success(c) then read_and_ignore(connection,number_of_characters-1)
  533 + }.
  534 +
  535 +
  536 +
  537 +
  538 +
  539 +
  540 +
  541 + *** (2.5) Reading a character string.
  542 +
  543 + Sometimes values of HTTP attributes or web args are presented in the form of double
  544 + quoted strings. The next function handles the reading of such things. The leading
  545 + double quote is already read in. We must read subsequent characters until the next non
  546 + backslashed double quote.
  547 +
  548 +define Result(Error,String)
  549 + read_string
  550 + (
  551 + Connection connection, // connection with the client
  552 + List(Int8) so_far // characters read so far (in reverse order)
  553 + ) =
  554 + if next_char(connection) is
  555 + {
  556 + failure then error(cannot_read_from_connection),
  557 + success(c) then
  558 + if c = '\\'
  559 + then if next_char(connection) is
  560 + {
  561 + failure then error(cannot_read_from_connection),
  562 + success(d) then
  563 + if d = '\"'
  564 + then read_string(connection,['\"' . so_far])
  565 + else read_string(connection,[d, c . so_far])
  566 + }
  567 + else if c = '\"'
  568 + then ok(implode(reverse(so_far)))
  569 + else read_string(connection,[c . so_far])
  570 + }.
  571 +
  572 +
  573 +
  574 +
  575 +
  576 +
  577 +
  578 + *** (2.6) Padding integers with zeros.
  579 +
  580 + 'zero_pad_2' transforms an integer (which is assumed to be between 0 and 99) into a
  581 + string with exactly two digits. This is used for formating days, hours, minutes and
  582 + seconds.
  583 +
  584 +define String
  585 + zero_pad_2
  586 + (
  587 + Int32 n
  588 + ) =
  589 + with s = integer_to_string(n),
  590 + if length(s) < 2
  591 + then "0"+s
  592 + else s.
  593 +
  594 +
  595 +
  596 +
  597 +
  598 +
  599 +
  600 + *** (2.7) Converting web arguments to ASCII.
  601 +
  602 + The function 'web_to_ascii' gets a character string and replaces web encoding by normal
  603 + ASCII encoding. This amounts to replacing:
  604 +
  605 + + by blank
  606 + %xx by the character whose ASCII code is xx in hexadecimal
  607 +
  608 + Note: We assume that '9' < 'A' (which is the case for ASCII code).
  609 +
  610 +
  611 +
  612 +define Int8
  613 + web_decode
  614 + (
  615 + Int8 x1,
  616 + Int8 x2
  617 + ) =
  618 + with z1 = int8_to_int32(x1),
  619 + n1 = if z1 =< '9' then (z1 - '0') else (z1 - 'A' + 10),
  620 + z2 = int8_to_int32(x2),
  621 + n2 = if z2 =< '9' then (z2 - '0') else (z2 - 'A' + 10),
  622 + n = (n1 << 4) + n2,
  623 + if n =< 127
  624 + then truncate_to_int8(n)
  625 + else if n = 224 then 'à' else
  626 + if n = 226 then 'â' else
  627 + if n = 231 then 'ç' else
  628 + if n = 232 then 'è' else
  629 + if n = 233 then 'é' else
  630 + if n = 234 then 'ê' else
  631 + if n = 235 then 'ë' else
  632 + if n = 238 then 'î' else
  633 + if n = 239 then 'ï' else
  634 + if n = 244 then 'ô' else
  635 + if n = 246 then 'ö' else
  636 + if n = 249 then 'ù' else
  637 + if n = 251 then 'û' else
  638 + if n = 252 then 'ü' else
  639 + truncate_to_int8(n).
  640 +
  641 +define String
  642 + web_to_ascii
  643 + (
  644 + String web_string,
  645 + Int32 n, // current position in web_string
  646 + List(Int8) so_far
  647 + ) =
  648 + if nth(n,web_string) is
  649 + {
  650 + failure then implode(reverse(so_far)),
  651 + success(c) then
  652 + if c = '+'
  653 + then web_to_ascii(web_string,n+1,[' ' . so_far])
  654 + else if c = '%'
  655 + then if nth(n+1,web_string) is
  656 + {
  657 + failure then implode(reverse(so_far)),
  658 + success(x1) then if nth(n+2,web_string) is
  659 + {
  660 + failure then implode(reverse(so_far)),
  661 + success(x2) then web_to_ascii(web_string,n+3,[web_decode(x1,x2) . so_far])
  662 + }
  663 + }
  664 + else web_to_ascii(web_string,n+1,[c . so_far])
  665 + }.
  666 +
  667 +
  668 +
  669 +
  670 + *** (2.8) Server description.
  671 +
  672 + We need a server description common for HTTP and HTTPS servers.
  673 +
  674 +type ServerDescription:
  675 + server_description(Int32 ip_address,
  676 + Int32 ip_port,
  677 + Maybe(String) server_common_name,
  678 + Maybe(String) authorization_secret,
  679 + List(Redirection) redirections,
  680 + String server_directory,
  681 + List(String) journal_extensions,
  682 + List(String) journal_headers,
  683 + (Int32,
  684 + String,
  685 + List(HTTP_header),
  686 + List(Web_arg),
  687 + Server) -> (String,
  688 + String,
  689 + Web_page) awp_handler).
  690 +
  691 +
  692 +
  693 +
  694 + We have to construct this 'internal' server description from the one provided by the
  695 + user of this program.
  696 +
  697 +define ServerDescription
  698 + internal_description
  699 + (
  700 + HTTP_ServerDescription d
  701 + ) =
  702 + if d is http_server_description(ip,port,redir,dir,exts,headers,handler,service) then
  703 + server_description(ip,port,failure,failure,redir,dir,exts,headers,handler).
  704 +
  705 +define ServerDescription
  706 + internal_description
  707 + (
  708 + HTTPS_ServerDescription d
  709 + ) =
  710 + if d is https_server_description(ip,port,cn,as,redir,dir,exts,headers,handler,service) then
  711 + server_description(ip,port,success(cn),success(as),redir,dir,exts,headers,handler).
  712 +
  713 +
  714 +
  715 +
  716 +
  717 +
  718 +
  719 +
  720 + *** (3) Managing the journal.
  721 +
  722 + Concurrently working machines should not try to access the same file at the same
  723 + time. This problem may be solved by using the 'protect' mecanism.
  724 +
  725 +
  726 +
  727 + *** (3.1) Naming journal files.
  728 +
  729 + Since journal messages are rather prolific, we should have at least one file per
  730 + hour. Hence, the name of a journal file must be constructed from the current year,
  731 + month, day and hour. For example, it may be:
  732 +
  733 + 2003_02_12_19
  734 +
  735 + (this is for the journal of 7 PM to 8 PM, 2003/mar/12).
  736 +
  737 +define String
  738 + make_current_journal_file_name
  739 + =
  740 + if convert_time(now) is date_and_time(y,m,d,h,_,_,_,_) then
  741 + integer_to_string(y)+"_"+
  742 + zero_pad_2(m)+"_"+
  743 + zero_pad_2(d)+"_"+
  744 + zero_pad_2(h).
  745 +
  746 +
  747 +
  748 +
  749 +
  750 +
  751 +
  752 + *** (3.2) Formating HTTP headers.
  753 +
  754 + HTTP headers may be shown on the console or written in the journal. The function below
  755 + formats a list of HTTP headers.
  756 +
  757 +define String
  758 + show_format
  759 + (
  760 + ServerDescription desc,
  761 + List(HTTP_header) headers
  762 + ) =
  763 + if headers is
  764 + {
  765 + [ ] then "",
  766 + [h . t] then if h is http_header(name,value) then
  767 + if member(journal_headers(desc),name)
  768 + then " | "+name+": "+value+"\n"+show_format(desc,t)
  769 + else show_format(desc,t)
  770 + }.
  771 +
  772 +
  773 +
  774 +
  775 +
  776 +
  777 +
  778 +
  779 + *** (3.3) Formating web arguments.
  780 +
  781 + The same thing for web arguments.
  782 +
  783 +define String
  784 + show_format
  785 + (
  786 + List(Web_arg) lwa
  787 + ) =
  788 + if lwa is
  789 + {
  790 + [ ] then "",
  791 + [h . t] then if h is
  792 + {
  793 + web_arg(n,v) then
  794 + " | "+n+"="+v+"\n"+show_format(t),
  795 + upload(n,fn,tfn) then
  796 + " | "+n+"="+fn+" (uploaded as '"+tfn+"')\n"+show_format(t)
  797 + }
  798 + }.
  799 +
  800 +
  801 +
  802 +
  803 +
  804 +
  805 +
  806 +
  807 + *** (3.4) Formating the whole request.
  808 +
  809 + It is cheap to transform month numbers (beginning at 0 !) into abbreviated month
  810 + names. This enhances the readability of the journal.
  811 +
  812 +define String
  813 + format_month
  814 + (
  815 + Int32 m
  816 + ) =
  817 + if m = 0 then "jan" else
  818 + if m = 1 then "feb" else
  819 + if m = 2 then "mar" else
  820 + if m = 3 then "apr" else
  821 + if m = 4 then "may" else
  822 + if m = 5 then "jun" else
  823 + if m = 6 then "jul" else
  824 + if m = 7 then "aug" else
  825 + if m = 8 then "sep" else
  826 + if m = 9 then "oct" else
  827 + if m = 10 then "nov" else
  828 + "dec".
  829 +
  830 +
  831 + Below we format a whole HTTP request. This may give this (actually, it depends on how
  832 + you defined the values of 'journal_headers' and 'journal_extensions'):
  833 +
  834 + [3] 2003/mar/10 10:06:57 from 123.456.123.456: /homepage.awp
  835 + | host: www.the-best-one.com
  836 + | user-agent: Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.0.1) Gecko/20020823 Netscape/7.0
  837 +
  838 + The leading number between brackets is the number of the virtual machine which served
  839 + the URI.
  840 +
  841 +define String
  842 + format_request
  843 + (
  844 + ServerDescription desc,
  845 + Connection client_connection,
  846 + HTTP_RequestLine request_line,
  847 + List(HTTP_header) headers,
  848 + List(Web_arg) web_args
  849 + ) =
  850 + with dt = convert_time(now),
  851 + if remote_IP_address_and_port(client_connection) is (addr,port) then
  852 + integer_to_string(year(dt))+"/"+format_month(month(dt))+"/"+zero_pad_2(day(dt))+" "+
  853 + zero_pad_2(hour(dt))+":"+zero_pad_2(minute(dt))+":"+zero_pad_2(second(dt))+
  854 + " from "+ip_addr_to_string(addr)+
  855 + ": "+uri(request_line)+"\n"+
  856 + show_format(desc,headers)+
  857 + show_format(web_args).
  858 +
  859 +
  860 +
  861 +
  862 +
  863 +
  864 +
  865 +
  866 +
  867 + *** (3.5) Putting it in the journal file (and on the console).
  868 +
  869 + We must not forget to 'protect' this operation, so that the messages of two machines
  870 + will not be mixed together.
  871 +
  872 +define One
  873 + log_journal_msg
  874 + (
  875 + ServerDescription desc,
  876 + String msg,
  877 + ) =
  878 + with msg = to_byte_array("["+virtual_machine_id+"] "+msg+"\n"),
  879 + protect
  880 + (
  881 + if file(server_directory(desc)+"/journal/"+make_current_journal_file_name,append) is
  882 + {
  883 + failure then unique,
  884 + success(journal_file) then
  885 + forget(reliable_write(file(journal_file),msg))
  886 + };
  887 + forget(reliable_write(file(stdout),msg))
  888 + ).
  889 +
  890 +
  891 +
  892 +
  893 +
  894 +
  895 +
  896 +
  897 + *** (4) Reading the HTTP request.
  898 +
  899 +
  900 + *** (4.1) Skipping leading blanks.
  901 +
  902 + One of the peculiarities of HTTP is that the characters 13 (carriage return) and 10
  903 + (line feed) followed by either a space (32) or a tab (9), is considered as a blank not
  904 + containing any new line. 'skip_http_blanks' must skip all blanks characters until the
  905 + first non blank character, which should not be read in. Obviously, because of the above
  906 + peculiarity, we need at least 3 characters of lookahead to do this. In other words, we
  907 + must be able to unput at least 3 characters (hopefully we are).
  908 +
  909 + Strictly blanks characters are 'space' and 'tab'.
  910 +
  911 +define Bool
  912 + is_strict_blank
  913 + (
  914 + Int8 c
  915 + ) =
  916 + if c = ' ' then true else c = '\t'.
  917 +
  918 +
  919 + On the contrary, blanks include 13 and 10.
  920 +
  921 +define Bool
  922 + is_blank
  923 + (
  924 + Int8 c
  925 + ) =
  926 + if c = ' ' then true else
  927 + if c = '\t' then true else
  928 + if c = 13 then true else
  929 + c = 10.
  930 +
  931 +
  932 + Skipping HTTP blanks.
  933 +
  934 +define One
  935 + skip_http_blanks
  936 + (
  937 + Connection connection
  938 + ) =
  939 + if next_char(connection) is
  940 + {
  941 + failure then unique,
  942 + success(c) then
  943 + if is_strict_blank(c)
  944 + then skip_http_blanks(connection)
  945 + else if c = 13
  946 + then if next_char(connection) is
  947 + {
  948 + failure then (unput(c); unique),
  949 + success(d) then
  950 + if d = 10
  951 + then if next_char(connection) is
  952 + {
  953 + failure then (unput(d); unput(c); unique),
  954 + success(e) then
  955 + if is_strict_blank(e)
  956 + then skip_http_blanks(connection)
  957 + else (unput(e); unput(d); unput(c); unique)
  958 + }
  959 + else (unput(d); unput(c); unique)
  960 + }
  961 + else (unput(c); unique)
  962 + }.
  963 +
  964 +
  965 +
  966 +
  967 +
  968 +
  969 +
  970 +
  971 + *** (4.2) Reading a new line.
  972 +
  973 + Normally in HTTP a new line is the sequence 13 10 (carriage return line feed), not
  974 + followed by a space or tabulator. If it is followed by a space or tabulator, the three
  975 + characters are considered blanks, and no new line has been read. Before trying to read
  976 + a new line, we first skip leading spaces and tabs. Then we try to read 13 and 10, and
  977 + we read another character. if this character is space or tab, we consider we have read
  978 + only blanks and we continue reading in order to find our new line. Otherwise, we unput
  979 + this character (which may be for example the first character of the name of the next
  980 + header), and answer that we have seen a new line.
  981 +
  982 + Warning: we must not use this function for reading the last pair (13,10) before the
  983 + beginning of the body, because if the body is empty, there is no character to read
  984 + after this pair, so that the server could wait for a character which will never
  985 + come. This is the reason for 'read_and_ignore' above, which is used precisely for
  986 + reading that last (13,10) pair.
  987 +
  988 +define Result(Error,One)
  989 + read_new_line
  990 + (
  991 + Connection connection
  992 + ) =
  993 + skip_http_blanks(connection);
  994 + if next_char(connection) is
  995 + {
  996 + failure then error(cannot_read_from_connection),
  997 + success(c) then
  998 + if c = 13
  999 + then if next_char(connection) is
  1000 + {
  1001 + failure then error(cannot_read_from_connection),
  1002 + success(d) then
  1003 + if d = 10
  1004 + then ok(unique)
  1005 + else (unput(d);
  1006 + unput(c);
  1007 + error(end_of_line_expected))
  1008 + }
  1009 + else (unput(c);
  1010 + error(end_of_line_expected))
  1011 + }.
  1012 +
  1013 +
  1014 +
  1015 +
  1016 +
  1017 +
  1018 +
  1019 +
  1020 + *** (4.3) Reading a 'word'.
  1021 +
  1022 + A 'word' is a sequence of characters which begins either by a double quote or not by a
  1023 + double quote. (However, any leading blanks are read in and ignored. This is
  1024 + accomplished by 'skip_http_blanks'.) If it begins by a double quote, it is read like a
  1025 + string, i.e. it ends at the next (non backslashed) double quote. Otherwise, it is
  1026 + right delimited by any character which may be considered as 'blank'. If the word is
  1027 + double quoted, the closing double quote is read in. On the contrary, if the word is not
  1028 + double quoted, the right delimiting blank character is not read in (it is 'unput' back
  1029 + into the connection), and may be read in again. This is needed because carriage return
  1030 + or line feed which are 'blank', also have a meaning in HTTP.
  1031 +
  1032 +define Result(Error,String)
  1033 + read_word_aux
  1034 + (
  1035 + Connection connection,
  1036 + List(Int8) so_far
  1037 + ) =
  1038 + if next_char(connection) is
  1039 + {
  1040 + failure then error(cannot_read_from_connection),
  1041 + success(c) then
  1042 + if is_blank(c)
  1043 + then (unput(c);
  1044 + ok(implode(reverse(so_far))))
  1045 + else read_word_aux(connection,[c . so_far])
  1046 + }.
  1047 +
  1048 +define Result(Error,String)
  1049 + read_word
  1050 + (
  1051 + Connection connection
  1052 + ) =
  1053 + skip_http_blanks(connection);
  1054 + if next_char(connection) is
  1055 + {
  1056 + failure then error(cannot_read_from_connection),
  1057 + success(c) then
  1058 + if c = '\"'
  1059 + then read_string(connection,[])
  1060 + else read_word_aux(connection,[c])
  1061 + }.
  1062 +
  1063 +
  1064 +
  1065 +
  1066 +
  1067 +
  1068 +
  1069 +
  1070 + *** (4.4) Separating the URI from the query string.
  1071 +
  1072 + A 'query string' may be postfixed to the URI, just after a question mark. For example,
  1073 + the client may send the following request:
  1074 +
  1075 + GET /catalog.awp?item=3&color=blue
  1076 +
  1077 + We separate this into an URI: "/catalog.awp" and the string: "item=3&color=blue" which
  1078 + will be later transformed into the list:
  1079 +
  1080 + [web_arg("item","3"),web_arg("color","blue")]
  1081 +
  1082 +
  1083 +define (String,String)
  1084 + separate_uri_from_query_string
  1085 + (
  1086 + String uri_and_query_string,
  1087 + Int32 n
  1088 + ) =
  1089 + if nth(n,uri_and_query_string) is
  1090 + {
  1091 + failure then (uri_and_query_string,""),
  1092 + success(c) then
  1093 + if c = '?'
  1094 + then (substr(uri_and_query_string,0,n),
  1095 + substr(uri_and_query_string,n+1,length(uri_and_query_string)-(n+1)))
  1096 + else separate_uri_from_query_string(uri_and_query_string,n+1)
  1097 + }.
  1098 +
  1099 +
  1100 +
  1101 +
  1102 +
  1103 +
  1104 +
  1105 +
  1106 +
  1107 + *** (4.5) Reading the web arguments.
  1108 +
  1109 + HTTP requests are sent in one of two formats:
  1110 +
  1111 + (1) www-url encoded
  1112 + (2) multipart/form-data encoded
  1113 +
  1114 + The first one is the normal (historical) way of encoding. The second one is required
  1115 + for uploading files. A server which is supposed to accept upload of files must handle
  1116 + both formats. The first thing to do is to decide the format of the request. This is
  1117 + easily done by examining the HTTP headers. If we find the header:
  1118 +
  1119 + Content-Type: multipart/form-data
  1120 +
  1121 + the request is multipart/form-data encoded. Otherwise, it is 'www-url' encoded. We
  1122 + first consider 'www-url' encoded requests.
  1123 +
  1124 + For a 'www-url' encoded request, the web argument are either in the query string or in
  1125 + the body of the request, or both. The format is the same for both:
  1126 +
  1127 + name=value&name=value&...
  1128 +
  1129 + However, we may also have
  1130 +
  1131 + name
  1132 + name=
  1133 + name=&...
  1134 + name&...
  1135 +
  1136 + i.e. some parts may be missing. Hence, we must be careful.
  1137 +
  1138 + Furthermore, web arguments must be translated from web to ASCII when www-url encoded.
  1139 +
  1140 +define Bool
  1141 + is_ampersand_or_equal
  1142 + (
  1143 + Int8 c
  1144 + ) =
  1145 + if c = '&' then true else c = '='.
  1146 +
  1147 +
  1148 +
  1149 + The function 'read_name_or_value' reads the string 's' starting at position 'n' until
  1150 + either the end of the string or the first '&' or '='.
  1151 +
  1152 +define String
  1153 + read_name_or_value
  1154 + (
  1155 + String s,
  1156 + Int32 start,
  1157 + Int32 i
  1158 + ) =
  1159 + if nth(i,s) is
  1160 + {
  1161 + failure then substr(s,start,i - start),
  1162 + success(c) then
  1163 + if is_ampersand_or_equal(c)
  1164 + then substr(s,start,i-start) // the separator is not included
  1165 + else read_name_or_value(s,start,i+1)
  1166 + }.
  1167 +
  1168 +
  1169 +define List(Web_arg)
  1170 + read_www_url_encoded_web_args
  1171 + (
  1172 + String s,
  1173 + Int32 start,
  1174 + ) =
  1175 + with first = read_name_or_value(s,start,start),
  1176 + if first = ""
  1177 + then []
  1178 + else with i = start+length(first),
  1179 + if nth(i,s) is
  1180 + {
  1181 + failure then [web_arg(first,"")],
  1182 + success(c) then
  1183 + if c = '&'
  1184 + then [web_arg(first,"") . read_www_url_encoded_web_args(s,i+1)]
  1185 + else if c = '='
  1186 + then with second1 = read_name_or_value(s,i+1,i+1),
  1187 + // print("\""+second1+"\"\n");
  1188 + with second = web_to_ascii(second1,0,[]),
  1189 + [web_arg(first,second) . read_www_url_encoded_web_args(s,i+length(second1)+2)]
  1190 + else alert
  1191 + }.
  1192 +
  1193 +
  1194 +
  1195 +
  1196 +
  1197 + *** (4.7) Reading the request line.
  1198 +
  1199 + 'read_request_line' reads three words and a new line from the connection. It tries to
  1200 + recognize "GET" or "POST" in the first word, separates the URI from the query string in
  1201 + the second word, transforms the query string into a list of 'Web_arg', and finally
  1202 + returns a datum of type 'HTTP_RequestLine' if no error arose.
  1203 +
  1204 +
  1205 +define Result(Error,HTTP_RequestType)
  1206 + identify_get_or_post
  1207 + (
  1208 + String s
  1209 + ) =
  1210 + with s = to_lower(s),
  1211 + if s = "get" then ok(get) else
  1212 + if s = "post" then ok(post) else
  1213 + error(not_get_or_post_request(s)).
  1214 +
  1215 +define Result(Error,HTTP_RequestLine)
  1216 + read_request_line
  1217 + (
  1218 + Connection connection
  1219 + ) =
  1220 + if read_word(connection) is
  1221 + {
  1222 + error(msg) then error(msg),
  1223 + ok(get_or_post) then if read_word(connection) is
  1224 + {
  1225 + error(msg) then error(msg),
  1226 + ok(uri_and_query_string) then if read_word(connection) is
  1227 + {
  1228 + error(msg) then error(msg),
  1229 + ok(http_version) then if read_new_line(connection) is
  1230 + {
  1231 + error(msg) then error(msg),
  1232 + ok(_) then if separate_uri_from_query_string(uri_and_query_string,0) is
  1233 + (uri,query_string) then if identify_get_or_post(get_or_post) is
  1234 + {
  1235 + error(msg) then error(msg),
  1236 + ok(request_type) then
  1237 + ok(request_line(request_type,uri,read_www_url_encoded_web_args(query_string,0)))
  1238 + }
  1239 + }
  1240 + }
  1241 + }
  1242 + }.
  1243 +
  1244 +
  1245 +
  1246 +
  1247 +
  1248 +
  1249 +
  1250 + *** (4.8) Reading the HTTP headers.
  1251 +
  1252 + Each header is made of a name (containing only letters, the underscore, digits and the
  1253 + minus sign), a colon, a value, and a new line. The first empty line ends the headers.
  1254 +
  1255 +
  1256 + The next function tests characters acceptable in a header name.
  1257 +
  1258 +define Bool
  1259 + is_header_name_char
  1260 + (
  1261 + Int8 c
  1262 + ) =
  1263 + with n = int8_to_int32(c),
  1264 + if ('a' =< n & n =< 'z') then true else
  1265 + if ('A' =< n & n =< 'Z') then true else
  1266 + if ('0' =< n & n =< '9') then true else
  1267 + if c = '-' then true else
  1268 + c = '_'.
  1269 +
  1270 +define Result(Error,String)
  1271 + read_header_name
  1272 + (
  1273 + Connection connection,
  1274 + List(Int8) so_far
  1275 + ) =
  1276 + if next_char(connection) is
  1277 + {
  1278 + failure then error(cannot_read_from_connection),
  1279 + success(c) then
  1280 + if is_header_name_char(c)
  1281 + then read_header_name(connection,[to_lower(c) . so_far])
  1282 + else unput(c); ok(implode(reverse(so_far)))
  1283 + }.
  1284 +
  1285 +define Result(Error,One)
  1286 + skip_colon
  1287 + (
  1288 + Connection connection
  1289 + ) =
  1290 + skip_http_blanks(connection);
  1291 + if next_char(connection) is
  1292 + {
  1293 + failure then error(cannot_read_from_connection),
  1294 + success(c) then
  1295 + if c = ':'
  1296 + then ok(unique)
  1297 + else error(colon_expected)
  1298 + }.
  1299 +
  1300 +
  1301 +define Result(Error,String)
  1302 + read_header_value
  1303 + (
  1304 + Connection connection,
  1305 + List(Int8) so_far
  1306 + ) =
  1307 + if next_char(connection) is
  1308 + {
  1309 + failure then error(cannot_read_from_connection),
  1310 + success(c) then
  1311 + if c = 13
  1312 + then if next_char(connection) is
  1313 + {
  1314 + failure then error(cannot_read_from_connection),
  1315 + success(d) then
  1316 + if d = 10
  1317 + then if next_char(connection) is
  1318 + {
  1319 + failure then error(cannot_read_from_connection),
  1320 + success(e) then
  1321 + if is_strict_blank(e)
  1322 + then read_header_value(connection,[e . so_far])
  1323 + else (unput(e); ok(implode(reverse(so_far))))
  1324 + }
  1325 + else read_header_value(connection,[d, c . so_far])
  1326 + }
  1327 + else read_header_value(connection,[c . so_far])
  1328 + }.
  1329 +
  1330 +
  1331 + Reading a single header.
  1332 +
  1333 +define Result(Error,Maybe(HTTP_header))
  1334 + read_header
  1335 + (
  1336 + Connection connection
  1337 + ) =
  1338 + if read_header_name(connection,[]) is
  1339 + {
  1340 + error(msg) then error(msg),
  1341 + ok(name) then
  1342 + if name = "" then
  1343 + if read_and_ignore(connection,2) /* 13 and 10 */ is
  1344 + {
  1345 + error(msg) then error(msg),
  1346 + ok(_) then // this is the blank line
  1347 + ok(failure) // end of headers
  1348 + }
  1349 + else if skip_colon(connection) is
  1350 + {
  1351 + error(msg) then error(msg),
  1352 + ok(_) then skip_http_blanks(connection);
  1353 + if read_header_value(connection,[]) is
  1354 + {
  1355 + error(msg) then error(msg),
  1356 + ok(value) then
  1357 + ok(success(http_header(name,value)))
  1358 + }
  1359 + }
  1360 + }.
  1361 +
  1362 +
  1363 +
  1364 + Reading all the headers.
  1365 +
  1366 +define Result(Error,List(HTTP_header))
  1367 + read_http_headers
  1368 + (
  1369 + Connection connection,
  1370 + ) =
  1371 + if read_header(connection) is
  1372 + {
  1373 + error(msg) then error(msg),
  1374 + ok(mbh) then if mbh is
  1375 + {
  1376 + failure then ok([ ]),
  1377 + success(header) then
  1378 + if read_http_headers(connection) is
  1379 + {
  1380 + error(msg) then error(msg),
  1381 + ok(others) then ok([header . others])
  1382 + }
  1383 + }
  1384 + }.
  1385 +
  1386 +
  1387 +
  1388 +
  1389 +
  1390 +
  1391 +
  1392 + *** (4.9) Getting the size of the request's body.
  1393 +
  1394 + The size of the body of the request is given under the 'Content-Length' header. If this
  1395 + header is not present, the size is assumed to be zero.
  1396 +
  1397 +define Result(Error,Int32)
  1398 + get_body_size
  1399 + (
  1400 + List(HTTP_header) headers
  1401 + ) =
  1402 + if headers is
  1403 + {
  1404 + [ ] then ok(0),
  1405 + [h . t] then if h is http_header(name,value) then
  1406 + if name = "content-length"
  1407 + then if string_to_integer(value) is
  1408 + {
  1409 + failure then error(incorrect_content_length_value),
  1410 + success(n) then ok(n)
  1411 + }
  1412 + else get_body_size(t)
  1413 + }.
  1414 +
  1415 +
  1416 +
  1417 +
  1418 +
  1419 +
  1420 +
  1421 +
  1422 +
  1423 +
  1424 + *** (4.10) Reading the body of the request.
  1425 +
  1426 + The body of the request may be very big (it contains uploaded files, if any). We read
  1427 + it using the primitive 'read', which returns the number of bytes read, which may be
  1428 + less than the number of bytes we wanted to read. This is not an error, but simply due
  1429 + to the fact the buffer associated with the connection in the Linux (or MS-Windows)
  1430 + kernel has a limited size. Hence, we must read bytes again until we have read the
  1431 + required number of bytes. However, if the number of bytes read is zero, the connection
  1432 + may be broken. In that case, we must not try to read indefinitely. On the contrary, we
  1433 + make at most 10 retries, with a small sleeping time between any two of them.
  1434 +
  1435 +define Result(Error,ByteArray)
  1436 + read_http_body
  1437 + (
  1438 + Connection connection,
  1439 + Int32 body_size,
  1440 + ByteArray so_far, // when calling this function, 'so_far' is the empty byte array
  1441 + Int32 retries // this function is called with retries = 10
  1442 + ) =
  1443 + if body_size = 0 then ok(constant_byte_array(0,0)) else
  1444 + if retries =< 0 then error(cannot_read_from_connection) else
  1445 + if read(connection,body_size,60) is
  1446 + {
  1447 + failure then error(cannot_read_from_connection),
  1448 + success(new_bytes) then with
  1449 + ba = so_far + new_bytes, // contains all the bytes read so far
  1450 + nr = length(ba), // total read since the beginning
  1451 + nn = length(new_bytes), // number of bytes just read
  1452 + if nr < body_size // must read more bytes
  1453 + then if nn > 0 // if connection seems to work
  1454 + then read_http_body(connection,body_size,ba,1000) // continue reading
  1455 + else sleep(100); // otherwise, sleep 1/10 of second
  1456 + read_http_body(connection,body_size,ba, // and retry reading
  1457 + retries-1) // but no more than 10 times
  1458 + else ok(ba) // required number of bytes has been read
  1459 + }.
  1460 +
  1461 +
  1462 + Note: During sleeping, 'anbexec' runs other machines. Actually, calling 'sleep', even
  1463 + for one millisecond, is some way of giving up explicitly, so that other virtual
  1464 + machines may work.
  1465 +
  1466 +
  1467 +
  1468 +
  1469 +
  1470 +
  1471 +
  1472 +
  1473 +
  1474 +
  1475 +
  1476 +
  1477 + *** (5) Making the HTTP answer.
  1478 +
  1479 + At that point we have read the request line, the headers and the body of the
  1480 + request, and we must decide what to do.
  1481 +
  1482 + Actually, we can do one of the following:
  1483 +
  1484 + - send a file,
  1485 + - execute 'tickets_and_web_page' in case of an ".awp" URI.
  1486 +
  1487 + The uploaded file (which are in the body of the request) are saved into temporary files
  1488 + below.
  1489 +
  1490 +
  1491 +
  1492 +
  1493 +
  1494 + *** (5.1) Avoiding illegal URIs.
  1495 +
  1496 + For security reasons, we must avoid illegal URIs, for example those which may climb up
  1497 + in the file hierarchy. First we accept only few characters in URIs.
  1498 +
  1499 +define Bool
  1500 + is_legal_uri_char
  1501 + (
  1502 + Int8 c
  1503 + ) =
  1504 + with n = int8_to_int32(c),
  1505 + if ('a' =< n & n =< 'z') then true else // accept 'a' to 'z'
  1506 + if ('A' =< n & n =< 'Z') then true else // accept 'A' to 'Z'
  1507 + if ('0' =< n & n =< '9') then true else // accept '0' to '9'
  1508 + if c = '.' then true else // accept '.' '-' '/' and '_'
  1509 + if c = '-' then true else
  1510 + if c = '/' then true else
  1511 + c = '_'.
  1512 +
  1513 + We do not accept ~ which is some way of climbing. Of course, we cannot disallow single
  1514 + dots, which are most often present in legal URIs, but we must avoid double dots ..
  1515 + which mean 'climb up'.
  1516 +
  1517 +define Bool
  1518 + is_illegal_uri
  1519 + (
  1520 + String uri,
  1521 + Int32 n
  1522 + ) =
  1523 + if nth(n,uri) is
  1524 + {
  1525 + failure then false,
  1526 + success(c) then
  1527 + if c = '.' // first dot
  1528 + then if nth(n+1,uri) is
  1529 + {
  1530 + failure then false,
  1531 + success(d) then
  1532 + if d = '.' // second dot
  1533 + then true
  1534 + else is_illegal_uri(uri,n+1)
  1535 + }
  1536 + else is_illegal_uri(uri,n+1)
  1537 + }.
  1538 +
  1539 +
  1540 +
  1541 +
  1542 +
  1543 +
  1544 + *** (5.2) Managing authorizations for downloading private files.
  1545 +
  1546 + It may be the case that you provide to your client a link for downloading a private
  1547 + file, that is to say a file which nobody else should be able to download. Such a file
  1548 + should not be located in the public directory tree, but in the private download
  1549 + directory. This feature is available only under HTTPS for security reasons.
  1550 +
  1551 + It works as follows. The function 'make_authorization' constructs a cryptographic
  1552 + 'authorization'. This authorization is put in the query string of the URI in the link
  1553 + for download.
  1554 +
  1555 + When the client subsequently clicks on the link, the server receives a 'GET' query with
  1556 + the path of the file as the URI, and the authorization as the value of the web argument
  1557 + 'auth'. The server verifies that the autorisation is valid for this file. If it is the
  1558 + case, it also makes the current verifications (illegal URI, and MIME type), and sends
  1559 + the file if everything is OK.
  1560 +
  1561 +
  1562 +public define String
  1563 + make_authorization
  1564 + (
  1565 + String authorization_secret,
  1566 + String filename
  1567 + ) =
  1568 + to_ascii(sha1((authorization_secret,filename))).
  1569 +
  1570 + The function 'send_file' defined below handles the recognition of authorizations.
  1571 +
  1572 +
  1573 +
  1574 +
  1575 +
  1576 +
  1577 +
  1578 + *** (5.3) Recognizing MIME types.
  1579 +
  1580 + The extension of the (redirected) URI must be either ".awp" or recognized as associated
  1581 + to a MIME type. Otherwise, the server will not send the file. This is for security, but
  1582 + also because, we must generate a 'Content-Type' header in the answer, with the right
  1583 + MIME type.
  1584 +
  1585 +define String
  1586 + get_uri_extension_aux
  1587 + (
  1588 + String uri,
  1589 + Int32 n // used for searching backwards
  1590 + ) =
  1591 + if nth(n,uri) is
  1592 + {
  1593 + failure then "",
  1594 + success(c) then
  1595 + if (c = '.' | c = '/')
  1596 + then substr(uri,n,length(uri)-n)
  1597 + else get_uri_extension_aux(uri,n-1)
  1598 + }.
  1599 +
  1600 +define String
  1601 + get_uri_extension
  1602 + (
  1603 + String uri
  1604 + ) =
  1605 + get_uri_extension_aux(uri,
  1606 + length(uri)-1). // search starts at the right end
  1607 +
  1608 +
  1609 +
  1610 + For recognizing a MIME type we use 'known_mime_types' defined in 'web/mime.anubis'.
  1611 +
  1612 +define Maybe(String)
  1613 + recognize_mime_type_from_ext
  1614 + (
  1615 + String ext,
  1616 + List(MIME) l
  1617 + ) =
  1618 + if l is
  1619 + {
  1620 + [ ] then failure,
  1621 + [h . t] then if h is mime(mime_type,extension) then
  1622 + if ext = extension
  1623 + then success(mime_type)
  1624 + else recognize_mime_type_from_ext(ext,t)
  1625 + }.
  1626 +
  1627 +define Maybe(String)
  1628 + recognize_mime_type_from_uri
  1629 + (
  1630 + String uri
  1631 + ) =
  1632 + recognize_mime_type_from_ext(get_uri_extension(uri),known_mime_types).
  1633 +
  1634 +
  1635 +
  1636 +
  1637 +
  1638 +
  1639 +
  1640 +
  1641 + *** (5.4) Formating HTTP headers.
  1642 +
  1643 + This is the formating for sending to the client.
  1644 +
  1645 +define Printable_tree
  1646 + format
  1647 + (
  1648 + List(HTTP_header) headers
  1649 + ) =
  1650 + if headers is
  1651 + {
  1652 + [ ] then [crlf],
  1653 + [h . t] then if h is http_header(name,value) then
  1654 + [name,": ",value,crlf . format(t)]
  1655 + }.
  1656 +
  1657 +
  1658 +
  1659 +
  1660 +
  1661 +
  1662 +
  1663 +
  1664 +
  1665 + *** (5.5) Sending a file.
  1666 +
  1667 + We send 2 headers 'Content-Type' and 'Content-Length'.
  1668 +
  1669 +define List(HTTP_header)
  1670 + headers_for_send_file
  1671 + (
  1672 + String mime_type,
  1673 + Int32 size,
  1674 + ) =
  1675 + [
  1676 + http_header("Content-Type",mime_type),
  1677 + http_header("Content-Length",integer_to_string(size)),
  1678 + ].
  1679 +
  1680 +
  1681 +
  1682 + Sending the body of the answer (i.e. the file itself).
  1683 +
  1684 +define One
  1685 + send_file_body
  1686 + (
  1687 + ServerDescription desc,
  1688 + Connection connection, // connection with the client
  1689 + Connection file, // file to be sent already opened
  1690 + Int32 size,
  1691 + Int32 sent,
  1692 + String filename
  1693 + ) =
  1694 + if sent >= size then unique else
  1695 + if read(file,min(10000,size-sent),60) is
  1696 + {
  1697 + failure then log_journal_msg(desc,"Cannot read from file '"+filename+"'.\n"),
  1698 + success(ba) then
  1699 + with nr = length(ba), // get the number of bytes read
  1700 + if reliable_write(connection,ba) is
  1701 + {
  1702 + failure then log_journal_msg(desc,"Cannot write into connection.\n"),
  1703 + success(nw) then
  1704 + send_file_body(desc,connection,file,size,sent+nw,filename)
  1705 + }
  1706 + }.
  1707 +
  1708 +
  1709 +
  1710 +
  1711 + Sending the answer line, the headers and the body.
  1712 +
  1713 +define One
  1714 + send_file
  1715 + (
  1716 + ServerDescription desc,
  1717 + Connection connection,
  1718 + List(HTTP_header) headers,
  1719 + Int32 size,
  1720 + Connection file,
  1721 + String filename
  1722 + ) =
  1723 + forget(reliable_write(connection,to_byte_array("HTTP/1.1 200 OK"+crlf)));
  1724 + forget(reliable_write(connection,format(headers)));
  1725 + send_file_body(desc,connection,file,size,0,filename).
  1726 +
  1727 +
  1728 +
  1729 + Checking if a connection is under SSL.
  1730 +
  1731 +define Bool
  1732 + is_SSL
  1733 + (
  1734 + Connection c
  1735 + ) =
  1736 + if c is
  1737 + {
  1738 + file(_) then false,
  1739 + file(_) then false,
  1740 + file(_) then false,
  1741 + tcp(_) then false,
  1742 + ssl(_) then true
  1743 + }.
  1744 +
  1745 +
  1746 +
  1747 + Before opening and sending a file, we check the MIME type. It must be recognized.
  1748 +
  1749 +define One
  1750 + send_file
  1751 + (
  1752 + ServerDescription desc,
  1753 + Connection connection,
  1754 + String uri,
  1755 + Maybe(String) mbauthorization
  1756 + ) =
  1757 + if recognize_mime_type_from_uri(uri) is
  1758 + {
  1759 + failure then log_journal_msg(desc,"No MIME type found for '"+uri+"'.\n"),
  1760 + success(mime_type) then
  1761 + with directory = if mbauthorization is
  1762 + {
  1763 + failure then server_directory(desc)+"/public",
  1764 + success(authorization) then
  1765 + log_journal_msg(desc,
  1766 + "URI required for download: "+uri+" authorization: "+authorization+"\n");
  1767 + if make_authorization(if authorization_secret(desc) is
  1768 + {
  1769 + failure then "",
  1770 + success(s) then s
  1771 + },uri) = authorization
  1772 + then if is_SSL(connection)
  1773 + then server_directory(desc)+"/private_download"
  1774 + else log_journal_msg(desc,"Private download allowed only under SSL.\n");
  1775 + server_directory(desc)+"/public"
  1776 + else log_journal_msg(desc,"Invalid authorization for download.\n");
  1777 + server_directory(desc)+"/public"
  1778 + },
  1779 + if (Maybe(RAddr(Int8)))connect to file directory+uri is
  1780 + {
  1781 + failure then log_journal_msg(desc,"Cannot find file '"+directory+uri+"'.\n"),
  1782 + success(f) then with size = file_size(f),
  1783 + send_file(desc,
  1784 + connection,
  1785 + headers_for_send_file(mime_type,size),
  1786 + size,
  1787 + file(f),
  1788 + uri)
  1789 + }
  1790 + }.
  1791 +
  1792 +
  1793 +
  1794 +
  1795 +
  1796 +
  1797 +
  1798 +
  1799 +
  1800 + *** (5.6) Answering a www-url encoded request.
  1801 +
  1802 + Standard headers are for answering ".awp" requests.
  1803 +
  1804 +define List(HTTP_header)
  1805 + standard_headers
  1806 + (
  1807 + Int32 answer_body_size
  1808 + ) =
  1809 + [
  1810 + http_header("Content-Type","text/html"),
  1811 + http_header("Content-length",integer_to_string(answer_body_size))
  1812 + ].
  1813 +
  1814 +
  1815 +define One
  1816 + www_url_answer
  1817 + (
  1818 + ServerDescription desc,
  1819 + Connection connection, // connection with the client
  1820 + Int32 ip_addr, // IP address of the client
  1821 + HTTP_RequestLine request_line, // request line sent by the client
  1822 + List(HTTP_header) headers, // HTTP headers sent by the client
  1823 + ByteArray body, // body of client's request
  1824 + Server server
  1825 + ) =
  1826 + with all_web_args = query_string(request_line) +
  1827 + read_www_url_encoded_web_args(to_string(body),0),
  1828 + uri = uri(request_line),
  1829 + ext = get_uri_extension(uri),
  1830 + (if member(journal_extensions(desc),ext)
  1831 + then log_journal_msg(desc,format_request(desc,connection,request_line,headers,all_web_args))
  1832 + else unique);
  1833 + if is_illegal_uri(uri,0)
  1834 + then log_journal_msg(desc,"Received illegal URI: "+uri+"\n")
  1835 + else (if ext = ".awp"
  1836 + then (if awp_handler(desc)
  1837 + (ip_addr,uri,headers,all_web_args,server) is (c_ticket,s_ticket,wp) then
  1838 + forget(reliable_write(connection,
  1839 + with answer_body = format(empty,c_ticket,s_ticket,wp),
  1840 + [ "HTTP/1.1 200 OK", crlf,
  1841 + format(standard_headers(length(answer_body))) .
  1842 + answer_body])))
  1843 + else (send_file(desc,
  1844 + connection,
  1845 + uri,
  1846 + if web_arg_value(all_web_args,"auth") is
  1847 + {
  1848 + not_found then failure,
  1849 + found(v) then success(v)
  1850 + }))).
  1851 +
  1852 +
  1853 +
  1854 +
  1855 +
  1856 +
  1857 +
  1858 +
  1859 +
  1860 +
  1861 +
  1862 + *** (5.7) Answering a multipart/form-data encoded request.
  1863 +
  1864 + In order to support upload of files, we must be able to read web arguments which are
  1865 + encoded in a multipart/form-data body. The first thing to do is to find the
  1866 + boundary. The boundary is a special string which delimits the various parts of the
  1867 + 'multipart' body. It is found within the value of the 'Content-Type' HTTP header, as
  1868 + the value of the 'boundary' attribute.
  1869 +
  1870 +
  1871 +
  1872 +
  1873 +
  1874 + *** (5.7.1) Finding the boundary.
  1875 +
  1876 + Hence, we just have to find the string 'boundary=' within the value of the
  1877 + 'Content-Type' header, and read the value of the boundary from there.
  1878 +
  1879 +define Bool
  1880 + delimits_boundary
  1881 + (
  1882 + Int8 c
  1883 + ) =
  1884 + if c = ' ' then true else
  1885 + if c = 13 then true else
  1886 + if c = 10 then true else
  1887 + if c = 0 then true else
  1888 + if c = ',' then true else
  1889 + c = ';'.
  1890 +
  1891 +
  1892 +define Maybe(String)
  1893 + get_boundary_value_3
  1894 + (
  1895 + String s,
  1896 + Int32 i,
  1897 + List(Int8) so_far
  1898 + ) =
  1899 + if nth(i,s) is
  1900 + {
  1901 + failure then success(implode(reverse(so_far))),
  1902 + success(c) then
  1903 + if delimits_boundary(c)
  1904 + then success(implode(reverse(so_far)))
  1905 + else get_boundary_value_3(s,i+1,[c . so_far])
  1906 + }.
  1907 +
  1908 +
  1909 +
  1910 +define Maybe(String)
  1911 + get_boundary_value_2
  1912 + (
  1913 + String s,
  1914 + Int32 i,
  1915 + ) =
  1916 + if nth(i,s) is
  1917 + {
  1918 + failure then failure,
  1919 + success(c) then
  1920 + if is_blank(c)
  1921 + then get_boundary_value_2(s,i+1)
  1922 + else get_boundary_value_3(s,i+1,[c])
  1923 + }.
  1924 +
  1925 +define Maybe(String)
  1926 + get_boundary_value_1
  1927 + (
  1928 + String s, // string into which we must find '= ...'
  1929 + Int32 i // position of start of search
  1930 + ) =
  1931 + if nth(i,s) is
  1932 + {
  1933 + failure then failure,
  1934 + success(c) then
  1935 + if is_blank(c)
  1936 + then get_boundary_value_1(s,i+1)
  1937 + else if c = '='
  1938 + then get_boundary_value_2(s,i+1)
  1939 + else failure
  1940 + }.
  1941 +
  1942 +
  1943 +define Maybe(String)
  1944 + get_boundary
  1945 + (
  1946 + String content_type_header_value
  1947 + ) =
  1948 + if find("boundary",content_type_header_value,0) is
  1949 + {
  1950 + failure then failure,
  1951 + success(n) then // 'boundary' has been found at position n
  1952 + get_boundary_value_1(content_type_header_value,n+8)
  1953 + }.
  1954 +
  1955 +define Maybe(String)
  1956 + get_boundary
  1957 + (
  1958 + List(HTTP_header) headers
  1959 + ) =
  1960 + if headers is
  1961 + {
  1962 + [ ] then failure,
  1963 + [h . t] then if h is http_header(name,value) then
  1964 + if name = "content-type"
  1965 + then get_boundary(value)
  1966 + else get_boundary(t)
  1967 + }.
  1968 +
  1969 +
  1970 +
  1971 +
  1972 +
  1973 +
  1974 +
  1975 +
  1976 + *** (5.7.2) Reading attributes from a multipart entity.
  1977 +
  1978 + Entities in a multipart/form-data body are separated by instances of the string:
  1979 +
  1980 + --bbbbb
  1981 +
  1982 + where bbbbb is the boundary computed above. Actually, the body has the form:
  1983 +
  1984 + --bbbbb
  1985 + <entity 1>
  1986 + --bbbbb
  1987 + <entity 2>
  1988 + --bbbbb
  1989 + ...
  1990 + --bbbbb
  1991 + <last entity>
  1992 + --bbbbb
  1993 +
  1994 +
  1995 + We have to extract an entity which is in the body between offsets 'start' and 'end'
  1996 + (computed when boundaries have been localized). The entity itself is made of two parts:
  1997 + headers and body. The body is separated from the headers by a blank line. This blank
  1998 + line (a double crlf) marks the beginning of the body of the entity. Within the headers
  1999 + of the entity, we look for a 'Content-Disposition' header, which should look like this:
  2000 +
  2001 + Content-Disposition: form-data; name="..."; filename="..." crlf
  2002 +
  2003 + We are just interested in the name and the file name. Hence we first search
  2004 + 'Content-Disposition', then we search 'name' and read the value, and we do the same for
  2005 + 'filename'.
  2006 +
  2007 + If the 'filename' attribute is not present, the web arg is an ordinary one, otherwise,
  2008 + it is an uploaded file.
  2009 +
  2010 +
  2011 + Below is a variant of 'find' (see 'tools/findstring.anubis'), with an extra 'end'
  2012 + argument.
  2013 +
  2014 +define Maybe(Int32)
  2015 + find
  2016 + (
  2017 + String what,
  2018 + ByteArray where,
  2019 + Int32 start,
  2020 + Int32 end
  2021 + ) =
  2022 + if find(to_byte_array(what),where,start) is
  2023 + {
  2024 + failure then failure,
  2025 + success(n) then
  2026 + if n+length(what) >= end
  2027 + then failure
  2028 + else success(n)
  2029 + }.
  2030 +
  2031 +
  2032 +define String
  2033 + read_attribute_value
  2034 + (
  2035 + ByteArray where,
  2036 + Int32 start,
  2037 + Int32 end,
  2038 + List(Int8) so_far
  2039 + ) =
  2040 + if start >= end then implode(reverse(so_far)) else
  2041 + if nth(start,where) is
  2042 + {
  2043 + failure then implode(reverse(so_far)),
  2044 + success(c) then
  2045 + if c = '\"'
  2046 + then implode(reverse(so_far))
  2047 + else read_attribute_value(where,start+1,end,[c . so_far])
  2048 + }.
  2049 +
  2050 +define Maybe(String)
  2051 + find_attribute
  2052 + (
  2053 + String name,
  2054 + ByteArray where,
  2055 + Int32 start,
  2056 + Int32 end
  2057 + ) =
  2058 + with name = name+"=\"",
  2059 + if find(to_byte_array(name),where,start) is
  2060 + {
  2061 + failure then failure,
  2062 + success(n) then
  2063 + if n+length(name) >= end
  2064 + then failure
  2065 + else success(read_attribute_value(where,n+length(name),end,[]))
  2066 + }.
  2067 +
  2068 +
  2069 +
  2070 +define Maybe((String,Maybe(String)))
  2071 + find_name_and_filename
  2072 + (
  2073 + ByteArray body,
  2074 + Int32 start,
  2075 + Int32 end
  2076 + ) =
  2077 + if find(to_byte_array("Content-Disposition"),body,start) is
  2078 + {
  2079 + failure then failure,
  2080 + success(n) then
  2081 + if find_attribute("name",body,n+19,end) is
  2082 + {
  2083 + failure then failure,
  2084 + success(name_value) then if find_attribute("filename",body,n+19,end) is
  2085 + {
  2086 + failure then success((name_value,failure)),
  2087 + success(filename_value) then success((name_value,success(filename_value)))
  2088 + }
  2089 + }
  2090 + }.
  2091 +
  2092 +
  2093 +
  2094 +
  2095 +
  2096 +
  2097 +
  2098 +
  2099 +
  2100 +
  2101 + *** (5.7.3) Creating a temporary filename for an uploaded file.
  2102 +
  2103 +variable Int32 uploaded_file_count = 0.
  2104 +
  2105 + This variable is local to the virtual machine. Hence, its value is 0 each time a new
  2106 + requests arrives. Temporary uploaded files are stored in the directory represented by
  2107 + 'upload_temporary_directory'. The filenames have the form:
  2108 +
  2109 + _m_n
  2110 +
  2111 + where 'm' is the number of the virtual machine, and 'n' a number obtained by
  2112 + incrementing 'uploaded_file_count'. Notice that the program must do something with this
  2113 + file (move it to some directory/name), otherwise, it will probably be overwritten the
  2114 + next time the same machine works.
  2115 +
  2116 +
  2117 +
  2118 +
  2119 +
  2120 +
  2121 + *** (5.7.4) Saving an uploaded file under a temporary filename.
  2122 +
  2123 +define Maybe(String) // returns the temporary file name
  2124 + save_uploaded_file
  2125 + (
  2126 + ServerDescription desc,
  2127 + ByteArray body,
  2128 + Int32 start,
  2129 + Int32 end
  2130 + ) =
  2131 + uploaded_file_count <- 1 + *uploaded_file_count;
  2132 + with tfn = "_"+integer_to_string(virtual_machine_id)+"_"+integer_to_string(*uploaded_file_count),
  2133 + if (Maybe(WAddr(Int8)))connect to file server_directory(desc)+"/upload_temporary/"+tfn is
  2134 + {
  2135 + failure then failure,
  2136 + success(f) then
  2137 + if reliable_write(file(f),extract(body,start,end)) is
  2138 + {
  2139 + failure then failure,
  2140 + success(nw) then
  2141 + if nw = end - start
  2142 + then success(tfn)
  2143 + else failure
  2144 + }
  2145 + }.
  2146 +
  2147 +
  2148 +
  2149 +
  2150 +
  2151 +
  2152 +
  2153 +
  2154 + *** (5.7.5) Removing the path from a file name.
  2155 +
  2156 + When a file is uploaded, the browser sends the complete path of the file on the client
  2157 + machine as the file name. Actually, this is not quite normal. Nevertheless, we need to
  2158 + remove the path, and keep only the file name. This is achieved by 'remove_path' below.
  2159 +
  2160 +define Int32
  2161 + file_name_begin
  2162 + (
  2163 + String full_name,
  2164 + Int32 i
  2165 + ) =
  2166 + if nth(i,full_name) is
  2167 + {
  2168 + failure then 0,
  2169 + success(c) then
  2170 + if c = '/' then i+1 else
  2171 + if c = '\\' then i+1 else
  2172 + file_name_begin(full_name,i-1)
  2173 + }.
  2174 +
  2175 +define String
  2176 + remove_path
  2177 + (
  2178 + String full_name
  2179 + ) =
  2180 + with l = length(full_name),
  2181 + b = file_name_begin(full_name,l-1),
  2182 + substr(full_name,b,l-b).
  2183 +
  2184 +
  2185 +
  2186 +
  2187 +
  2188 + *** (5.7.6) Reading a multipart entity.
  2189 +
  2190 +define Maybe(Web_arg)
  2191 + get_multipart_entity
  2192 + (
  2193 +
  2194 + ServerDescription desc,
  2195 + ByteArray body,
  2196 + Int32 start,
  2197 + Int32 end
  2198 + ) =
  2199 + if find(to_byte_array(crlf+crlf),body,start) is
  2200 + {
  2201 + failure then failure,
  2202 + success(k) then
  2203 + if k >= end // must be within this entity, not the next one
  2204 + then failure
  2205 + else if find_name_and_filename(body,start,k) is
  2206 + {
  2207 + failure then failure,
  2208 + success(n_mbfn) then if n_mbfn is (name,mbfn) then
  2209 + if mbfn is
  2210 + {
  2211 + failure then
  2212 + success(web_arg(name,to_string(extract(body,k+4,end-2)))),
  2213 + // we must substract 2 to end because of crlf just before the boundary
  2214 +
  2215 + success(fn) then
  2216 + if save_uploaded_file(desc,body,k+4,end-2) is
  2217 + {
  2218 + failure then failure,
  2219 + success(tfn) then
  2220 + success(upload(name,remove_path(fn),
  2221 + server_directory(desc)+"/upload_temporary/"+tfn))
  2222 +
  2223 + }
  2224 + }
  2225 + }
  2226 + }.
  2227 +
  2228 +
  2229 +
  2230 +define List(Web_arg)
  2231 + read_multipart_form_data_encoded_web_args
  2232 + (
  2233 + ServerDescription desc,
  2234 + ByteArray body,
  2235 + ByteArray __boundary,
  2236 + Int32 i,
  2237 + ) =
  2238 + if find(__boundary,body,i) is
  2239 + {
  2240 + failure then [ ],
  2241 + success(n) then
  2242 + if find(__boundary,body,n+length(__boundary)) is
  2243 + {
  2244 + failure then [ ],
  2245 + success(m) then
  2246 + if get_multipart_entity(desc,body,n+length(__boundary),m) is
  2247 + {
  2248 + failure then [ ],
  2249 + success(wa) then
  2250 + [wa . read_multipart_form_data_encoded_web_args(desc,body,__boundary,m)]
  2251 + }
  2252 + }
  2253 + }.
  2254 +
  2255 +
  2256 +
  2257 +define One
  2258 + multipart_form_data_answer
  2259 + (
  2260 + ServerDescription desc,
  2261 + Connection connection,
  2262 + Int32 ip_addr,
  2263 + HTTP_RequestLine request_line,
  2264 + List(HTTP_header) headers,
  2265 + ByteArray body,
  2266 + Server server
  2267 + ) =
  2268 + if get_boundary(headers) is
  2269 + {
  2270 + failure then unique,
  2271 + success(boundary) then
  2272 + with all_web_args = query_string(request_line) +
  2273 + read_multipart_form_data_encoded_web_args(desc,
  2274 + body,
  2275 + to_byte_array("--"+boundary),
  2276 + 0),
  2277 + uri = uri(request_line),
  2278 + ext = get_uri_extension(uri),
  2279 + log_journal_msg(desc,format_request(desc,connection,request_line,headers,all_web_args));
  2280 + if is_illegal_uri(uri,0)
  2281 + then log_journal_msg(desc,"Received illegal URI: "+uri+"\n")
  2282 + else
  2283 + if ext = ".awp" then
  2284 + (if awp_handler(desc)
  2285 + (ip_addr,uri,headers,all_web_args,server) is (c_ticket,s_ticket,wp) then
  2286 + forget(reliable_write(connection,
  2287 + with answer_body = format(empty,c_ticket,s_ticket,wp),
  2288 + [ "HTTP/1.1 200 OK",crlf,
  2289 + format(standard_headers(length(answer_body))) .
  2290 + answer_body])))
  2291 + else unique
  2292 + }.
  2293 +
  2294 +
  2295 +
  2296 +
  2297 +
  2298 +
  2299 +
  2300 +
  2301 + *** (5.8) Handling redirections.
  2302 +
  2303 + 'redirections' (of type 'List(Redirection)') contains redirection directives. Each one
  2304 + has the form:
  2305 +
  2306 + redirect(required_uri,required_host,corresponding_uri).
  2307 +
  2308 + The host required by the client may be found in the 'Host' HTTP header. The URI
  2309 + required by the client is given below as 'uri'. We just have to find the required host
  2310 + in the headers, and to find the corresponding redirection directive.
  2311 +
  2312 +
  2313 + In the next fonction, the required host and URI are known. We just have to search in
  2314 + the 'redirections' list.
  2315 +
  2316 +define String
  2317 + handle_redirection
  2318 + (
  2319 + String required_uri,
  2320 + String required_host,
  2321 + List(Redirection) redirections
  2322 + ) =
  2323 + if redirections is
  2324 + {
  2325 + [ ] then required_uri,
  2326 + [h . t] then if h is redirect(uri,host,target) then
  2327 + if host = required_host
  2328 + then if uri = required_uri
  2329 + then target
  2330 + else handle_redirection(required_uri,required_host,t)
  2331 + else handle_redirection(required_uri,required_host,t)
  2332 + }.
  2333 +
  2334 +
  2335 +
  2336 + Finding the 'Host' header. No redirection is performed if this header is not found.
  2337 +
  2338 +define String
  2339 + handle_redirection // returns the redirected URI
  2340 + (
  2341 + ServerDescription desc,
  2342 + String uri, // original URI
  2343 + List(HTTP_header) headers
  2344 + ) =
  2345 + if headers is
  2346 + {
  2347 + [ ] then uri,
  2348 + [h . t] then if h is http_header(name,value) then
  2349 + if name = "host"
  2350 + then handle_redirection(uri,value,redirections(desc))
  2351 + else handle_redirection(desc,uri,t)
  2352 + }.
  2353 +
  2354 +
  2355 +
  2356 +
  2357 +
  2358 +
  2359 +
  2360 +
  2361 + *** (5.9) Answering both sorts of requests.
  2362 +
  2363 + We must decide if the request is www-url encoded of multipart/form-data encoded. This
  2364 + is achieved through the header 'Content-Type'.
  2365 +
  2366 +define EncodingType
  2367 + get_encoding_type
  2368 + (
  2369 + List(HTTP_header) headers
  2370 + ) =
  2371 + if headers is
  2372 + {
  2373 + [ ] then www_url, // this is the default
  2374 + [h . t] then if h is http_header(name,value) then
  2375 + if name = "content-type"
  2376 + then if find("multipart/form-data",value,0) is
  2377 + {
  2378 + failure then www_url,
  2379 + success(_) then multipart_form_data
  2380 + }
  2381 + else get_encoding_type(t)
  2382 + }.
  2383 +
  2384 +
  2385 +
  2386 +define One
  2387 + send_answer
  2388 + (
  2389 + ServerDescription desc,
  2390 + Connection connection,
  2391 + HTTP_RequestLine rqline,
  2392 + List(HTTP_header) headers,
  2393 + ByteArray body,
  2394 + Server server
  2395 + ) =
  2396 + if rqline is request_line(type,uri,qstring) then
  2397 + with rqline = request_line(type,handle_redirection(desc,uri,headers),qstring),
  2398 + if remote_IP_address_and_port(connection) is (ip_addr,_) then
  2399 + if get_encoding_type(headers) is
  2400 + {
  2401 + www_url then
  2402 + www_url_answer(desc,connection,ip_addr,rqline,headers,body,server),
  2403 + multipart_form_data then
  2404 + multipart_form_data_answer(desc,connection,ip_addr,rqline,headers,body,server)
  2405 + }.
  2406 +
  2407 +
  2408 +
  2409 +
  2410 +
  2411 +
  2412 +
  2413 +
  2414 +
  2415 +
  2416 + *** (6) The HTTP/HTTPS server.
  2417 +
  2418 + The command 'start_server' (declared in 'predefined.anubis') starts a virtual machine
  2419 + which opens a server TCP/IP connection, and which continuously listens to this
  2420 + connection. When a request arrives, this machine delegates the work of deciphering and
  2421 + answering the request to another virtual machine, and continues to listen. The job of
  2422 + the delegated machine is defined by the HTTP request handler below.
  2423 +
  2424 +
  2425 +
  2426 + *** (6.1) The HTTP request handler.
  2427 +
  2428 + Here is the HTTP handler. It is called at each new request in a separate virtual
  2429 + machine. It reads the headers of the HTTP request, determines body size, reads the body
  2430 + of the HTTP request, and answers the request.
  2431 +
  2432 +define One
  2433 + handler
  2434 + (
  2435 + ServerDescription desc,
  2436 + Connection connection, // connection with the client
  2437 + Server server
  2438 + ) =
  2439 + if remote_IP_address_and_port(connection) is (ip_addr,port) then
  2440 + if read_request_line(connection) is
  2441 + {
  2442 + error(msg) then log_journal_msg(desc,format(msg)),
  2443 + ok(request_line) then
  2444 + if read_http_headers(connection) is
  2445 + {
  2446 + error(msg) then log_journal_msg(desc,format(msg)),
  2447 + ok(headers) then if get_body_size(headers) is
  2448 + {
  2449 + error(msg) then log_journal_msg(desc,format(msg)),
  2450 + ok(body_size) then
  2451 + if read_http_body(connection,body_size,constant_byte_array(0,0),1000) is
  2452 + {
  2453 + error(msg) then log_journal_msg(desc,format(msg)),
  2454 + ok(body) then
  2455 + send_answer(desc,
  2456 + connection,
  2457 + request_line,
  2458 + headers,
  2459 + body,
  2460 + server)
  2461 + }
  2462 + }
  2463 + }
  2464 + }.
  2465 +
  2466 +
  2467 +define Server -> ((RWAddr(Int8)) -> One)
  2468 + http_handler
  2469 + (
  2470 + ServerDescription desc,
  2471 + ) =
  2472 + (Server server) |->
  2473 + (RWAddr(Int8) connection) |-> handler(desc,tcp(connection),server).
  2474 +
  2475 +define Server -> ((SSL_Connection) -> One)
  2476 + https_handler
  2477 + (
  2478 + ServerDescription desc,
  2479 + ) =
  2480 + (Server server) |->
  2481 + (SSL_Connection connection) |-> handler(desc,ssl(connection),server).
  2482 +
  2483 +
  2484 +
  2485 +
  2486 +
  2487 + *** (6.2) Starting the HTTP/HTTPS server.
  2488 +
  2489 +
  2490 + The next function creates the directories (if they don't already exist).
  2491 +
  2492 +define One
  2493 + create_directories
  2494 + (
  2495 + String server_dir
  2496 + ) =
  2497 + //forget(make_directory(server_dir+"/public",default_directory_mode));
  2498 + forget(make_directory(server_dir+"/upload_temporary",default_directory_mode));
  2499 + forget(make_directory(server_dir+"/private_download",default_directory_mode));
  2500 + forget(make_directory(server_dir+"/journal",default_directory_mode)).
  2501 +
  2502 +
  2503 + The 'notify' functions are used only when the server cannot accept a new connection.
  2504 +
  2505 +define (One) -> One
  2506 + notify
  2507 + (
  2508 + ServerDescription desc,
  2509 + ) =
  2510 + (One u) |->
  2511 + log_journal_msg(desc,"HTTP server cannot accept new connection.\n").
  2512 +
  2513 +
  2514 +define (One) -> One
  2515 + ssl_notify
  2516 + (
  2517 + ServerDescription desc,
  2518 + ) =
  2519 + (One u) |->
  2520 + log_journal_msg(desc,"HTTPS server cannot accept new connection.\n").
  2521 +
  2522 +
  2523 +
  2524 +
  2525 + Below is the command which starts an HTTP server.
  2526 +
  2527 +public define One
  2528 + start_http_server
  2529 + (
  2530 + HTTP_ServerDescription desc
  2531 + ) =
  2532 + create_directories(server_directory(desc));
  2533 + with idesc = internal_description(desc),
  2534 + with ip_addr = ip_address(idesc),
  2535 + port = ip_port(idesc),
  2536 + if start_server(ip_addr,
  2537 + port,
  2538 + http_handler(idesc),
  2539 + notify(idesc)) is
  2540 + {
  2541 + cannot_create_the_socket then print("Cannot create the listening socket.\n"),
  2542 + cannot_bind_to_port then print("Cannot bind to port.\n"),
  2543 + cannot_listen_on_port then print("Cannot listen on port.\n"),
  2544 + ok(server) then
  2545 + print("HTTP Server started on "+ip_addr_to_string(ip_addr)+
  2546 + ":"+integer_to_string(port)+".\n");
  2547 + delegate (checking every 1000 milliseconds, wait for
  2548 + (service(desc)(unique); false) then unique),
  2549 + unique
  2550 + }.
  2551 +
  2552 +
  2553 + Below is the command which starts an HTTPS server.
  2554 +
  2555 +public define One
  2556 + start_https_server
  2557 + (
  2558 + HTTPS_ServerDescription desc
  2559 + ) =
  2560 + create_directories(server_directory(desc));
  2561 + with idesc = internal_description(desc),
  2562 + with ip_addr = ip_address(idesc),
  2563 + port = ip_port(idesc),
  2564 + if start_ssl_server(ip_addr,
  2565 + port,
  2566 + server_common_name(desc),
  2567 + https_handler(idesc),
  2568 + ssl_notify(idesc)) is
  2569 + {
  2570 + cannot_create_the_socket then print("Cannot create the listening socket.\n"),
  2571 + cannot_bind_to_port then print("Cannot bind to port.\n"),
  2572 + cannot_listen_on_port then print("Cannot listen on port.\n"),
  2573 + ok(server) then
  2574 + print("HTTPS Server started on "+ip_addr_to_string(ip_addr)+
  2575 + ":"+integer_to_string(port)+".\n");
  2576 + delegate (checking every 1000 milliseconds, wait for
  2577 + (service(desc)(unique); false) then unique),
  2578 + unique
  2579 + }.
  2580 +
  2581 +
  2582 +
  2583 +
  2584 +
0 2585 \ No newline at end of file
... ...