https_get.anubis 12.9 KB

                                       The Anubis Project
   
                           Getting a document from the secured Web.
   
                                 Copyright (c) Alain Prouté 2001. 


    Author:   Alain Prouté
   

            
   This file  defines the function  'https_get' which retrieve  a document from  the world
   wide web in secured mode (HTTPS). The  function is analogous to 'http_get', to be found
   in 'web/http_get.anubis'.
   
   The function  simulates the  behavior of  a browser, at  least just  what is  needed to
   retrieve the document. It  does not display the document, but returns  it (if found) in
   the form of a string.
   
   The function 'https_get' takes the following operands:
   
     - the name of the server to which the request is to be sent, 
     - the name (including the path) of the document on this server,
     - a list of headers to be added to mandatory standard headers, 
     - a list of 'arguments' to be sent as the body of the request (web arguments). 
     - an accept policy function (see below),  for accepting X.509 certificates in case of
       a problem.
   
   The result returned  by 'https_get' has the following type,  which defines the problems
   which may happen:
   
   
read tools/basis.anubis
read system/string.anubis
read html.anubis
read http_get_common.anubis   
read common.anubis   
   

public type HTTPS_GET_Result:
  cannot_resolve_server_name(DNS_Result),
  ssl_connect_error(SSLConnectError),
  transmission_problem, 
  request_refused_by_server,
  ok(String response,
     List(HTTP_header) headers,
     String document). 

   Cookies are among headers. See 'web/cookies.anubis' for cookies handling.
   
   
   Note: The types 'DNS_Result' and 'SSLConnectError' are defined in 'predefined.anubis'.
   
public define HTTPS_GET_Result
  https_get
    (
      String server_name,
      String document_name,
      List(HTTP_header) headers, 
      List(HTTP_argument) arguments,
      (Maybe(X509)) -> Bool accept_policy
    ). 

   The same one without the 'headers' argument:
   
public define HTTPS_GET_Result
  https_get
    (
      String server_name,
      String document_name,
      List(HTTP_argument) arguments,
      (Maybe(X509)) -> Bool accept_policy
    ) = https_get(server_name,document_name,[],arguments,accept_policy). 

   The  main   difference  with  'http_get'   is  the  presence  of   the  'accept_policy'
   argument. 'accept_policy'  is the  function which determines  your personal  policy for
   accepting  a server certificate,  if it  is the  case that  either this  certificate is
   invalid (or missing), or if its common name  does not match the server name, that is to
   say if  'open_SSL_connection' (defined in  'predefined.anubis') did not  already accept
   it.
   
   'X509' is an  'opaque' type defined in 'predefined.anubis'. It is  'opaque' in the sens
   that no alternative of  this type is directly accessible to you  (despite the fact that
   the type is public).
   
   An accept policy function takes (maybe) an X.509 certificate as its unique argument, so
   that the  decision may be taken  with the suspect  certificate at hand. It  must return
   'true' for accepting, and 'false' for refusing.
   
   You may use the following default accept policy function:
   
public define Bool
  default_accept_policy
    (
      Maybe(X509) suspect_certificate
    ) = false. 
   
   That  is,  never  accept  a  certificate  which  cannot  be  successfully  verified  by
   'open_SSL_connection'.   Notice that  this is  not a  paranoid behavior,  but  a normal
   behavior.   Nevertheless, you still  have the  possibility to  weaken this  behavior by
   using  another accept policy  function.  Be  very careful  when writing  this function,
   because  this  may weaken  your  security.   This function  may  for  example show  the
   certificate and ask for user input for  accepting it. It may also check the certificate
   fingerprint against a data base, etc...
   
   Another accept policy function is defined in this file:
   
public define Bool
  command_line_accept_policy
    (
      Maybe(X509) suspect_certificate
    ).
   
   It  is used by  the command  line module  'https_get.adm'.  If  the certificate  is not
   accepted by 'open_SSL_connection', this function  prints the certificate on the screen,
   and ask the user  for acceptation. It also asks the user  for accepting the certificate
   for ever.
   
   It  is likely  that you  will need  an accept  policy function  of your  own.   See the
   definition    of    'command_line_accept_policy'     below    for    information    and
   'predefined.anubis'  for the  tools enabling  the manipulation  of  X.509 certificates.
   Certificates that  you trust are  stored into the  directory declared under  the symbol
   'ca'  (for 'Certificate  Authorities')  in your  configuration  file.  Any  certificate
   present in this directory is trusted without any condition.
   
   This file defines the module 'https_get' to be used directly from the command line.  To
   learn about the syntax,  just type 'https_get' at the system prompt,  or have a look at
   the end of this file.

   
   
   
   --- That's all for public definitions. ------------------------------------------------

   
   
   
   
   
   
   
define Maybe(String)
  receive_text_chunk
    (
      SSL_Connection conn
    ) =
  read(conn,100,1000).
      
   
   
   
define HTTPS_GET_Result
  receive
    (
      SSL_Connection conn,
      String headers, 
      String text_so_far,
      Bool double_crlf_seen
    ) =
  if receive_text_chunk(conn) is 
    {
      failure then if separate_headers(headers) is 
        {
          [ ] then ok("",[],text_so_far), 
          [h . t] then if h is http_header(a,b) then ok(a,t,text_so_far)
        },
   
      success(s) then 
        if s = ""
        then if separate_headers(headers) is
          {
            [ ] then ok("",[],text_so_far), 
            [h . t] then if h is http_header(a,b) then ok(a,t,text_so_far)
          }
        else with new_s = text_so_far+s, 
             if double_crlf_seen
             then receive(conn,headers,new_s,true)
             else if has_double_crlf(new_s) is 
               {
                 failure then receive(conn,headers,new_s,false),
                 success(n) then 
                   if sub_string(new_s,n+4,length(new_s)-n-4) is 
                     {
                       failure then should_not_happen(transmission_problem), 
                       success(s1) then
                         if sub_string(new_s,0,n) is 
                           {
                             failure then should_not_happen(transmission_problem), 
                             success(h) then receive(conn,h,s1,true)
                           }
                     }
               }
    }. 

   
      
   The next function has  a valid SSL connection to the server,  and tries to retrieve the
   document.
   
define HTTPS_GET_Result
  https_get
    (
      Bool print_all, 
      SSL_Connection conn, 
      String server_name, 
      String document_name,
      List(HTTP_header) headers, 
      List(HTTP_argument) arguments
    ) = 
  //
  // Send the HTTP request, and receive the answer:
  //
  with body = format_http_args(arguments), 
  with request =  (if arguments = [] then "GET " else "POST ")
                       + document_name + " HTTP/1.0" + crlf +
                  "Host: " + server_name + crlf +
                  "Accept-Charset: iso-8859-1,*,utf-8" + crlf + 
                  (if arguments = [] then "" 
                       else "Content-type: application/x-www-form-urlencoded" + crlf +
                            "Content-length: " + to_decimal(length(body))+ crlf) +
                  format_headers(headers) +
                  crlf +
                  body,
  (if print_all then 
    (
    print("Sending request:\n");
    print(request); 
    print("\n")
    ) else unique); 
  if write(conn,request) is
    {
      failure     then   transmission_problem,
      success(_)  then   receive(conn,"","",false)
    }. 
   
      
   The next function retrieves the document using the numerical (resolved) server address. 
   
define HTTPS_GET_Result
  https_get
    (
      Bool print_all,
      Word32 server_addr,
      Word32 server_port, 
      String server_name, 
      String document_name,
      List(HTTP_header) headers,
      List(HTTP_argument) arguments,
      (Maybe(X509)) -> Bool accept_policy
    ) = 
  if open_SSL_connection(server_name,server_addr,server_port,accept_policy) is
    {
      error(msg) then ssl_connect_error(msg), 
      ok(conn) then https_get(print_all,conn,server_name,document_name,headers,arguments)
    }.

   
     
define HTTPS_GET_Result
  https_get
    (
      Bool print_all, 
      String server_name,
      String document_name,
      List(HTTP_header) headers, 
      List(HTTP_argument) arguments,
      (Maybe(X509)) -> Bool accept_policy
    ) = 
  if separate_name_port(server_name,443) is (name,port) then 
  //
  // resolve server name and call 'https_get' with numeric server address:
  //
  with a = dns(name), 
    if a is     ok(addr) 
    then        https_get(print_all,addr,port,name,document_name,headers,arguments,accept_policy)
    else        cannot_resolve_server_name(a). 

   
   
   Now, here is our public tool: 
   
public define HTTPS_GET_Result
  https_get
    (
      String server_name,
      String document_name,
      List(HTTP_header) headers, 
      List(HTTP_argument) arguments,
      (Maybe(X509)) -> Bool accept_policy
    ) = https_get(false,server_name,document_name,headers,arguments,accept_policy). 
      
  
   
   
   
   Finally, we construct the command line executable module 'https_get.adm':
   
define One
  syntax_https_get =
    print("\nUsage: https_get <server> <document> [options] =<header> <value> ... <arg> <value> ...\n");
    print("   Options are:\n");
    print("     -print_all       print request, response line, headers and document\n");
    print("                      (default is to print only the document)\n").
   
   
   
    
   Below is  our accept  policy function for  the command  line module. This  function may
   serve as a model for your own accept policy function. 

public define Bool
  command_line_accept_policy
    (
      Maybe(X509) mbcert
    ) =
  if mbcert is 
    {
      failure then 
        print("No server certificate or invalid server certificate.\n");
        print("Do you want to trust this site anyway ? [Y/N]\n"); 
        yes,  // this is the same as 'if yes then true else false'
   
      success(cert) then 
        print(to_string(cert));
        print("\nDo you want to accept the above certificate ? [Y/N]\n");
        if yes
        then (
               print("Do you want to accept this certificate for ever ? [Y/N]\n");
               if yes
               then (if trust_for_ever(cert) is 
                 {
                   ca_directory_not_found      then print("'ca' directory not found.\n"),
                   cannot_create_file          then print("cannot create file.\n"), 
                   cannot_create_symbolic_link then print("cannot create symbolic link.\n"),
                   write_error                 then print("write error.\n"),
                   ok                          then unique
                 };     true)
               else true
             )
        else false
    }.
   
      
global define One
  https_get
    (
      List(String) args
    ) =
  if args is 
    {
      [ ]              then syntax_https_get, 
      [server . t]     then if t is 
        {
          [ ]                  then syntax_https_get, 
          [document . rest]    then 
            with print_all = member(rest,"-print_all"), 
                 headers   = get_headers(rest), 
                 arguments = get_arguments(rest), 
            if https_get(print_all,server,document,headers,arguments,command_line_accept_policy) is
              {
                cannot_resolve_server_name(dns_error) then 
                  print("Cannot resolve server name: " + format(dns_error) + ".\n"), 
   
                ssl_connect_error(connect_error) then 
                  print("SSL connect error: " + format(connect_error) + ".\n"), 
   
                transmission_problem then 
                  print("Transmission problem.\n"), 
   
                request_refused_by_server then 
                  print("The request has been refused by server: " + server + ".\n"),
    
                ok(response,headers1,document1) then 
                  (
                  if print_all
                  then (
                          print("\n----- response ----\n"); 
                          print(response); 
                          print("\n----- headers -----\n"); 
                          print_headers(headers1); 
                          print("----- document ----\n")
                        ) else unique
                  ); 
                  print(document1)     // on the screen (use a redirection to get it in a file)
              }
        }
    }.