http_get.anubis
9.93 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
*Project* The Anubis Project
*Title* Getting a document from the Web.
*Copyright* Copyright (c) Alain Prouté 2001.
*Author* Alain Prouté
*Overview*
This file defines the function 'http_get' which retrieves a document from the world
wide web (a similar function 'https_get' for secured documents is defined in
'https_get.anubis'). The function simulates the behavior of a browser, at least just
what is needed to retrieve the document. It does not display the document, but returns
it (if found) in the form of a string. It also returns the response line from the
server, and the list af all HTTP headers.
The function 'http_get' takes the following arguments:
- the name of the server to which the request is to be sent,
- the name (including the path) of the document on this server,
- a list of headers to be added to mandatory standard headers,
- a list of 'arguments' in the form of pairs of strings '(name,value)' to be sent as
the body of the request.
The result returned by 'http_get' has the following type, which defines the problems
which may happen:
read tools/basis.anubis
read system/string.anubis
read web/common.anubis
read web/http_get_common.anubis
public type HTTP_GET_Result:
cannot_resolve_server_name(DNS_Result),
cannot_connect_to_server(NetworkConnectError),
transmission_problem,
request_refused_by_server,
ok(String response, // HTTP response line from the server
List(HTTP_header) headers, // HTTP headers received from the server
String document). // The HTML document itself
public define HTTP_GET_Result
http_get
( //-------- example: -----------------------
String server_name, // "www.machin.com"
String document_name, // "/truc/bidule.html"
List(HTTP_header) headers, // [http_header("Cookie","..."),...]
List(HTTP_argument) arguments // [http_argument("ga","bu"),...]
).
The same one without the 'headers' argument:
public define HTTP_GET_Result
http_get
( //-------- example: -----------------------
String server_name, // "www.machin.com"
String document_name, // "/truc/bidule.html"
List(HTTP_argument) arguments // [http_argument("ga","bu"),...]
) = http_get(server_name,document_name,[],arguments).
This file also defines the command 'http_get' to be used directly from the system
prompt. To learn about the syntax, just type 'http_get' at the system prompt, or have
a look at the end of this file
--- That's all for public definitions. ------------------------------------------------
We need two functions for sending and receiving bytes.
define Maybe(One)
send
(
RWStream conn, // where to send the text
String text, // the text to be sent
Word32 n // start sending at character number 'n' in 'text'
) =
if nth(to_Int(n),text) is
{
failure then success(unique),
success(c) then
if conn <- c is
{
failure then failure,
success(_) then send(conn,text,n+1)
}
}.
define Maybe(String)
receive_text_chunk
(
RWStream conn,
List(Word8) so_far,
Word32 count
) =
if count = 100
then success(implode(reverse(so_far)))
else if *conn is // *conn waits for data to be readable from connection
{
failure then success(implode(reverse(so_far))), // means 'connection closed by peer'
success(c) then receive_text_chunk(conn,[c . so_far],count+1)
}.
define HTTP_GET_Result
receive
(
RWStream conn,
String headers,
String text_so_far,
Bool double_crlf_seen
) =
if receive_text_chunk(conn,[],0) is
{
failure then if separate_headers(headers) is
{
[ ] then ok("",[],text_so_far),
[h . t] then if h is http_header(a,b) then ok(a,t,text_so_far)
},
success(s) then
if s = ""
then if separate_headers(headers) is
{
[ ] then ok("",[],text_so_far),
[h . t] then if h is http_header(a,b) then ok(a,t,text_so_far)
}
else with new_s = text_so_far+s,
if double_crlf_seen
then receive(conn,headers,new_s,true)
else if has_double_crlf(new_s) is
{
failure then receive(conn,headers,new_s,false),
success(n) then
if sub_string(new_s,n+4,length(new_s)-n-4) is
{
failure then alert,
success(s1) then
if sub_string(new_s,0,n) is
{
failure then alert,
success(h) then receive(conn,h,s1,true)
}
}
}
}.
The next function has a valid TCP/IP connection to the server, and tries to retrieve
the document.
define HTTP_GET_Result
http_get
(
Bool print_all,
RWStream conn,
String server_name,
String document_name,
List(HTTP_header) headers,
List(HTTP_argument) arguments,
) =
//
// Send the HTTP request, and receive the answer:
//
with body = format_http_args(arguments),
with request = (if arguments = [] then "GET " else "POST ")
+ document_name + " HTTP/1.0" + crlf +
"Host: " + server_name + crlf +
"Accept-Charset: iso-8859-1,*,utf-8" + crlf +
(if arguments = [] then ""
else "Content-type: application/x-www-form-urlencoded" + crlf +
"Content-length: " + to_decimal(length(body))+ crlf) +
format_headers(headers) +
crlf +
body,
(if print_all then
(
print("----- request ----\n");
print(request);
print("\n")
) else unique);
if send(conn,request,0) is
{
failure then transmission_problem,
success(_) then receive(conn,"","",false)
}.
The next function retrieves the document using the numerical (resolved) server address.
define HTTP_GET_Result
http_get
(
Bool print_all,
Word32 server_addr,
Word32 server_port,
String server_name,
String document_name,
List(HTTP_header) headers,
List(HTTP_argument) arguments,
) =
//
// try to connect to the server before sending the request
//
if (Result(NetworkConnectError,RWStream))connect(server_addr,server_port) is
{
error(e) then cannot_connect_to_server(e),
ok(conn) then http_get(print_all,conn,server_name,document_name,headers,arguments)
}.
define HTTP_GET_Result
http_get
(
Bool print_all,
String server_name,
String document_name,
List(HTTP_header) headers,
List(HTTP_argument) arguments,
) =
if separate_name_port(server_name,80) is (name,port) then
//
// resolve server name and call 'http_get' with numeric server address:
//
with a = dns(name),
if a is ok(addr)
then http_get(print_all,addr,port,name,document_name,headers,arguments)
else cannot_resolve_server_name(a).
Now, here is our public tool:
define HTTP_GET_Result
http_get
(
String server_name,
String document_name,
List(HTTP_header) headers,
List(HTTP_argument) arguments,
) = http_get(false,server_name,document_name,headers,arguments).
Finally, we construct the executable module 'http_get':
define One
recall_syntax =
print("\nUsage: http_get <server> <document> [options] =<header> <value> ... -<arg> <value> ...\n");
print(" Options are:\n");
print(" -print_all print request, response line, headers and document\n");
print(" (default is to print only the document)\n").
global define One
http_get
(
List(String) args
) =
if args is
{
[ ] then recall_syntax,
[server . t] then if t is
{
[ ] then recall_syntax,
[document . rest] then
with print_all = member(rest,"-print_all"),
headers = get_headers(rest),
arguments = get_arguments(rest),
if http_get(print_all,server,document,headers,arguments) is
{
cannot_resolve_server_name(dns_error) then
print("Cannot resolve server name: " + format(dns_error) + ".\n"),
cannot_connect_to_server(connect_error) then
print("Cannot connect to server: " + format(connect_error) + ".\n"),
transmission_problem then
print("Transmission problem.\n"),
request_refused_by_server then
print("The request has been refused by server: " + server + ".\n"),
ok(response,headers1,document1) then
(
if print_all
then (
print("\n----- response ----\n");
print(response);
print("\n----- headers -----\n");
print_headers(headers1);
print("----- document ----\n")
) else unique
);
print(document1) // on the screen (use a redirection to get it in a file)
}
}
}.