Brool brool (n.) : a low roar; a deep murmur or humming

Ocaml Sockets

 |  coding

There seems to be one standard library in Ocaml for dealing with HTTP, and that’s Ocamlnet. Ocamlnet suffers from a few problems, the chief of which is it’s difficult to set up unless you use something like GODI to install your packages. (Sadly, this is one thing that Ocaml is still not very strong at; it’s not “batteries included” like Python).

Sometimes, you don’t want the whole Ocamlnet baggage, you just want the smallest, simplest routine possible to get the contents of a web page. Well, it boils down to just a few lines of Ocaml; we just have to create a socket, connect it to the end point, and then get the results. First we define a function to split a URL into a hostname and everything else:

(* split an url into the (hostname, index) *)
open Unix
open Str

let spliturl url = 
    let re = Str.regexp "\\(http://\\)?\\([^/]+\\)\\(/.*\\)?" in
        if Str.string_match re url 0 then 
            let host = Str.matched_group 2 url in
            let index = try
                Str.matched_group 3 url
            with Not_found -> 
                "/" in
                (host, index)
            raise Not_found

… and then some routines to read and write to a socket…

(* read everything pending in the socket *) 
let readall socket = 
    let buffer = String.create 512 in
    let rec _readall accum = 
            let count = (recv socket buffer 0 512 []) in
                if count = 0 then accum else _readall ((String.sub buffer 0 count)::accum)
        with _ -> 
        String.concat "" (List.rev (_readall []))

(* write everything to a socket *)
let writeall socket s = 
    send socket s 0 (String.length s) [] 

Once you have those bits, the routine that gets the contents of a web page is straightforward.

(* get the contents of an arbitrary URL page *)
let gethttp url = 
    let (hostname, rest) = spliturl url in 
    let socket = Unix.socket Unix.PF_INET Unix.SOCK_STREAM 0 in 
    let hostinfo = Unix.gethostbyname hostname in
    let server_address = hostinfo.Unix.h_addr_list.(0) in
    let _ = Unix.connect socket (Unix.ADDR_INET (server_address, 80)) in
    let ss = "GET " ^ rest ^ " HTTP/1.0\r\nHost: " ^ hostname ^ "\r\n\r\n" in
        writeall socket ss;
        let rv = readall socket in 
            Unix.close socket;

Note that this doesn’t stress the error checking much; in fact, it pretty much ignores it. Use netclient in Ocamlnet if you want something robust; this is just something quick.

Oh, you want a quick-and-easy server? That’s just slightly more complicated; we need to create the socket, bind it to a port, and then accept any connections that happen and deal with them. Try this:

(* create a server on a given port, and invokes the given function whenever anybody makes a request *)
let httplistener port fn = 
    let socket = Unix.socket Unix.PF_INET Unix.SOCK_STREAM 0 in 
    let hostinfo = Unix.gethostbyname "localhost" in
    let server_address = hostinfo.Unix.h_addr_list.(0) in
        ignore (Unix.bind socket (Unix.ADDR_INET (server_address, port)));
        Unix.listen socket 10;
        while true do 
            let (fd, _) = Unix.accept socket in
            let _ = set_nonblock fd in 
            let ins = readall fd in
                ignore (writeall fd (fn ins));
                Unix.close fd

This binds to localhost; if you want to bind it to the world-at-large you’ll want to use gethostname () instead of “localhost” on the hostinfo assignment. Note the complete lack of error checking. Some exception throws? You’ll lose the socket. Multithreaded or multiprocessing? Nope! Nonetheless, sometimes you just want some quick scaffolding.

(Thanks go to this excellent socket tutorial for Python, from which I cribbed everything and translated to Ocaml)


Comments are moderated whenever I remember that I have a blog.

Phil | 2014-06-07 22:25:59
Thank you so much for this! I'm building a really simple web server to get into OCaml networking basics. It's so great to have a tangible example to base off of. One question, though, how is different than Unix.recv? Like why use Unix.recv if you don't set any flags? Thanks!!
Add a comment