Wednesday, March 30, 2016

another java hacking story, part 3

OK, here's the source.

Note that it does not handle a variety of return codes. I'll work that later as I need them. And I'll come back to this to add things if I remember to.

Completely self-contained in this one file.

Two test cases are included, so you can see that "chunked" return and "normal" return are both handled.

package com.hu.commons;

import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.net.Socket;
import java.net.URL;
import java.net.UnknownHostException;
import java.util.Enumeration;
import java.util.Properties;

/**
 * AAAAHHHH!
 *
 * This is a scratch rewrite of broken functionality elsewhere.
 *
 * the problem: java.net.HttpURLConnection works just fine when the return is standard, simple, "Content-length: N" where N>0
 *                         BUT IT DOES NOT WORK WITH CHUNKED RESULTS
 *
 * So I tried spring.framework.core.httpclient as an alternative. THAT DIDN'T WORK EITHER. Different, but similar problem about chunked.
 *
 * So I'm making my own replacement, from scratch.
 *
 * This will grow over time to handle more flavors of results. Right now, just 200, 204, 400, 404, 500.
 *
 * using ByteArrayOutputStream as byte accumulator for reading is much simpler.
 *
 * To use: make an instance of NanoHTTPClient. set debug=true if you want verbose output. Call sendMessageGet(url-string) and get a string back.
 *
 * test cases are at httpbin.org and typicode.com

  * for proof about the chunked part, use www.google.com
 *
 * @copyright 2016, Hyde University.
 *
 * feel free to use as you wish, but you may not claim to be the author.
 *
 * find something wrong? oh well.
 *
 */

public class NanoHTTPClient {

    // these really need to be local to instances.
    //probably shouldn't be public either.
    String protocol = "http";
    public String url = null;
    public String host = null;
    public int port = 80; // standard default
    public String method = null; // GET, POST, etc
    public String version = "HTTP/1.1";
    public String serverRev = null; // server's http version
    public String returnCode = null; // 200/400/500/etc
    public String returnMsg = null; // third part of the first line: "OK", "Bad Request" etc
   
    // the in/out header-block properties
    private Properties sendprops = new Properties();
    private Properties receiveprops = new Properties();
   
    boolean debug = false; // aka verbose
   
    // *******************************************************
    // *******************************************************

    public NanoHTTPClient() {}
   
    // *******************************************************
    // *******************************************************

    public void setRequestProperty(String prop, String value) {
        sendprops.setProperty(prop, value);
    }
   
    // *******************************************************
    // *******************************************************

    public String getReeiveProperty(String prop) {
        return receiveprops.getProperty(prop);
    }
   
    // *******************************************************
    // *******************************************************

    // ok, we can't necessarily trust reading characters here and bytes elsewhere
    // so its bytes everywhere
   
    // if your input contains embedded \r\n occurrences, well, this method won't know that, and will assume that's the end.
   
    // it's possible for this to screw up, too, if you input happens to contain a byte sequence
    // that can be interpreted as a valid multi-byte character. Had that happen once.
   
    public String readLine(InputStream is) {
        String result = "";
       
        //ok, we read until we get to the first line break (\r\n)
       
        // BAOS is an accumulator of bytes, you use it like a stream, but you can get a String of the content at any time.
        ByteArrayOutputStream baos = new ByteArrayOutputStream();
        try {
            while (true) {
                int b = is.read();
                if (b=='\r') {
                    b=is.read();
                    if (b=='\n') break;
                }
                else
                    baos.write(b);
                }
            } catch (Exception err) {
                err.printStackTrace();
        }
        //ok, make sure we get a clean conversion
        try {result = baos.toString("UTF-8");}
        catch (UnsupportedEncodingException err) {err.printStackTrace();}
       
        if (debug) System.out.println("RDL: " + result);
       
        return result;
    }
   
    // *******************************************************
    // *******************************************************

    // regular old GET call.
   
    // an important thing to understand here, part of the "why" of this code,
    //  is that you can't mix/match a wrapper stream on a base stream and expect
    // reading to behave properly
   
    // this is the problem with java.net.httpclient--there's a base stream in there,
    // httpclient itself reads from that stream, so that if you want to use it,
    // you can't put a wrapper on it (like BufferedReader), because you can't quite expect that the pointer
    // position on that stream is where you think it is after reading the http headers.
   
    // sometimes it isn't; if you mix stream readers, it definitely isn't.
    //  that probably has to do with how the O/S input buffer fits in.
    // imagine that the wrapper clears the i/o buffer when it starts.
    // the wrapper is then at zero, but the underlying stream is, well, *somewhere*
   
    public String callMethodGet (String url) {
        try {
            URL url1 = new URL(url); // this fails on standard reasons, basically the syntax format is wrong.
           
            System.out.println(url1);
           
            //if we happened to be reusing this instance, you want these cleared each time.
            sendprops.clear();
            receiveprops.clear();

            // hold these for later? am I using them yet?
            host = url1.getHost();
            if (url1.getPort()>0) port = url1.getPort(); // default properly
           
            // are these order dependent? nah, apparently not. good.
            sendprops.setProperty("User-Agent", "NanoHTTPClient"); // always use this name, no spoofing that. // or it might need to be that Mozilla/Gecko thing.
            if (sendprops.getProperty("Accept") == null) sendprops.setProperty("Accept", "text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,*/*;q=0.5");
            if (sendprops.getProperty("Accept-Language") == null) sendprops.setProperty("Accept-Language", "en-us,ex;q=0.5");
            if (sendprops.getProperty("Accept-Charset") == null) sendprops.setProperty("Accept-Charset", "ISO-8859-1,utf-8;q=0.7,*;q=0.7");
            // this is required, some tiny flavor of security check
            sendprops.setProperty("Host", host + ":" + port);
           
            // ok let's go
            Socket so = new Socket(host, port);
           
            // time to write all the proper stuff to the server
            OutputStream os = so.getOutputStream();
           
            byte[] b = null;
           
            String line = "GET " + url1.getFile() + " " + version + "\r\n"; // getFile includes the query params
            b = line.getBytes();
            os.write(b);
            os.flush(); // don't forget this, you have no idea what buffering is going on behind the scenes.
            if (debug) System.out.println(line);
           
            // do the real work
            // push all the "send" properties across. order not important.
            Enumeration props = (Enumeration)sendprops.propertyNames();
            while (props.hasMoreElements()) {
                String prop = props.nextElement();
                line = prop + ":" + sendprops.getProperty(prop) + "\r\n";
                if (debug) System.out.println(line);
                b = line.getBytes();
                os.write(b);
                os.flush();
            }
            // need an extra blank line
            os.write("\r\n".getBytes());
            os.flush();
           
            // ok that's all for sending to the server
           
            if (debug) System.out.println("-------------");
           
            // *********************************************
           
            // all done. ready to receive.
           
            InputStream is = so.getInputStream();
           
            // easier reading of headers. body is separate handling anyway.
            // no, this won't work right here with multi-byte characters. Plus, you can't mix streams.
            // BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8"));
            // I deleted the lines below that went with "br" type of stream.
           
            // rather than read bytes, let's read lines, since the header part is all guaranteed to be lines.
            // we'll read the entire header block up to the blank line separator.
           
            line = readLine(is);
           
            if (debug)
                    System.out.println("Ret Code: " + line);

            String[] split = line.split(" ");
           
            serverRev = split[0];
            returnCode = split[1];
            returnMsg = split[2];
           
            //now read headers. don't think too hard about this.
            while ((line= readLine(is)).length() >0) {
                if (debug) System.out.println(line);
                split = line.split(": ");
                receiveprops.setProperty(split[0], split[1]);
            }
            if (debug) System.out.println();
           
            // get the content-length, if specified. If not, default to -1, which we expect means "chunked"
            // the whole reason for this code in the first place.
           
            int len = Integer.parseInt(receiveprops.getProperty("Content-Length", "-1")); // returns -1 even if there's no entry at all
           
            if (debug) System.out.println("CL: " + len);
           
            String sb = null;//new String("");
           
            // this is the empty result
            if (returnCode.equals("204")) { // nothing to see here, folks. move along.               
            }
           
            // normal result
            else if (returnCode.equals("200")) {
                // all's well
                if (len>0) { // normal content, one long byte-sequence.
                    int retry = 0;
                    int num_tries = 5;
                   
                    byte[] bts = new byte[len];
                   
                    ByteArrayOutputStream baos = new ByteArrayOutputStream(len); // we know the size
                   
                    // ok, this might be excessive. for small results, it undoubtedly is. long ones, gotta do it.
                    while (retry++                        if (is.available()>0) {
                            int len2 = is.read(bts);
                            baos.write(bts, 0, len2);
                        } else {
                        // interesting. Adding the println wastes enough time that the socket catches up to the incoming data.
                        // otherwise, above loop goes too fast and terminates before all the data is read.
                        System.out.println("#");
                        }
                    }
                    sb = baos.toString("UTF-8");
                }
               
                // now for chunked encoding
                else if (receiveprops.containsKey("Transfer-Encoding")
                        && receiveprops.getProperty("Transfer-Encoding").equalsIgnoreCase("chunked")) {
                   
                    String chunkSizeLine = readLine(is); // this is a hex value
                   
                    ByteArrayOutputStream baos = new ByteArrayOutputStream();
                   
                    // now we are reading the chunk
                    while (true) {
                        if (debug) System.out.println("chunk size line hex: " + chunkSizeLine);
                       
                        // next is chunk size, which is given in hex.
                        int chunkSize = Integer.parseInt(chunkSizeLine, 16);
                        if (chunkSize == 0) break; // final chunk size is always zero. This not zero? not final chunk.

                        if (debug) System.out.println("chunk size decimal: " + chunkSize);
                       
                        // in general, the chunks aren't large. Doesnt mean they can't be...
                        byte[] buf = new byte[chunkSize];
                        int rlen = 0;
                        // read the entire chunk.
                        while (rlen                            rlen += is.read(buf,rlen,chunkSize-rlen);
                        }
                        baos.write(buf);
                       
                        if (debug) System.out.println("bytes read (=chunk size decimal): " + rlen);
                       
                        // on to the next chunk
                        // read a blank
                        readLine(is);
                        chunkSizeLine= readLine(is);
                        if (debug) System.out.println();
                    }
                   
                    String temp = baos.toString("UTF-8");
                    if (debug) System.out.println(temp);
                   
                    sb = temp; // this is it
                }
            }
           
            // ok, I probably better read an error message
            // I think the server actually creates these errors, when your calling params don't match the method signature
            else if (returnCode.equals("400")
                    || returnCode.equals("404")
                    || returnCode.equals("500")
                    ) {
                if (debug) System.out.println(receiveprops);
               
                if (len>0)  { // normal content, one long byte-sequence. even though it's an error.
                    int retry = 0;
                    int num_tries = 5;
                   
                    byte[] bts = new byte[len];
                   
                    ByteArrayOutputStream baos = new ByteArrayOutputStream(len); // we know the size
                   
                    // ok, this might be excessive. for small results, it undoubtedly is. long ones, gotta do it.
                    while (retry++                        while (is.available()>0) {
                            int len2 = is.read(bts);
                            baos.write(bts, 0, len2);
                        }
                    }
                    sb = baos.toString("UTF-8");
                }
            }
            is.close();
            so.close();
           
            if (debug) System.out.println(sb.length());
           
            return sb;
           
        } catch (MalformedURLException err) {
            err.printStackTrace();
        } catch (UnknownHostException err) {
            err.printStackTrace();
        } catch (IOException err) {
            err.printStackTrace();
        }
        return null; // oops.
    }

    // *******************************************
    // *******************************************
    // *******************************************

    public static void main(String[] args) {
        NanoHTTPClient nano = new NanoHTTPClient();

        // C-L normal return.
        System.out.println(nano.callMethodGet("http://httpbin.org/"));
        System.out.println(nano.callMethodGet("http://httpbin.org/ip"));
       
        //nano.debug = true;
        // YAY! CHUNKED!
        System.out.println(nano.callMethodGet("http://www.google.com/"));
    }
   
}

No comments: