TET Cookbook

cookbook

emptycheck

Check whether a specified area on the page is empty, i.e. does not contain any text, vector graphics or image.

 

Download Java Code      Show Output      Show Input PDF

package com.pdflib.cookbook.tet.special;

import com.pdflib.TET;
import com.pdflib.TETException;

/* Check whether a specified area on the page is empty, i.e.
 * does not contain any text, vector graphics or image.
 *
 * $Id: emptycheck.java,v 1.2 2017/05/26 11:39:22 stm Exp $
 */

public class emptycheck
{
    /**
     * Global option list
     */
    static final String globaloptlist = "searchpath={../input}";
    
    /**
     * Document-specific option list
     */
    static final String docoptlist = "";
    
    /**
     * Page-specific option list:
     * - "emptycheck" checks whether the specified are is empty
     * - "includebox" specifies the region of interest
     *   (we check the first line in the right column)
     */
    static final String pageoptlist = 
    	"emptycheck includebox={{300 66 560 88}}";
    
    public static void main (String argv[])
    {
        TET tet = null;
        
	try
        {
	    if (argv.length != 1)
            {
                throw new Exception(
		    "usage: emptycheck <filename>");
            }

            tet = new TET();

            tet.set_option(globaloptlist);

            int doc = tet.open_document(argv[0], docoptlist);

            if (doc == -1)
            {
                throw new Exception("Error " + tet.get_errnum() + "in "
                        + tet.get_apiname() + "(): " + tet.get_errmsg());
            }
            
            /* get number of pages in the document */
            int n_pages = (int) tet.pcos_get_number(doc, "length:pages");

            /* loop over pages in the document */
            for (int pageno = 1; pageno <= n_pages; ++pageno)
            {
                int page = tet.open_page(doc, pageno, pageoptlist);

                if (page == -1)
                {
                    print_tet_error(tet, pageno);
                    continue;		/* try next page */
                }

                /*
                 * Retrieve the "text" which in this situation contains
		 * only "empty" or "notempty".
                 */
		System.err.println("box on page " + pageno + ": " +
			tet.get_text(page));

                tet.close_page(page);
            }

            tet.close_document(doc);
        }
	catch (TETException e)
	{
	    System.err.println("TET exception occurred in extractor sample:");
	    System.err.println("[" + e.get_errnum() + "] " + e.get_apiname() +
			    ": " + e.get_errmsg());
        }
        catch (Exception e)
        {
            System.err.println(e.getMessage());
        }
        finally
        {
            if (tet != null) {
		tet.delete();
            }
        }
    }

    /**
     * Report a TET error.
     * 
     * @param tet The TET object
     * @param pageno The page number on which the error occurred
     */
    private static void print_tet_error(TET tet, int pageno)
    {
        System.err.println("Error " + tet.get_errnum() + " in  "
                + tet.get_apiname() + "() on page " + pageno + ": "
                + tet.get_errmsg());
    }
}