Search This Blog

Friday, 27 November 2009

OpenOffice API, a generic document inspector

Here comes a useful program to inspect the content of a document (Open Office .odt or Microsoft Word .doc) in order to read paragraphs, text sections, bookmarks, text fields, forms and tables.

package docreader;

import java.util.ArrayList;

import org.apache.log4j.Logger;


public class DocReaderParser {

    private static Logger logger = Logger.getLogger(DocReaderParser.class);

    public DocReaderParser() {

    private String classifyFormComponentType(XPropertySet xComponent,
            String name, DocumentModel documentModel) throws Exception {
        String sType = "";
        XServiceInfo xSI = (XServiceInfo) UnoRuntime.queryInterface(
                XServiceInfo.class, xComponent);
        XPropertySetInfo xPSI = null;
        if (null != xComponent) {
            xPSI = xComponent.getPropertySetInfo();
             * Property ps[] = xPSI.getProperties(); for (int i = 0; i <
             * ps.length; i++) { logger.debug("ps " + ps[i].Name); }

        XPropertySet xCompProps = (XPropertySet) UnoRuntime.queryInterface(
                XPropertySet.class, xComponent);

        if ((null != xPSI) && xPSI.hasPropertyByName("ClassId")) {
            // get the ClassId property
            Short nClassId = (Short) xCompProps.getPropertyValue("ClassId");
            switch (nClassId.intValue()) {
            case FormComponentType.COMMANDBUTTON:
                sType = "Command button";
            case FormComponentType.RADIOBUTTON:
                sType = "Radio button";
            case FormComponentType.IMAGEBUTTON:
                sType = "Image button";
            case FormComponentType.CHECKBOX:
                sType = "Check Box";
            case FormComponentType.LISTBOX:
                sType = "List Box";
                short[] selectedItems = (short[]) xCompProps
                String[] items = (String[]) xCompProps
                if (items != null && items.length > 0) {
                    ArrayList selectedItemsModelList = new ArrayList();
                    for (int item_idx = 0; item_idx < selectedItems.length; item_idx++) {
                        if (selectedItems[item_idx] > -1
                                && selectedItems[item_idx] < items.length) {
                            logger.debug("property SelectedItems[" + item_idx
                                    + "]: " + items[selectedItems[item_idx]]);
                        }// if
                    }// for
                    if (selectedItemsModelList.size() > 0) {
            case FormComponentType.COMBOBOX:
                sType = "Combo Box";
            case FormComponentType.GROUPBOX:
                sType = "Group Box";
            case FormComponentType.FIXEDTEXT:
                sType = "Fixed Text";
            case FormComponentType.GRIDCONTROL:
                sType = "Grid Control";
            case FormComponentType.FILECONTROL:
                sType = "File Control";
            case FormComponentType.HIDDENCONTROL:
                sType = "Hidden Control";
            case FormComponentType.IMAGECONTROL:
                sType = "Image Control";
            case FormComponentType.DATEFIELD:
                sType = "Date Field";
            case FormComponentType.TIMEFIELD:
                sType = "Time Field";
            case FormComponentType.NUMERICFIELD:
                sType = "Numeric Field";
            case FormComponentType.CURRENCYFIELD:
                sType = "Currency Field";
            case FormComponentType.PATTERNFIELD:
                sType = "Pattern Field";
            case FormComponentType.TEXTFIELD:
                // there are two known services with this class id: the usual
                // text field,
                // and the formatted field
                sType = "Text Field";
                logger.debug("property Text: "
                        + xCompProps.getPropertyValue("Text"));
                documentModel.addFormElement(name, ""
                        + xCompProps.getPropertyValue("Text"));
                if ((null != xSI)
                        && xSI
                                .supportsService("")) {
                    sType = "Formatted Field";
        } else {
            if ((null != xSI)
                    && xSI
                            .supportsService("")) {
                sType = "Form";
        return sType;

    private XDrawPage getDocumentDrawPage(XComponent m_xDocument)
            throws java.lang.Exception {
        XModel s_aDocument = (XModel) UnoRuntime.queryInterface(XModel.class,
        XDrawPage xReturn;
        // in case of a Writer document, this is rather easy: simply ask the
        // XDrawPageSupplier
        XDrawPageSupplier xSuppPage = (XDrawPageSupplier) UnoRuntime
                .queryInterface(XDrawPageSupplier.class, s_aDocument);
        xReturn = xSuppPage.getDrawPage();
        if (null == xReturn) {
            // the model itself is no draw page supplier - then it may be an
            // Impress or Calc
            // (or any other multi-page) document
            XDrawPagesSupplier xSuppPages = (XDrawPagesSupplier) UnoRuntime
                    .queryInterface(XDrawPagesSupplier.class, s_aDocument);
            XDrawPages xPages = xSuppPages.getDrawPages();
            xReturn = (XDrawPage) UnoRuntime.queryInterface(XDrawPage.class,
            // Note that this is not really error-proof code: If the document
            // model does not support the
            // XDrawPagesSupplier interface, or if the pages collection returned
            // is empty, this will break.
        return xReturn;

    private XNameContainer getFormComponentTreeRoot(
            throws java.lang.Exception {
        XFormsSupplier xSuppForms = (XFormsSupplier) UnoRuntime.queryInterface(
                XFormsSupplier.class, getDocumentDrawPage(m_xDocument));
        XNameContainer xFormsCollection = null;
        if (null != xSuppForms) {
            xFormsCollection = xSuppForms.getForms();
        return xFormsCollection;

    private void enumerateFormComponents(XNameAccess xContainer,
            String sPrefix, DocumentModel documentModel)
            throws java.lang.Exception {
        // loop through all the element names
        String aNames[] = xContainer.getElementNames();

        for (int i = 0; i < aNames.length; ++i) {
            // print the child name
                    + " recognized element, name: " + sPrefix + aNames[i]);

            XPropertySet xModelProps = (XPropertySet) UnoRuntime
                    .queryInterface(XPropertySet.class, xContainer
            logger.debug("classifyFormComponentType: "
                    + classifyFormComponentType(xModelProps, aNames[i],

            // check if it is a FormComponents component itself
            XServiceInfo xSI = (XServiceInfo) UnoRuntime.queryInterface(
                    XServiceInfo.class, xContainer.getByName(aNames[i]));
            if (xSI.supportsService("")) {
                XNameAccess xChildContainer = (XNameAccess) UnoRuntime
                        .queryInterface(XNameAccess.class, xSI);
                enumerateFormComponents(xChildContainer, new String(" ")
                        + sPrefix, documentModel);

    private void enumerateTextComponents(XComponent xComp,
            DocumentModel documentModel) throws NoSuchElementException,
            WrappedTargetException, UnknownPropertyException {
        // query the new document for the XTextDocument interface
        XTextDocument xTextDocument = (XTextDocument) UnoRuntime
                .queryInterface(XTextDocument.class, xComp);
        XText xText = xTextDocument.getText();
        // Get Access to the TextFields in the document
        XTextFieldsSupplier xTextFieldsSupplier = (XTextFieldsSupplier) UnoRuntime
                .queryInterface(XTextFieldsSupplier.class, xComp);
        XEnumerationAccess xEnumeratedFields = xTextFieldsSupplier
        XEnumeration enumeration = xEnumeratedFields.createEnumeration();
        // Loop through the TextFields
        while (enumeration.hasMoreElements()) {
            Object field = enumeration.nextElement();
            XDependentTextField dependentTextField = (XDependentTextField) UnoRuntime
                    .queryInterface(XDependentTextField.class, field);
            XPropertySet propertySet = dependentTextField.getTextFieldMaster();
            String name = (String) propertySet.getPropertyValue("Name");
            logger.debug("textfield name name " + name);
            XPropertySetInfo propertysetInfo = propertySet.getPropertySetInfo();
            Property properties[] = propertysetInfo.getProperties();
            for (int i = 0; i < properties.length; i++) {
                logger.debug("prop " + properties[i].Name);
        // enumerate paragraphs
        logger.debug("create an enumeration of all paragraphs");
        XEnumeration xParagraphEnumeration = null;
        XEnumerationAccess xParaEnumerationAccess = null;
        XEnumeration xTextPortionEnum;
        XTextContent xTextElement = null;
        // create an enumeration access of all paragraphs of a document
        XEnumerationAccess xEnumerationAccess = (XEnumerationAccess) UnoRuntime
              , xText);
        xParagraphEnumeration = xEnumerationAccess.createEnumeration();

        // Loop through all paragraphs of the document
        while (xParagraphEnumeration.hasMoreElements()) {
            logger.debug("------------------------------- new paragraph");

            xTextElement = (XTextContent) UnoRuntime.queryInterface(
                    XTextContent.class, xParagraphEnumeration.nextElement());

            XServiceInfo xServiceInfo = (XServiceInfo) UnoRuntime
                    .queryInterface(XServiceInfo.class, xTextElement);

            String[] services = xServiceInfo.getSupportedServiceNames();
            for (int si = 0; si < services.length; si++) {
                logger.debug("service " + services[si]);

            // check if the current paragraph is really a paragraph or an
            // anchor of a frame or picture
            if (xServiceInfo.supportsService("")) {
                XTextRange xTextRange = xTextElement.getAnchor();
                logger.debug("This is a Paragraph");

                // create another enumeration to get all text portions of
                // the paragraph
                xParaEnumerationAccess = (XEnumerationAccess) UnoRuntime
                        .queryInterface(XEnumerationAccess.class, xTextElement);
                xTextPortionEnum = xParaEnumerationAccess.createEnumeration();

                while (xTextPortionEnum.hasMoreElements()) {
                    XTextRange xTextPortion = (XTextRange) UnoRuntime
                            .queryInterface(XTextRange.class, xTextPortionEnum
                    logger.debug("Text from the portion : "
                            + xTextPortion.getString());

                    XPropertySet xPropertySet = (XPropertySet) UnoRuntime
                            .queryInterface(XPropertySet.class, xTextPortion);
                    logger.debug("font name: "
                            + xPropertySet.getPropertyValue("CharFontName"));

                    // PropertyState status of each text portion.
                    XPropertyState xPropertyState = (XPropertyState) UnoRuntime
                            .queryInterface(XPropertyState.class, xTextPortion);

                    if (xPropertyState.getPropertyState("CharWeight").equals(
                                .debug("-  The text range contains more than one different attributes");

                    if (xPropertyState.getPropertyState("CharWeight").equals(
                        logger.debug(" - The text range contains hard formats");

                    if (xPropertyState.getPropertyState("CharWeight").equals(
                                .debug(" - The text range doesn't contains hard formats");
            } else if (xServiceInfo
                    .supportsService("")) {
                logger.debug("this is a table");
            } else {
                logger.debug("The text portion isn't a text paragraph");
        }// while

    private void enumerateBookmarks(XComponent xComp,
            DocumentModel documentModel) throws NoSuchElementException,
            WrappedTargetException {
        // accessing the bookmark collection of the document
        XBookmarksSupplier xBookmarksSupplier = (XBookmarksSupplier) UnoRuntime
                .queryInterface(XBookmarksSupplier.class, xComp);
        if (xBookmarksSupplier != null) {
            XNameAccess xNamedBookmarks = xBookmarksSupplier.getBookmarks();
            String bookmarks[] = xNamedBookmarks.getElementNames();
            Object bookmark = null, previousBookmark = null;
            for (int b = 0; b < bookmarks.length; b++) {
                if (bookmark != null) {
                    previousBookmark = bookmark;
                bookmark = xNamedBookmarks.getByName(bookmarks[b]);

                // we need its XTextRange which is available from
                // getAnchor(),
                // so query for XTextContent
                XTextContent xBookmarkContent = (XTextContent) UnoRuntime
                        .queryInterface(XTextContent.class, bookmark);
                // get the anchor of the bookmark (its XTextRange)
                XTextRange xBookmarkRange = xBookmarkContent.getAnchor();

                if (previousBookmark != null) {
                    XTextRange x_previousBookmarkRange = ((XTextContent) UnoRuntime

                // set the bookmark text
                // xBookmarkRange.setString("test");

                logger.debug("bookmark " + bookmarks[b] + ", text "
                        + xBookmarkRange.getString());
                documentModel.addBookmark(bookmarks[b], xBookmarkRange

    private void enumerateTables(XComponent xComp, DocumentModel documentModel)
            throws IndexOutOfBoundsException, WrappedTargetException,
            UnknownPropertyException {
        // first query the XTextTablesSupplier interface from our document
        XTextTablesSupplier xTablesSupplier = (XTextTablesSupplier) UnoRuntime
                .queryInterface(XTextTablesSupplier.class, xComp);
        // get the tables collection
        XNameAccess xNamedTables = xTablesSupplier.getTextTables();
        // now query the XIndexAccess from the tables collection
        XIndexAccess xIndexedTables = (XIndexAccess) UnoRuntime.queryInterface(
                XIndexAccess.class, xNamedTables);
        // we need properties
        XPropertySet xTableProps = null;
        // get the tables
        for (int i = 0; i < xIndexedTables.getCount(); i++) {
            Object table = xIndexedTables.getByIndex(i);
            logger.debug("------------------------- recognized table ");

            // the properties
            xTableProps = (XPropertySet) UnoRuntime.queryInterface(
                    XPropertySet.class, table);
            Property props[] = xTableProps.getPropertySetInfo().getProperties();
            for (int props_idx = 0; props_idx < props.length; props_idx++) {
                // logger.debug("table props " + props[props_idx].Name + ", "
                // + xTableProps.getPropertyValue(props[props_idx].Name));

            XTextTable xTextTable = (XTextTable) UnoRuntime.queryInterface(
                    XTextTable.class, table);

            XTableRows rows = xTextTable.getRows();
            XTableColumns columns = xTextTable.getColumns();

            XCellRange cellRange = (XCellRange) UnoRuntime.queryInterface(
                    XCellRange.class, table);

            ArrayList righe = new ArrayList();

            for (int row_idx = 0; row_idx < rows.getCount(); row_idx++) {
                logger.debug("scanning row# " + row_idx);
                String colonna[] = new String[columns.getCount()];
                for (int col_idx = 0; col_idx < columns.getCount(); col_idx++) {

                    // WARNING: the first parameter is the column, the
                    // second
                    // parameter is the row
                    XCell xCell = cellRange.getCellByPosition(col_idx, row_idx);
                    XText xCellText = (XText) UnoRuntime.queryInterface(
                            XText.class, xCell);
                    logger.debug("cell (" + col_idx + "," + row_idx + "): "
                            + xCellText.getText().getString());
                    colonna[col_idx] = xCellText.getText().getString();

                    + xTableProps.getPropertyValue("LinkDisplayName"), righe);


    public DocumentModel parse(String sUrl) throws Exception {

        DocumentModel documentModel = new DocumentModel();
        DocConnection docConnection = new DocConnection();

        XComponent xComp = null;

        Exception ex = null;

        try {
            // open connection
            xComp =;

            // parse
            parse(xComp, documentModel);

        } catch (Exception e) {
            logger.error(e, e);
            ex = e;
        } finally {

        if (ex != null) {
            throw ex;

        return documentModel;

    public DocumentModel parse(XComponent xComp, DocumentModel documentModel)
            throws Exception {
        // enumerateTextComponents(xComp, documentModel);
        enumerateFormComponents(getFormComponentTreeRoot(xComp), " ",
        enumerateBookmarks(xComp, documentModel);
        enumerateTables(xComp, documentModel);
        return documentModel;



// the previous code can be tested with the following class

package docreader;

public class TestDocReader {
    public static void main(String args[]) {
        DocReaderParser parser = new DocReaderParser();
        DocumentModel documentModel = parser
        try {
        } catch (Exception e) {

Wednesday, 18 November 2009

monitoring dependencies between jar files

a script to use the jarjar utility on Windows

Make sure to run the dos console with the delayed environment variable expansion option enable, that is cmd /V:ON

echo off
FOR /F %%G IN ('DIR /S /B %1\*.jar') DO (
java -jar jarjar-1.0.jar find jar %CP%

the script has to be called with an argument specifying the directory containing the set of jar files

Monday, 16 November 2009

session management on WebSphere

WebSphere adds a prefix to the session identifiers, which will not be included by the getId method of the HttpSession object.

This would cause problems in such cases when you need to enable a process external to the browser to work inside an already existing session created by an user logged on the web interface.

HttpSession session = request.getSession(true);
String standardSessionID = session.getId();
//standardSessionID does not contain the cache prefix

String header = request.getHeader("Cookie");
//header contains the string "JSESSIONID="

String fromHeaderSessionID = header.substring(11);
//fromHeaderSessionID contains the WebSphere cache prefix
//and can be sent to the server as a JSESSIONID cookie or as a part of the url,
//as in the string 
Strig url = ";jsessionid=" + fromHeaderSessionID;

Thursday, 12 November 2009

load balance on Tomcat 5.5 with Apache web server

a very interesting post on how to set up a farm environment
with different instances of Tomcat 5.5 and an Apache web server


Wednesday, 11 November 2009

retrieve the HttpServletRequest on an Axis server object

MessageContext context = org.apache.axis.MessageContext.getCurrentContext();
request = (HttpServletRequest)context.getProperty(

Monday, 9 November 2009

debug the network activity of a process on Windows

find the process (service) id with
tasklist /SVC

then type

netstat -aovn | findstr <processid>

where <processid> is the process id found by tasklist.

Saturday, 7 November 2009


how to sync (in simulation mode) the folder myRootPackage under /http/www/myWebapp/WEB-INF/classes on the host "hostname", in the folder /http/lib/myWebapp/WEB-INF/classes/ on the local host

rsync --rsh="/usr/bin/ssh -l username" -avzn --size-only

Friday, 6 November 2009

tar gz

to create a tar.gz file
tar -czvf archive.tgz directory --exclude "file"
to extract a tar.gz file
tar xvzf file.tar.gz

on Solaris:
tar cf tomcat7.tar /tomcat7 | gzip -f tomcat7.tar

Wednesday, 4 November 2009

log swallowing on Tomcat

In order to split the catalina.out across multiple files, each one of which created every day, it's useful to configure the context xml descriptor as below (tested on Tomcat 5.0.28)

<context path="/myWebApp" swallowOutput="true">
<logger directory="logs"

which will create every day a new file called
containing the daily ouput

This technique is referred to as "output swallowing" on Tomcat
(note the swallowOutput="true")


articles on Datawarehouse

What's blocking my lock?

Here comes an extremely clear and interesting article, written by Natalka Roshalk, about Oracle locks and how to monitor what's going on when a lock is blocking a session

read the article