package docreader;
import java.util.ArrayList;
import org.apache.log4j.Logger;
import com.sun.star.beans.Property;
import com.sun.star.beans.PropertyState;
import com.sun.star.beans.UnknownPropertyException;
import com.sun.star.beans.XPropertySet;
import com.sun.star.beans.XPropertySetInfo;
import com.sun.star.beans.XPropertyState;
import com.sun.star.container.NoSuchElementException;
import com.sun.star.container.XEnumeration;
import com.sun.star.container.XEnumerationAccess;
import com.sun.star.container.XIndexAccess;
import com.sun.star.container.XNameAccess;
import com.sun.star.container.XNameContainer;
import com.sun.star.drawing.XDrawPage;
import com.sun.star.drawing.XDrawPageSupplier;
import com.sun.star.drawing.XDrawPages;
import com.sun.star.drawing.XDrawPagesSupplier;
import com.sun.star.form.FormComponentType;
import com.sun.star.form.XFormsSupplier;
import com.sun.star.frame.XModel;
import com.sun.star.lang.IndexOutOfBoundsException;
import com.sun.star.lang.WrappedTargetException;
import com.sun.star.lang.XComponent;
import com.sun.star.lang.XServiceInfo;
import com.sun.star.table.XCell;
import com.sun.star.table.XCellRange;
import com.sun.star.table.XTableColumns;
import com.sun.star.table.XTableRows;
import com.sun.star.text.XBookmarksSupplier;
import com.sun.star.text.XDependentTextField;
import com.sun.star.text.XText;
import com.sun.star.text.XTextContent;
import com.sun.star.text.XTextDocument;
import com.sun.star.text.XTextFieldsSupplier;
import com.sun.star.text.XTextRange;
import com.sun.star.text.XTextTable;
import com.sun.star.text.XTextTablesSupplier;
import com.sun.star.uno.UnoRuntime;
public class DocReaderParser {
private static Logger logger = Logger.getLogger(DocReaderParser.class);
public DocReaderParser() {
}
private String classifyFormComponentType(XPropertySet xComponent,
String name, DocumentModel documentModel) throws Exception {
String sType = "";
XServiceInfo xSI = (XServiceInfo) UnoRuntime.queryInterface(
XServiceInfo.class, xComponent);
XPropertySetInfo xPSI = null;
if (null != xComponent) {
xPSI = xComponent.getPropertySetInfo();
/*
* Property ps[] = xPSI.getProperties(); for (int i = 0; i <
* ps.length; i++) { logger.debug("ps " + ps[i].Name); }
*/
}
XPropertySet xCompProps = (XPropertySet) UnoRuntime.queryInterface(
XPropertySet.class, xComponent);
if ((null != xPSI) && xPSI.hasPropertyByName("ClassId")) {
// get the ClassId property
Short nClassId = (Short) xCompProps.getPropertyValue("ClassId");
switch (nClassId.intValue()) {
case FormComponentType.COMMANDBUTTON:
sType = "Command button";
break;
case FormComponentType.RADIOBUTTON:
sType = "Radio button";
break;
case FormComponentType.IMAGEBUTTON:
sType = "Image button";
break;
case FormComponentType.CHECKBOX:
sType = "Check Box";
break;
case FormComponentType.LISTBOX:
sType = "List Box";
short[] selectedItems = (short[]) xCompProps
.getPropertyValue("SelectedItems");
String[] items = (String[]) xCompProps
.getPropertyValue("StringItemList");
if (items != null && items.length > 0) {
ArrayList selectedItemsModelList = new ArrayList();
for (int item_idx = 0; item_idx < selectedItems.length; item_idx++) {
if (selectedItems[item_idx] > -1
&& selectedItems[item_idx] < items.length) {
logger.debug("property SelectedItems[" + item_idx
+ "]: " + items[selectedItems[item_idx]]);
selectedItemsModelList
.add(items[selectedItems[item_idx]]);
}// if
}// for
if (selectedItemsModelList.size() > 0) {
documentModel.addFormElement(name,
selectedItemsModelList);
}
}
break;
case FormComponentType.COMBOBOX:
sType = "Combo Box";
break;
case FormComponentType.GROUPBOX:
sType = "Group Box";
break;
case FormComponentType.FIXEDTEXT:
sType = "Fixed Text";
break;
case FormComponentType.GRIDCONTROL:
sType = "Grid Control";
break;
case FormComponentType.FILECONTROL:
sType = "File Control";
break;
case FormComponentType.HIDDENCONTROL:
sType = "Hidden Control";
break;
case FormComponentType.IMAGECONTROL:
sType = "Image Control";
break;
case FormComponentType.DATEFIELD:
sType = "Date Field";
break;
case FormComponentType.TIMEFIELD:
sType = "Time Field";
break;
case FormComponentType.NUMERICFIELD:
sType = "Numeric Field";
break;
case FormComponentType.CURRENCYFIELD:
sType = "Currency Field";
break;
case FormComponentType.PATTERNFIELD:
sType = "Pattern Field";
break;
case FormComponentType.TEXTFIELD:
// there are two known services with this class id: the usual
// text field,
// and the formatted field
sType = "Text Field";
logger.debug("property Text: "
+ xCompProps.getPropertyValue("Text"));
documentModel.addFormElement(name, ""
+ xCompProps.getPropertyValue("Text"));
if ((null != xSI)
&& xSI
.supportsService("com.sun.star.form.component.FormattedField")) {
sType = "Formatted Field";
}
break;
default:
break;
}
} else {
if ((null != xSI)
&& xSI
.supportsService("com.sun.star.form.component.DataForm")) {
sType = "Form";
}
}
return sType;
}
private XDrawPage getDocumentDrawPage(XComponent m_xDocument)
throws java.lang.Exception {
XModel s_aDocument = (XModel) UnoRuntime.queryInterface(XModel.class,
m_xDocument);
XDrawPage xReturn;
// in case of a Writer document, this is rather easy: simply ask the
// XDrawPageSupplier
XDrawPageSupplier xSuppPage = (XDrawPageSupplier) UnoRuntime
.queryInterface(XDrawPageSupplier.class, s_aDocument);
xReturn = xSuppPage.getDrawPage();
if (null == xReturn) {
// the model itself is no draw page supplier - then it may be an
// Impress or Calc
// (or any other multi-page) document
XDrawPagesSupplier xSuppPages = (XDrawPagesSupplier) UnoRuntime
.queryInterface(XDrawPagesSupplier.class, s_aDocument);
XDrawPages xPages = xSuppPages.getDrawPages();
xReturn = (XDrawPage) UnoRuntime.queryInterface(XDrawPage.class,
xPages.getByIndex(0));
// Note that this is not really error-proof code: If the document
// model does not support the
// XDrawPagesSupplier interface, or if the pages collection returned
// is empty, this will break.
}
return xReturn;
}
private XNameContainer getFormComponentTreeRoot(
com.sun.star.lang.XComponent m_xDocument)
throws java.lang.Exception {
XFormsSupplier xSuppForms = (XFormsSupplier) UnoRuntime.queryInterface(
XFormsSupplier.class, getDocumentDrawPage(m_xDocument));
XNameContainer xFormsCollection = null;
if (null != xSuppForms) {
xFormsCollection = xSuppForms.getForms();
}
return xFormsCollection;
}
private void enumerateFormComponents(XNameAccess xContainer,
String sPrefix, DocumentModel documentModel)
throws java.lang.Exception {
// loop through all the element names
String aNames[] = xContainer.getElementNames();
for (int i = 0; i < aNames.length; ++i) {
// print the child name
logger.debug("----------------------------------------"
+ " recognized element, name: " + sPrefix + aNames[i]);
XPropertySet xModelProps = (XPropertySet) UnoRuntime
.queryInterface(XPropertySet.class, xContainer
.getByName(aNames[i]));
logger.debug("classifyFormComponentType: "
+ classifyFormComponentType(xModelProps, aNames[i],
documentModel));
// check if it is a FormComponents component itself
XServiceInfo xSI = (XServiceInfo) UnoRuntime.queryInterface(
XServiceInfo.class, xContainer.getByName(aNames[i]));
if (xSI.supportsService("com.sun.star.form.FormComponents")) {
XNameAccess xChildContainer = (XNameAccess) UnoRuntime
.queryInterface(XNameAccess.class, xSI);
enumerateFormComponents(xChildContainer, new String(" ")
+ sPrefix, documentModel);
}
}
}
private void enumerateTextComponents(XComponent xComp,
DocumentModel documentModel) throws NoSuchElementException,
WrappedTargetException, UnknownPropertyException {
// query the new document for the XTextDocument interface
XTextDocument xTextDocument = (XTextDocument) UnoRuntime
.queryInterface(XTextDocument.class, xComp);
XText xText = xTextDocument.getText();
// Get Access to the TextFields in the document
XTextFieldsSupplier xTextFieldsSupplier = (XTextFieldsSupplier) UnoRuntime
.queryInterface(XTextFieldsSupplier.class, xComp);
XEnumerationAccess xEnumeratedFields = xTextFieldsSupplier
.getTextFields();
XEnumeration enumeration = xEnumeratedFields.createEnumeration();
// Loop through the TextFields
while (enumeration.hasMoreElements()) {
Object field = enumeration.nextElement();
XDependentTextField dependentTextField = (XDependentTextField) UnoRuntime
.queryInterface(XDependentTextField.class, field);
XPropertySet propertySet = dependentTextField.getTextFieldMaster();
String name = (String) propertySet.getPropertyValue("Name");
logger.debug("textfield name name " + name);
XPropertySetInfo propertysetInfo = propertySet.getPropertySetInfo();
Property properties[] = propertysetInfo.getProperties();
for (int i = 0; i < properties.length; i++) {
logger.debug("prop " + properties[i].Name);
}
}
// enumerate paragraphs
logger.debug("create an enumeration of all paragraphs");
XEnumeration xParagraphEnumeration = null;
XEnumerationAccess xParaEnumerationAccess = null;
XEnumeration xTextPortionEnum;
XTextContent xTextElement = null;
// create an enumeration access of all paragraphs of a document
XEnumerationAccess xEnumerationAccess = (XEnumerationAccess) UnoRuntime
.queryInterface(
com.sun.star.container.XEnumerationAccess.class, xText);
xParagraphEnumeration = xEnumerationAccess.createEnumeration();
// Loop through all paragraphs of the document
while (xParagraphEnumeration.hasMoreElements()) {
logger.debug("------------------------------- new paragraph");
xTextElement = (XTextContent) UnoRuntime.queryInterface(
XTextContent.class, xParagraphEnumeration.nextElement());
XServiceInfo xServiceInfo = (XServiceInfo) UnoRuntime
.queryInterface(XServiceInfo.class, xTextElement);
String[] services = xServiceInfo.getSupportedServiceNames();
for (int si = 0; si < services.length; si++) {
logger.debug("service " + services[si]);
}
// check if the current paragraph is really a paragraph or an
// anchor of a frame or picture
if (xServiceInfo.supportsService("com.sun.star.text.Paragraph")) {
XTextRange xTextRange = xTextElement.getAnchor();
logger.debug("This is a Paragraph");
// create another enumeration to get all text portions of
// the paragraph
xParaEnumerationAccess = (XEnumerationAccess) UnoRuntime
.queryInterface(XEnumerationAccess.class, xTextElement);
xTextPortionEnum = xParaEnumerationAccess.createEnumeration();
while (xTextPortionEnum.hasMoreElements()) {
XTextRange xTextPortion = (XTextRange) UnoRuntime
.queryInterface(XTextRange.class, xTextPortionEnum
.nextElement());
logger.debug("Text from the portion : "
+ xTextPortion.getString());
XPropertySet xPropertySet = (XPropertySet) UnoRuntime
.queryInterface(XPropertySet.class, xTextPortion);
logger.debug("font name: "
+ xPropertySet.getPropertyValue("CharFontName"));
// PropertyState status of each text portion.
XPropertyState xPropertyState = (XPropertyState) UnoRuntime
.queryInterface(XPropertyState.class, xTextPortion);
if (xPropertyState.getPropertyState("CharWeight").equals(
PropertyState.AMBIGUOUS_VALUE))
logger
.debug("- The text range contains more than one different attributes");
if (xPropertyState.getPropertyState("CharWeight").equals(
PropertyState.DIRECT_VALUE))
logger.debug(" - The text range contains hard formats");
if (xPropertyState.getPropertyState("CharWeight").equals(
PropertyState.DEFAULT_VALUE))
logger
.debug(" - The text range doesn't contains hard formats");
}
} else if (xServiceInfo
.supportsService("com.sun.star.text.TextTable")) {
logger.debug("this is a table");
} else {
logger.debug("The text portion isn't a text paragraph");
}
}// while
}
private void enumerateBookmarks(XComponent xComp,
DocumentModel documentModel) throws NoSuchElementException,
WrappedTargetException {
// accessing the bookmark collection of the document
XBookmarksSupplier xBookmarksSupplier = (XBookmarksSupplier) UnoRuntime
.queryInterface(XBookmarksSupplier.class, xComp);
if (xBookmarksSupplier != null) {
XNameAccess xNamedBookmarks = xBookmarksSupplier.getBookmarks();
String bookmarks[] = xNamedBookmarks.getElementNames();
Object bookmark = null, previousBookmark = null;
for (int b = 0; b < bookmarks.length; b++) {
if (bookmark != null) {
previousBookmark = bookmark;
}
bookmark = xNamedBookmarks.getByName(bookmarks[b]);
// we need its XTextRange which is available from
// getAnchor(),
// so query for XTextContent
XTextContent xBookmarkContent = (XTextContent) UnoRuntime
.queryInterface(XTextContent.class, bookmark);
// get the anchor of the bookmark (its XTextRange)
XTextRange xBookmarkRange = xBookmarkContent.getAnchor();
if (previousBookmark != null) {
XTextRange x_previousBookmarkRange = ((XTextContent) UnoRuntime
.queryInterface(XTextContent.class,
previousBookmark)).getAnchor();
}
// set the bookmark text
// xBookmarkRange.setString("test");
logger.debug("bookmark " + bookmarks[b] + ", text "
+ xBookmarkRange.getString());
documentModel.addBookmark(bookmarks[b], xBookmarkRange
.getString());
}
}
}
private void enumerateTables(XComponent xComp, DocumentModel documentModel)
throws IndexOutOfBoundsException, WrappedTargetException,
UnknownPropertyException {
// first query the XTextTablesSupplier interface from our document
XTextTablesSupplier xTablesSupplier = (XTextTablesSupplier) UnoRuntime
.queryInterface(XTextTablesSupplier.class, xComp);
// get the tables collection
XNameAccess xNamedTables = xTablesSupplier.getTextTables();
// now query the XIndexAccess from the tables collection
XIndexAccess xIndexedTables = (XIndexAccess) UnoRuntime.queryInterface(
XIndexAccess.class, xNamedTables);
// we need properties
XPropertySet xTableProps = null;
// get the tables
for (int i = 0; i < xIndexedTables.getCount(); i++) {
Object table = xIndexedTables.getByIndex(i);
logger.debug("------------------------- recognized table ");
// the properties
xTableProps = (XPropertySet) UnoRuntime.queryInterface(
XPropertySet.class, table);
Property props[] = xTableProps.getPropertySetInfo().getProperties();
for (int props_idx = 0; props_idx < props.length; props_idx++) {
// logger.debug("table props " + props[props_idx].Name + ", "
// + xTableProps.getPropertyValue(props[props_idx].Name));
}
XTextTable xTextTable = (XTextTable) UnoRuntime.queryInterface(
XTextTable.class, table);
XTableRows rows = xTextTable.getRows();
XTableColumns columns = xTextTable.getColumns();
XCellRange cellRange = (XCellRange) UnoRuntime.queryInterface(
XCellRange.class, table);
ArrayList righe = new ArrayList();
for (int row_idx = 0; row_idx < rows.getCount(); row_idx++) {
logger.debug("scanning row# " + row_idx);
String colonna[] = new String[columns.getCount()];
for (int col_idx = 0; col_idx < columns.getCount(); col_idx++) {
// WARNING: the first parameter is the column, the
// second
// parameter is the row
XCell xCell = cellRange.getCellByPosition(col_idx, row_idx);
XText xCellText = (XText) UnoRuntime.queryInterface(
XText.class, xCell);
logger.debug("cell (" + col_idx + "," + row_idx + "): "
+ xCellText.getText().getString());
colonna[col_idx] = xCellText.getText().getString();
}
righe.add(colonna);
}
documentModel.addTable(""
+ xTableProps.getPropertyValue("LinkDisplayName"), righe);
}
}
public DocumentModel parse(String sUrl) throws Exception {
DocumentModel documentModel = new DocumentModel();
DocConnection docConnection = new DocConnection();
XComponent xComp = null;
Exception ex = null;
try {
// open connection
xComp = docConnection.open(sUrl);
// parse
parse(xComp, documentModel);
} catch (Exception e) {
logger.error(e, e);
ex = e;
} finally {
docConnection.close();
}
if (ex != null) {
throw ex;
}
return documentModel;
}
public DocumentModel parse(XComponent xComp, DocumentModel documentModel)
throws Exception {
// enumerateTextComponents(xComp, documentModel);
enumerateFormComponents(getFormComponentTreeRoot(xComp), " ",
documentModel);
enumerateBookmarks(xComp, documentModel);
enumerateTables(xComp, documentModel);
return documentModel;
}
}
///////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////
// the previous code can be tested with the following class
package docreader;
public class TestDocReader {
public static void main(String args[]) {
DocReaderParser parser = new DocReaderParser();
DocumentModel documentModel = parser
.parse("C:\\tmp\\test.odt");
try {
System.out.println(documentModel.getXml());
} catch (Exception e) {
e.printStackTrace();
}
}
}
this blog is intended to be a place to store code snippets, examples, notes... in order to have them handy and to quickly find during the everyday work
Search This Blog
Friday, 27 November 2009
OpenOffice API, a generic document inspector
Here comes a useful program to inspect the content of a document (Open Office .odt or Microsoft Word .doc) in order to read paragraphs, text sections, bookmarks, text fields, forms and tables.
Etichette:
OpenOffice
Subscribe to:
Post Comments (Atom)
No comments:
Post a Comment