package docreader;
import java.util.ArrayList;
import org.apache.log4j.Logger;
import com.sun.star.beans.Property;
import com.sun.star.beans.PropertyState;
import com.sun.star.beans.UnknownPropertyException;
import com.sun.star.beans.XPropertySet;
import com.sun.star.beans.XPropertySetInfo;
import com.sun.star.beans.XPropertyState;
import com.sun.star.container.NoSuchElementException;
import com.sun.star.container.XEnumeration;
import com.sun.star.container.XEnumerationAccess;
import com.sun.star.container.XIndexAccess;
import com.sun.star.container.XNameAccess;
import com.sun.star.container.XNameContainer;
import com.sun.star.drawing.XDrawPage;
import com.sun.star.drawing.XDrawPageSupplier;
import com.sun.star.drawing.XDrawPages;
import com.sun.star.drawing.XDrawPagesSupplier;
import com.sun.star.form.FormComponentType;
import com.sun.star.form.XFormsSupplier;
import com.sun.star.frame.XModel;
import com.sun.star.lang.IndexOutOfBoundsException;
import com.sun.star.lang.WrappedTargetException;
import com.sun.star.lang.XComponent;
import com.sun.star.lang.XServiceInfo;
import com.sun.star.table.XCell;
import com.sun.star.table.XCellRange;
import com.sun.star.table.XTableColumns;
import com.sun.star.table.XTableRows;
import com.sun.star.text.XBookmarksSupplier;
import com.sun.star.text.XDependentTextField;
import com.sun.star.text.XText;
import com.sun.star.text.XTextContent;
import com.sun.star.text.XTextDocument;
import com.sun.star.text.XTextFieldsSupplier;
import com.sun.star.text.XTextRange;
import com.sun.star.text.XTextTable;
import com.sun.star.text.XTextTablesSupplier;
import com.sun.star.uno.UnoRuntime;
public class DocReaderParser {
private static Logger logger = Logger.getLogger(DocReaderParser.class);
public DocReaderParser() {
}
private String classifyFormComponentType(XPropertySet xComponent,
String name, DocumentModel documentModel) throws Exception {
String sType = "";
XServiceInfo xSI = (XServiceInfo) UnoRuntime.queryInterface(
XServiceInfo.class, xComponent);
XPropertySetInfo xPSI = null;
if (null != xComponent) {
xPSI = xComponent.getPropertySetInfo();
/*
* Property ps[] = xPSI.getProperties(); for (int i = 0; i <
* ps.length; i++) { logger.debug("ps " + ps[i].Name); }
*/
}
XPropertySet xCompProps = (XPropertySet) UnoRuntime.queryInterface(
XPropertySet.class, xComponent);
if ((null != xPSI) && xPSI.hasPropertyByName("ClassId")) {
// get the ClassId property
Short nClassId = (Short) xCompProps.getPropertyValue("ClassId");
switch (nClassId.intValue()) {
case FormComponentType.COMMANDBUTTON:
sType = "Command button";
break;
case FormComponentType.RADIOBUTTON:
sType = "Radio button";
break;
case FormComponentType.IMAGEBUTTON:
sType = "Image button";
break;
case FormComponentType.CHECKBOX:
sType = "Check Box";
break;
case FormComponentType.LISTBOX:
sType = "List Box";
short[] selectedItems = (short[]) xCompProps
.getPropertyValue("SelectedItems");
String[] items = (String[]) xCompProps
.getPropertyValue("StringItemList");
if (items != null && items.length > 0) {
ArrayList selectedItemsModelList = new ArrayList();
for (int item_idx = 0; item_idx < selectedItems.length; item_idx++) {
if (selectedItems[item_idx] > -1
&& selectedItems[item_idx] < items.length) {
logger.debug("property SelectedItems[" + item_idx
+ "]: " + items[selectedItems[item_idx]]);
selectedItemsModelList
.add(items[selectedItems[item_idx]]);
}// if
}// for
if (selectedItemsModelList.size() > 0) {
documentModel.addFormElement(name,
selectedItemsModelList);
}
}
break;
case FormComponentType.COMBOBOX:
sType = "Combo Box";
break;
case FormComponentType.GROUPBOX:
sType = "Group Box";
break;
case FormComponentType.FIXEDTEXT:
sType = "Fixed Text";
break;
case FormComponentType.GRIDCONTROL:
sType = "Grid Control";
break;
case FormComponentType.FILECONTROL:
sType = "File Control";
break;
case FormComponentType.HIDDENCONTROL:
sType = "Hidden Control";
break;
case FormComponentType.IMAGECONTROL:
sType = "Image Control";
break;
case FormComponentType.DATEFIELD:
sType = "Date Field";
break;
case FormComponentType.TIMEFIELD:
sType = "Time Field";
break;
case FormComponentType.NUMERICFIELD:
sType = "Numeric Field";
break;
case FormComponentType.CURRENCYFIELD:
sType = "Currency Field";
break;
case FormComponentType.PATTERNFIELD:
sType = "Pattern Field";
break;
case FormComponentType.TEXTFIELD:
// there are two known services with this class id: the usual
// text field,
// and the formatted field
sType = "Text Field";
logger.debug("property Text: "
+ xCompProps.getPropertyValue("Text"));
documentModel.addFormElement(name, ""
+ xCompProps.getPropertyValue("Text"));
if ((null != xSI)
&& xSI
.supportsService("com.sun.star.form.component.FormattedField")) {
sType = "Formatted Field";
}
break;
default:
break;
}
} else {
if ((null != xSI)
&& xSI
.supportsService("com.sun.star.form.component.DataForm")) {
sType = "Form";
}
}
return sType;
}
private XDrawPage getDocumentDrawPage(XComponent m_xDocument)
throws java.lang.Exception {
XModel s_aDocument = (XModel) UnoRuntime.queryInterface(XModel.class,
m_xDocument);
XDrawPage xReturn;
// in case of a Writer document, this is rather easy: simply ask the
// XDrawPageSupplier
XDrawPageSupplier xSuppPage = (XDrawPageSupplier) UnoRuntime
.queryInterface(XDrawPageSupplier.class, s_aDocument);
xReturn = xSuppPage.getDrawPage();
if (null == xReturn) {
// the model itself is no draw page supplier - then it may be an
// Impress or Calc
// (or any other multi-page) document
XDrawPagesSupplier xSuppPages = (XDrawPagesSupplier) UnoRuntime
.queryInterface(XDrawPagesSupplier.class, s_aDocument);
XDrawPages xPages = xSuppPages.getDrawPages();
xReturn = (XDrawPage) UnoRuntime.queryInterface(XDrawPage.class,
xPages.getByIndex(0));
// Note that this is not really error-proof code: If the document
// model does not support the
// XDrawPagesSupplier interface, or if the pages collection returned
// is empty, this will break.
}
return xReturn;
}
private XNameContainer getFormComponentTreeRoot(
com.sun.star.lang.XComponent m_xDocument)
throws java.lang.Exception {
XFormsSupplier xSuppForms = (XFormsSupplier) UnoRuntime.queryInterface(
XFormsSupplier.class, getDocumentDrawPage(m_xDocument));
XNameContainer xFormsCollection = null;
if (null != xSuppForms) {
xFormsCollection = xSuppForms.getForms();
}
return xFormsCollection;
}
private void enumerateFormComponents(XNameAccess xContainer,
String sPrefix, DocumentModel documentModel)
throws java.lang.Exception {
// loop through all the element names
String aNames[] = xContainer.getElementNames();
for (int i = 0; i < aNames.length; ++i) {
// print the child name
logger.debug("----------------------------------------"
+ " recognized element, name: " + sPrefix + aNames[i]);
XPropertySet xModelProps = (XPropertySet) UnoRuntime
.queryInterface(XPropertySet.class, xContainer
.getByName(aNames[i]));
logger.debug("classifyFormComponentType: "
+ classifyFormComponentType(xModelProps, aNames[i],
documentModel));
// check if it is a FormComponents component itself
XServiceInfo xSI = (XServiceInfo) UnoRuntime.queryInterface(
XServiceInfo.class, xContainer.getByName(aNames[i]));
if (xSI.supportsService("com.sun.star.form.FormComponents")) {
XNameAccess xChildContainer = (XNameAccess) UnoRuntime
.queryInterface(XNameAccess.class, xSI);
enumerateFormComponents(xChildContainer, new String(" ")
+ sPrefix, documentModel);
}
}
}
private void enumerateTextComponents(XComponent xComp,
DocumentModel documentModel) throws NoSuchElementException,
WrappedTargetException, UnknownPropertyException {
// query the new document for the XTextDocument interface
XTextDocument xTextDocument = (XTextDocument) UnoRuntime
.queryInterface(XTextDocument.class, xComp);
XText xText = xTextDocument.getText();
// Get Access to the TextFields in the document
XTextFieldsSupplier xTextFieldsSupplier = (XTextFieldsSupplier) UnoRuntime
.queryInterface(XTextFieldsSupplier.class, xComp);
XEnumerationAccess xEnumeratedFields = xTextFieldsSupplier
.getTextFields();
XEnumeration enumeration = xEnumeratedFields.createEnumeration();
// Loop through the TextFields
while (enumeration.hasMoreElements()) {
Object field = enumeration.nextElement();
XDependentTextField dependentTextField = (XDependentTextField) UnoRuntime
.queryInterface(XDependentTextField.class, field);
XPropertySet propertySet = dependentTextField.getTextFieldMaster();
String name = (String) propertySet.getPropertyValue("Name");
logger.debug("textfield name name " + name);
XPropertySetInfo propertysetInfo = propertySet.getPropertySetInfo();
Property properties[] = propertysetInfo.getProperties();
for (int i = 0; i < properties.length; i++) {
logger.debug("prop " + properties[i].Name);
}
}
// enumerate paragraphs
logger.debug("create an enumeration of all paragraphs");
XEnumeration xParagraphEnumeration = null;
XEnumerationAccess xParaEnumerationAccess = null;
XEnumeration xTextPortionEnum;
XTextContent xTextElement = null;
// create an enumeration access of all paragraphs of a document
XEnumerationAccess xEnumerationAccess = (XEnumerationAccess) UnoRuntime
.queryInterface(
com.sun.star.container.XEnumerationAccess.class, xText);
xParagraphEnumeration = xEnumerationAccess.createEnumeration();
// Loop through all paragraphs of the document
while (xParagraphEnumeration.hasMoreElements()) {
logger.debug("------------------------------- new paragraph");
xTextElement = (XTextContent) UnoRuntime.queryInterface(
XTextContent.class, xParagraphEnumeration.nextElement());
XServiceInfo xServiceInfo = (XServiceInfo) UnoRuntime
.queryInterface(XServiceInfo.class, xTextElement);
String[] services = xServiceInfo.getSupportedServiceNames();
for (int si = 0; si < services.length; si++) {
logger.debug("service " + services[si]);
}
// check if the current paragraph is really a paragraph or an
// anchor of a frame or picture
if (xServiceInfo.supportsService("com.sun.star.text.Paragraph")) {
XTextRange xTextRange = xTextElement.getAnchor();
logger.debug("This is a Paragraph");
// create another enumeration to get all text portions of
// the paragraph
xParaEnumerationAccess = (XEnumerationAccess) UnoRuntime
.queryInterface(XEnumerationAccess.class, xTextElement);
xTextPortionEnum = xParaEnumerationAccess.createEnumeration();
while (xTextPortionEnum.hasMoreElements()) {
XTextRange xTextPortion = (XTextRange) UnoRuntime
.queryInterface(XTextRange.class, xTextPortionEnum
.nextElement());
logger.debug("Text from the portion : "
+ xTextPortion.getString());
XPropertySet xPropertySet = (XPropertySet) UnoRuntime
.queryInterface(XPropertySet.class, xTextPortion);
logger.debug("font name: "
+ xPropertySet.getPropertyValue("CharFontName"));
// PropertyState status of each text portion.
XPropertyState xPropertyState = (XPropertyState) UnoRuntime
.queryInterface(XPropertyState.class, xTextPortion);
if (xPropertyState.getPropertyState("CharWeight").equals(
PropertyState.AMBIGUOUS_VALUE))
logger
.debug("- The text range contains more than one different attributes");
if (xPropertyState.getPropertyState("CharWeight").equals(
PropertyState.DIRECT_VALUE))
logger.debug(" - The text range contains hard formats");
if (xPropertyState.getPropertyState("CharWeight").equals(
PropertyState.DEFAULT_VALUE))
logger
.debug(" - The text range doesn't contains hard formats");
}
} else if (xServiceInfo
.supportsService("com.sun.star.text.TextTable")) {
logger.debug("this is a table");
} else {
logger.debug("The text portion isn't a text paragraph");
}
}// while
}
private void enumerateBookmarks(XComponent xComp,
DocumentModel documentModel) throws NoSuchElementException,
WrappedTargetException {
// accessing the bookmark collection of the document
XBookmarksSupplier xBookmarksSupplier = (XBookmarksSupplier) UnoRuntime
.queryInterface(XBookmarksSupplier.class, xComp);
if (xBookmarksSupplier != null) {
XNameAccess xNamedBookmarks = xBookmarksSupplier.getBookmarks();
String bookmarks[] = xNamedBookmarks.getElementNames();
Object bookmark = null, previousBookmark = null;
for (int b = 0; b < bookmarks.length; b++) {
if (bookmark != null) {
previousBookmark = bookmark;
}
bookmark = xNamedBookmarks.getByName(bookmarks[b]);
// we need its XTextRange which is available from
// getAnchor(),
// so query for XTextContent
XTextContent xBookmarkContent = (XTextContent) UnoRuntime
.queryInterface(XTextContent.class, bookmark);
// get the anchor of the bookmark (its XTextRange)
XTextRange xBookmarkRange = xBookmarkContent.getAnchor();
if (previousBookmark != null) {
XTextRange x_previousBookmarkRange = ((XTextContent) UnoRuntime
.queryInterface(XTextContent.class,
previousBookmark)).getAnchor();
}
// set the bookmark text
// xBookmarkRange.setString("test");
logger.debug("bookmark " + bookmarks[b] + ", text "
+ xBookmarkRange.getString());
documentModel.addBookmark(bookmarks[b], xBookmarkRange
.getString());
}
}
}
private void enumerateTables(XComponent xComp, DocumentModel documentModel)
throws IndexOutOfBoundsException, WrappedTargetException,
UnknownPropertyException {
// first query the XTextTablesSupplier interface from our document
XTextTablesSupplier xTablesSupplier = (XTextTablesSupplier) UnoRuntime
.queryInterface(XTextTablesSupplier.class, xComp);
// get the tables collection
XNameAccess xNamedTables = xTablesSupplier.getTextTables();
// now query the XIndexAccess from the tables collection
XIndexAccess xIndexedTables = (XIndexAccess) UnoRuntime.queryInterface(
XIndexAccess.class, xNamedTables);
// we need properties
XPropertySet xTableProps = null;
// get the tables
for (int i = 0; i < xIndexedTables.getCount(); i++) {
Object table = xIndexedTables.getByIndex(i);
logger.debug("------------------------- recognized table ");
// the properties
xTableProps = (XPropertySet) UnoRuntime.queryInterface(
XPropertySet.class, table);
Property props[] = xTableProps.getPropertySetInfo().getProperties();
for (int props_idx = 0; props_idx < props.length; props_idx++) {
// logger.debug("table props " + props[props_idx].Name + ", "
// + xTableProps.getPropertyValue(props[props_idx].Name));
}
XTextTable xTextTable = (XTextTable) UnoRuntime.queryInterface(
XTextTable.class, table);
XTableRows rows = xTextTable.getRows();
XTableColumns columns = xTextTable.getColumns();
XCellRange cellRange = (XCellRange) UnoRuntime.queryInterface(
XCellRange.class, table);
ArrayList righe = new ArrayList();
for (int row_idx = 0; row_idx < rows.getCount(); row_idx++) {
logger.debug("scanning row# " + row_idx);
String colonna[] = new String[columns.getCount()];
for (int col_idx = 0; col_idx < columns.getCount(); col_idx++) {
// WARNING: the first parameter is the column, the
// second
// parameter is the row
XCell xCell = cellRange.getCellByPosition(col_idx, row_idx);
XText xCellText = (XText) UnoRuntime.queryInterface(
XText.class, xCell);
logger.debug("cell (" + col_idx + "," + row_idx + "): "
+ xCellText.getText().getString());
colonna[col_idx] = xCellText.getText().getString();
}
righe.add(colonna);
}
documentModel.addTable(""
+ xTableProps.getPropertyValue("LinkDisplayName"), righe);
}
}
public DocumentModel parse(String sUrl) throws Exception {
DocumentModel documentModel = new DocumentModel();
DocConnection docConnection = new DocConnection();
XComponent xComp = null;
Exception ex = null;
try {
// open connection
xComp = docConnection.open(sUrl);
// parse
parse(xComp, documentModel);
} catch (Exception e) {
logger.error(e, e);
ex = e;
} finally {
docConnection.close();
}
if (ex != null) {
throw ex;
}
return documentModel;
}
public DocumentModel parse(XComponent xComp, DocumentModel documentModel)
throws Exception {
// enumerateTextComponents(xComp, documentModel);
enumerateFormComponents(getFormComponentTreeRoot(xComp), " ",
documentModel);
enumerateBookmarks(xComp, documentModel);
enumerateTables(xComp, documentModel);
return documentModel;
}
}
///////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////
// the previous code can be tested with the following class
package docreader;
public class TestDocReader {
public static void main(String args[]) {
DocReaderParser parser = new DocReaderParser();
DocumentModel documentModel = parser
.parse("C:\\tmp\\test.odt");
try {
System.out.println(documentModel.getXml());
} catch (Exception e) {
e.printStackTrace();
}
}
}
this blog is intended to be a place to store code snippets, examples, notes... in order to have them handy and to quickly find during the everyday work
Search This Blog
Friday, 27 November 2009
OpenOffice API, a generic document inspector
Here comes a useful program to inspect the content of a document (Open Office .odt or Microsoft Word .doc) in order to read paragraphs, text sections, bookmarks, text fields, forms and tables.
Wednesday, 18 November 2009
monitoring dependencies between jar files
a script to use the jarjar utility on Windows
Make sure to run the dos console with the delayed environment variable expansion option enable, that is cmd /V:ON
the script has to be called with an argument specifying the directory containing the set of jar files
Make sure to run the dos console with the delayed environment variable expansion option enable, that is cmd /V:ON
echo off FOR /F %%G IN ('DIR /S /B %1\*.jar') DO ( SET CP=!CP!;%%G ) java -jar jarjar-1.0.jar find jar %CP%
the script has to be called with an argument specifying the directory containing the set of jar files
Monday, 16 November 2009
session management on WebSphere
WebSphere adds a prefix to the session identifiers, which will not be included by the getId method of the HttpSession object.
This would cause problems in such cases when you need to enable a process external to the browser to work inside an already existing session created by an user logged on the web interface.
Example:
This would cause problems in such cases when you need to enable a process external to the browser to work inside an already existing session created by an user logged on the web interface.
Example:
HttpSession session = request.getSession(true); String standardSessionID = session.getId(); //standardSessionID does not contain the cache prefix String header = request.getHeader("Cookie"); //header contains the string "JSESSIONID=" String fromHeaderSessionID = header.substring(11); //fromHeaderSessionID contains the WebSphere cache prefix //and can be sent to the server as a JSESSIONID cookie or as a part of the url, //as in the string Strig url = "http://www.myapp.com;jsessionid=" + fromHeaderSessionID;
Thursday, 12 November 2009
load balance on Tomcat 5.5 with Apache web server
a very interesting post on how to set up a farm environment
with different instances of Tomcat 5.5 and an Apache web server
read
with different instances of Tomcat 5.5 and an Apache web server
read
Wednesday, 11 November 2009
retrieve the HttpServletRequest on an Axis server object
MessageContext context = org.apache.axis.MessageContext.getCurrentContext(); request = (HttpServletRequest)context.getProperty( org.apache.axis.transport.http. HTTPConstants.MC_HTTP_SERVLETREQUEST);
Monday, 9 November 2009
debug the network activity of a process on Windows
find the process (service) id with
then type
where <processid> is the process id found by tasklist.
tasklist /SVC
then type
netstat -aovn | findstr <processid>
where <processid> is the process id found by tasklist.
Saturday, 7 November 2009
rsync
how to sync (in simulation mode) the folder myRootPackage under /http/www/myWebapp/WEB-INF/classes on the host "hostname", in the folder /http/lib/myWebapp/WEB-INF/classes/ on the local host
rsync --rsh="/usr/bin/ssh -l username" -avzn --size-only hostname:/http/www/myWebapp/WEB-INF/classes/myRootPackage /http/lib/myWebapp/WEB-INF/classes/
Friday, 6 November 2009
tar gz
to create a tar.gz file
on Solaris:
tar -czvf archive.tgz directory --exclude "file"
to extract a tar.gz file
tar xvzf file.tar.gz
on Solaris:
tar cf tomcat7.tar /tomcat7 | gzip -f tomcat7.tar
Wednesday, 4 November 2009
log swallowing on Tomcat
In order to split the catalina.out across multiple files, each one of which created every day, it's useful to configure the context xml descriptor as below (tested on Tomcat 5.0.28)
which will create every day a new file called
myApp_log.YYYY-MM-DD.txt
containing the daily ouput
This technique is referred to as "output swallowing" on Tomcat
(note the swallowOutput="true")
/usr/local/tomcat/conf/Catalina/localhost/myApp.xml <context path="/myWebApp" swallowOutput="true"> <logger directory="logs" className="org.apache.catalina.logger.FileLogger" prefix="myApp_log." suffix=".txt" timestamp="true"/> </Context>
which will create every day a new file called
myApp_log.YYYY-MM-DD.txt
containing the daily ouput
This technique is referred to as "output swallowing" on Tomcat
(note the swallowOutput="true")
What's blocking my lock?
Here comes an extremely clear and interesting article, written by Natalka Roshalk, about Oracle locks and how to monitor what's going on when a lock is blocking a session
read the article
read the article
Subscribe to:
Posts (Atom)