package org.docx4j.model.fields.merge;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.math.BigInteger;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.regex.Pattern;
import javax.xml.bind.JAXBElement;
import javax.xml.transform.TransformerException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.commons.lang.StringUtils;
import org.docx4j.Docx4jProperties;
import org.docx4j.TraversalUtil;
import org.docx4j.XmlUtils;
import org.docx4j.jaxb.Context;
import org.docx4j.model.fields.ComplexFieldLocator;
import org.docx4j.model.fields.FieldRef;
import org.docx4j.model.fields.FieldsPreprocessor;
import org.docx4j.model.fields.FldSimpleModel;
import org.docx4j.model.fields.FormattingSwitchHelper;
import org.docx4j.model.structure.PageDimensions;
import org.docx4j.model.structure.PageSizePaper;
import org.docx4j.model.structure.SectionWrapper;
import org.docx4j.openpackaging.exceptions.Docx4JException;
import org.docx4j.openpackaging.io.SaveToZipFile;
import org.docx4j.openpackaging.packages.OpcPackage;
import org.docx4j.openpackaging.packages.WordprocessingMLPackage;
import org.docx4j.openpackaging.parts.JaxbXmlPart;
import org.docx4j.openpackaging.parts.WordprocessingML.FooterPart;
import org.docx4j.openpackaging.parts.WordprocessingML.HeaderPart;
import org.docx4j.openpackaging.parts.relationships.Namespaces;
import org.docx4j.openpackaging.parts.relationships.RelationshipsPart;
import org.docx4j.openpackaging.parts.relationships.RelationshipsPart.AddPartBehaviour;
import org.docx4j.relationships.Relationship;
import org.docx4j.vml.CTTextbox;
import org.docx4j.wml.Body;
import org.docx4j.wml.BooleanDefaultTrue;
import org.docx4j.wml.CTFFData;
import org.docx4j.wml.CTFFName;
import org.docx4j.wml.CTFFTextInput;
import org.docx4j.wml.CTFFTextType;
import org.docx4j.wml.CTPageNumber;
import org.docx4j.wml.CTRel;
import org.docx4j.wml.ContentAccessor;
import org.docx4j.wml.ObjectFactory;
import org.docx4j.wml.P;
import org.docx4j.wml.R;
import org.docx4j.wml.STFFTextType;
import org.docx4j.wml.SectPr;
import org.docx4j.wml.Text;
/**
* Perform a mail merge.
*
* Instance values are merged into a docx containing
* MERGEFIELD to produce output docx made up of
* a copy of the input docx for each collection of
* input values.
*
* The output can be a single docx, or multiple docx.
*
* If you choose single docx, there are two ways to
* do this:
*
* One is using MergeDocx, which will ensure each
* constituent "document" doesn't affect the neighbouring
* ones (eg numbering will restart).
*
* The other is the "poor man's" approach, which
* puts them together, and just hopes for the best.
* Images and hyperlinks should be ok. But numbering
* will continue, as will footnotes/endnotes.
*
* From 3.0, there is some support for formatting switches
* (date/time, numeric, and general), and basic
* support for MERGEFORMAT.
*
* LIMITATIONS:
* - no support for text before (\b) and text after (\f)
* switches
* - no support for \m and \v switches
* - no support for multiple MERGEFIELD in a single
* instruction (eg MERGEFIELD CoutesyTitle \f " " MERGEFIELD FirstName \f " " MERGEFIELD LastName )
*
* @author jharrop
*
*/
public class MailMerger {
private static Logger log = LoggerFactory.getLogger(MailMerger.class);
/**
* A "poor man's" approach, which generates the mail merge
* results as a single docx, and just hopes for the best.
* Images and hyperlinks should be ok. But numbering
* will continue, as will footnotes/endnotes.
* @param input
* @param data
* @return
* @throws Docx4JException
*/
public static WordprocessingMLPackage getConsolidatedResultCrude(WordprocessingMLPackage input,
List<Map<DataFieldName, String>> data) throws Docx4JException {
return getConsolidatedResultCrude(input, data, false);
}
/**
* A "poor man's" approach, which generates the mail merge
* results as a single docx, and just hopes for the best.
* Images and hyperlinks should be ok. But numbering
* will continue, as will footnotes/endnotes.
* [Advert:] If this isn't working for you, the commercial Enterprise Edition of docx4j
* (MergeDocx component) will solve your problems.
* @param input
* @param data
* @param processHeadersAndFooters process headers and footers in FIRST section only.
* If you have multiple sections in your input docx, performMerge is a better approach
* @return
* @throws Docx4JException
* @ since 2.8.1
*/
public static WordprocessingMLPackage getConsolidatedResultCrude(WordprocessingMLPackage input,
List<Map<DataFieldName, String>> data, boolean processHeadersAndFooters) throws Docx4JException {
FormTextFieldNames formTextFieldNames = new FormTextFieldNames();
// create contents destined for the main document part
FieldsPreprocessor.complexifyFields(input.getMainDocumentPart() );
log.debug("complexified: " + XmlUtils.marshaltoString(input.getMainDocumentPart().getJaxbElement(), true));
List<List<Object>> mdpResults = performOverList(input, input.getMainDocumentPart().getContent(), data, formTextFieldNames );
// headers/footers
Map<CTRel, JaxbXmlPart> hfTemplates = null;
BooleanDefaultTrue titlePage = null;
if (processHeadersAndFooters) {
// then we need a clone/template of the headers/footers
// in the first section
hfTemplates = new HashMap<CTRel, JaxbXmlPart>();
SectionWrapper sw = input.getDocumentModel().getSections().get(0);
SectPr sectPr = sw.getSectPr();
List<CTRel> hdrFtrRefs = sectPr.getEGHdrFtrReferences();
titlePage = sectPr.getTitlePg();
for (CTRel rel : hdrFtrRefs) {
String relId = rel.getId();
log.debug("for h|f relId: " + relId);
JaxbXmlPart part = (JaxbXmlPart)input.getMainDocumentPart().getRelationshipsPart().getPart(relId);
FieldsPreprocessor.complexifyFields(part );
log.debug("complexified: " + XmlUtils.marshaltoString(part.getJaxbElement(), true));
hfTemplates.put(rel, part);
}
}
// Create WordprocessingMLPackage target, by cloning
OpcPackage result = null;
ByteArrayOutputStream baos = new ByteArrayOutputStream();
SaveToZipFile saver = new SaveToZipFile(input);
saver.save(baos);
byte[] template = baos.toByteArray();
WordprocessingMLPackage target = WordprocessingMLPackage.load(
new ByteArrayInputStream(template));
// populate main document part
SectPr documentSeparator = getDocumentSeparator(target);
if (processHeadersAndFooters) {
if (titlePage!=null
&& titlePage.isVal()) {
documentSeparator.setTitlePg(titlePage);
}
documentSeparator.getEGHdrFtrReferences().clear();
}
target.getMainDocumentPart().getContent().clear();
/*
* What we're doing, effectively, is doing the
* main content in a single hit (ie for all
* instances), and then, for each instance,
* doing the headers/footers.
*
* It is this way because that is how the code
* has evolved.
*
* Since we have to do the headers/footers
* instance by instance, it would probably
* be neater to do the main content at
* the same time (ie instead of using
* performOverList at the start of this method).
*/
int i = 0;
for (List<Object> content : mdpResults) {
// now inject the content
target.getMainDocumentPart().getContent().addAll(content);
log.debug(XmlUtils.marshaltoString(target.getMainDocumentPart().getJaxbElement(), true, true) );
// add sectPr to final paragraph
Object last = content.get( content.size()-1);
P lastP = null;
if (last instanceof P) {
lastP = (P)last;
} else {
lastP = Context.getWmlObjectFactory().createP();
target.getMainDocumentPart().getContent().add(lastP);
}
if (lastP.getPPr()==null) {
lastP.setPPr(Context.getWmlObjectFactory().createPPr());
}
SectPr thisSection = XmlUtils.deepCopy(documentSeparator);
lastP.getPPr().setSectPr(thisSection);
if (processHeadersAndFooters) {
for( CTRel ctRel : hfTemplates.keySet()) {
// Create a suitable part
JaxbXmlPart part = hfTemplates.get(ctRel);
JaxbXmlPart clonedPart = null;
if (part instanceof HeaderPart) {
clonedPart = new HeaderPart();
clonedPart.setJaxbElement(Context.getWmlObjectFactory().createHdr());
} else if (part instanceof FooterPart) {
clonedPart = new FooterPart();
clonedPart.setJaxbElement(Context.getWmlObjectFactory().createFtr());
}
// Populate it
List<Object> newContent = performOnInstance(input,
((ContentAccessor)part).getContent(),
data.get(i), formTextFieldNames );
((ContentAccessor)clonedPart).getContent().addAll(newContent);
// Add it
Relationship rel = target.getMainDocumentPart().addTargetPart(clonedPart, AddPartBehaviour.RENAME_IF_NAME_EXISTS);
// Now add CTRel!
CTRel newHfRef = XmlUtils.deepCopy(ctRel);
newHfRef.setId(rel.getId());
thisSection.getEGHdrFtrReferences().add(newHfRef);
}
}
i++;
}
return target;
}
/**
* Word uses the existing sectPr element, but adds
* a page numbering restart to it. TODO: investigate
* what it does with headers/footers.
* @param template
* @return
*/
private static SectPr getDocumentSeparator(WordprocessingMLPackage template) {
SectPr sectPr = template.getMainDocumentPart().getJaxbElement().getBody().getSectPr();
if (sectPr==null) {
// Maybe the last P already contains one?
// Presumably Word would reuse this.
List all = template.getMainDocumentPart().getContent();
Object last = all.get( all.size()-1 );
if (last instanceof P) {
if (((P) last).getPPr()!=null
&& ((P) last).getPPr().getSectPr() !=null) {
sectPr = ((P) last).getPPr().getSectPr();
}
}
}
if (sectPr==null) {
// Create a basic sectPr using our Page model
String papersize= Docx4jProperties.getProperties().getProperty("docx4j.PageSize", "A4");
log.info("Using paper size: " + papersize);
String landscapeString = Docx4jProperties.getProperties().getProperty("docx4j.PageOrientationLandscape", "false");
boolean landscape= Boolean.parseBoolean(landscapeString);
log.info("Landscape orientation: " + landscape);
PageDimensions page = new PageDimensions();
page.setPgSize(PageSizePaper.valueOf(papersize), landscape);
sectPr = Context.getWmlObjectFactory().createSectPr();
sectPr.setPgSz( page.getPgSz() );
sectPr.setPgMar( page.getPgMar() );
}
// <w:pgNumType w:start="1"/>
CTPageNumber pageNumber = sectPr.getPgNumType();
if (pageNumber==null) {
pageNumber = Context.getWmlObjectFactory().createCTPageNumber();
sectPr.setPgNumType(pageNumber);
}
pageNumber.setStart(BigInteger.ONE);
return sectPr;
}
// public static List<WordprocessingMLPackage> getResults(WordprocessingMLPackage input,
// List<Map<DataFieldName, String>> data) throws Docx4JException {
//
// List<WordprocessingMLPackage> pkgs = new ArrayList<WordprocessingMLPackage>();
//
// // Just MDP for now
// FieldsPreprocessor.complexifyFields(input.getMainDocumentPart() );
// List<List<Object>> results = performOverList(input.getMainDocumentPart().getContent(), data );
//
// // Prepare for cloning
// OpcPackage result = null;
//
// // Zip it up
// ByteArrayOutputStream baos = new ByteArrayOutputStream();
// SaveToZipFile saver = new SaveToZipFile(input);
// saver.save(baos);
// byte[] template = baos.toByteArray();
//
// for (List<Object> content : results) {
//
// WordprocessingMLPackage target = WordprocessingMLPackage.load(
// new ByteArrayInputStream(template));
// pkgs.add(target);
//
// // now inject the content
// target.getMainDocumentPart().getContent().clear();
// target.getMainDocumentPart().getContent().addAll(content);
// }
//
// return pkgs;
// }
/**
* Perform merge on a single instance.
*
* This is the best approach, if your input has headers/footers in
* multiple sections.
*
* If you are using MergeDocx, you can use that to join the
* instances into a single docx.
*
* WARNING: The input docx will be modified, so input a copy if that is a problem.
* This is left to the user, since that can potentially be more efficient, than
* doing it here.
*
* @param input
* @param data
* @param processHeadersAndFooters
* @return
* @throws Docx4JException
*/
public static void performMerge(WordprocessingMLPackage input,
Map<DataFieldName, String> data, boolean processHeadersAndFooters ) throws Docx4JException {
// HOWTO Clone the input docx
// ByteArrayOutputStream baos = new ByteArrayOutputStream();
// SaveToZipFile saver = new SaveToZipFile(input);
// saver.save(baos);
// byte[] template = baos.toByteArray();
// WordprocessingMLPackage target = WordprocessingMLPackage.load(
// new ByteArrayInputStream(template));
// Required where converting MERGEFIELD to FORMTEXT
FormTextFieldNames formTextFieldNames = new FormTextFieldNames();
// MDP
FieldsPreprocessor.complexifyFields(input.getMainDocumentPart() );
// log.debug("\n\n COMPLEXIFIED " + input.getMainDocumentPart().getPartName().getName() + "\n\n"
// + input.getMainDocumentPart().getXML() + "\n");
List<Object> mdpResults = performOnInstance(input, input.getMainDocumentPart().getContent(), data, formTextFieldNames );
input.getMainDocumentPart().getContent().clear();
input.getMainDocumentPart().getContent().addAll(mdpResults);
if (processHeadersAndFooters) {
RelationshipsPart rp = input.getMainDocumentPart().getRelationshipsPart();
for ( Relationship r : rp.getJaxbElement().getRelationship() ) {
if (r.getType().equals(Namespaces.HEADER)
|| r.getType().equals(Namespaces.FOOTER)) {
JaxbXmlPart part = (JaxbXmlPart)rp.getPart(r);
log.debug("\n\n BEFORE " + part.getPartName().getName() + "\n\n"
+ XmlUtils.marshaltoString(part.getJaxbElement(), true, true) + "\n");
FieldsPreprocessor.complexifyFields(part );
log.debug("\n\n COMPLEXIFIED " + part.getPartName().getName() + "\n\n"
+ XmlUtils.marshaltoString(part.getJaxbElement(), true, true) + "\n");
List<Object> results = performOnInstance(input,
((ContentAccessor)part).getContent(), data, formTextFieldNames );
((ContentAccessor)part).getContent().clear();
((ContentAccessor)part).getContent().addAll(results);
log.debug("\n\n AFTER " + part.getPartName().getName() + "\n\n"
+ XmlUtils.marshaltoString(part.getJaxbElement(), true, true) + "\n");
}
}
}
}
/**
* The idea is to be able to perform a mail merge
* on content from main document part, or a header/footer etc.
*
* We return a list of content lists, so that the consumer
* can choose whether to produce a single docx (via
* MergeDocx or otherwise), or a docx for each item in the list.
*
* @param wordMLPackage
* @param data
* @return
* @throws Docx4JException
*/
private static List<List<Object>> performOverList(WordprocessingMLPackage input,
List<Object> contentList,
List<Map<DataFieldName, String>> data,
FormTextFieldNames formTextFieldNames) throws Docx4JException {
List<List<Object>> results = new ArrayList<List<Object>>();
for (Map<DataFieldName, String> datamap : data) {
results.add(
performOnInstance(input, contentList, datamap, formTextFieldNames));
}
return results;
}
private static List<Object> performOnInstance(WordprocessingMLPackage input,
List<Object> contentList,
Map<DataFieldName, String> datamap,
FormTextFieldNames formTextFieldNames) throws Docx4JException {
// We need our fieldRefs point to the correct objects;
// the easiest way to do this is to create them after cloning!
// to facilitate cloning, wrap the list in a body
Body shell = Context.getWmlObjectFactory().createBody();
shell.getContent().addAll(contentList);
Body shellClone = (Body)XmlUtils.deepCopy(shell);
// find fields
ComplexFieldLocator fl = new ComplexFieldLocator();
new TraversalUtil(shellClone, fl);
log.info("Found " + fl.getStarts().size() + " fields ");
// canonicalise and setup fieldRefs
List<FieldRef> fieldRefs = new ArrayList<FieldRef>();
canonicaliseStarts(fl, fieldRefs);
// Populate
for (FieldRef fr : fieldRefs) {
if ( fr.getFldName().equals("MERGEFIELD") ) {
String instr = extractInstr(fr.getInstructions() );
String datafieldName = getDatafieldNameFromInstr(instr);
String val = datamap.get( new DataFieldName(datafieldName));
String gFormat = null; // required only for FORMTEXT conversion
if (StringUtils.isBlank(val)) {
log.warn("Couldn't find value for key: '" + datafieldName + "'");
if (fieldFate.equals(OutputField.REMOVED)) {
// Remove the mergefield from the document
removeSimpleField(fr);
// Concatenate all content still present in the parent
String text = getTextInsideContent(fr.getParent());
// If the parent still contains data, don't delete it
if (StringUtils.isBlank(text)) {
recursiveRemove(shellClone, fr.getParent());
}
}
} else {
// Now format the result
FldSimpleModel fsm = new FldSimpleModel();
try {
fsm.build(instr);
val = FormattingSwitchHelper.applyFormattingSwitch(input, fsm, val);
gFormat = FormattingSwitchHelper.findFirstSwitchValue("\\*", fsm.getFldParameters(), true);
// Solely for potential use in OutputField.AS_FORMTEXT_REGULAR
// We are in fact applying all formatting switches above.
} catch (TransformerException e) {
log.warn("Can't format the field", e);
}
fr.setResult(val);
}
if (fieldFate.equals(OutputField.AS_FORMTEXT_REGULAR)) {
log.debug(gFormat);
// TODO if we're going to use gFormat, setup FSM irrespective of whether we can find key
// TODO: other format instructions
// if (gFormat!=null) {
// if (gFormat.equals("Upper")) {
// gFormat = "UPPERCASE";
// } else if (gFormat.equals("Lower")) {
// gFormat = "LOWERCASE";
// }
// }
// replace instrText
// eg MERGEFIELD CLIENT.ORGANIZATIONSTATE \* Upper \* MERGEFORMAT
// to FORMTEXT
// Do this first, so we can abort without affecting output
List<Object> instructions = fr.getInstructions();
if (instructions.size()!=1) {
log.error("TODO MERGEFIELD field contained complex instruction");
continue;
}
Object o = XmlUtils.unwrap(instructions.get(0));
if (o instanceof Text) {
((Text)o).setValue("FORMTEXT");
} else {
log.error("TODO: set FORMTEXT in" + o.getClass().getName() );
log.error(XmlUtils.marshaltoString(instructions.get(0), true, true) );
continue;
}
String fieldName = formTextFieldNames.generateName(datafieldName);
log.debug("Field name normalisation: " + datafieldName + " -> " + fieldName);
setFormFieldProperties(fr, fieldName, null);
// remove <w:highlight w:val="lightGray"/>, if present
// (corresponds in Word to clicking Legacy Forms > Form Field Shading)
// so that the result is not printed in grey
R resultR = fr.getResultsSlot();
if (resultR.getRPr()!=null
&& resultR.getRPr().getHighlight()!=null) {
resultR.getRPr().setHighlight(null);
}
} else if (!fieldFate.equals(OutputField.KEEP_MERGEFIELD)) {
// If doing an actual mail merge, the begin-separate run is removed, as is the end run
fr.getParent().getContent().remove(fr.getBeginRun());
fr.getParent().getContent().remove(fr.getEndRun());
}
// System.out.println("AFTER " +XmlUtils.marshaltoString(
// fr.getParent(), true, true));
}
}
return shellClone.getContent();
}
/**
* @param fl
* @param fieldRefs
* @throws Docx4JException
*/
protected static void canonicaliseStarts(ComplexFieldLocator fl,
List<FieldRef> fieldRefs) throws Docx4JException {
for( P p : fl.getStarts() ) {
int index;
if (p.getParent() instanceof ContentAccessor) {
// 2.8.1
index = ((ContentAccessor)p.getParent()).getContent().indexOf(p);
P newP = FieldsPreprocessor.canonicalise(p, fieldRefs);
log.debug("Canonicalised: " + XmlUtils.marshaltoString(newP, true, true));
((ContentAccessor)p.getParent()).getContent().set(index, newP);
} else if (p.getParent() instanceof java.util.List) {
// 3.0
index = ((java.util.List)p.getParent()).indexOf(p);
P newP = FieldsPreprocessor.canonicalise(p, fieldRefs);
log.debug("NewP length: " + newP.getContent().size() );
((java.util.List) p.getParent()).set(index, newP);
} else if (p.getParent() instanceof CTTextbox) {
// 3.0.1
index = ((CTTextbox) p.getParent()).getTxbxContent().getContent().indexOf(p);
P newP = FieldsPreprocessor.canonicalise(p, fieldRefs);
log.debug("Canonicalised: "+ XmlUtils.marshaltoString(newP, true, true));
((CTTextbox) p.getParent()).getTxbxContent().getContent().set(index, newP);
} else {
throw new Docx4JException("Unexpected parent: "+ p.getParent().getClass().getName());
}
}
}
/**
* Get the datafield name from, for example
* <w:instrText xml:space="preserve"> MERGEFIELD Kundenstrasse \* MERGEFORMAT </w:instrText>
* or <w:instrText xml:space="preserve"> MERGEFIELD Kundenstrasse</w:instrText>
*/
protected static String getDatafieldNameFromInstr(String instr) {
// System.out.println("BEFORE " +XmlUtils.marshaltoString(
// fr.getParent(), true, true));
// log.debug(instr);
String tmp = instr.substring( instr.indexOf("MERGEFIELD") + 10);
tmp = tmp.trim();
String datafieldName = null;
// A data field name will be quoted if it contains spaces
if (tmp.startsWith("\"")) {
if (tmp.indexOf("\"",1)>-1) {
datafieldName = tmp.substring(1, tmp.indexOf("\"",1));
} else {
log.warn("Quote mismatch in " + instr);
// hope for the best
datafieldName = tmp.indexOf(" ") >-1 ? tmp.substring(1, tmp.indexOf(" ")) : tmp.substring(1) ;
}
} else {
datafieldName = tmp.indexOf(" ") >-1 ? tmp.substring(0, tmp.indexOf(" ")) : tmp ;
}
log.info("Key: '" + datafieldName + "'");
return datafieldName;
}
protected static String extractInstr(List<Object> instructions) {
// For MERGEFIELD, expect the list to contain a simple string
if (instructions.size()!=1) {
log.error("TODO MERGEFIELD field contained complex instruction");
/* eg
*
* <w:r>
<w:instrText xml:space="preserve"> MERGEFIELD lasauv</w:instrText>
</w:r>
<w:r>
<w:instrText xml:space="preserve">egarde \* MERGEFORMAT </w:instrText>
</w:r>
for (Object i : instructions) {
i = XmlUtils.unwrap(i);
if (i instanceof Text) {
log.error( ((Text)i).getValue());
} else {
log.error(XmlUtils.marshaltoString(i, true, true) );
}
}
*/
return null;
}
Object o = XmlUtils.unwrap(instructions.get(0));
if (o instanceof Text) {
return ((Text)o).getValue();
} else {
log.error("TODO: extract field name from " + o.getClass().getName() );
log.error(XmlUtils.marshaltoString(instructions.get(0), true, true) );
return null;
}
}
/**
* Remove the field but preserve the paragraph and content around it
*
* @param fr
*/
protected static void removeSimpleField(FieldRef fr) {
int end = fr.getParent().getContent().indexOf(fr.getEndRun());
int begin = fr.getParent().getContent().indexOf(fr.getBeginRun());
for (int i = end; i >= begin; i--) {
fr.getParent().getContent().remove(i);
}
}
/**
* Parse through all content inside the paragraph to concatenate all values inside a text
*
* @param paragraph The paragraph which contains (or not) data
* @return All text inside the paragraph
*/
protected static String getTextInsideContent(ContentAccessor paragraph) {
StringBuilder result = new StringBuilder();
for (Object content : paragraph.getContent()) {
if (content instanceof org.docx4j.wml.R) {
org.docx4j.wml.R run = (org.docx4j.wml.R) content;
List<Object> runContent = run.getContent();
for (Object o2 : runContent) {
if (o2 instanceof javax.xml.bind.JAXBElement) {
if (((JAXBElement<?>) o2).getDeclaredType().getName().equals("org.docx4j.wml.Text")) {
org.docx4j.wml.Text t = (org.docx4j.wml.Text) ((JAXBElement) o2).getValue();
result.append(t.getValue());
}
}
}
}
}
return result.toString();
}
/**
* To remove an object from the docx template
*
* @param content Body (or other part) of the template
* @param needToBeRemoved The object that will be removed from the content
*/
protected static void recursiveRemove(ContentAccessor content, Object needToBeRemoved) {
if (content.getContent().contains(needToBeRemoved)) {
content.getContent().remove(needToBeRemoved);
return;
}
for (Object object : content.getContent()) {
if (object instanceof ContentAccessor) {
recursiveRemove((ContentAccessor) object, needToBeRemoved);
} else if (object instanceof JAXBElement<?>) {
JAXBElement<?> element = (JAXBElement<?>) object;
if (element.getValue() instanceof ContentAccessor) {
recursiveRemove((ContentAccessor) element.getValue(), needToBeRemoved);
}
}
}
}
protected static OutputField fieldFate = OutputField.REMOVED;
/**
* What to do with the MERGEFIELD in the output docx.
*
* Default is REMOVED.
*
* KEEP_MERGEFIELD will allow you to perform
* another merge on the output document.
*
* The AS_FORMTEXT options convert the MERGEFIELD to a FORMTEXT field.
* This is convenient if you want users to
* be able to edit the field, where editing is restricted
* to forms.
*
* @param fieldFate
*/
public static void setMERGEFIELDInOutput(OutputField fieldFate) {
MailMerger.fieldFate = fieldFate;
}
public enum OutputField {
DEFAULT,
REMOVED,
KEEP_MERGEFIELD,
AS_FORMTEXT_REGULAR;
// AS_FORMTEXT_TYPED,
// AS_FORMTEXT_TYPED_FORMATTED;
}
protected static void setFormFieldProperties(FieldRef fr, String ffName, String ffTextInputFormat) {
ObjectFactory wmlObjectFactory = Context.getWmlObjectFactory();
// Create object for ffData
CTFFData ffdata = wmlObjectFactory.createCTFFData();
fr.setFormFieldProperties(ffdata);
// Create object for name (wrapped in JAXBElement)
CTFFName ffname = wmlObjectFactory.createCTFFName();
JAXBElement<org.docx4j.wml.CTFFName> ffnameWrapped = wmlObjectFactory.createCTFFDataName(ffname);
ffdata.getNameOrEnabledOrCalcOnExit().add( ffnameWrapped);
ffname.setVal(ffName);
// Create object for enabled (wrapped in JAXBElement)
BooleanDefaultTrue booleandefaulttrue = wmlObjectFactory.createBooleanDefaultTrue();
JAXBElement<org.docx4j.wml.BooleanDefaultTrue> booleandefaulttrueWrapped = wmlObjectFactory.createCTFFDataEnabled(booleandefaulttrue);
ffdata.getNameOrEnabledOrCalcOnExit().add( booleandefaulttrueWrapped);
// Create object for calcOnExit (wrapped in JAXBElement)
BooleanDefaultTrue booleandefaulttrue2 = wmlObjectFactory.createBooleanDefaultTrue();
JAXBElement<org.docx4j.wml.BooleanDefaultTrue> booleandefaulttrueWrapped2 = wmlObjectFactory.createCTFFDataCalcOnExit(booleandefaulttrue2);
ffdata.getNameOrEnabledOrCalcOnExit().add( booleandefaulttrueWrapped2);
// Create object for textInput (wrapped in JAXBElement)
CTFFTextInput fftextinput = wmlObjectFactory.createCTFFTextInput();
JAXBElement<org.docx4j.wml.CTFFTextInput> fftextinputWrapped = wmlObjectFactory.createCTFFDataTextInput(fftextinput);
ffdata.getNameOrEnabledOrCalcOnExit().add( fftextinputWrapped);
//Set type to regular
CTFFTextType ffTextType = wmlObjectFactory.createCTFFTextType();
ffTextType.setVal(STFFTextType.REGULAR);
fftextinput.setType(ffTextType);
if (ffTextInputFormat!=null) {
// Create object for format
CTFFTextInput.Format fftextinputformat = wmlObjectFactory.createCTFFTextInputFormat();
fftextinput.setFormat(fftextinputformat);
fftextinputformat.setVal( ffTextInputFormat); // eg "UPPERCASE"
}
}
/**
* If we're converting MERGEFIELD to FORMTEXT, it is
* desirable to make the w:fldChar/w:ffData/w:name
* unique within the docx (though Word 2010 can still open the docx if they
* aren't), and to remove spaces
*
* @author jharrop
* @since 3.0.0
*/
protected static class FormTextFieldNames {
// MS-OE376 says Word only allows strings of length at most 40 for the name attribute.
// Also, by experiment (in Word 2010), only alphanumberic characters and "_" are allowed
// (no spaces, other symbols/punctuation etc),
// and it can't start with a numeral. Comparison is case insensitive.
// Some UTF-8 characters are allowed eg ϣ
Pattern pattern = java.util.regex.Pattern.compile("[^a-zA-Z0-9]");
// http://webapp.docx4java.org/OnlineDemo/ecma376/WordML/bookmarkStart.html
// says:
// If multiple bookmarks in a document share the same name, then the first bookmark
// (defined by the location of the bookmarkStart element in document order) shall
// be maintained, and all subsequent bookmarks should be ignored.
// By experiment (inserting a cross ref) it looks like Word 2010 actually ignores the first!
// Not that that matters here, since we make the field names unique
private FormTextFieldNameSet names = new FormTextFieldNameSet();
public String generateName(String input) {
// Strip characters
String unpunctuated = pattern.matcher(input).replaceAll("_");
// Ensure it starts with a letter
char c = unpunctuated.charAt(0);
if('0'<=c && c<='9') {
unpunctuated = "z" + unpunctuated;
} else if(c=='_') {
unpunctuated = "z" + unpunctuated;
}
if (names.contains(unpunctuated)) {
// Then make unique
int i = 2;
String newName = null;
do {
newName = unpunctuated + i;
i++;
} while (names.contains(newName));
unpunctuated = newName;
}
// Add to FormTextFieldNameSet
names.add(unpunctuated);
return unpunctuated;
}
/**
* Case insensitive key
* (matching http://www.w3.org/TR/css3-fonts/#font-family-casing
*/
private static class FormTextFieldNameSet extends HashSet<String> {
@Override
public boolean add(String key) {
log.debug("Added '" + key.toLowerCase() + "'");
return super.add(key.toLowerCase());
}
// not @Override because that would require the key parameter to be of type Object
public boolean contains(String key) {
return super.contains(key.toLowerCase());
}
}
}
// public static boolean isMergeField(String type) {
//
// if (type.contains("MERGEFIELD")) {
// return true;
// } else {
// return false;
// }
// }
}