/**
* Copyright (c) 2010 Yahoo! Inc. All rights reserved.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License. See accompanying LICENSE file.
*/
package org.apache.oozie.command.coord;
import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.oozie.CoordinatorActionBean;
import org.apache.oozie.CoordinatorActionInfo;
import org.apache.oozie.CoordinatorJobBean;
import org.apache.oozie.ErrorCode;
import org.apache.oozie.XException;
import org.apache.oozie.action.ActionExecutorException;
import org.apache.oozie.action.hadoop.FsActionExecutor;
import org.apache.oozie.client.CoordinatorAction;
import org.apache.oozie.client.CoordinatorJob;
import org.apache.oozie.client.Job;
import org.apache.oozie.client.SLAEvent.SlaAppType;
import org.apache.oozie.client.rest.RestConstants;
import org.apache.oozie.command.CommandException;
import org.apache.oozie.command.PreconditionException;
import org.apache.oozie.command.RerunTransitionXCommand;
import org.apache.oozie.command.bundle.BundleStatusUpdateXCommand;
import org.apache.oozie.coord.CoordELFunctions;
import org.apache.oozie.executor.jpa.CoordActionGetJPAExecutor;
import org.apache.oozie.executor.jpa.CoordJobGetActionForNominalTimeJPAExecutor;
import org.apache.oozie.executor.jpa.CoordJobGetActionsForDatesJPAExecutor;
import org.apache.oozie.executor.jpa.CoordJobGetJPAExecutor;
import org.apache.oozie.executor.jpa.CoordJobUpdateJPAExecutor;
import org.apache.oozie.executor.jpa.JPAExecutorException;
import org.apache.oozie.service.JPAService;
import org.apache.oozie.service.Services;
import org.apache.oozie.util.DateUtils;
import org.apache.oozie.util.InstrumentUtils;
import org.apache.oozie.util.LogUtils;
import org.apache.oozie.util.ParamChecker;
import org.apache.oozie.util.StatusUtils;
import org.apache.oozie.util.XConfiguration;
import org.apache.oozie.util.XLog;
import org.apache.oozie.util.XmlUtils;
import org.apache.oozie.util.db.SLADbOperations;
import org.jdom.Element;
import org.jdom.JDOMException;
/**
* Rerun coordinator actions by a list of dates or ids. User can specify if refresh or noCleanup.
* <p/>
* The "rerunType" can be set as {@link RestConstants.JOB_COORD_RERUN_DATE} or
* {@link RestConstants.JOB_COORD_RERUN_ACTION}.
* <p/>
* The "refresh" is used to indicate if user wants to refresh an action's input and output events.
* <p/>
* The "noCleanup" is used to indicate if user wants to cleanup output events for given rerun actions
*/
public class CoordRerunXCommand extends RerunTransitionXCommand<CoordinatorActionInfo> {
private String rerunType;
private String scope;
private boolean refresh;
private boolean noCleanup;
private CoordinatorJobBean coordJob = null;
private JPAService jpaService = null;
protected boolean prevPending;
/**
* The constructor for class {@link CoordRerunXCommand}
*
* @param jobId the job id
* @param rerunType rerun type {@link RestConstants.JOB_COORD_RERUN_DATE} or {@link RestConstants.JOB_COORD_RERUN_ACTION}
* @param scope the rerun scope for given rerunType separated by ","
* @param refresh true if user wants to refresh input/output dataset urls
* @param noCleanup false if user wants to cleanup output events for given rerun actions
*/
public CoordRerunXCommand(String jobId, String rerunType, String scope, boolean refresh, boolean noCleanup) {
super("coord_rerun", "coord_rerun", 1);
this.jobId = ParamChecker.notEmpty(jobId, "jobId");
this.rerunType = ParamChecker.notEmpty(rerunType, "rerunType");
this.scope = ParamChecker.notEmpty(scope, "scope");
this.refresh = refresh;
this.noCleanup = noCleanup;
}
/**
* Get the list of actions for given id ranges
*
* @param jobId coordinator job id
* @param scope the id range to rerun separated by ","
* @return the list of all actions to rerun
* @throws CommandException thrown if failed to get coordinator actions by given id range
*/
private List<CoordinatorActionBean> getCoordActionsFromIds(String jobId, String scope) throws CommandException {
ParamChecker.notEmpty(jobId, "jobId");
ParamChecker.notEmpty(scope, "scope");
Set<String> actions = new HashSet<String>();
String[] list = scope.split(",");
for (String s : list) {
s = s.trim();
if (s.contains("-")) {
String[] range = s.split("-");
if (range.length != 2) {
throw new CommandException(ErrorCode.E0302, "format is wrong for action's range '" + s + "'");
}
int start;
int end;
try {
start = Integer.parseInt(range[0].trim());
end = Integer.parseInt(range[1].trim());
if (start > end) {
throw new CommandException(ErrorCode.E0302, "format is wrong for action's range '" + s + "'");
}
}
catch (NumberFormatException ne) {
throw new CommandException(ErrorCode.E0302, ne);
}
for (int i = start; i <= end; i++) {
actions.add(jobId + "@" + i);
}
}
else {
try {
Integer.parseInt(s);
}
catch (NumberFormatException ne) {
throw new CommandException(ErrorCode.E0302, "format is wrong for action id'" + s
+ "'. Integer only.");
}
actions.add(jobId + "@" + s);
}
}
List<CoordinatorActionBean> coordActions = new ArrayList<CoordinatorActionBean>();
for (String id : actions) {
CoordinatorActionBean coordAction;
try {
coordAction = jpaService.execute(new CoordActionGetJPAExecutor(id));
}
catch (JPAExecutorException je) {
throw new CommandException(je);
}
coordActions.add(coordAction);
LOG.debug("Rerun coordinator for actionId='" + id + "'");
}
return coordActions;
}
/**
* Get the list of actions for given date ranges
*
* @param jobId coordinator job id
* @param scope the date range to rerun separated by ","
* @return the list of dates to rerun
* @throws CommandException thrown if failed to get coordinator actions by given date range
*/
private List<CoordinatorActionBean> getCoordActionsFromDates(String jobId, String scope) throws CommandException {
ParamChecker.notEmpty(jobId, "jobId");
ParamChecker.notEmpty(scope, "scope");
Set<CoordinatorActionBean> actionSet = new HashSet<CoordinatorActionBean>();
String[] list = scope.split(",");
for (String s : list) {
s = s.trim();
if (s.contains("::")) {
String[] dateRange = s.split("::");
if (dateRange.length != 2) {
throw new CommandException(ErrorCode.E0302, "format is wrong for date's range '" + s + "'");
}
Date start;
Date end;
try {
start = DateUtils.parseDateUTC(dateRange[0].trim());
end = DateUtils.parseDateUTC(dateRange[1].trim());
if (start.after(end)) {
throw new CommandException(ErrorCode.E0302, "start date is older than end date: '" + s + "'");
}
}
catch (Exception e) {
throw new CommandException(ErrorCode.E0302, e);
}
List<CoordinatorActionBean> listOfActions = getActionIdsFromDateRange(jobId, start, end);
actionSet.addAll(listOfActions);
}
else {
try {
Date date = DateUtils.parseDateUTC(s.trim());
CoordinatorActionBean coordAction = jpaService
.execute(new CoordJobGetActionForNominalTimeJPAExecutor(jobId, date));
actionSet.add(coordAction);
}
catch (JPAExecutorException e) {
throw new CommandException(e);
}
catch (Exception e) {
throw new CommandException(ErrorCode.E0302, e);
}
}
}
List<CoordinatorActionBean> coordActions = new ArrayList<CoordinatorActionBean>();
for (CoordinatorActionBean coordAction : actionSet) {
coordActions.add(coordAction);
LOG.debug("Rerun coordinator for actionId='" + coordAction.getId() + "'");
}
return coordActions;
}
/**
* Get coordinator action ids between given start and end time
*
* @param jobId coordinator job id
* @param start start time
* @param end end time
* @return a list of coordinator actions belong to the range of start and end time
* @throws CommandException thrown if failed to get coordinator actions
*/
private List<CoordinatorActionBean> getActionIdsFromDateRange(String jobId, Date start, Date end)
throws CommandException {
List<CoordinatorActionBean> list;
try {
list = jpaService.execute(new CoordJobGetActionsForDatesJPAExecutor(jobId, start, end));
}
catch (JPAExecutorException je) {
throw new CommandException(je);
}
return list;
}
/**
* Check if all given actions are eligible to rerun.
*
* @param actions list of CoordinatorActionBean
* @return true if all actions are eligible to rerun
*/
private boolean checkAllActionsRunnable(List<CoordinatorActionBean> coordActions) {
boolean ret = false;
for (CoordinatorActionBean coordAction : coordActions) {
ret = true;
if (!coordAction.isTerminalStatus()) {
ret = false;
break;
}
}
return ret;
}
/**
* Cleanup output-events directories
*
* @param eAction coordinator action xml
* @param user user name
* @param group group name
*/
@SuppressWarnings("unchecked")
private void cleanupOutputEvents(Element eAction, String user, String group) {
Element outputList = eAction.getChild("output-events", eAction.getNamespace());
if (outputList != null) {
for (Element data : (List<Element>) outputList.getChildren("data-out", eAction.getNamespace())) {
if (data.getChild("uris", data.getNamespace()) != null) {
String uris = data.getChild("uris", data.getNamespace()).getTextTrim();
if (uris != null) {
String[] uriArr = uris.split(CoordELFunctions.INSTANCE_SEPARATOR);
FsActionExecutor fsAe = new FsActionExecutor();
for (String uri : uriArr) {
Path path = new Path(uri);
try {
fsAe.delete(user, group, path);
LOG.debug("Cleanup the output dir " + path);
}
catch (ActionExecutorException ae) {
LOG.warn("Failed to cleanup the output dir " + uri, ae);
}
}
}
}
}
}
else {
LOG.info("No output-events defined in coordinator xml. Therefore nothing to cleanup");
}
}
/**
* Refresh an action's input and ouput events.
*
* @param coordJob coordinator job bean
* @param coordAction coordinator action bean
* @throws Exception thrown if failed to materialize coordinator action
*/
private void refreshAction(CoordinatorJobBean coordJob, CoordinatorActionBean coordAction) throws Exception {
Configuration jobConf = null;
try {
jobConf = new XConfiguration(new StringReader(coordJob.getConf()));
}
catch (IOException ioe) {
LOG.warn("Configuration parse error. read from DB :" + coordJob.getConf(), ioe);
throw new CommandException(ErrorCode.E1005, ioe);
}
String jobXml = coordJob.getJobXml();
Element eJob = XmlUtils.parseXml(jobXml);
Date actualTime = new Date();
String actionXml = CoordCommandUtils.materializeOneInstance(jobId, dryrun, (Element) eJob.clone(), coordAction
.getNominalTime(), actualTime, coordAction.getActionNumber(), jobConf, coordAction);
LOG.debug("Refresh Action actionId=" + coordAction.getId() + ", actionXml="
+ XmlUtils.prettyPrint(actionXml).toString());
coordAction.setActionXml(actionXml);
}
/**
* Update an action into database table
*
* @param coordJob coordinator job bean
* @param coordAction coordinator action bean
* @param actionXml coordinator action xml
* @throws Exception thrown failed to update coordinator action bean or unable to write sla registration event
*/
private void updateAction(CoordinatorJobBean coordJob, CoordinatorActionBean coordAction, String actionXml)
throws Exception {
LOG.debug("updateAction for actionId=" + coordAction.getId());
if (coordAction.getStatus() == CoordinatorAction.Status.TIMEDOUT) {
LOG.debug("Updating created time for TIMEDOUT action id =" + coordAction.getId());
coordAction.setCreatedTime(new Date());
}
coordAction.setStatus(CoordinatorAction.Status.WAITING);
coordAction.setExternalId("");
coordAction.setExternalStatus("");
coordAction.setRerunTime(new Date());
coordAction.setLastModifiedTime(new Date());
jpaService.execute(new org.apache.oozie.executor.jpa.CoordActionUpdateJPAExecutor(coordAction));
writeActionRegistration(coordAction.getActionXml(), coordAction, coordJob.getUser(), coordJob.getGroup());
}
/**
* Create SLA RegistrationEvent
*
* @param actionXml action xml
* @param actionBean coordinator action bean
* @param user user name
* @param group group name
* @throws Exception thrown if unable to write sla registration event
*/
private void writeActionRegistration(String actionXml, CoordinatorActionBean actionBean, String user, String group)
throws Exception {
Element eAction = XmlUtils.parseXml(actionXml);
Element eSla = eAction.getChild("action", eAction.getNamespace()).getChild("info", eAction.getNamespace("sla"));
SLADbOperations.writeSlaRegistrationEvent(eSla, actionBean.getId(), SlaAppType.COORDINATOR_ACTION, user, group,
LOG);
}
/* (non-Javadoc)
* @see org.apache.oozie.command.XCommand#getEntityKey()
*/
@Override
protected String getEntityKey() {
return jobId;
}
/* (non-Javadoc)
* @see org.apache.oozie.command.XCommand#isLockRequired()
*/
@Override
protected boolean isLockRequired() {
return true;
}
/* (non-Javadoc)
* @see org.apache.oozie.command.XCommand#loadState()
*/
@Override
protected void loadState() throws CommandException {
jpaService = Services.get().get(JPAService.class);
if (jpaService == null) {
throw new CommandException(ErrorCode.E0610);
}
try {
coordJob = jpaService.execute(new CoordJobGetJPAExecutor(jobId));
prevPending = coordJob.isPending();
}
catch (JPAExecutorException je) {
throw new CommandException(je);
}
LogUtils.setLogInfo(coordJob, logInfo);
}
/* (non-Javadoc)
* @see org.apache.oozie.command.XCommand#verifyPrecondition()
*/
@Override
protected void verifyPrecondition() throws CommandException, PreconditionException {
if (coordJob.getStatus() == CoordinatorJob.Status.KILLED
|| coordJob.getStatus() == CoordinatorJob.Status.FAILED) {
LOG.info("CoordRerunXCommand is not able to run, job status=" + coordJob.getStatus() + ", jobid=" + jobId);
throw new CommandException(ErrorCode.E1018,
"coordinator job is killed or failed so all actions are not eligible to rerun!");
}
// no actioins have been created for PREP job
if (coordJob.getStatus() == CoordinatorJob.Status.PREP) {
LOG.info("CoordRerunXCommand is not able to run, job status=" + coordJob.getStatus() + ", jobid=" + jobId);
throw new CommandException(ErrorCode.E1018,
"coordinator job is PREP so no actions are materialized to rerun!");
}
}
@Override
protected void eagerVerifyPrecondition() throws CommandException, PreconditionException {
verifyPrecondition();
}
@Override
public void rerunChildren() throws CommandException {
boolean isError = false;
try {
CoordinatorActionInfo coordInfo = null;
InstrumentUtils.incrJobCounter(getName(), 1, getInstrumentation());
List<CoordinatorActionBean> coordActions;
if (rerunType.equals(RestConstants.JOB_COORD_RERUN_DATE)) {
coordActions = getCoordActionsFromDates(jobId, scope);
}
else if (rerunType.equals(RestConstants.JOB_COORD_RERUN_ACTION)) {
coordActions = getCoordActionsFromIds(jobId, scope);
}
else {
isError = true;
throw new CommandException(ErrorCode.E1018, "date or action expected.");
}
if (checkAllActionsRunnable(coordActions)) {
for (CoordinatorActionBean coordAction : coordActions) {
String actionXml = coordAction.getActionXml();
if (!noCleanup) {
Element eAction = XmlUtils.parseXml(actionXml);
cleanupOutputEvents(eAction, coordJob.getUser(), coordJob.getGroup());
}
if (refresh) {
refreshAction(coordJob, coordAction);
}
updateAction(coordJob, coordAction, actionXml);
queue(new CoordActionNotificationXCommand(coordAction), 100);
queue(new CoordActionInputCheckXCommand(coordAction.getId()), 100);
}
}
else {
isError = true;
throw new CommandException(ErrorCode.E1018, "part or all actions are not eligible to rerun!");
}
coordInfo = new CoordinatorActionInfo(coordActions);
ret = coordInfo;
}
catch (XException xex) {
isError = true;
throw new CommandException(xex);
}
catch (JDOMException jex) {
isError = true;
throw new CommandException(ErrorCode.E0700, jex);
}
catch (Exception ex) {
isError = true;
throw new CommandException(ErrorCode.E1018, ex);
}
finally{
if(isError){
transitToPrevious();
}
}
}
/*
* (non-Javadoc)
* @see org.apache.oozie.command.TransitionXCommand#getJob()
*/
@Override
public Job getJob() {
return coordJob;
}
@Override
public void notifyParent() throws CommandException {
//update bundle action
if (getPrevStatus() != null && coordJob.getBundleId() != null) {
BundleStatusUpdateXCommand bundleStatusUpdate = new BundleStatusUpdateXCommand(coordJob, getPrevStatus());
bundleStatusUpdate.call();
}
}
@Override
public void updateJob() throws CommandException {
try {
// rerun a paused coordinator job will keep job status at paused and pending at previous pending
if (getPrevStatus()!= null && getPrevStatus().equals(Job.Status.PAUSED)) {
coordJob.setStatus(Job.Status.PAUSED);
if (prevPending) {
coordJob.setPending();
} else {
coordJob.resetPending();
}
}
jpaService.execute(new CoordJobUpdateJPAExecutor(coordJob));
}
catch (JPAExecutorException je) {
throw new CommandException(je);
}
}
/* (non-Javadoc)
* @see org.apache.oozie.command.RerunTransitionXCommand#getLog()
*/
@Override
public XLog getLog() {
return LOG;
}
@Override
public final void transitToNext() {
prevStatus = coordJob.getStatus();
coordJob.setStatus(Job.Status.RUNNING);
// used for backward support of coordinator 0.1 schema
coordJob.setStatus(StatusUtils.getStatusForCoordRerun(coordJob, prevStatus));
coordJob.setPending();
}
private final void transitToPrevious() throws CommandException {
coordJob.setStatus(getPrevStatus());
if (!prevPending) {
coordJob.resetPending();
}
else {
coordJob.setPending();
}
}
}