001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.util;
019
020import java.io.BufferedInputStream;
021import java.io.BufferedOutputStream;
022import java.io.Closeable;
023import java.io.DataInputStream;
024import java.io.DataOutputStream;
025import java.io.File;
026import java.io.FileInputStream;
027import java.io.FileOutputStream;
028import java.io.IOException;
029import java.nio.file.Files;
030import java.nio.file.Paths;
031import java.util.ArrayList;
032import java.util.Arrays;
033import java.util.Collections;
034import java.util.EnumSet;
035import java.util.HashSet;
036import java.util.Iterator;
037import java.util.List;
038import java.util.Locale;
039import java.util.Optional;
040import java.util.Set;
041import java.util.concurrent.Callable;
042import java.util.concurrent.CancellationException;
043import java.util.concurrent.ExecutionException;
044import java.util.concurrent.ExecutorService;
045import java.util.concurrent.Executors;
046import java.util.concurrent.Future;
047import java.util.concurrent.TimeUnit;
048import java.util.concurrent.TimeoutException;
049import java.util.function.Predicate;
050import org.apache.commons.io.IOUtils;
051import org.apache.hadoop.conf.Configuration;
052import org.apache.hadoop.hbase.ClusterMetrics.Option;
053import org.apache.hadoop.hbase.HBaseConfiguration;
054import org.apache.hadoop.hbase.HConstants;
055import org.apache.hadoop.hbase.HRegionLocation;
056import org.apache.hadoop.hbase.MetaTableAccessor;
057import org.apache.hadoop.hbase.ServerName;
058import org.apache.hadoop.hbase.UnknownRegionException;
059import org.apache.hadoop.hbase.client.Admin;
060import org.apache.hadoop.hbase.client.Connection;
061import org.apache.hadoop.hbase.client.ConnectionFactory;
062import org.apache.hadoop.hbase.client.DoNotRetryRegionException;
063import org.apache.hadoop.hbase.client.RegionInfo;
064import org.apache.hadoop.hbase.client.RegionInfoBuilder;
065import org.apache.hadoop.hbase.client.Result;
066import org.apache.hadoop.hbase.master.RackManager;
067import org.apache.hadoop.hbase.master.RegionState;
068import org.apache.hadoop.hbase.master.assignment.AssignmentManager;
069import org.apache.hadoop.hbase.zookeeper.MetaTableLocator;
070import org.apache.hadoop.hbase.zookeeper.ZKWatcher;
071import org.apache.hadoop.hbase.zookeeper.ZNodePaths;
072import org.apache.yetus.audience.InterfaceAudience;
073import org.slf4j.Logger;
074import org.slf4j.LoggerFactory;
075
076import org.apache.hbase.thirdparty.org.apache.commons.cli.CommandLine;
077import org.apache.hbase.thirdparty.org.apache.commons.collections4.CollectionUtils;
078
079/**
080 * Tool for loading/unloading regions to/from given regionserver This tool can be run from Command
081 * line directly as a utility. Supports Ack/No Ack mode for loading/unloading operations.Ack mode
082 * acknowledges if regions are online after movement while noAck mode is best effort mode that
083 * improves performance but will still move on if region is stuck/not moved. Motivation behind noAck
084 * mode being RS shutdown where even if a Region is stuck, upon shutdown master will move it
085 * anyways. This can also be used by constructiong an Object using the builder and then calling
086 * {@link #load()} or {@link #unload()} methods for the desired operations.
087 */
088@InterfaceAudience.Public
089public class RegionMover extends AbstractHBaseTool implements Closeable {
090  public static final String MOVE_RETRIES_MAX_KEY = "hbase.move.retries.max";
091  public static final String MOVE_WAIT_MAX_KEY = "hbase.move.wait.max";
092  public static final String SERVERSTART_WAIT_MAX_KEY = "hbase.serverstart.wait.max";
093  public static final int DEFAULT_MOVE_RETRIES_MAX = 5;
094  public static final int DEFAULT_MOVE_WAIT_MAX = 60;
095  public static final int DEFAULT_SERVERSTART_WAIT_MAX = 180;
096
097  private static final Logger LOG = LoggerFactory.getLogger(RegionMover.class);
098
099  private RegionMoverBuilder rmbuilder;
100  private boolean ack = true;
101  private int maxthreads = 1;
102  private int timeout;
103  private List<String> isolateRegionIdArray;
104  private String loadUnload;
105  private String hostname;
106  private String filename;
107  private String excludeFile;
108  private String designatedFile;
109  private int port;
110  private Connection conn;
111  private Admin admin;
112  private RackManager rackManager;
113
114  private RegionMover(RegionMoverBuilder builder) throws IOException {
115    this.hostname = builder.hostname;
116    this.filename = builder.filename;
117    this.excludeFile = builder.excludeFile;
118    this.designatedFile = builder.designatedFile;
119    this.maxthreads = builder.maxthreads;
120    this.isolateRegionIdArray = builder.isolateRegionIdArray;
121    this.ack = builder.ack;
122    this.port = builder.port;
123    this.timeout = builder.timeout;
124    setConf(builder.conf);
125    this.conn = ConnectionFactory.createConnection(conf);
126    this.admin = conn.getAdmin();
127    // Only while running unit tests, builder.rackManager will not be null for the convenience of
128    // providing custom rackManager. Otherwise for regular workflow/user triggered action,
129    // builder.rackManager is supposed to be null. Hence, setter of builder.rackManager is
130    // provided as @InterfaceAudience.Private and it is commented that this is just
131    // to be used by unit test.
132    rackManager = builder.rackManager == null ? new RackManager(conf) : builder.rackManager;
133  }
134
135  private RegionMover() {
136  }
137
138  @Override
139  public void close() {
140    IOUtils.closeQuietly(this.admin, e -> LOG.warn("failed to close admin", e));
141    IOUtils.closeQuietly(this.conn, e -> LOG.warn("failed to close conn", e));
142  }
143
144  /**
145   * Builder for Region mover. Use the {@link #build()} method to create RegionMover object. Has
146   * {@link #filename(String)}, {@link #excludeFile(String)}, {@link #maxthreads(int)},
147   * {@link #ack(boolean)}, {@link #timeout(int)}, {@link #designatedFile(String)} methods to set
148   * the corresponding options.
149   */
150  public static class RegionMoverBuilder {
151    private boolean ack = true;
152    private int maxthreads = 1;
153    private int timeout = Integer.MAX_VALUE;
154    private List<String> isolateRegionIdArray = new ArrayList<>();
155    private String hostname;
156    private String filename;
157    private String excludeFile = null;
158    private String designatedFile = null;
159    private String defaultDir = System.getProperty("java.io.tmpdir");
160    @InterfaceAudience.Private
161    final int port;
162    private final Configuration conf;
163    private RackManager rackManager;
164
165    public RegionMoverBuilder(String hostname) {
166      this(hostname, createConf());
167    }
168
169    /**
170     * Creates a new configuration and sets region mover specific overrides
171     */
172    private static Configuration createConf() {
173      Configuration conf = HBaseConfiguration.create();
174      conf.setInt("hbase.client.prefetch.limit", 1);
175      conf.setInt("hbase.client.pause", 500);
176      conf.setInt("hbase.client.retries.number", 100);
177      return conf;
178    }
179
180    /**
181     * @param hostname Hostname to unload regions from or load regions to. Can be either hostname or
182     *                 hostname:port.
183     * @param conf     Configuration object
184     */
185    public RegionMoverBuilder(String hostname, Configuration conf) {
186      String[] splitHostname = hostname.toLowerCase().split(":");
187      this.hostname = splitHostname[0];
188      if (splitHostname.length == 2) {
189        this.port = Integer.parseInt(splitHostname[1]);
190      } else {
191        this.port = conf.getInt(HConstants.REGIONSERVER_PORT, HConstants.DEFAULT_REGIONSERVER_PORT);
192      }
193      this.filename = defaultDir + File.separator + System.getProperty("user.name") + this.hostname
194        + ":" + Integer.toString(this.port);
195      this.conf = conf;
196    }
197
198    /**
199     * Path of file where regions will be written to during unloading/read from during loading
200     * @return RegionMoverBuilder object
201     */
202    public RegionMoverBuilder filename(String filename) {
203      this.filename = filename;
204      return this;
205    }
206
207    /**
208     * Set the max number of threads that will be used to move regions
209     */
210    public RegionMoverBuilder maxthreads(int threads) {
211      this.maxthreads = threads;
212      return this;
213    }
214
215    /**
216     * Set the region ID to isolate on the region server.
217     */
218    public RegionMoverBuilder isolateRegionIdArray(List<String> isolateRegionIdArray) {
219      this.isolateRegionIdArray = isolateRegionIdArray;
220      return this;
221    }
222
223    /**
224     * Path of file containing hostnames to be excluded during region movement. Exclude file should
225     * have 'host:port' per line. Port is mandatory here as we can have many RS running on a single
226     * host.
227     */
228    public RegionMoverBuilder excludeFile(String excludefile) {
229      this.excludeFile = excludefile;
230      return this;
231    }
232
233    /**
234     * Set the designated file. Designated file contains hostnames where region moves. Designated
235     * file should have 'host:port' per line. Port is mandatory here as we can have many RS running
236     * on a single host.
237     * @param designatedFile The designated file
238     * @return RegionMoverBuilder object
239     */
240    public RegionMoverBuilder designatedFile(String designatedFile) {
241      this.designatedFile = designatedFile;
242      return this;
243    }
244
245    /**
246     * Set ack/noAck mode.
247     * <p>
248     * In ack mode regions are acknowledged before and after moving and the move is retried
249     * hbase.move.retries.max times, if unsuccessful we quit with exit code 1.No Ack mode is a best
250     * effort mode,each region movement is tried once.This can be used during graceful shutdown as
251     * even if we have a stuck region,upon shutdown it'll be reassigned anyway.
252     * <p>
253     * @return RegionMoverBuilder object
254     */
255    public RegionMoverBuilder ack(boolean ack) {
256      this.ack = ack;
257      return this;
258    }
259
260    /**
261     * Set the timeout for Load/Unload operation in seconds.This is a global timeout,threadpool for
262     * movers also have a separate time which is hbase.move.wait.max * number of regions to
263     * load/unload
264     * @param timeout in seconds
265     * @return RegionMoverBuilder object
266     */
267    public RegionMoverBuilder timeout(int timeout) {
268      this.timeout = timeout;
269      return this;
270    }
271
272    /**
273     * Set specific rackManager implementation. This setter method is for testing purpose only.
274     * @param rackManager rackManager impl
275     * @return RegionMoverBuilder object
276     */
277    @InterfaceAudience.Private
278    public RegionMoverBuilder rackManager(RackManager rackManager) {
279      this.rackManager = rackManager;
280      return this;
281    }
282
283    /**
284     * This method builds the appropriate RegionMover object which can then be used to load/unload
285     * using load and unload methods
286     * @return RegionMover object
287     */
288    public RegionMover build() throws IOException {
289      return new RegionMover(this);
290    }
291  }
292
293  /**
294   * Loads the specified {@link #hostname} with regions listed in the {@link #filename} RegionMover
295   * Object has to be created using {@link #RegionMover(RegionMoverBuilder)}
296   * @return true if loading succeeded, false otherwise
297   */
298  public boolean load() throws ExecutionException, InterruptedException, TimeoutException {
299    ExecutorService loadPool = Executors.newFixedThreadPool(1);
300    Future<Boolean> loadTask = loadPool.submit(getMetaRegionMovePlan());
301    boolean isMetaMoved = waitTaskToFinish(loadPool, loadTask, "loading");
302    if (!isMetaMoved) {
303      return false;
304    }
305    loadPool = Executors.newFixedThreadPool(1);
306    loadTask = loadPool.submit(getNonMetaRegionsMovePlan());
307    return waitTaskToFinish(loadPool, loadTask, "loading");
308  }
309
310  private Callable<Boolean> getMetaRegionMovePlan() {
311    return getRegionsMovePlan(true);
312  }
313
314  private Callable<Boolean> getNonMetaRegionsMovePlan() {
315    return getRegionsMovePlan(false);
316  }
317
318  private Callable<Boolean> getRegionsMovePlan(boolean moveMetaRegion) {
319    return () -> {
320      try {
321        List<RegionInfo> regionsToMove = readRegionsFromFile(filename);
322        if (regionsToMove.isEmpty()) {
323          LOG.info("No regions to load.Exiting");
324          return true;
325        }
326        Optional<RegionInfo> metaRegion = getMetaRegionInfoIfToBeMoved(regionsToMove);
327        if (moveMetaRegion) {
328          if (metaRegion.isPresent()) {
329            loadRegions(Collections.singletonList(metaRegion.get()));
330          }
331        } else {
332          metaRegion.ifPresent(regionsToMove::remove);
333          loadRegions(regionsToMove);
334        }
335      } catch (Exception e) {
336        LOG.error("Error while loading regions to " + hostname, e);
337        return false;
338      }
339      return true;
340    };
341  }
342
343  private Optional<RegionInfo> getMetaRegionInfoIfToBeMoved(List<RegionInfo> regionsToMove) {
344    return regionsToMove.stream().filter(RegionInfo::isMetaRegion).findFirst();
345  }
346
347  private void loadRegions(List<RegionInfo> regionsToMove) throws Exception {
348    ServerName server = getTargetServer();
349    List<RegionInfo> movedRegions = Collections.synchronizedList(new ArrayList<>());
350    LOG.info("Moving " + regionsToMove.size() + " regions to " + server + " using "
351      + this.maxthreads + " threads.Ack mode:" + this.ack);
352
353    final ExecutorService moveRegionsPool = Executors.newFixedThreadPool(this.maxthreads);
354    List<Future<Boolean>> taskList = new ArrayList<>();
355    int counter = 0;
356    while (counter < regionsToMove.size()) {
357      RegionInfo region = regionsToMove.get(counter);
358      ServerName currentServer = MoveWithAck.getServerNameForRegion(region, admin, conn);
359      if (currentServer == null) {
360        LOG
361          .warn("Could not get server for Region:" + region.getRegionNameAsString() + " moving on");
362        counter++;
363        continue;
364      } else if (server.equals(currentServer)) {
365        LOG.info(
366          "Region " + region.getRegionNameAsString() + " is already on target server=" + server);
367        counter++;
368        continue;
369      }
370      if (ack) {
371        Future<Boolean> task = moveRegionsPool
372          .submit(new MoveWithAck(conn, region, currentServer, server, movedRegions));
373        taskList.add(task);
374      } else {
375        Future<Boolean> task = moveRegionsPool
376          .submit(new MoveWithoutAck(admin, region, currentServer, server, movedRegions));
377        taskList.add(task);
378      }
379      counter++;
380    }
381
382    moveRegionsPool.shutdown();
383    long timeoutInSeconds = regionsToMove.size()
384      * admin.getConfiguration().getLong(MOVE_WAIT_MAX_KEY, DEFAULT_MOVE_WAIT_MAX);
385    waitMoveTasksToFinish(moveRegionsPool, taskList, timeoutInSeconds);
386  }
387
388  /**
389   * Unload regions from given {@link #hostname} using ack/noAck mode and {@link #maxthreads}.In
390   * noAck mode we do not make sure that region is successfully online on the target region
391   * server,hence it is best effort.We do not unload regions to hostnames given in
392   * {@link #excludeFile}. If designatedFile is present with some contents, we will unload regions
393   * to hostnames provided in {@link #designatedFile}
394   * @return true if unloading succeeded, false otherwise
395   */
396  public boolean unload() throws InterruptedException, ExecutionException, TimeoutException {
397    return unloadRegions(false);
398  }
399
400  /**
401   * Unload regions from given {@link #hostname} using ack/noAck mode and {@link #maxthreads}.In
402   * noAck mode we do not make sure that region is successfully online on the target region
403   * server,hence it is best effort.We do not unload regions to hostnames given in
404   * {@link #excludeFile}. If designatedFile is present with some contents, we will unload regions
405   * to hostnames provided in {@link #designatedFile}. While unloading regions, destination
406   * RegionServers are selected from different rack i.e regions should not move to any RegionServers
407   * that belong to same rack as source RegionServer.
408   * @return true if unloading succeeded, false otherwise
409   */
410  public boolean unloadFromRack()
411    throws InterruptedException, ExecutionException, TimeoutException {
412    return unloadRegions(true);
413  }
414
415  private boolean unloadRegions(boolean unloadFromRack)
416    throws ExecutionException, InterruptedException, TimeoutException {
417    return unloadRegions(unloadFromRack, null);
418  }
419
420  /**
421   * Isolated regions specified in {@link #isolateRegionIdArray} on {@link #hostname} in ack Mode
422   * and Unload regions from given {@link #hostname} using ack/noAck mode and {@link #maxthreads}.
423   * In noAck mode we do not make sure that region is successfully online on the target region
424   * server,hence it is the best effort. We do not unload regions to hostnames given in
425   * {@link #excludeFile}. If designatedFile is present with some contents, we will unload regions
426   * to hostnames provided in {@link #designatedFile}
427   * @return true if region isolation succeeded, false otherwise
428   */
429  public boolean isolateRegions()
430    throws ExecutionException, InterruptedException, TimeoutException {
431    return unloadRegions(false, isolateRegionIdArray);
432  }
433
434  private boolean unloadRegions(boolean unloadFromRack, List<String> isolateRegionIdArray)
435    throws InterruptedException, ExecutionException, TimeoutException {
436    deleteFile(this.filename);
437    ExecutorService unloadPool = Executors.newFixedThreadPool(1);
438    Future<Boolean> unloadTask = unloadPool.submit(() -> {
439      List<RegionInfo> movedRegions = Collections.synchronizedList(new ArrayList<>());
440      try {
441        // Get Online RegionServers
442        List<ServerName> regionServers = new ArrayList<>();
443        regionServers.addAll(admin.getRegionServers());
444        // Remove the host Region server from target Region Servers list
445        ServerName server = stripServer(regionServers, hostname, port);
446        if (server == null) {
447          LOG.info("Could not find server '{}:{}' in the set of region servers. giving up.",
448            hostname, port);
449          LOG.debug("List of region servers: {}", regionServers);
450          return false;
451        }
452        // Remove RS not present in the designated file
453        includeExcludeRegionServers(designatedFile, regionServers, true);
454
455        // Remove RS present in the exclude file
456        includeExcludeRegionServers(excludeFile, regionServers, false);
457
458        if (unloadFromRack) {
459          // remove regionServers that belong to same rack (as source host) since the goal is to
460          // unload regions from source regionServer to destination regionServers
461          // that belong to different rack only.
462          String sourceRack = rackManager.getRack(server);
463          List<String> racks = rackManager.getRack(regionServers);
464          Iterator<ServerName> iterator = regionServers.iterator();
465          int i = 0;
466          while (iterator.hasNext()) {
467            iterator.next();
468            if (racks.size() > i && racks.get(i) != null && racks.get(i).equals(sourceRack)) {
469              iterator.remove();
470            }
471            i++;
472          }
473        }
474
475        // Remove decommissioned RS
476        Set<ServerName> decommissionedRS = new HashSet<>(admin.listDecommissionedRegionServers());
477        if (CollectionUtils.isNotEmpty(decommissionedRS)) {
478          regionServers.removeIf(decommissionedRS::contains);
479          LOG.debug("Excluded RegionServers from unloading regions to because they "
480            + "are marked as decommissioned. Servers: {}", decommissionedRS);
481        }
482
483        stripMaster(regionServers);
484        if (regionServers.isEmpty()) {
485          LOG.warn("No Regions were moved - no servers available");
486          return false;
487        }
488        unloadRegions(server, regionServers, movedRegions, isolateRegionIdArray);
489      } catch (Exception e) {
490        LOG.error("Error while unloading regions ", e);
491        return false;
492      } finally {
493        if (movedRegions != null) {
494          writeFile(filename, movedRegions);
495        }
496      }
497      return true;
498    });
499    return waitTaskToFinish(unloadPool, unloadTask, "unloading");
500  }
501
502  private void unloadRegions(ServerName server, List<ServerName> regionServers,
503    List<RegionInfo> movedRegions, List<String> isolateRegionIdArray) throws Exception {
504    while (true) {
505      List<RegionInfo> isolateRegionInfoList = Collections.synchronizedList(new ArrayList<>());
506      RegionInfo isolateRegionInfo = null;
507      if (isolateRegionIdArray != null && !isolateRegionIdArray.isEmpty()) {
508        // Region will be moved to target region server with Ack mode.
509        final ExecutorService isolateRegionPool = Executors.newFixedThreadPool(maxthreads);
510        List<Future<Boolean>> isolateRegionTaskList = new ArrayList<>();
511        List<RegionInfo> recentlyIsolatedRegion = Collections.synchronizedList(new ArrayList<>());
512        boolean allRegionOpsSuccessful = true;
513        boolean isMetaIsolated = false;
514        RegionInfo metaRegionInfo = RegionInfoBuilder.FIRST_META_REGIONINFO;
515        List<HRegionLocation> hRegionLocationRegionIsolation =
516          Collections.synchronizedList(new ArrayList<>());
517        for (String isolateRegionId : isolateRegionIdArray) {
518          if (isolateRegionId.equalsIgnoreCase(metaRegionInfo.getEncodedName())) {
519            isMetaIsolated = true;
520            continue;
521          }
522          Result result = MetaTableAccessor.scanByRegionEncodedName(conn, isolateRegionId);
523          HRegionLocation hRegionLocation =
524            MetaTableAccessor.getRegionLocation(conn, result.getRow());
525          if (hRegionLocation != null) {
526            hRegionLocationRegionIsolation.add(hRegionLocation);
527          } else {
528            LOG.error("Region " + isolateRegionId + " doesn't exists/can't fetch from"
529              + " meta...Quitting now");
530            // We only move the regions if all the regions were found.
531            allRegionOpsSuccessful = false;
532            break;
533          }
534        }
535
536        if (!allRegionOpsSuccessful) {
537          break;
538        }
539        // If hbase:meta region was isolated, then it needs to be part of isolateRegionInfoList.
540        if (isMetaIsolated) {
541          ZKWatcher zkWatcher = new ZKWatcher(conf, null, null);
542          List<HRegionLocation> result = new ArrayList<>();
543          for (String znode : zkWatcher.getMetaReplicaNodes()) {
544            String path = ZNodePaths.joinZNode(zkWatcher.getZNodePaths().baseZNode, znode);
545            int replicaId = zkWatcher.getZNodePaths().getMetaReplicaIdFromPath(path);
546            RegionState state = MetaTableLocator.getMetaRegionState(zkWatcher, replicaId);
547            result.add(new HRegionLocation(state.getRegion(), state.getServerName()));
548          }
549          ServerName metaSeverName = result.get(0).getServerName();
550          // For isolating hbase:meta, it should move explicitly in Ack mode,
551          // hence the forceMoveRegionByAck = true.
552          if (!metaSeverName.equals(server)) {
553            LOG.info("Region of hbase:meta " + metaRegionInfo.getEncodedName() + " is on server "
554              + metaSeverName + " moving to " + server);
555            submitRegionMovesWhileUnloading(metaSeverName, Collections.singletonList(server),
556              movedRegions, Collections.singletonList(metaRegionInfo), true);
557          } else {
558            LOG.info("Region of hbase:meta " + metaRegionInfo.getEncodedName() + " already exists"
559              + " on server : " + server);
560          }
561          isolateRegionInfoList.add(RegionInfoBuilder.FIRST_META_REGIONINFO);
562        }
563
564        if (!hRegionLocationRegionIsolation.isEmpty()) {
565          for (HRegionLocation hRegionLocation : hRegionLocationRegionIsolation) {
566            isolateRegionInfo = hRegionLocation.getRegion();
567            isolateRegionInfoList.add(isolateRegionInfo);
568            if (hRegionLocation.getServerName() == server) {
569              LOG.info("Region " + hRegionLocation.getRegion().getEncodedName() + " already exists"
570                + " on server : " + server.getHostname());
571            } else {
572              Future<Boolean> isolateRegionTask =
573                isolateRegionPool.submit(new MoveWithAck(conn, isolateRegionInfo,
574                  hRegionLocation.getServerName(), server, recentlyIsolatedRegion));
575              isolateRegionTaskList.add(isolateRegionTask);
576            }
577          }
578        }
579
580        if (!isolateRegionTaskList.isEmpty()) {
581          isolateRegionPool.shutdown();
582          // Now that we have fetched all the region's regionInfo, we can move them.
583          waitMoveTasksToFinish(isolateRegionPool, isolateRegionTaskList,
584            admin.getConfiguration().getLong(MOVE_WAIT_MAX_KEY, DEFAULT_MOVE_WAIT_MAX));
585
586          Set<RegionInfo> currentRegionsOnTheServer = new HashSet<>(admin.getRegions(server));
587          if (!currentRegionsOnTheServer.containsAll(isolateRegionInfoList)) {
588            // If all the regions are not online on the target server,
589            // we don't put RS in decommission mode and exit from here.
590            LOG.error("One of the Region move failed OR stuck in transition...Quitting now");
591            break;
592          }
593        } else {
594          LOG.info("All regions already exists on server : " + server.getHostname());
595        }
596        // Once region has been moved to target RS, put the target RS into decommission mode,
597        // so master doesn't assign new region to the target RS while we unload the target RS.
598        // Also pass 'offload' flag as false since we don't want master to offload the target RS.
599        List<ServerName> listOfServer = new ArrayList<>();
600        listOfServer.add(server);
601        LOG.info("Putting server : " + server.getHostname() + " in decommission/draining mode");
602        admin.decommissionRegionServers(listOfServer, false);
603      }
604      List<RegionInfo> regionsToMove = admin.getRegions(server);
605      // Remove all the regions from the online Region list, that we just isolated.
606      // This will also include hbase:meta if it was isolated.
607      regionsToMove.removeAll(isolateRegionInfoList);
608      regionsToMove.removeAll(movedRegions);
609      if (regionsToMove.isEmpty()) {
610        LOG.info("No Regions to move....Quitting now");
611        break;
612      }
613      LOG.info("Moving {} regions from {} to {} servers using {} threads .Ack Mode: {}",
614        regionsToMove.size(), this.hostname, regionServers.size(), this.maxthreads, ack);
615
616      Optional<RegionInfo> metaRegion = getMetaRegionInfoIfToBeMoved(regionsToMove);
617      if (metaRegion.isPresent()) {
618        RegionInfo meta = metaRegion.get();
619        // hbase:meta should move explicitly in Ack mode.
620        submitRegionMovesWhileUnloading(server, regionServers, movedRegions,
621          Collections.singletonList(meta), true);
622        regionsToMove.remove(meta);
623      }
624      submitRegionMovesWhileUnloading(server, regionServers, movedRegions, regionsToMove, false);
625    }
626  }
627
628  private void submitRegionMovesWhileUnloading(ServerName server, List<ServerName> regionServers,
629    List<RegionInfo> movedRegions, List<RegionInfo> regionsToMove, boolean forceMoveRegionByAck)
630    throws Exception {
631    final ExecutorService moveRegionsPool = Executors.newFixedThreadPool(this.maxthreads);
632    List<Future<Boolean>> taskList = new ArrayList<>();
633    int serverIndex = 0;
634    for (RegionInfo regionToMove : regionsToMove) {
635      // To move/isolate hbase:meta on a server, it should happen explicitly by Ack mode, hence the
636      // forceMoveRegionByAck = true.
637      if (ack || forceMoveRegionByAck) {
638        Future<Boolean> task = moveRegionsPool.submit(new MoveWithAck(conn, regionToMove, server,
639          regionServers.get(serverIndex), movedRegions));
640        taskList.add(task);
641      } else {
642        Future<Boolean> task = moveRegionsPool.submit(new MoveWithoutAck(admin, regionToMove,
643          server, regionServers.get(serverIndex), movedRegions));
644        taskList.add(task);
645      }
646      serverIndex = (serverIndex + 1) % regionServers.size();
647    }
648    moveRegionsPool.shutdown();
649    long timeoutInSeconds = regionsToMove.size()
650      * admin.getConfiguration().getLong(MOVE_WAIT_MAX_KEY, DEFAULT_MOVE_WAIT_MAX);
651    waitMoveTasksToFinish(moveRegionsPool, taskList, timeoutInSeconds);
652  }
653
654  private boolean waitTaskToFinish(ExecutorService pool, Future<Boolean> task, String operation)
655    throws TimeoutException, InterruptedException, ExecutionException {
656    pool.shutdown();
657    try {
658      if (!pool.awaitTermination((long) this.timeout, TimeUnit.SECONDS)) {
659        LOG.warn("Timed out before finishing the " + operation + " operation. Timeout: "
660          + this.timeout + "sec");
661        pool.shutdownNow();
662      }
663    } catch (InterruptedException e) {
664      pool.shutdownNow();
665      Thread.currentThread().interrupt();
666    }
667    try {
668      return task.get(5, TimeUnit.SECONDS);
669    } catch (InterruptedException e) {
670      LOG.warn("Interrupted while " + operation + " Regions on " + this.hostname, e);
671      throw e;
672    } catch (ExecutionException e) {
673      LOG.error("Error while " + operation + " regions on RegionServer " + this.hostname, e);
674      throw e;
675    }
676  }
677
678  private void waitMoveTasksToFinish(ExecutorService moveRegionsPool,
679    List<Future<Boolean>> taskList, long timeoutInSeconds) throws Exception {
680    try {
681      if (!moveRegionsPool.awaitTermination(timeoutInSeconds, TimeUnit.SECONDS)) {
682        moveRegionsPool.shutdownNow();
683      }
684    } catch (InterruptedException e) {
685      moveRegionsPool.shutdownNow();
686      Thread.currentThread().interrupt();
687    }
688    for (Future<Boolean> future : taskList) {
689      try {
690        // if even after shutdownNow threads are stuck we wait for 5 secs max
691        if (!future.get(5, TimeUnit.SECONDS)) {
692          LOG.error("Was Not able to move region....Exiting Now");
693          throw new Exception("Could not move region Exception");
694        }
695      } catch (InterruptedException e) {
696        LOG.error("Interrupted while waiting for Thread to Complete " + e.getMessage(), e);
697        throw e;
698      } catch (ExecutionException e) {
699        boolean ignoreFailure = ignoreRegionMoveFailure(e);
700        if (ignoreFailure) {
701          LOG.debug("Ignore region move failure, it might have been split/merged.", e);
702        } else {
703          LOG.error("Got Exception From Thread While moving region {}", e.getMessage(), e);
704          throw e;
705        }
706      } catch (CancellationException e) {
707        LOG.error("Thread for moving region cancelled. Timeout for cancellation:" + timeoutInSeconds
708          + "secs", e);
709        throw e;
710      }
711    }
712  }
713
714  private boolean ignoreRegionMoveFailure(ExecutionException e) {
715    boolean ignoreFailure = false;
716    if (e.getCause() instanceof UnknownRegionException) {
717      // region does not exist anymore
718      ignoreFailure = true;
719    } else if (
720      e.getCause() instanceof DoNotRetryRegionException && e.getCause().getMessage() != null
721        && e.getCause().getMessage()
722          .contains(AssignmentManager.UNEXPECTED_STATE_REGION + "state=SPLIT,")
723    ) {
724      // region is recently split
725      ignoreFailure = true;
726    }
727    return ignoreFailure;
728  }
729
730  private ServerName getTargetServer() throws Exception {
731    ServerName server = null;
732    int maxWaitInSeconds =
733      admin.getConfiguration().getInt(SERVERSTART_WAIT_MAX_KEY, DEFAULT_SERVERSTART_WAIT_MAX);
734    long maxWait = EnvironmentEdgeManager.currentTime() + maxWaitInSeconds * 1000;
735    while (EnvironmentEdgeManager.currentTime() < maxWait) {
736      try {
737        List<ServerName> regionServers = new ArrayList<>();
738        regionServers.addAll(admin.getRegionServers());
739        // Remove the host Region server from target Region Servers list
740        server = stripServer(regionServers, hostname, port);
741        if (server != null) {
742          break;
743        } else {
744          LOG.warn("Server " + hostname + ":" + port + " is not up yet, waiting");
745        }
746      } catch (IOException e) {
747        LOG.warn("Could not get list of region servers", e);
748      }
749      Thread.sleep(500);
750    }
751    if (server == null) {
752      LOG.error("Server " + hostname + ":" + port + " is not up. Giving up.");
753      throw new Exception("Server " + hostname + ":" + port + " to load regions not online");
754    }
755    return server;
756  }
757
758  private List<RegionInfo> readRegionsFromFile(String filename) throws IOException {
759    List<RegionInfo> regions = new ArrayList<>();
760    File f = new File(filename);
761    if (!f.exists()) {
762      return regions;
763    }
764    try (
765      DataInputStream dis = new DataInputStream(new BufferedInputStream(new FileInputStream(f)))) {
766      int numRegions = dis.readInt();
767      int index = 0;
768      while (index < numRegions) {
769        regions.add(RegionInfo.parseFromOrNull(Bytes.readByteArray(dis)));
770        index++;
771      }
772    } catch (IOException e) {
773      LOG.error("Error while reading regions from file:" + filename, e);
774      throw e;
775    }
776    return regions;
777  }
778
779  /**
780   * Write the number of regions moved in the first line followed by regions moved in subsequent
781   * lines
782   */
783  private void writeFile(String filename, List<RegionInfo> movedRegions) throws IOException {
784    try (DataOutputStream dos =
785      new DataOutputStream(new BufferedOutputStream(new FileOutputStream(filename)))) {
786      dos.writeInt(movedRegions.size());
787      for (RegionInfo region : movedRegions) {
788        Bytes.writeByteArray(dos, RegionInfo.toByteArray(region));
789      }
790    } catch (IOException e) {
791      LOG.error("ERROR: Was Not able to write regions moved to output file but moved "
792        + movedRegions.size() + " regions", e);
793      throw e;
794    }
795  }
796
797  private void deleteFile(String filename) {
798    File f = new File(filename);
799    if (f.exists()) {
800      f.delete();
801    }
802  }
803
804  /**
805   * @param filename The file should have 'host:port' per line
806   * @return List of servers from the file in format 'hostname:port'.
807   */
808  private List<String> readServersFromFile(String filename) throws IOException {
809    List<String> servers = new ArrayList<>();
810    if (filename != null) {
811      try {
812        Files.readAllLines(Paths.get(filename)).stream().map(String::trim)
813          .filter(((Predicate<String>) String::isEmpty).negate()).map(String::toLowerCase)
814          .forEach(servers::add);
815      } catch (IOException e) {
816        LOG.error("Exception while reading servers from file,", e);
817        throw e;
818      }
819    }
820    return servers;
821  }
822
823  /**
824   * Designates or excludes the servername whose hostname and port portion matches the list given in
825   * the file. Example:<br>
826   * If you want to designated RSs, suppose designatedFile has RS1, regionServers has RS1, RS2 and
827   * RS3. When we call includeExcludeRegionServers(designatedFile, regionServers, true), RS2 and RS3
828   * are removed from regionServers list so that regions can move to only RS1. If you want to
829   * exclude RSs, suppose excludeFile has RS1, regionServers has RS1, RS2 and RS3. When we call
830   * includeExcludeRegionServers(excludeFile, servers, false), RS1 is removed from regionServers
831   * list so that regions can move to only RS2 and RS3.
832   */
833  private void includeExcludeRegionServers(String fileName, List<ServerName> regionServers,
834    boolean isInclude) throws IOException {
835    if (fileName != null) {
836      List<String> servers = readServersFromFile(fileName);
837      if (servers.isEmpty()) {
838        LOG.warn("No servers provided in the file: {}." + fileName);
839        return;
840      }
841      Iterator<ServerName> i = regionServers.iterator();
842      while (i.hasNext()) {
843        String rs = i.next().getServerName();
844        String rsPort = rs.split(ServerName.SERVERNAME_SEPARATOR)[0].toLowerCase() + ":"
845          + rs.split(ServerName.SERVERNAME_SEPARATOR)[1];
846        if (isInclude != servers.contains(rsPort)) {
847          i.remove();
848        }
849      }
850    }
851  }
852
853  /**
854   * Exclude master from list of RSs to move regions to
855   */
856  private void stripMaster(List<ServerName> regionServers) throws IOException {
857    ServerName master = admin.getClusterMetrics(EnumSet.of(Option.MASTER)).getMasterName();
858    stripServer(regionServers, master.getHostname(), master.getPort());
859  }
860
861  /**
862   * Remove the servername whose hostname and port portion matches from the passed array of servers.
863   * Returns as side-effect the servername removed.
864   * @return server removed from list of Region Servers
865   */
866  private ServerName stripServer(List<ServerName> regionServers, String hostname, int port) {
867    for (Iterator<ServerName> iter = regionServers.iterator(); iter.hasNext();) {
868      ServerName server = iter.next();
869      if (
870        server.getAddress().getHostName().equalsIgnoreCase(hostname)
871          && server.getAddress().getPort() == port
872      ) {
873        iter.remove();
874        return server;
875      }
876    }
877    return null;
878  }
879
880  @Override
881  protected void addOptions() {
882    this.addRequiredOptWithArg("r", "regionserverhost", "region server <hostname>|<hostname:port>");
883    this.addRequiredOptWithArg("o", "operation",
884      "Expected: load/unload/unload_from_rack/isolate_regions");
885    this.addOptWithArg("m", "maxthreads",
886      "Define the maximum number of threads to use to unload and reload the regions");
887    this.addOptWithArg("i", "isolateRegionIds",
888      "Comma separated list of Region IDs hash to isolate on a RegionServer and put region server"
889        + " in draining mode. This option should only be used with '-o isolate_regions'."
890        + " By putting region server in decommission/draining mode, master can't assign any"
891        + " new region on this server. If one or more regions are not found OR failed to isolate"
892        + " successfully, utility will exist without putting RS in draining/decommission mode."
893        + " Ex. --isolateRegionIds id1,id2,id3 OR -i id1,id2,id3");
894    this.addOptWithArg("x", "excludefile",
895      "File with <hostname:port> per line to exclude as unload targets; default excludes only "
896        + "target host; useful for rack decommisioning.");
897    this.addOptWithArg("d", "designatedfile",
898      "File with <hostname:port> per line as unload targets;" + "default is all online hosts");
899    this.addOptWithArg("f", "filename",
900      "File to save regions list into unloading, or read from loading; "
901        + "default /tmp/<usernamehostname:port>");
902    this.addOptNoArg("n", "noack",
903      "Turn on No-Ack mode(default: false) which won't check if region is online on target "
904        + "RegionServer, hence best effort. This is more performant in unloading and loading "
905        + "but might lead to region being unavailable for some time till master reassigns it "
906        + "in case the move failed");
907    this.addOptWithArg("t", "timeout", "timeout in seconds after which the tool will exit "
908      + "irrespective of whether it finished or not;default Integer.MAX_VALUE");
909  }
910
911  @Override
912  protected void processOptions(CommandLine cmd) {
913    String hostname = cmd.getOptionValue("r");
914    rmbuilder = new RegionMoverBuilder(hostname);
915    this.loadUnload = cmd.getOptionValue("o").toLowerCase(Locale.ROOT);
916    if (cmd.hasOption('m')) {
917      rmbuilder.maxthreads(Integer.parseInt(cmd.getOptionValue('m')));
918    }
919    if (this.loadUnload.equals("isolate_regions") && cmd.hasOption("isolateRegionIds")) {
920      rmbuilder
921        .isolateRegionIdArray(Arrays.asList(cmd.getOptionValue("isolateRegionIds").split(",")));
922    }
923    if (cmd.hasOption('n')) {
924      rmbuilder.ack(false);
925    }
926    if (cmd.hasOption('f')) {
927      rmbuilder.filename(cmd.getOptionValue('f'));
928    }
929    if (cmd.hasOption('x')) {
930      rmbuilder.excludeFile(cmd.getOptionValue('x'));
931    }
932    if (cmd.hasOption('d')) {
933      rmbuilder.designatedFile(cmd.getOptionValue('d'));
934    }
935    if (cmd.hasOption('t')) {
936      rmbuilder.timeout(Integer.parseInt(cmd.getOptionValue('t')));
937    }
938    this.loadUnload = cmd.getOptionValue("o").toLowerCase(Locale.ROOT);
939  }
940
941  @Override
942  protected int doWork() throws Exception {
943    boolean success;
944    try (RegionMover rm = rmbuilder.build()) {
945      if (loadUnload.equalsIgnoreCase("load")) {
946        success = rm.load();
947      } else if (loadUnload.equalsIgnoreCase("unload")) {
948        success = rm.unload();
949      } else if (loadUnload.equalsIgnoreCase("unload_from_rack")) {
950        success = rm.unloadFromRack();
951      } else if (loadUnload.equalsIgnoreCase("isolate_regions")) {
952        if (rm.isolateRegionIdArray != null && !rm.isolateRegionIdArray.isEmpty()) {
953          success = rm.isolateRegions();
954        } else {
955          LOG.error("Missing -i/--isolate_regions option with '-o isolate_regions' option");
956          LOG.error("Use -h or --help for usage instructions");
957          printUsage();
958          success = false;
959        }
960      } else {
961        printUsage();
962        success = false;
963      }
964    }
965    return (success ? 0 : 1);
966  }
967
968  public static void main(String[] args) {
969    try (RegionMover mover = new RegionMover()) {
970      mover.doStaticMain(args);
971    }
972  }
973}