Run External Processes in Scala

(, en)

Running external commands in Scala is confusing. At least to me. If you look at the process package you might get the impression that it is a separate DSL. External processes can fail in various ways and, to my knowledge, Scala does not have something ready to use that can

Implementation

A usable implementation could look like this:

// src/main/scala/ProcessUtil.scala
import java.io.{ByteArrayInputStream, ByteArrayOutputStream}
import scala.concurrent._
import scala.concurrent.duration.{Duration, DurationInt}
import scala.sys.process.{ProcessLogger, _}
import scala.util.{Failure, Success, Try}

sealed trait ProcessError

case class ProcessFailed(
    exitValue: Int,
    stdout: Array[Byte],
    stderr: String,
    cmd: Seq[String],
    message: String,
    cause: Throwable = null
) extends Exception(message, cause)
    with ProcessError

case class ProcessNotRun(exitValue: Int, cmd: Seq[String], message: String) extends ProcessError
case class ProcessTimedOut(
    message: String,
    cause: Throwable = null
) extends Exception(message, cause)
    with ProcessError
case class ProcessInterrupted(
    message: String,
    cause: Throwable = null
) extends Exception(message, cause)
    with ProcessError
case class ProcessUnknownError(
    message: String,
    cause: Throwable = null
) extends Exception(message, cause)
    with ProcessError

object ProcessUtil {
  def runProcess(data: Array[Byte], cmd: Seq[String], timeoutAfter: Duration = 10.seconds)(implicit
      ioEc: ExecutionContext
  ): Either[ProcessError, (Array[Byte], String)] = {
    val stdin = new ByteArrayInputStream(data)
    val stdout = new ByteArrayOutputStream(data.length)
    val stderr = new StringBuilder()

    val process = cmd #< stdin #> stdout run ProcessLogger(stderr append _)

    val res = Try {
      val f = Future(blocking(process.exitValue()))
      Await.result(f, timeoutAfter)
    }

    stdin.close()
    stdout.close()
    process.destroy()

    res match {
      case Success(exitValue) if exitValue == 0 => Right((stdout.toByteArray, stderr.toString()))
      // if you supply a wrong command, which does not exist, java returns with exit code -1
      case Success(exitValue) if exitValue == -1 =>
        Left(
          ProcessNotRun(
            exitValue = exitValue,
            cmd = cmd,
            message = "process did not run"
          )
        )
      case Success(exitValue) =>
        Left(
          ProcessFailed(
            exitValue = exitValue,
            stdout = stdout.toByteArray,
            stderr = stderr.toString(),
            cmd = cmd,
            message = "process failed"
          )
        )
      case Failure(ex: TimeoutException) =>
        Left(ProcessTimedOut(message = "process timed out", cause = ex))
      case Failure(ex: InterruptedException) =>
        Left(ProcessInterrupted(message = "process interrupted", cause = ex))
      case Failure(ex) => Left(ProcessUnknownError("unknown error occurred", ex))
    }
  }
}

Probably you need to adapt it to your needs. I decided to use Array[Byte] for stdout and stdin, but you might want to use String or List[String] instead. I’ll leave this exercise to the reader.

Usage

How do you use it? Some common scenarios you can find as tests below:

// src/test/scala/ProcessUtilSpec.scala
import ProcessUtil.runProcess

import org.scalatest.Inside.inside
import org.scalatest.flatspec.AnyFlatSpec
import org.scalatest.matchers.should.Matchers

import java.io.ByteArrayInputStream
import java.util.zip.GZIPInputStream
import scala.concurrent.ExecutionContext
import scala.concurrent.duration.DurationInt

class ProcessUtilSpec extends AnyFlatSpec with Matchers {
  private implicit val executionContext: ExecutionContext = ExecutionContext.Implicits.global

  it should "call external program successfully" in {
    val data = "Ich liebe Kartoffelbrei".getBytes("UTF-8")
    val res = runProcess(data, List("gzip", "-c"), 10.seconds)

    val (stdout, _) = res.getOrElse((null, null))
    data should not be stdout
    val expected = new GZIPInputStream(new ByteArrayInputStream(stdout)).readAllBytes()
    data should be(expected)
  }

  it should "fail when command does not exist" in {
    val data = "Ich liebe Kartoffelbrei".getBytes("UTF-8")
    val res = runProcess(data, List("gzipgzipgzipgzipgzip", "-c"), 10.seconds)

    inside(res) { case Left(err: ProcessNotRun) =>
      err.exitValue should be(-1)
    }
  }

  it should "fail when command takes too long" in {
    val data = "Ich liebe Kartoffelbrei".getBytes("UTF-8")
    val res = runProcess(data, List("perl", "-e", "sleep(2);"), 1.seconds)

    inside(res) { case Left(_: ProcessTimedOut) => }
  }

  it should "fail when command fails" in {
    val data = "Ich liebe Kartoffelbrei".getBytes("UTF-8")
    val res = runProcess(
      data,
      List("perl", "-e", "print \"ich mag Nudeln\"; print STDERR \"ich nicht\"; exit(1);"),
      10.seconds
    )

    inside(res) { case Left(err: ProcessFailed) =>
      err.exitValue should be(1)
      err.stderr should be("ich nicht")
      new String(err.stdout, "UTF-8") should be("ich mag Nudeln")
    }
  }

  it should "succeed when command succeeds" in {
    val data = "Ich liebe Kartoffelbrei".getBytes("UTF-8")
    val res = runProcess(
      data,
      List("perl", "-e", "print \"ich mag Nudeln\"; print STDERR \"ich nicht\";"),
      10.seconds
    )

    inside(res) { case Right((stdout, stderr)) =>
      stderr should be("ich nicht")
      new String(stdout, "UTF-8") should be("ich mag Nudeln")
    }
  }
}

Discussion

Overall, if you don’t have to deal with binary data and you don’t need timeouts, you can stick to !!.

val stdout = Seq("ls", "-l").!!

But as soon as you want deal with binary data or timeouts, you have to move one layer of abstraction down and deal with the details.

See Also