aboutsummaryrefslogtreecommitdiffstats
path: root/lib/external_command.rb
diff options
context:
space:
mode:
authorRobin Houston <robin@lenny.robin>2011-06-20 23:34:44 +0100
committerRobin Houston <robin@lenny.robin>2011-06-20 23:34:44 +0100
commit770ceda38d780c2e8d3f0260a0f8912923d82b62 (patch)
treeea9e4972af2e933cd0bbd86d12e49b4aea06cc7b /lib/external_command.rb
parentdb1136c70e1a580efd9307dce2efe421aaa63762 (diff)
When external converters are used to extract text from attachments
for Xapian, suppress the stderr output of these external programs unless they actually fail. It is possible this will not significantly reduce the noise from converters, because they may actually have been failing. At least with this change we’ll be able to tell which it is. Closes #52.
Diffstat (limited to 'lib/external_command.rb')
-rw-r--r--lib/external_command.rb131
1 files changed, 131 insertions, 0 deletions
diff --git a/lib/external_command.rb b/lib/external_command.rb
new file mode 100644
index 000000000..de8bfc11e
--- /dev/null
+++ b/lib/external_command.rb
@@ -0,0 +1,131 @@
+# Run an external command, capturing its stdout and stderr
+# streams into variables.
+#
+# So it’s rather like the `backtick` built-in, except that:
+# - The command is run as-is, rather than being parsed by the shell;
+# - Standard error is also captured.
+#
+# After the run() method has been called, the instance variables
+# out, err and status contain the contents of the process’s stdout,
+# the contents of its stderr, and the exit status.
+#
+# Example usage:
+# require 'external_command'
+# xc = ExternalCommand("ls", "-l").run()
+# puts "Ran ls -l with exit status #{xc.status}"
+# puts "===STDOUT===\n#{xc.out}"
+# puts "===STDERR===\n#{xc.err}"
+#
+# The out and err attributes are writeable. If you assign
+# a string, after calling the constructor and before calling
+# run(), then the subprocess output/error will be appended
+# to this string.
+
+# <rant author="robin">
+# In any sane language, this would be implemented with a
+# single child process. The parent process would block on
+# select(), and when the child process terminated, the
+# select call would be interrupted by a CHLD signal
+# and return EINTR. Unfortunately Ruby goes out of its
+# way to prevent this from working, automatically restarting
+# the select call if EINTR is returned. Therefore we
+# use a parent-child-grandchild arrangement, where the
+# parent blocks on select() and the child blocks on
+# waitpid(). When the child detects that the grandchild
+# has finished, it writes to a pipe that’s included in
+# the parent’s select() for this purpose.
+# </rant>
+
+class ExternalCommand
+ attr_accessor :out, :err
+ attr_reader :status
+
+ def initialize(cmd, *args)
+ @cmd = cmd
+ @args = args
+
+ # Strings to collect stdout and stderr from the child process
+ # These may be replaced by the caller, to append to existing strings.
+ @out = ""
+ @err = ""
+ @fin = ""
+ end
+
+ def run()
+ # Pipes for parent-child communication
+ @out_read, @out_write = IO::pipe
+ @err_read, @err_write = IO::pipe
+ @fin_read, @fin_write = IO::pipe
+
+ @pid = fork do
+ # Here we’re in the child process.
+ child_process
+ end
+
+ # Here we’re in the parent process.
+ parent_process
+
+ return self
+ end
+
+ private
+
+ def child_process()
+ # Reopen stdout and stderr to point at the pipes
+ STDOUT.reopen(@out_write)
+ STDERR.reopen(@err_write)
+
+ # Close all the filehandles other than the ones we intend to use.
+ ObjectSpace.each_object(IO) do |fh|
+ fh.close unless (
+ [STDOUT, STDERR, @fin_write].include?(fh) || fh.closed?)
+ end
+
+ Process::waitpid(fork { grandchild_process })
+ @fin_write.puts($?.exitstatus.to_s)
+
+ exit! 0
+ end
+
+ def grandchild_process()
+ exec(@cmd, *@args)
+
+ # This is only reached if the exec fails
+ @err_write.print("Failed to exec: #{[@cmd, *@args].join(' ')}")
+ exit! 99
+ end
+
+ def parent_process()
+ # Close the writing ends of the pipes
+ @out_write.close
+ @err_write.close
+ @fin_write.close
+
+ @fhs = {@out_read => @out, @err_read => @err, @fin_read => @fin}
+
+ while @fin.empty?
+ r = read_data
+ if r.nil?
+ raise "select() timed out even with a nil (infinite) timeout"
+ end
+ end
+
+ Process::waitpid(@pid)
+ @status = @fin.to_i
+ @out_read.close
+ @err_read.close
+ end
+
+ def read_data()
+ ready_array = IO.select(@fhs.keys, [], [], nil)
+ return false if ready_array.nil?
+ ready_array[0].each do |fh|
+ begin
+ @fhs[fh] << fh.readpartial(8192)
+ rescue EOFError
+ @fhs.delete fh
+ end
+ end
+ return true
+ end
+end