#!/usr/bin/perl
use strict;
use File::Copy;
use File::Spec;
use File::Basename;
use File::Which;
use Cwd 'realpath';
use Getopt::Std;
use List::Util qw(max);
use URI::Encode;

my $extract_range_specifier;
my $extract_pid;
my $extract_file;
my $output_file;
my $output_path;
my $extract_offset;
my $extract_size;
my $pid_running;
my $verbose=0;
my $error=0;
my $output_to_stdout=0;

sub usage {
  print("Usage: $0 [-o|v|h] URI... \n");
  print("  URIs can be read from STDIN, one per line.\n");
  print("  From the URIs specified, extracts code objects into files named: ");
  print("<executable_name>-[pid<number>]-offset<number>-size<number>.co\n\n");
  print("Options:\n");
  print("  -o <path> \tPath for output. If \"-\" specified, code object is printed to STDOUT.\n");
  print("  -v        \tVerbose output to STDOUT (includes Entry ID).\n");
  print("  -h        \tShow this help message.\n");
  print("\nURI syntax:\n");
  print("\tcode_object_uri ::== file_uri | memory_uri\n");
  print("\tfile_uri        ::== \"file://\" extract_file [ range_specifier ]\n");
  print("\tmemory_uri      ::== \"memory://\" process_id range_specifier\n");
  print("\trange_specifier ::== [ \"#\" | \"?\" ] \"offset=\" number \"&\" \"size=\" number\n");
  print("\textract_file    ::== URI_ENCODED_OS_FILE_PATH\n");
  print("\tprocess_id      ::== DECIMAL_NUMBER\n");
  print("\tnumber          ::== HEX_NUMBER \| DECIMAL_NUMBER \| OCTAL_NUMBER\n\n");
  print("\tExample: file://dir1/dir2/hello_world#offset=133&size=14472 \n");
  print("\t         memory://1234#offset=0x20000&size=3000\n\n");

  exit($error);
}

# Process options
my %options=();
getopts('vho:', \%options);

if (defined $options{h}) {
  usage();
}

if (defined $options{v}) {
  $verbose = 1;
}

if (defined $options{o}) {
  $output_path = $options{o};
  if ($output_path eq "-") {
    $output_to_stdout=1;
  } else {
    (-d $output_path) || die("Error: Path \'$output_path\' cannot be found.\n");
  }
}

# push STDIN to ARGV array.
push @ARGV, <STDIN> unless -t STDIN;

# error check: enough arguments presented.
if ($#ARGV < 0) {
  print(STDERR "Error: No arguments.\n"); $error++;
  usage();
}

# error check: command dd is available.
my $dd_cmd = which("dd");
(-f $dd_cmd) || die("Error: Can't find dd command\n");

foreach my $uri_str(@ARGV) {
  chomp $uri_str;

  # we expect the URI to follow this BNF syntax:
  #
  #   code_object_uri ::== file_uri | memory_uri
  #   file_uri        ::== "file://" extract_file [ range_specifier ]
  #   memory_uri      ::== "memory://" process_id range_specifier
  #     range_specifier  ::== [ "#" | "?" ] "offset=" number "&" "size=" number
  #     extract_file        ::== URI_ENCODED_OS_FILE_PATH
  #     process_id       ::== DECIMAL_NUMBER
  #     number           ::== HEX_NUMBER | DECIMAL_NUMBER | OCTAL_NUMBER

  # Example: file://dir1/dir2/hello_world#offset=133&size=14472
  #          memory://1234#offset=0x20000&size=3000

  my ($uri_protocol, $specs) = split(/:\/\//,$uri_str);
  my $obj_uri_encode = URI::Encode->new();
  my $decoded_extract_file;

  if (lc($uri_protocol) eq "file") {
    # expect file path
    ($extract_file, $extract_range_specifier) = split(/[#,?]/,$specs);

    # decode the file name. URIs may have file/path names with non-alphanumeric characters, which will be encoded with %.  We need to decode these.
    $decoded_extract_file = $obj_uri_encode->decode($extract_file);

    # verify file exists:
    if (! -e $decoded_extract_file) {
      print(STDERR "Error: can't find file: $decoded_extract_file\n"); $error++;
      next;
    }

    # use the output_path is specified, otherwise use current working dir.
    if ($output_path ne "") {
      $output_file = File::Spec->catfile($output_path, basename($decoded_extract_file));
    } else {
      $output_file = basename($decoded_extract_file);
    }

  } elsif ( lc($uri_protocol) eq "memory") {
    # expect memory specifier
    ($extract_pid, $extract_range_specifier) = split(/[#,?]/,$specs);

    # verify pid is currently running
    $pid_running = kill 0, $extract_pid;
    if (! $pid_running) {
      print(STDERR "Error: PID: $extract_pid is NOT running\n"); $error++;
      next;
    }

    # get pid filename:
    $extract_file = "/proc/$extract_pid/mem";
   
    # verify file exists:
    if (! -e $extract_file) {
      print(STDERR "Error: can't find file: $extract_file\n"); $error++;
      next;
    }

    # for extracting from a pid, make the output file in the current dir/path with the pid value as a name.
    $output_file = "pid${extract_pid}";
    
    # need to set $decoded_extract_file, because later we use this for other checks.
    $decoded_extract_file = $extract_file;

  } else {
    # error, unrecognized Code Object URI
    print(STDERR "Error: \'$uri_protocol\' is not recognized as a supported code object URI.\n"); $error++;
    next;
  }

  # it is valid to not give a range specifier in a URI, in which case the entire code object will be extracted.
  if ($extract_range_specifier ne "") {
    ($extract_offset, $extract_size) = split(/[&]/,$extract_range_specifier);
    (undef, $extract_offset) = split(/=/,$extract_offset);
    (undef, $extract_size) = split(/=/,$extract_size);
  } else {
    # Error if URI is a memory request, and we have no range_specifier.
    if ($pid_running) {
      print(STDERR "Error: must specify a Range Specifier (offset and size) for a memory URI: $uri_str\n"); $error++;
      next;
    }

    $extract_offset = 0;
    $extract_size = -s $decoded_extract_file;
  }

  # We should have at least a valid size to extract; ignore cases with size=0.
  if ($extract_size != 0) {
    print("Reading input file \"$extract_file\" ...\n") if ($verbose);

    # only if this is a File URI.
    if (lc($uri_protocol) eq "file") {
      # verify that offset+size does not exceed file size:
      my $file_size = -s $decoded_extract_file;
      my $size = int($extract_offset) + int($extract_size);
      if ( $size > $file_size ) {
        print(STDERR "Error: requested offset($extract_offset) + size($extract_size) exceeds file size($file_size) for file \"$decoded_extract_file\".\n"); $error++;
        next;
      }
    }

    open(INPUT_FP, "<", $decoded_extract_file) || die $!;
    binmode INPUT_FP;

    # extract the code object
    my $co_filename;
    if (!$output_to_stdout) {
      $co_filename = "of=\'${output_file}-offset${extract_offset}-size${extract_size}.co\'";
    }

    my $dd_cmd_str = "$dd_cmd if=\'$decoded_extract_file\' $co_filename skip=$extract_offset count=$extract_size bs=1 status=none";

    print("DD Command: $dd_cmd_str\n") if ($verbose);

    my $dd_ret = system($dd_cmd_str);
    if ($dd_ret != 0) {
       print(STDERR "Error: DD command ($dd_cmd_str)  failed with RC: $dd_ret\n"); $error++;
    }

    print("Extract request:  file: $extract_file offset: $extract_offset size: $extract_size\n") if ($verbose);
  } else {
    print("Warning: trying to extract from $extract_file at offset=$extract_offset with size=0.  Nothing to extract.\n") if ($verbose);
  }

} # end of for each (URI) argument

exit($error);
