#!/usr/bin/perl -T
#############################################################################
#   w  w  w  . l  i  t  t  l  e  f  i  s  h  .  c  a                        #
#                                                                           #
# repo-proxy                                                                #
#                                                                           #
#                                                                           #
# ------------------------------------------------------------------------- #
# (C)2006 littlefish.ca, All Rights Reserved.                               #
#                                                                           #
# This program is free software; you can redistribute it and/or             #
# modify it under the terms of the GNU General Public License               #
# as published by the Free Software Foundation; either version 2            #
# of the License, or (at your option) any later version.                    #
#                                                                           #
# This program is distributed in the hope that it will be useful,           #
# but WITHOUT ANY WARRANTY; without even the implied warranty of            #
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the             #
# GNU General Public License for more details.                              #
#                                                                           #
# You should have received a copy of the GNU General Public License         #
# along with this program; if not, write to the                             #
#                Free Software Foundation, Inc.                             #
#                59 Temple Place - Suite 330                                #
#                Boston, MA  02111-1307, USA.                               #
#                                                                           #
# -->A full copy of the GNU General Public License is included in LICENSE   #
# ------------------------------------------------------------------------- #
# Need Support? Email us at support@littlefish.ca                           #
# ------------------------------------------------------------------------- #
# Modification History:                                                     #
# 1.0.0: 03/18/2006 Initial Version                                         #
#############################################################################

# requires (core modules):
#     Fcntl;
#     Socket;
#     Sys::Syslog;
#     POSIX;

my $ver = '0.01';
=head1 NAME

 repo-proxy - A tool to cache repository package downloads

 Copyright (C) 2007 Scott Mazur <scott@littlefish.ca>
 Distributed under the terms of the GNU Public License (GPL).

=head1 SYNOPSIS

 copy repo-proxy to your sbin directory
 ./install

=head1 DESCRIPTION

Repo-proxy is a package repository proxy.  When a package manager is configured
to use repo-proxy, all package requests will be copied and stored in a local
cache directory for future use.  If you have more than one machine on a local
network they can all share the same package download cache saving you not only
bandwidth, but lengthy download times as well.

If you have only one machine, this isn't terribly exciting, but can still be
useful saving repeat updates if you need to later re-install packages.

Repo-proxy can be run using several different methods to accept requests from
clients (package managers such as apt-get or smart) for Release and/or Package
files.  By default repo-proxy listens on port 10001. When a request is received
for a cached file, repo-proxy returns the local file immediately.  Otherwise the
file is fetched from the remote repository source (internet) and served back to
the client while simultaneously saving a copy to the local cache.  Future requests
for the same package will go directly to the local cache without contacting the
repository source.  Each package is only downloaded once, no matter how many local
machines request the same package.

Repo-proxy requires no additional support applications to run.

=head1 INSTALLATION

The easy way
1. Change to the unpacked archive directory

2. run ./install

The harder way
1. Change to the unpacked archive directory

2. Copy the repo-proxy file to your prefered script path
   (usually /usr/local/sbin)

   cp ./repo-proxy /usr/local/sbin/repo-proxy

3. Copy the etc.repo-proxy.conf file to /etc/repo-proxy.conf

   cp ./etc.repo-proxy.conf /etc/repo-proxy.conf

4.  Decide whether to run from xinetd or as a service
    (xinetd is prefered)

5. Xinetd setup:
 a. Copy the etc.xinet.d.repo-proxy file to /etc/xinetd/repo-proxy

    cp ./etc.xinet.d.repo-proxy /etc/xinetd/repo-proxy

 b. Edit /etc/xinetd/repo-proxy
    replace '{PORT}' with '10001'
    replace '{PATH}' with the complete script path
      (usually '/usr/local/sbin/repo-proxy')
 c. Restart xinetd

   /etc/init.d/xinetd restart

or
5. Service setup:
 a. Copy the etc.init.d.repo-proxy file to /etc/init.d/repo-proxy
    and make it executable

    cp ./etc.init.d.repo-proxy /etc/init.d/repo-proxy
    chmod --reference=./etc.init.d.repo-proxy /etc/init.d/repo-proxy

 b. Edit /etc/init.d/repo-proxy

    replace '{PORT}' with '10001'
    replace '{PATH}' with the complete script path
      (usually '/usr/local/sbin/repo-proxy')

 c. Add repo-proxy to run levels and start service

   chkconfig --add repo-proxy
   /etc/init.d/repo-proxy start

6. On the server machine and all client machines, edit your
   /etc/apt/sources.list files, as follows. Assuming your repo-proxy
   server machine is accessible at my.machine.com, where a line says
   something like:

     rpm http://spout.ussg.indiana.edu/linux/...

   change this to:

     rpm http://my.machine.com:10001/spout.ussg.indiana.edu/linux/...

7. Do "apt-get update" as root on the server machine to initialize the
   cache with the Release files.

=head1 FEATURES

1. Requires no additional tools or services to run.  Completely self contained.

2. Easily installed, configured, and removed.  Automatically updates repository
   sources (if allowed).

3. Packages are only cached as they are downloaded (installed).  No need to
   copy the entire repository.

4. Fast.  Package downloads are copied simultaneously with no noticable impact
   on total package download times.

5. Neither clients or sources are checked for any kind of restrictions.  Clients
   are presumed to be coming from an internal local network and sources are
   set according to the package manager setup.  Neither should require checking.
   (this may be considered a bug)

6. Packages that have been replaced with newer versions are not cleared from
   the cache.  (this may be considered a bug)

=head1 BUGS

1. Only HTTP is supported at present (i.e. repo-proxy cannot access an
   FTP URL) but I see no reason not to include FTP support in the future.

2. Repo-proxy was designed in an apt-get/RPM environment.  It may need minor
   tweeks to work in an apt-get/deb environment.

3. Package files are cached according to repository source (in sub-directories).
   This could lead to duplicate Package cache files.  Package files shouldn't be
   duplicated.

4. Neither clients or sources are checked for any kind of restrictions.  Clients
   are presumed to be coming from an internal local network and sources are
   set according to the package manager setup.  Neither should require checking.
   (this may be considered a feature)

5. Packages that have been replaced with newer versions are not cleared from
   the cache.  (this may be considered a feature)

=head1 UPDATES

Please email bug fixes and enhancements to support@littlefish.ca

=cut


use strict;
use warnings;

use Fcntl qw(:DEFAULT :flock);
use Socket;
use Sys::Syslog;
use POSIX qw(:sys_wait_h);

use constant DEBUG => 0;  # general testing purposes
use constant DEBUGFILE => 0;  # file transfer testing
use constant DEBUGFORK => 0;  # forking (careful!!)

use constant LOGNAME => 'repo-proxy';

# Buffer size dynamically adjusted to match transfer speed
use constant MAX_BUFSIZE => 32768; # 32K  65536; # 64K  8196; # 8k 16384; # 16k
use constant MIN_BUFSIZE => 1024; # 1K ;
# increments to size up buffer
use constant INC_BUFSIZE => 2048; # 2K ;

# maximum number of bytes to read from the header
use constant HTTP_MAXHEADER => 500;

# number of seconds to block on any activity
use constant BLOCK_TIMEOUT => 2;

# number of seconds to wait for a new connection
use constant CONNECTION_TIMEOUT => 4;

# default config values
my $default_config_file = '/etc/repo-proxy.conf';

my %config = (
		server_port => '10001',
		cache_dir => '/var/cache/repo-proxy',
		log_messages => 'stdout',
#		log_messages => 'syslog',
		skip_check => 'default',
		pid_file => '/var/run/repo-proxy.pid',
	);

my $have_pid_file = 0;

# - c <file|none>           config file
# - C <file>                cache directory
# - l <file|syslog|stdout>  log file
# - p <number>              port number
# - P <file|none>           pid file
# - s <all|none|default>    skip check

# - D daemonize
# - h help
# - K kill daemon
# - S daemon status
# - v version
# - x run xinetd mode

# --check [source:](base|packages|all)

my %args;
my $flag_options = 'DhKSvx';
my $value_options = 'cClpPs';
my %arg_map = (
		'p' => 'server_port',
		'C' => 'cache_dir',
		'l' => 'log_messages',
		'P' => 'pid_file',
		's' => 'skip_check',
	);
my $usage = <<EOF;
repo-proxy v$ver (C) 2007 Scott Mazur

Usage: repo-proxy [options]

options:
 -c file  alternate config file or 'none' (default: $default_config_file)
 -C dir   cache local file copies in dir (default: $config{cache_dir})
 -D       start a background daemon process and return immediately
 -h       this usage message
 -K       stop the background daemon process
          (daemon must be configured to use a PID file, see '-P')
 -l file  log file name (default: $config{log_messages})
          values 'stdout', 'syslog' or file name
 -p port  port to listen on (default: $config{server_port})
 -P file  write pid of signal thread to 'file' (default: $config{pid_file})
          values 'none' or file name
 -s value skip ckecking for newer files (default: $config{skip_check})
          values 'all', 'none' and 'default'
 -S       return daemon status
 -v       print version
 -x       run from xinetd
EOF

# get command line options
while (@ARGV && $ARGV[0] =~ /^-/) {
	$_ = shift @ARGV;
	if (m/^-([$value_options])(.*)/) {  # value options
		$args{$1} = $2 ? $2 : shift @ARGV;
	}
	elsif (s/^-([$flag_options]+)$/$1/) { # flag options
		while (m/(.)/g) {
			$args{$1}++;
		}
	}
	else { die $usage }
}
if ($args{h}) {
	print $usage;
	exit;
}
if ($args{v}) {
	print "v$ver\n";
	exit;
}
die "Can't use -d and -x together (try repo-proxy -h for correct usage)\n"
	if $args{D} and $args{x};

# where does our config file come from
my $config_file = -f $default_config_file ? $default_config_file : '';
if (my $config_arg = $args{c}) {
	if ($config_arg =~ m/^(none|no|false|0)$/i) {
		$config_file = '';
	}
	elsif ($config_arg = sanitize_file_path($config_arg)) {
		$config_file = $config_arg if -f $config_arg;
	}
}

# load the config file
if ($config_file) {
	open(CONFIG, $config_file) or die "Failed to open $config_file: $!\n";
	while($_ = <CONFIG>) {
		s/[#;].*//; # strip comments
		next if m/^\s*$/; # skip blank lines
		# separate key from value
		if (m/^\s*(\S+):?\s+(.*)/) {
			my ($key, $value) = (lc($1), $2);
			if (! defined $config{$key}) {
				warn "Config file error (unknown key): $_";
				next;
			}
			config_edit(\%config, $key, $value)
				or warn "Config file error (invalid value): $_\n";
		}
	}
	close(CONFIG);
}

# over-ride with command line arguments
foreach my $key (keys %arg_map) {
	next unless defined $args{$key};
	my $config_key = $arg_map{$key};
	config_edit(\%config, $config_key, $args{$key})
		or die "Argument error -$key $args{$key} " .
				"(try repo-proxy -h for correct usage)\n";
}

# empty log_messages if everything goes to stdout
if ($config{log_messages} eq 'stdout') {
	# extra cruft in output messes xinetd interaction
	# and daemons have no output
	$config{log_messages} = ($args{D} or $args{x}) ? 'syslog' : '';
}
map {logMessage("DEBUG config: $_  $config{$_}")} sort keys %config if DEBUG;

# terminate a background daemon?
if ($args{K}) {
	my $pid_file = $config{pid_file} or
		die "No PID file was configured (try repo-proxy -h for correct usage)\n";

	# check if we're already running
	if (! -e $pid_file) {
		print "repo-proxy doesn't appear to be running\n";
		exit 0;
	}

	open(PID, $pid_file) or
		die "failed to open PID file $pid_file: $!\n";
	$_ = <PID>;
	chomp $_;
	m/^(\d+)$/; # untaint
	my $pid = $1 || 0;
	my $running = kill(0, $pid);
	close(PID);

	if ($running) {
		warn "kill repo-process (pid $pid)\n";
		kill('TERM', $pid);
	}
	else {
		warn "found a dead pid file? $pid_file (pid $pid)\n";
		unlink($pid_file) or die "failed to delete $pid_file: $!\n";
	}

	exit;
}

# status of background daemon?
if ($args{S}) {
	my $pid_file = $config{pid_file} or
		die "No PID file was configured (try repo-proxy -h for correct usage)\n";

	# check if we're already running
	if (! -e $pid_file) {
		print "repo-proxy doesn't appear to be running\n";
		exit 0;
	}

	open(PID, $pid_file) or
		die "failed to open PID file $pid_file: $!\n";
	$_ = <PID>;
	chomp $_;
	m/^(\d+)$/; # untaint
	my $pid = $1 || 0;
	my $running = kill(0, $pid);
	close(PID);

	if ($running) {
		print "repo-process is running (pid $pid)\n";
		exit 0;
	}

	warn "found a dead pid file? $pid_file (pid $pid)\n";
	unlink($pid_file) or die "failed to delete $pid_file: $!\n";

	exit 1;
}


# lets get this show rolling!
logMessage("DEBUG Start repo-proxy ($$)") if DEBUG;
logMessage("DEBUG using config file: $config_file") if $config_file and DEBUG;

# xinetd mode
if ($args{x}) {
	open(my $client, "+>&STDIN");  # read/write
	my ($result, $transfered) = handle_client($client);
	logMessage("DEBUG handle_client result: $result  transfered: " .
		($transfered || 0)) if DEBUG;
	exit;
}

# start the web server
our %children;
our $keep_running = 1;
sub REAPER {
	foreach (keys %children) {
		if (waitpid($_, &WNOHANG) > 0) {
 			print "lost child came home $_\n" if DEBUGFORK;
  			delete $children{$_};
 		}
 	}
	$SIG{CHLD} = \&REAPER;
}

# stop the daemon process gracefully
sub TERMINATE {
	$keep_running = 0;
}

# FIXME
sub RELOAD {
	$keep_running = 0;
}

$SIG{PIPE} = 'IGNORE';
#$SIG{CHLD} = 'IGNORE';
$SIG{CHLD} = \&REAPER;

# start daemon mode
if ($args{D}) {
	if (my $pid_file = $config{pid_file}) {
		# check if we're already running
		if (open(PID, $pid_file)) {
			$_ = <PID>;
			chomp $_;
			m/^(\d+)$/; # untaint
			my $pid = $1 || 0;
			my $running = kill(0, $pid);
			close(PID);
			die "repo-proxy is already running (pid $pid)\n" if $running;
			logMessage("found a dead pid file? $pid_file (pid $pid)");
			unlink($pid_file) or die "failed to delete $pid_file: $!\n";
		}
	}
	# flush the buffer
	$| = 1;

	# reset current directory to file system root
	chdir('/') or die("Can't chdir to /: $!\n");

	# now fork
	defined(my $pid = fork()) or die("Can't fork: $!\n");
	exit if $pid;  # done with parent

	# continue as child

	# write out the new PID file
	if (my $pid_file = $config{pid_file}) {
		open(PID, '>', $pid_file) or
			die "failed to open pid file $pid_file: $!\n";
		print PID "$$\n";
		close(PID) or die "failed to close pid file $pid_file: $!\n";
		$have_pid_file = $pid_file;
	}

	# close the standard file handles.
	open (STDIN, '/dev/null') or
		logAndDie("Can't reopen STDIN to /dev/null: $!");
	open (STDOUT, '>/dev/null') or
		logAndDie("Can't reopen STDOUT to /dev/null: $!");
	open (STDERR, '>&STDOUT') or logAndDie("Can't dup STDERR to STDOUT: $!");

	# dissociate from terminal
	POSIX::setsid() or logAndDie("Can't start dissociate from parent: $!");

	$SIG{INT} = $SIG{TERM} = \&TERMINATE;
	$SIG{HUP} = \&RELOAD;
}

# wait for client connections
wait_for_requests($config{server_port});

exit;

# open the server port and wait for requests
sub wait_for_requests {
	my ($server_port, $repo_path) = @_;

	# open a socket to communicate through
	socket(SERVER, PF_INET, SOCK_STREAM, getprotobyname('tcp'))
		or logAndDie("Failed to create socket: $!");

	# so we can restart our server quickly
	setsockopt(SERVER, SOL_SOCKET, SO_REUSEADDR, pack("l", 1));

	# listen on all interfaces
	bind(SERVER, sockaddr_in($server_port, INADDR_ANY))
		or logAndDie("Failed to bind to port $server_port: $!");

	listen(SERVER, SOMAXCONN)
		or logAndDie("Failed to listen on port $server_port: $!");

	# loop waiting for client connects
	logMessage("wait for client connect");

	# from perldoc:
	#  "Note that if you have installed a signal handler for SIGCHLD,
	#   the value of $? will usually be wrong outside that handler."
	# So accept often fails as a result of reaper.  Placing it inside
	# the loop ensures we get back to a working status.
	while ($keep_running) {
		accept(CLIENT, SERVER) or next;
		# how many processes do we have running?
		my $count = (keys %children);
		# fork a new process to handle the client
		# FIXME limit forks?
		my $pid = fork;
		if ($pid) {
			# parent
			$children{$pid}++;
			logMessage("DEBUG parent($$-$pid,$count) close client")
				if DEBUGFORK;
			close(CLIENT);
			next;
		}
		# child
		logAndDie("fork failed: $!") unless defined $pid;

		close(SERVER);
		logMessage("DEBUG spawned new child $$") if DEBUGFORK;

		# read the client request
		my ($result, $transfered) = handle_client(*CLIENT);
		logMessage("DEBUG handle_client result: $result  transfered: " .
			($transfered || 0)) if DEBUGFORK;
		close(CLIENT);
		exit;
	}

	# exit gracefully...
	logMessage("Received terminate signal");
	logMessage("DEBUG ($$) done while: $!") if DEBUGFORK;
	close(SERVER);
	unlink($have_pid_file) if $have_pid_file;
	logMessage("Server terminated");
}

sub request_failed {
	my $client = shift;
	my $msg = shift || 'reason unknown';

	logMessage("request failed: $msg");
	my $header = "HTTP/1.1 500 $msg\r\n\r\n";

	# send header with timeout
	eval {
		# set a signal to die if the timeout is reached
		local $SIG{ALRM} = sub { die "alarm\n" };
		alarm BLOCK_TIMEOUT; # only wait this long
		syswrite($client, $header);
		alarm 0;
	};
	alarm 0; # prevent race condition

	return 0;
}

sub handle_client {
	my $client = shift;

# directory to store file copies
	my $repo_path = $config{cache_dir};

	if (! -e $repo_path) {
		logMessage("create repository directory $repo_path");
		mkdir($repo_path)
			or logAndDie("Failed to create repository directory $repo_path: $!");
	}

	my $acptaddr = getpeername($client);
	$acptaddr || return 0;
	my $dotted_quad = inet_ntoa((unpack_sockaddr_in($acptaddr))[1]);

	logMessage("DEBUG new client! $dotted_quad") if DEBUG;

	# enable command buffering (autoflush)
	select((select($client), $| = 1)[0]);

	# set binary mode on output
	binmode $client;

	# read the client request
	my ($req_ref) = get_http_header($client, $dotted_quad);

	return request_failed($client, 'system error') unless $req_ref;

	logMessage("Client: $dotted_quad $req_ref->{QUERY_STRING}");

	map {logMessage("DEBUG request $_: $req_ref->{$_}")} sort keys %$req_ref
		if DEBUG;

	# get the repository value
	my $source = $req_ref->{QUERY_STRING};
	$source =~ s!^(\.*/+)+!!;
	logMessage("DEBUG source: $source") if DEBUG;

	# strip the site from the URL and setup paths
	if (my $source_info = get_source_info($source, 'http', $repo_path)) {
		logMessage("DEBUG repository_source: " .
					"$source_info->{repository_source}\n" .
					"DEBUG distro: $source_info->{distro}") if DEBUG;
		# now proxy the request
		return http_proxy($client, $source_info);
	}
	return request_failed($client, 'invalid URL');
}

# Source info can come from the URL, or an info file.
# The info file is created when a repository reload is
# requested.
# The info file just makes it easier to parse requests
# for RPMs without fretting over the path names.
sub get_source_info {
	my ($base, $repository_type, $repo_path) = @_;

	my %info = (
			source => $base,
			skip_check => ($config{skip_check} eq 'all'),
		);

	$base =~ s!^([^/]+)!! or
		return logMessage("Can't identify repository from source");

	$info{repository_source} = $1;

	my $source_path = $info{source_path} = $base;

	$repo_path .= "/$1";
	my $repo_info = "$repo_path/info_$repository_type.txt";

	# FIXME don't create this path until we confirm a good connection
	if (! -e $repo_path) {
		logMessage("create repository source directory $repo_path");
		mkdir($repo_path)
			or return logMessage("Failed to create repository source ".
									"directory $repo_path: $!");
	}

	# the main info file is only created with a request for a base file
	if (-e $repo_info) {
		# read the info file
		my ($fh, $msg) = openReadWithLock($repo_info);
		$fh or return logMessage($msg);
		while ($_ = <$fh>) {
			$info{$1} = $2 if m/^\s*([^#][^:]+):\s+(.+)/;
		}
		close($fh);
		$info{info_file} = $repo_info;
	}
	else {
		$base =~ s!^(.+)/base/!! or
			return logMessage("Can't identify distribution from source");
		my $distro = $info{pathbase} = $1;
		$distro =~ s!^.+/apt/!!;
		$info{distro} = $distro;

		# write a new info file
		my ($fh, $msg) = openWriteWithLock($repo_info);
		$fh or return logMessage($msg);
		map { print $fh "$_: $info{$_}\n" }
			qw(repository_source distro pathbase);
		close($fh);

		$info{info_file} = $repo_info;
	}

	# is this base information, or a package file
	my $pathbase = $info{pathbase};
	$source_path =~ m!^\Q$pathbase\E/([^/]+)/(.+)!
		or return logMessage("Can't identify base from source $pathbase");
	if ($1 eq 'base') {
		$info{base}++;
		# Don't check packages for changes
		$info{skip_check}++ if $config{skip_check} eq 'default';
	}

	my $repo_base = "$repo_path/$1";
	$info{base_file} = "$repo_base/$2";
	$info{file_short} = "$info{repository_source}/$1/$2";
	logMessage("DEBUG base path $1 $2") if DEBUG;

	if (! -e $repo_base) {
		logMessage("Create repository source base directory $repo_base");
		mkdir($repo_base)
			or return logMessage("Failed to create repository source base ".
									"directory $repo_base: $!");
	}

	return \%info;
}

# retreive the requested file from the repository source, or a local copy
sub http_proxy {
	my $client = shift;
	my $info = shift;
	my $force_reload = shift || 0;

	my $local_file = $info->{base_file};
	my $file_short = $info->{file_short};
	my $old = (-e $local_file);
	logMessage("DEBUG found local copy $local_file") if $old and DEBUG;
	$force_reload++ unless $old;

	# connect to http source if we're forcing a reload
	# don't connect to the http source if we're skipping the check

	if ($force_reload or !$info->{skip_check}) {
		my ($socket, $tags_ref, $header_ref) =
			open_header_source($info->{repository_source},
				$info->{source_path});

		if ($socket) {
			# check request code
			if ($tags_ref->{STATUS} != 200) {
				# return local file if we've got it
				if ($old) {
					logMessage("Unexpected status: $tags_ref->{STATUS}, return local copy $file_short");
					close($socket);
					my ($fh, $tags_ref, $header_ref) = open_header_local($info);
					return request_failed($client, "couldn't open local copy")
						unless $fh;
					return http_transfer_file($client, $fh, $header_ref,
						$tags_ref->{'CONTENT-LENGTH'});
				}
				# pass on error to client
				logMessage("Unexpected status: $tags_ref->{STATUS}, passing to client");
				return http_transfer_unknown($client, $socket, $header_ref,
					$tags_ref->{'CONTENT-LENGTH'});
			}
		}
		else {
			# couldn't connect to source
			# return local file if we've got it
			return request_failed($client, "couldn't connect to source")
				unless $old;
			logMessage("Return local copy $file_short");
			my ($fh, $tags_ref, $header_ref) = open_header_local($info);
			return request_failed($client, "couldn't open local copy")
				unless $fh;
			return http_transfer_file($client, $fh, $header_ref,
				$tags_ref->{'CONTENT-LENGTH'});
		}

		if ($force_reload) {
			logMessage(($old ? 'Force refresh local copy' : 'Get new copy') .
						" $file_short");
			# return the repository request
			return http_transfer_source($client, $socket, $header_ref,
				$tags_ref->{'CONTENT-LENGTH'}, $local_file);
		}

		# we already have this file.  Maybe it needs updating?
		my ($fh, $local_tags_ref, $local_header_ref) = open_header_local($info);

		# Etags are unique aren't they?
		if ($fh) {
			if ($tags_ref->{ETAG} eq $local_tags_ref->{ETAG}) {
				logMessage("Return local copy (no change) $file_short");
				# repository file and local copy are the same
				close($socket);
				return http_transfer_file($client, $fh, $local_header_ref,
					$local_tags_ref->{'CONTENT-LENGTH'});
			}
			# done with readonly, release the lock
			close($fh);
		}

		# new file in repository
		logMessage("Refresh local copy $file_short");
		# return the repository request
		return http_transfer_source($client, $socket, $header_ref,
			$tags_ref->{'CONTENT-LENGTH'}, $local_file);
	}

	# return the local copy (don't check source)
	logMessage("Return local copy (skip check) $file_short");
	my ($fh, $tags_ref, $header_ref) = open_header_local($info);
	return request_failed($client, "couldn't open local copy") unless $fh;
	return http_transfer_file($client, $fh, $header_ref,
		$tags_ref->{'CONTENT-LENGTH'})
}

# pull the header off the socket (or file handle)
sub get_http_header {
	my $fh = shift;
	my $fh_id = shift || 'unknown file handle';

	logMessage("DEBUG get header") if DEBUG;

	my $header = '';
	my %request = (
			STATUS => 0
		);
	my $buffer = '';
	my $read_OK;

	# it really shouldn't take very long to receive a request
	# so don't kill the application waiting!
	my $OK = 0;
	my $read_count = 0;
	eval {
 		local $SIG{ALRM} = sub { die "alarm\n" }; # NB: \n required
 		alarm(BLOCK_TIMEOUT);
		my $char;
		while($read_OK = sysread($fh, $char, 1)) {
			$buffer .= $char;
			last if $buffer =~ m/\r\n\r\n$/;
			last if ++$read_count > HTTP_MAXHEADER;
		}
		alarm 0;
		$OK++;
	};
	alarm 0; # prevent race condition

	# check time out
	$OK or
		return logMessage("Get http header timed out: $fh_id");

	# check never ending header
	$read_count <= HTTP_MAXHEADER or
		return logMessage("failed to get the httpd request: too big? ($read_count)");

	# check read error
	$read_OK or
		return logMessage("failed to get the httpd header: $!");

	# pull header lines out of the buffer
	foreach (split(/\n/, $buffer)) {
		$header .= "$_\n";
		if (m!^HTTP/([\d.]+)\s+(\d+)!) {
			$request{HTTP} = $1;
			$request{STATUS} = $2;
			logMessage("DEBUG CODE=$2 >>$_") if DEBUG;
			next;
		}
		if (m!^get\s+(.+)\s+http/(.+)!i) {
			$request{REQUEST_METHOD} = 'GET';
			$request{QUERY_STRING} = $1;
			$request{HTTP} = $2;
			next;
		}
		$request{uc($1)} = $2 if m!^([^:]+):\s+([^\r]+)!;
	}

	map { logMessage("DEBUG request: $_  value: $request{$_}") }
		sort keys %request if DEBUG;
	logMessage("DEBUG header length: " . length($header)) if DEBUG;

	return \%request, \$header;
}

# create a dummy http header for an existing local file
sub create_http_header {
	my $info = shift;
	my $fh = shift;

	my $local_file = $info->{base_file};
	my $file_short = $info->{file_short};

	logMessage("DEBUG create dummy info file: $file_short.info") if DEBUG;
	my ($size, $modified, $dev, $ino, $mode, $atime) =
		(stat($fh))[7,9,0,1,2,8];
	$modified = gmtime($modified);
	$modified =~ s/^(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)/$1, $3 $2 $5 $4/;
	my $now = gmtime;
	$now =~ s/^(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)/$1, $3 $2 $5 $4/;

	# dummy up an ETag
	my $etag = "$dev-$ino$mode-$atime";
	my $content_type;
	if ($info->{base}) {
		$content_type = 'text/plain; charset=ISO-8859-1';
	}
	else {
		# FIXME don't hard code RPMs
		$content_type = 'application/x-redhat-package-manager';
	}

	my %request = (
			'HTTP' => '1.1',
			'STATUS' => 200,
			'DATE' => "$now GMT",
			'SERVER' => "repo-proxy/$ver",
			'LAST-MODIFIED' => "$modified GMT",
			'ETAG' => '"' . $etag . '"',
			'ACCEPT-RANGES' => 'bytes',
			'CONTENT-LENGTH' => $size,
			'CONTENT-TYPE' => $content_type
		);
	my $header =  <<EOF;
HTTP/1.1 200 OK\r
Date: $now GMT\r
Server: repo-proxy/$ver\r
Last-Modified: $modified GMT\r
ETag: "$etag"\r
Accept-Ranges: bytes\r
Content-Length: $size\r
Content-Type: $content_type\r
\r
EOF

	map { logMessage("DEBUG request: $_  value: $request{$_}") }
		sort keys %request if DEBUG;
	logMessage("DEBUG header length: " . length($header)) if DEBUG;

	my ($info_fh, $msg) = openWriteWithLock("$local_file.info");
	$info_fh or return logMessage($msg);

	syswrite($info_fh, $header);
	close($info_fh);

	return \%request, \$header;
}

sub open_header_source {
	my $site = shift;
	my $path = shift;

	logMessage("DEBUG open tcp connection to $site") if DEBUG;

	# open a tcp socket to the repository
	socket(my $socket, PF_INET, SOCK_STREAM, getprotobyname('tcp'));

	my $ip = inet_aton($site)
		or return logMessage("Can't find site: $site");

	# connect with timeout
	my $OK = 0;
	my $msg;
	eval {
		# set a signal to die if the timeout is reached
		local $SIG{ALRM} = sub { die "alarm\n" };
		alarm CONNECTION_TIMEOUT; # only wait this long
		connect($socket, sockaddr_in(80, $ip))
			or $msg = "Couldn't connect to $site:80 : $!";
		alarm 0;
		$OK++;
	};
	alarm 0; # prevent race condition
	return logMessage($msg) if $msg;
	return logMessage("Couldn't connect to $site:80 : Socket Timeout")
		unless $OK;

	# enable command buffering (autoflush)
	select((select($socket), $| = 1)[0]);

	# send the page request
	syswrite($socket, join("\015\012",
						"GET $path HTTP/1.1",
						"Host: $site",
						"User-Agent: repo-proxy/$ver",
#						"Connection: close",
						"", ""));

	# TODO handle chunked encoding?
	# TODO handle 100 continue?

	return $socket, get_http_header($socket, $site);
}

sub open_header_local {
	my $info = shift;
	my $local_file = $info->{base_file};
	my $file_short = $info->{file_short};

	logMessage("DEBUG open_header_local $file_short.info") if DEBUG;

	# open the info file
	my ($info_fh, $info_msg) = openReadWithLock("$local_file.info");
	if ($info_fh) {
		# now open the real file
		my ($fh, $msg) = openReadWithLock("$local_file");
		$fh or return logMessage($msg);
		my ($tags_ref, $header_ref) =
			get_http_header($info_fh, "$file_short.info");
		close($info_fh);
		return $fh, $tags_ref, $header_ref;
	}

	# no info file?  maybe still have the source file
	logMessage("Missing info file?  Create dummy: $file_short.info");
	my ($fh, $msg) = openReadWithLock("$local_file");
	$fh or return logMessage($msg);
	my ($tags_ref, $header_ref) = create_http_header($info, $fh);
	return($fh, $tags_ref, $header_ref) if $tags_ref;
	close($fh);
	return 0;
}

# adjust the buffer size dynamically to match the bytes read
# transfer with timeout
sub dynamic_block_size_transfer {
	my $client = shift;
	my $fh = shift;
	my $length = shift;
	my $local_fh = shift;

	my $OK = 0;
	my $bufsize = MAX_BUFSIZE;
	my $buffer;
	my $more_bytes;
	my $bytes_transfered = 0;

	eval {
		# set a signal to die if the timeout is reached
		local $SIG{ALRM} = sub { die "alarm\n" };
		alarm BLOCK_TIMEOUT; # only wait this long

		while (defined($more_bytes = sysread($fh, $buffer, $bufsize))) {
			if ($more_bytes) {
				alarm BLOCK_TIMEOUT; # reset timeout
				syswrite($client, $buffer);
				if ($local_fh) {
					alarm BLOCK_TIMEOUT; # reset timeout
					syswrite($local_fh, $buffer);
				}
				$bytes_transfered += $more_bytes;
				last if $bytes_transfered >= $length;
			}
			if ($more_bytes < $bufsize) {
				# adjust buffer size to match what we just received
				$bufsize = $more_bytes > MIN_BUFSIZE ?
					$more_bytes : MIN_BUFSIZE;
				logMessage("DEBUG stalled: bytes_transfered " .
							"$bytes_transfered  ($more_bytes - $bufsize)")
					if DEBUGFILE;
			}
			# increase the buffer size (if possible)
			elsif ($bufsize < MAX_BUFSIZE) {
				$bufsize += INC_BUFSIZE;
				$bufsize = MAX_BUFSIZE if $bufsize > MAX_BUFSIZE;
			}
			alarm BLOCK_TIMEOUT; # reset timeout
		}
		alarm 0;
		$OK++;
	};
	alarm 0; # prevent race condition

	return $OK, $bytes_transfered;
}

# transfer request from source to client, saving a copy to local file
sub http_transfer_source {
	my $client = shift;
	my $socket = shift;
	my $header_ref = shift;
	my $length = shift || 0;
	my $local_file = shift;

	my $bufsize = MAX_BUFSIZE;
	my $buffer;
	my $more_bytes;
	my $bytes_transfered = 0;

	logMessage("DEBUG http_transfer_source $local_file length: $length")
		if DEBUG;

	#this is our first copy
	my ($info_fh, $info_msg) = openWriteWithLock("$local_file.info");
	if (!$info_fh) {
		close($socket);
		return logMessage($info_msg);
	}

	# send header with timeout
	my $OK = 0;
	eval {
		# set a signal to die if the timeout is reached
		local $SIG{ALRM} = sub { die "alarm\n" };
		alarm BLOCK_TIMEOUT; # only wait this long
		syswrite($client, $$header_ref);
		alarm BLOCK_TIMEOUT; # reset timer
		syswrite($info_fh, $$header_ref);
		alarm 0;
		$OK++;
	};
	alarm 0; # prevent race condition

	if ($OK and $length) {
		# now send rest of the file
		my ($fh, $msg) = openWriteWithLock("$local_file");
		if (!$fh) {
			close($info_fh);
			close($socket);
			return logMessage($msg);
		}

		# transfer file stream
		($OK, $bytes_transfered) =
			dynamic_block_size_transfer($client, $socket, $length, $fh);
		close($fh);
	}

	close($info_fh);
	close($socket);

	logMessage("DEBUG bytes_transfered: $bytes_transfered") if DEBUG;
	return ($OK, $bytes_transfered) if $OK;

	logMessage("Transfer Timeout!");
	# remove local file, it's no good.
	logMessage("DEBUG unlink $local_file") if DEBUG;
	unlink($local_file) or logMessage("Failed to unlink $local_file: $!\n");
	unlink("$local_file.info") or
		logMessage("Failed to unlink $local_file.info: $!\n");
	return 0;
}

# pass on unknown request status
# same as http_transfer_source, only no save to local file
sub http_transfer_unknown {
	my $client = shift;
	my $socket = shift;
	my $header_ref = shift;
	my $length = shift || 0;

	my $bufsize = MAX_BUFSIZE;
	my $buffer;
	my $more_bytes;
	my $bytes_transfered = 0;

	logMessage("DEBUG http_transfer_unknown length: $length") if DEBUG;

	# send header with timeout
	my $OK = 0;
	eval {
		# set a signal to die if the timeout is reached
		local $SIG{ALRM} = sub { die "alarm\n" };
		alarm BLOCK_TIMEOUT; # only wait this long
		syswrite($client, $$header_ref);
		alarm 0;
		$OK++;
	};
	alarm 0; # prevent race condition

	# pass on the request

	# transfer file stream (no local copy)
	($OK, $bytes_transfered) =
		dynamic_block_size_transfer($client, $socket, $length)
		if $OK and $length;
	close($socket);

	logMessage("DEBUG bytes_transfered: $bytes_transfered") if DEBUG;
	logMessage("Transfer Timeout!") unless $OK;
	return $OK, $bytes_transfered;
}

# same as http_transfer_source only it doesn't copy source while transfering
sub http_transfer_file {
	my $client = shift;
	my $fh = shift;
	my $header_ref = shift;
	my $length = shift || 0;

	my $buffer;
	my $more_bytes;

	logMessage("DEBUG http_transfer_file length: $length") if DEBUG;

	# send header with timeout
	my $OK = 0;
	eval {
		# set a signal to die if the timeout is reached
		local $SIG{ALRM} = sub { die "alarm\n" };
		alarm BLOCK_TIMEOUT; # only wait this long
		syswrite($client, $$header_ref);
		alarm 0;
		$OK++;
	};
	alarm 0; # prevent race condition

	my $bytes_transfered = 0;

	# now return the request (with timeout)
	if ($OK and $length) {
		logMessage("DEBUG start transfering") if DEBUG;
		eval {
			# set a signal to die if the timeout is reached
			local $SIG{ALRM} = sub { die "alarm\n" };
			alarm BLOCK_TIMEOUT; # only wait this long
			while ($more_bytes = sysread($fh, $buffer, MAX_BUFSIZE)) {
				alarm BLOCK_TIMEOUT; # reset timer
				logMessage("DEBUG transfer: $more_bytes") if DEBUGFILE;
				if (my $result = syswrite($client, $buffer)) {
					$bytes_transfered += $more_bytes;
					logMessage("DEBUG bytes_transfered($result): " .
								"$bytes_transfered") if DEBUGFILE;
					last if $bytes_transfered >= $length;
					alarm BLOCK_TIMEOUT; # reset timer
					next;
				}
				# failed to write? how odd!
				logMessage("write client failed: $!");
				last;
			}
			alarm 0;
			$OK++;
		};
		alarm 0; # prevent race condition
	}
	close($fh);
	logMessage("DEBUG bytes_transfered: $bytes_transfered") if DEBUG;
	return 0 unless $bytes_transfered >= $length;
	logMessage("Transfer Timeout!") unless $OK;
	return $OK, $bytes_transfered;
}

# TODO dotlock? (or do we care)
# non-blocking read lock
sub openReadWithLock {
    my $file = shift;
    if (sysopen(my $fh, $file, O_RDONLY)) {
		my $result;
		eval {
			local $SIG{ALRM} = sub { die "alarm\n" }; # NB: \n required
			alarm(BLOCK_TIMEOUT);
			$result = flock($fh, LOCK_SH);
			alarm 0;
		};
		alarm 0; # prevent race condition
		return $fh if $result;

		# catch the error befor closing the fh
		my $msg = "Can't lock $file: $!";
		close($fh);
		return 0, $msg;
	}
	return 0, "Failed to read file $file: $!";
}

# non-blocking write lock
sub openWriteWithLock {
    my $file = shift;
    if (sysopen(my $fh, $file, O_WRONLY|O_CREAT)) {
		my $result;
		eval {
			local $SIG{ALRM} = sub { die "alarm\n" }; # NB: \n required
			alarm(BLOCK_TIMEOUT);
			$result = flock($fh, LOCK_EX);
			alarm 0;
			truncate($fh, 0) if $result;
		};
		alarm 0; # prevent race condition
		return $fh if $result;

		# catch the error befor closing the fh
        my $msg = "Can't lock $file: $!";
        close($fh);
		return 0, $msg;
    }
    return 0, "Failed to write file $file: $!";
}

sub sanitize_file_path {
	local $_ = shift;
	# strip fishy characters
	s![^\w\d\s./_-]+!!g;
	# strip // (just for looks)
	s!//+!/!;
	m/^(.+)$/; # untaint
	return $1;
}

# edit (and untaint) config values
sub config_edit {
	my ($config_ref, $key, $value) = @_;
	$value =~ s/\s+$//; # strip trailing spaces
	$value =~ s/^['"]+//;$value =~ s/['"]+$//; # strip quotes
	if ($key eq 'server_port') {
		return $config_ref->{server_port} = $1 + 0
			if $value and $value =~ m/^(\d+)$/;
	}
	elsif ($key eq 'cache_dir') {
		return $config_ref->{cache_dir} = $value
			if $value = sanitize_file_path($value);
	}
	elsif ($key eq 'log_messages') {
		return $config_ref->{log_messages} = lc($1)
			if $value =~ m/^(syslog|stdout)$/i;
		return $config_ref->{log_messages} = $value
			if $value = sanitize_file_path($value);
	}
	elsif ($key eq 'skip_check') {
		return $config_ref->{skip_check} = lc($1)
			if $value =~ m/^(all|none|default)$/i;
	}
	elsif ($key eq 'pid_file') {
		return $config_ref->{pid_file} = ''
			if $value =~ m/^(none|no|false|0)$/i;
		return $config_ref->{pid_file} = $value
			if $value = sanitize_file_path($value);
	}
	return 0;
}

sub logMessage {
	my $msg = shift;

	my $now = localtime;
	my $logid = LOGNAME."[$$]";
	# strip the day of week and year (syslog format)
	$now =~ s/^\S+\s+(.+)\s+\S+$/$1 localhost $logid/;

	if (my $logfile = $config{log_messages}) {
		# log to file
		if ($logfile !~ m/^syslog$/i) {
			if (open(LOG, ">>$logfile")) {
				map { print LOG "$now: $_\n" } split /\n+/, $msg;
				close(LOG);
				return 0;
			}
			# fall through to syslog
			$msg = "Failed to open $logfile: $!\n$msg";
		}
		# log to syslog
		openlog($logid);
		map { syslog('info', $_) } split /\n+/, $msg;
		closelog();
		return 0;
	}

	# log to STDOUT
	map { print STDOUT "$_\n" } split /\n+/, $msg;
	return 0;
}

sub logAndDie {
	my $msg = shift;
	$msg .= "\nServer terminated" if $args{D};
	logMessage($msg) if ($config{log_messages});
	unlink($have_pid_file) if $have_pid_file;
	die "DIED: $msg\n";
}
