#!/usr/bin/perl

##################################
##
## vt_scan2.pl
##
## Checks scan results of all files in current directory and subdirectories with virustotal.com
## If it hasn't been scanned, upload it and check results later.
##
##################################

## biggest filesize that VT is currently accepting through web interface.
$max_file_size_to_scan = 10000000;

## largest number of files to be scanning at VT, before needing to pull some results.
$max_files_to_scan_at_once = 500000;


## If difference between first VT scan and last VT scan is bigger than this,
## assume a non-infection result is permanent.
## I.E. - if they haven't found a virus in the file in the first six months, they're
## likely not going to find one at all.
$date_span_for_no_rescan = 125;


## oldest scan results to trust, in days.  Lower this during major breakouts.
$days_old_to_rescan = 5;


$sleep_between_files = 12;
$vt_api_key = '472d92cc24580fed2bcb562aa27a1742612be5913bb91712f4ff5826eca96345';
$db_host = '172.20.1.3';
$db_name = 'vt_scan';
$db_user = 'vt_scan';
$db_pass = 'vt_scan';

$debug = 1;

###################################################################################
#################  DO NOT CHANGE ANYTHING BELOW THIS LINE!!!   ####################
###################################################################################

use Data::Dumper;
use DBI;
use File::Find;
use File::Basename;
use Cwd;
use Digest::SHA;
use Digest::MD5;
use LWP;

$| = 1;

$current_scanning_count = 0;

$curdir = getcwd . "/";
#print "$curdir\n\n";

##### create LWP user agent....

$ua = new LWP::UserAgent;
#$ua = new LWP::RobotUA;
#$ua->agent("MSIE 7.0 - Mozilla Compatible - Scanbot/0.1.1");
$ua->agent("Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0; GTB6.3; SLCC1; .NET CLR 2.0.50727; Media Center PC 5.0; .NET CLR 3.5.30729; .NET CLR 3.0.30729)");
####push @{ $ua->requests_redirectable }, 'POST';

#$ua->delay( 15/60 );

#### create database connection....
($dbh = DBI->connect("DBI:mysql:$db_name;host=$db_host", "$db_user","$db_pass"))
    or die "Error connecting to database\n";
##$insert_prpst = $dbh->prepare("INSERT INTO results (MD5, SHA1, SHA256, LocalLastScan, VTLastScan, VTFirstScan, VTResult) VALUES (?, ?, ?, NOW(), ?, ?, ?)");
#$insert_prpst = $dbh->prepare("INSERT INTO results (MD5, SHA1, SHA256, LocalLastScan, VTFirstScan, VTLastScan, VTResult, Detections, Scanners) VALUES (?, ?, ?, NOW(), ?, ?, ?, ?, ?)");
#$update_prpst = $dbh->prepare("UPDATE results set LocalLastScan = NOW(), VTLastScan = ?, VTFirstScan = ?, VTResult = ?, Detections = ?, Scanners = ? where MD5 = ? && SHA1 = ? && SHA256 = ?");
$select_prpst = $dbh->prepare("SELECT *, DATEDIFF(VTLastScan, VTFirstScan) as span, DATEDIFF(NOW(), VTLastScan) as age from results where MD5 = ? && SHA1 = ? && SHA256 = ?");
#$infected_prpst = $dbh->prepare("INSERT INTO infected (SysID, MD5, SHA1, SHA256, Filename, Infected, Scanned) VALUES (?, ?, ?, ?, ?, ?, ?)");
#$daterange_prpst = $dbh->prepare("SELECT DATEDIFF(?, ?) as range");
#$dateage_prpst = $dbh->prepare("SELECT DATEDIFF(NOW(), ?) as age");
$hash_insert_prpst = $dbh->prepare("INSERT INTO hashes (SysID, MD5, SHA1, SHA256, Filename) VALUES (?, ?, ?, ?, ?)");

##### need to fix this.  Won't scan if you give it a directory.
#####  only works to scan current directory and subdirectories.

@ARGV=qw(anontest) if not @ARGV;

$sysid = $ARGV[0];

#### find all files and directories in current tree, and run sub "doit" on each.

finddepth(\&doit, qw(.));

### do it a second time, so any uploaded files get results pulled.
### need to fix rescan(), so this isn't necessary.

finddepth(\&doit, qw(.));














sub doit {
	print "\n\n";
#####  remove ./ from beginning of each filename.
	$fpath=substr($File::Find::name, 2);
#####  add full path for absolute filename
	$file_path="$curdir"."$fpath";

#####  make sure file is not a directory.
	if (!(-d $file_path) && !(-p $file_path))
		{
		print "File: ".$file_path."\n";
		if ($debug >= 1)
            {
            print "\n";
		    print "Initial file path: ".$file_path."\n";
        }
#####  check filesize

		if (-s $file_path <= $max_file_size_to_scan)
			{
			if ($debug >= 2)
                {
                print "After checking size file path: ".$file_path."\n";
            }
			&hashcalc($file_path);
            $local_results = 0;
            &check_local_results($file_path);
            if (!defined($all_files{$file_path}{'results'}))
				{
				if ($debug >= 1)
					{
					print "Inserting hashes into table: ".$file_path."\n";
				}
				$hash_insert_prpst->execute($sysid, $all_files{$file_path}{'MD5'},
												$all_files{$file_path}{'SHA1'},
												$all_files{$file_path}{'SHA256'},
												$file_path);
				

			} else
				{
				print "Already have local results: ".$file_path."\n";
			}
		} else
			{
			print "File too big: ".$file_path."\n";
		}	
	} else
		{
		print "Not a regular file: ".$file_path."\n";
	}
}




sub hashcalc
    {
#####  calculate SHA hashes for file.
    if ($debug >= 1)
       {
       print "Hashing: ".$file_path."\n";
    }
    open(MD5HANDLE, $file_path) or print "Can't open '$file_path' for MD5: $!";
    binmode(MD5HANDLE);
    $md5obj = Digest::MD5->new;
    $md5obj->addfile(*MD5HANDLE);
    close MD5HANDLE;
    $localMD5 = $md5obj->hexdigest;
    # print "Local MD5: ".$localMD5."\n";
    $sha1obj = Digest::SHA->new(1);
    $sha1obj->addfile($file_path);
    $localSHA1 = $sha1obj->hexdigest;
    # print "Local SHA1: ".$localSHA1."\n";
    $sha256obj = Digest::SHA->new(256);
    $sha256obj->addfile($file_path);
    $localSHA256 = $sha256obj->hexdigest;
    # print "Local SHA256: ".$localSHA256."\n";
    $all_files{$file_path}{'MD5'} = $localMD5;
    $all_files{$file_path}{'SHA1'} = $localSHA1;
    $all_files{$file_path}{'SHA256'} = $localSHA256;
    if ($debug >= 2)
       {
       print "LocalMD5: ".$all_files{$file_path}{'MD5'}."\n";
       print "LocalSHA1: ".$all_files{$file_path}{'SHA1'}."\n";
       print "LocalSHA256: ".$all_files{$file_path}{'SHA256'}."\n";
    }

}



sub check_local_results
    {
##    $query = "select * from results where MD5 = '".$all_files{$file_path}{'MD5'}."'
##          && SHA1 = '".$all_files{$file_path}{'SHA1'}."'
##          && SHA256 = '".$all_files{$file_path}{'SHA256'}."'";
    $select_prpst->execute($all_files{$file_path}{'MD5'},
                      $all_files{$file_path}{'SHA1'},
                      $all_files{$file_path}{'SHA256'});

    my @row;
    @row = $select_prpst->fetchrow_array();
    $all_files{$file_path}{'results'} = $row[6];
    $all_files{$file_path}{'detected'} = $row[7];
    $all_files{$file_path}{'scanned'} = $row[8];
    $all_files{$file_path}{'span'} = $row[9];
    $all_files{$file_path}{'age'} = $row[10];
    ##$all_files{$file_path}{'results'} =~ /(\d{1,2})\/(\d{1,2})/;
    #if ($debug >= 1)
    #   {
#       print "Local results: ".$all_files{$file_path}{'results'}."\n";
#       print "Age: ".$all_files{$file_path}{'age'}." days.\n";
#       print "Span: ".$all_files{$file_path}{'span'}." days.\n";

    #}
##    $all_files{$_}{''}

}



























