#!/usr/bin/perl

##################################
##
## vt_scan2.pl
##
## Checks scan results of all files in current directory and subdirectories with virustotal.com
## If it hasn't been scanned, upload it and check results later.
##
##################################

## biggest filesize that VT is currently accepting through web interface.
$max_file_size_to_scan = 10000000;

## largest number of files to be scanning at VT, before needing to pull some results.
$max_files_to_scan_at_once = 500000;


## If difference between first VT scan and last VT scan is bigger than this,
## assume a non-infection result is permanent.
## I.E. - if they haven't found a virus in the file in the first six months, they're
## likely not going to find one at all.
$date_span_for_no_rescan = 125;


## oldest scan results to trust, in days.  Lower this during major breakouts.
$days_old_to_rescan = 5;


$sleep_between_files = 12;
$vt_api_key = '472d92cc24580fed2bcb562aa27a1742612be5913bb91712f4ff5826eca96345';
$db_host = '172.20.1.3';
$db_name = 'vt_scan';
$db_user = 'vt_scan';
$db_pass = 'vt_scan';

$debug = 2;

###################################################################################
#################  DO NOT CHANGE ANYTHING BELOW THIS LINE!!!   ####################
###################################################################################

use Data::Dumper;
use DBI;
use File::Find;
use File::Basename;
use Cwd;
use Digest::SHA;
use Digest::MD5;
use LWP;

$| = 1;

$current_scanning_count = 0;

$curdir = getcwd . "/";
#print "$curdir\n\n";

##### create LWP user agent....

$ua = new LWP::UserAgent;
#$ua = new LWP::RobotUA;
#$ua->agent("MSIE 7.0 - Mozilla Compatible - Scanbot/0.1.1");
$ua->agent("Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0; GTB6.3; SLCC1; .NET CLR 2.0.50727; Media Center PC 5.0; .NET CLR 3.5.30729; .NET CLR 3.0.30729)");
####push @{ $ua->requests_redirectable }, 'POST';

#$ua->delay( 15/60 );

#### create database connection....
($dbh = DBI->connect("DBI:mysql:$db_name;host=$db_host", "$db_user","$db_pass"))
    or die "Error connecting to database\n";
##$insert_prpst = $dbh->prepare("INSERT INTO results (MD5, SHA1, SHA256, LocalLastScan, VTLastScan, VTFirstScan, VTResult) VALUES (?, ?, ?, NOW(), ?, ?, ?)");
$insert_prpst = $dbh->prepare("INSERT INTO results (MD5, SHA1, SHA256, LocalLastScan, VTFirstScan, VTLastScan, VTResult, Detections, Scanners) VALUES (?, ?, ?, NOW(), ?, ?, ?, ?, ?)");
$update_prpst = $dbh->prepare("UPDATE results set LocalLastScan = NOW(), VTLastScan = ?, VTFirstScan = ?, VTResult = ?, Detections = ?, Scanners = ? where MD5 = ? && SHA1 = ? && SHA256 = ?");
$select_prpst = $dbh->prepare("SELECT *, DATEDIFF(VTLastScan, VTFirstScan) as span, DATEDIFF(NOW(), VTLastScan) as age from results where MD5 = ? && SHA1 = ? && SHA256 = ?");
$infected_prpst = $dbh->prepare("INSERT INTO infected (SysID, MD5, SHA1, SHA256, Filename, Infected, Scanned) VALUES (?, ?, ?, ?, ?, ?, ?)");
$daterange_prpst = $dbh->prepare("SELECT DATEDIFF(?, ?) as range");
$dateage_prpst = $dbh->prepare("SELECT DATEDIFF(NOW(), ?) as age");


##### need to fix this.  Won't scan if you give it a directory.
#####  only works to scan current directory and subdirectories.

@ARGV=qw(anontest) if not @ARGV;

$sysid = $ARGV[0];

#### find all files and directories in current tree, and run sub "doit" on each.

finddepth(\&doit, qw(.));




##&rescan;


$number_clean = $#clean_files + 1;
$number_infected = scalar(keys %infected_files);

print "Clean files: ".$number_clean."\n";
print "Previously Unscanned files: ".scalar(keys %scanning_files)."\n";
print "Infected files: ".scalar(keys %infected_files)."\n";
if (scalar(keys %infected_files) > 0)
	{
	print "Infected files list:\n\n";
	foreach $infected (sort keys %infected_files)
		{
#		print $infected_files{$infected}{'detected'}."/".$infected_files{$infected}{'scanned'}.
#			" scanners detected infection in:  ".$infected."\n";
		print $all_files{$infected}{'results'}." scanners detected infection in:  ".$infected."\n";
	}
	print "\n\n";
} else
	{
	print "No infected files!!!  :)\n\n";
}












sub doit {
print "\n\n";
#####  remove ./ from beginning of each filename.
	$fpath=substr($File::Find::name, 2);
#####  add full path for absolute filename
	$file_path="$curdir"."$fpath";

##	$fname=basename($fpath);
#####  make sure file is not a directory.
	if (!(-d $file_path) && !(-p $file_path))
		{
		print "File: ".$file_path."\n";
		if ($debug >= 1)
                   {
                   print "\n";
		   print "Initial file path: ".$file_path."\n";
                }
#####  check filesize

		if (-s $file_path <= $max_file_size_to_scan)
			{
			if ($debug >= 2)
                           {
                           print "After checking size file path: ".$file_path."\n";
                        }
			&hashcalc($file_path);
                        $local_results = 0;
                        &check_local_results($file_path);
                        if (!defined($all_files{$file_path}{'results'}) ||
                              ($all_files{$file_path}{'age'} > $days_old_to_rescan &&
                              $all_files{$file_path}{'span'} < $date_span_for_no_rescan))
                           {
                           if ($debug >= 1)
                              {
                              if (!defined($all_files{$file_path}{'results'}))
                                 {
                                 print "No local results.  Sending hash...\n";
                              } elsif ($all_files{$file_path}{'age'} > $days_old_to_rescan)
                                 {
                                 print "Local results found, but ".$all_files{$file_path}{'age'}." days old.  Sending hash...\n";
                              } else
                                {
                                print "Local result ".$all_files{$file_path}{'age'}." days old.\nVT scan span ".$all_files{$file_path}{'span'}." days.\n";
                              }
                           }
                           if ($debug >= 1)
                              {
                              print "Calling send_hash function for: ".$file_path."\n";
                           }
                           &send_hash($file_path);
                           if (!defined($all_files{$file_path}{'results'}) ||
                              ($all_files{$file_path}{'age'} > $days_old_to_rescan &&
                              $all_files{$file_path}{'span'} < $date_span_for_no_rescan))
                              {
                              if ($current_scanning_count <= $max_files_to_scan_at_once)
			         {
			         if ($debug >= 1)
                                    {
                                    if (!defined($all_files{$file_path}{'results'}))
                                       {
                                       print "No VT results found.  Uploading...\n";
                                    } else
                                      {
                                      print "VT results found, but:\n";
                                      print $all_files{$file_path}{'age'}." days old.\n";
                                      print $all_files{$file_path}{'span'}." day age span\n";
                                      print "  Re-uploading...\n";
                                    }
                                    print "Only scanning ".$current_scanning_count." files 1.\n";
                                 }
                                 if (!defined($all_files{$file_path}{'results'}))
                                    {
                                    &upload_file($file_path);
                                 } else
                                    {
                                    &reupload_file($file_path);
                                 }
                              } else
			         {
			         $unscanned_files{$file_path} = $all_files{$file_path};
			         &rescan();
                                 if ($current_scanning_count <= $max_files_to_scan_at_once)
			            {
			            if ($debug >= 1)
                                       {
                                       print "Only scanning ".$current_scanning_count." files 2.\n";
                                    }
			            &upload_file($file_path);
                                 } else
                                    {
                                    $files_to_scan{$file_path} = 1;
                                 }
                              }
                           }  else
                              {
                              if ($debug >= 1)
                                 {
                                 print "Using local results...\n";
                               #  print "VT results found 1111, but:\n";
                               #       print $all_files{$file_path}{'age'}." days old.\n";
                               #       print $all_files{$file_path}{'span'}." day age span\n";
                                #      print "Current results: ".$all_files{$file_path}{'results'}."\n";
                               #       print "  Re-uploading...\n";
                              }
                              ##&reupload_file($file_path);
			   }



                        } else
                           {
                           #&check_results($file_path);
                           $local_results = 1;
                        }
#####   don't beat the crap out of virustotal.com
                        if ($local_results == 0)
                           {
                           sleep $sleep_between_files;
                        }

		} else
			{
			print "File too big...\n";
		}
	}
}




sub hashcalc
    {
#####  calculate SHA hashes for file.
    if ($debug >= 1)
       {
       print "Hashing: ".$file_path."\n";
    }
    open(MD5HANDLE, $file_path) or print "Can't open '$file_path' for MD5: $!";
    binmode(MD5HANDLE);
    $md5obj = Digest::MD5->new;
    $md5obj->addfile(*MD5HANDLE);
    close MD5HANDLE;
    $localMD5 = $md5obj->hexdigest;
    # print "Local MD5: ".$localMD5."\n";
    $sha1obj = Digest::SHA->new(1);
    $sha1obj->addfile($file_path);
    $localSHA1 = $sha1obj->hexdigest;
    # print "Local SHA1: ".$localSHA1."\n";
    $sha256obj = Digest::SHA->new(256);
    $sha256obj->addfile($file_path);
    $localSHA256 = $sha256obj->hexdigest;
    # print "Local SHA256: ".$localSHA256."\n";
    $all_files{$file_path}{'MD5'} = $localMD5;
    $all_files{$file_path}{'SHA1'} = $localSHA1;
    $all_files{$file_path}{'SHA256'} = $localSHA256;
    if ($debug >= 2)
       {
       print "LocalMD5: ".$all_files{$file_path}{'MD5'}."\n";
       print "LocalSHA1: ".$all_files{$file_path}{'SHA1'}."\n";
       print "LocalSHA256: ".$all_files{$file_path}{'SHA256'}."\n";
    }

}



sub check_local_results
    {
##    $query = "select * from results where MD5 = '".$all_files{$file_path}{'MD5'}."'
##          && SHA1 = '".$all_files{$file_path}{'SHA1'}."'
##          && SHA256 = '".$all_files{$file_path}{'SHA256'}."'";
    $select_prpst->execute($all_files{$file_path}{'MD5'},
                      $all_files{$file_path}{'SHA1'},
                      $all_files{$file_path}{'SHA256'});

    my @row;
    @row = $select_prpst->fetchrow_array();
    $all_files{$file_path}{'results'} = $row[6];
    $all_files{$file_path}{'detected'} = $row[7];
    $all_files{$file_path}{'scanned'} = $row[8];
    $all_files{$file_path}{'span'} = $row[9];
    $all_files{$file_path}{'age'} = $row[10];
    ##$all_files{$file_path}{'results'} =~ /(\d{1,2})\/(\d{1,2})/;
    #if ($debug >= 1)
    #   {
       print "Local results: ".$all_files{$file_path}{'results'}."\n";
       print "Age: ".$all_files{$file_path}{'age'}." days.\n";
       print "Span: ".$all_files{$file_path}{'span'}." days.\n";

    #}
##    $all_files{$_}{''}
    if ($1 > 0)
       {
       $infected_files{$file_path} = 1;
#       $all_files{$file_path}{'detected'} = $1;
#       $all_files{$file_path}{'scanned'} = $2;
       $infected_prpst->execute($sysid, $all_files{$file_path}{'MD5'},
                                        $all_files{$file_path}{'SHA1'},
                                        $all_files{$file_path}{'SHA256'},
                                        $file_path,
                                        $all_files{$file_path}{'detected'},
                                        $all_files{$file_path}{'scanned'});
    ## add to $infected_files array if problem found.
    }
}





sub send_hash
    {
    $send_hash_file_path = $file_path;
    if ($debug >= 1)
       {
       print "Sending hash for ".$send_hash_file_path."\n";
    }
#####  submit SHA256 hash to virustotal....

##  my $req = new HTTP::Request POST => 'http://www.virustotal.com/vt/en/consultamd5';
    my $req = new HTTP::Request POST => 'http://www.virustotal.com/search.html';
    $req->content_type('application/x-www-form-urlencoded');
##  $req->content('hash='.$all_files{$send_hash_file_path}{'SHA256'});
    $req->content('chain='.$all_files{$send_hash_file_path}{'SHA256'});
    if ($debug >= 1)
       {
       print "Hash for upload: ".$all_files{$send_hash_file_path}{'SHA256'}."\n";
    }
    scanstart:
    my $res = $ua->request($req);
    $response = $res->header(Location);
    print "Location: ".$response."\n";
##  print $response;
#   if ($response =~ /href...(buscaHash.html.notfound)/)
    if ($response =~ /(notfound=1)/)
       {
       print "Not yet scanned...\n";
       $unscanned_files{$send_hash_file_path} = 1;
    } elsif ($response =~ /report.html/)
       {
       $results_URL = $response;
       if ($debug >= 1)
          {
          print "Results are at: ".$results_URL."\n";
          print "Storing URL....\n";
       }
       $all_files{$send_hash_file_path}{'resultURL'} = $results_URL;
       sleep $sleep_between_files;
       &pull_results($send_hash_file_path);
    } elsif ($response =~ /invalid/)
       {
       print "Received invalid response...\n ...sleeping 30 seconds\n";
       sleep 30;
       goto scanstart;
    } else
       {
       print "Random borkage during hash upload.......\n";
       print $res->content."\n\n\n";
       sleep 30;
       goto scanstart;
    }
}






sub pull_results
	{
	$results_file_path = $file_path;
        my $reupload = 0;
	if ($_[1] == 1)
           {
           #$results_file_path = $curdir.$_[0];
           $results_file_path = $_[0];
           $this_is_a_rescan = 1;
           #$_[1] = 0;
        }
	if ($debug >= 2)
           {
           print "Filename ".$results_file_path."\n";
           print "Result URL: ".$all_files{$results_file_path}{'resultURL'}."\n";
        }
	my $req = new HTTP::Request GET => $all_files{$results_file_path}{'resultURL'};
	my $res = $ua->request($req);
	$response = $res->content;
##	print $response;
	$response =~ /Submission date.{50}([0123456789:-\s]{19})/;
	$last_scan_date = $1;
#	$1 = "fucked";
  #$sub_date = "NULL";
	$response =~ /Current sta.{55}([a-z]{5,15})/;
	$current_status = $1;
#	$1 = "fucked";
	$response =~ /orcentaje.{21}(\d{1,2})/;
	$detected = $1;
	$response =~ /status-total.{3}(\d{1,2})/;
	$total_scanners = $1;
	$response =~ /First seen.{9}([0123456789:-\s]{19})/;
	$first_seen = $1;
        my @spancalc_query = $daterange_prpst->execute($last_scan_date, $first_seen);
        @spancalc_row = $daterange_prpst->fetchrow_array();
        $all_files{$results_file_path}{'span'} = $spancalc_row[0];
        my @age_query = $dateage_prpst->execute($last_scan_date);
        @age_row = $dateage_prpst->fetchrow_array();
        $all_files{$results_file_path}{'age'} = $age_row[0];
	if ($debug >= 1)
           {
           print "First seen: ".$first_seen."\n";
	   print "Last scanned: ".$last_scan_date."\n";
	   print "Status: ".$current_status."\n";
	   print "Result: ".$detected."/".$total_scanners."\n";
           print "VT Scan age: ".$all_files{$results_file_path}{'age'}."\n";
           print "VT Scan span: ".$all_files{$results_file_path}{'span'}."\n";
        }

        
        
        
        
        
        
	if ($current_status == "finished")
		{
		print "Finding hashes...\n";
		$status = $1;
		$response =~ /MD5.nbsp..nbsp..nbsp...\/span..([0123456789abcdef]{32})/;
		$VTMD5 = $1;
		$response =~ /SHA1.nbsp..nbsp...\/span..([0123456789abcdef]{40})/;
		$VTSHA1 = $1;
		$response =~ /SHA256..\/span..([0123456789abcdef]{64})/;
		$VTSHA256 = $1;
		if ($debug >= 2)
                      {
                      print "VT MD5: ".$VTMD5."\n";
	              print "VT SHA1: ".$VTSHA1."\n";
		      print "VT SHA256: ".$VTSHA256."\n";
                }

		print "Status: ".$status."\n";
		print "Virus detection: $detected / $total_scanners\n";
####		$received = $1;
		print "Received: ".$last_scan_date."\n";
                if (defined($all_files{$results_file_path}{'results'}))
                   {
                   $reupload = 1;
                }
		$all_files{$results_file_path}{'results'} = $detected."/".$total_scanners;
		$all_files{$results_file_path}{'detected'} = $detected;
		$all_files{$results_file_path}{'scanned'} = $total_scanners;
		if ($detected > 0)
			{
			$infected_files{$results_file_path} = 1;
		        $infected_prpst->execute($sysid, $all_files{$file_path}{'MD5'},
                                      $all_files{$file_path}{'SHA1'},
                                      $all_files{$file_path}{'SHA256'},
                                      $file_path,
                                      $detected, $total_scanners);

		} else
			{
			$clean_files{$results_file_path} = 1;
		}
		if ($reupload == 0)
                   {
                   print "THIS IS A FIRST SCAN!!\n";
		   $insert_prpst->execute($all_files{$results_file_path}{'MD5'},
                                          $all_files{$results_file_path}{'SHA1'},
                                          $all_files{$results_file_path}{'SHA256'},
                                          $first_seen, $last_scan_date, $all_files{$results_file_path}{'results'},
                                          $all_files{$results_file_path}{'detected'},
                                          $all_files{$results_file_path}{'scanned'});
                } else
                   {
                   print "THIS IS AN UPDATE!!\n";
                   $update_prpst->execute($last_scan_date, $first_seen,
                                          $all_files{$results_file_path}{'results'},
                                          $all_files{$results_file_path}{'detected'},
                                          $all_files{$results_file_path}{'scanned'},
                                          $all_files{$results_file_path}{'MD5'},
                                          $all_files{$results_file_path}{'SHA1'},
                                          $all_files{$results_file_path}{'SHA256'});
                }
                   

                   
                   
                   
                if ($this_is_a_rescan == 1)
                        {
                        $scanning_files{$results_file_path} = 0;
                        $this_is_a_rescan = 0;
                }
		return 1;
	} #else
		#	{
		#	$scanning_files{$results_file_path}{'URL'} = $_[0];
		#	$scanning_files{$results_file_path}{'hash'} = $localSHA1;
#############		print $response;
	#	}
}






sub upload_file
	{
	$upload_file_path = $file_path;
	$filesize = -s $upload_file_path;
######################	print "Filesize:  ".$filesize."\n";
	if ($filesize > $max_file_size_to_scan)
		{
		$large_files[$#large_files + 1] = $upload_file_path;
		print "File too big... $upload_file_path\n";
	} else
		{
		if ($debug >= 1)
                   {
		   print "Uploading $upload_file_path...\n";
                }
		my $browser = LWP::UserAgent->new;
         ######################
         ######################
         ###   changed to test public API file upload
         ##############################
		my $response = $browser->post('http://www.virustotal.com/file-upload/file_upload',
			[file => [$upload_file_path]],
			'Content_Type' => 'multipart/form-data');
        ####       my $response = $browser->post('http://www.virustotal.com/api/scan_file.json',
        ####                [file => [$upload_file_path]],
        ####                'key' => $vt_api_key,
        ####                'Content_Type' => 'multipart/form-data'
        ####                );
		##$response = $response->content;
##################		print "Content: ". $response->content;
                if ($debug >= 3)
                   {
                   print "Response content: ".$response->content;
                }
                #print Dumper($response);
          ##      die;
              $results_URL = $response->header(Location);
          ##    $location = $response->header(scan_id);
              

         #####       if ($response =~ /href...(analisis\/[0123456789abcdefg-]*)/)
	#####		{
	#####		$results_URL = "http://www.virustotal.com/".$1;
			if ($debug >= 1)
                           {
                           print "Result URL: ".$results_URL."\n";
                           print "Location: ".$location."\n";
                        }
			$all_files{$upload_file_path}{'resultURL'} = $results_URL;
                        $scanning_files{$upload_file_path} = 1;
                        #$scanned_files{$upload_file_path} = 1;
                        $current_scanning_count++;
                ##}
	}
}





sub reupload_file
	{
              &upload_file($file_path);
              $force_scan_URL = "http://www.virustotal.com".$all_files{$file_path}{'resultURL'}."&amp;force=1";
              sleep $sleep_between_files;
              my $browser = LWP::UserAgent->new;
              print "Forcing rescan...\n";
              print "Rescan URL: ".$force_scan_URL."\n";
              my $force_scan = $browser->get($force_scan_URL);
              #print "Response:\n";
              #print $force_scan->content();
              sleep $sleep_between_files;
}








sub rescan
	{
	foreach $scanning (sort keys %scanning_files)
		{
                if ($scanning_files{$scanning} == 1)
                   {
		#   my $req = new HTTP::Request POST => $all_files{$scanning}{'URL'};
		#   my $req = new HTTP::Request GET => $all_files{$scanning}{'resultURL'};
		#   $req->content_type('application/x-www-form-urlencoded');
		#   $req->content('hash='.$all_files{$scanning}{'SHA256'});

		#   my $res = $ua->request($req);
		#   $response = $res->content;
		#   if ($response =~ /href...(analisis\/[0123456789abcdefg-]*)/)
		#	{
		#	$results_URL = "http://www.virustotal.com/".$1;
###############################################			print "Results are at: ".$results_URL."\n\n";
                        if ($debug >= 1)
                           {
                           print "Retrieving results for: ".$scanning."\n";
                        }
			$need_to_decrement_scanning_counter = &pull_results($scanning, 1);
			if ($need_to_decrement_scanning_counter == 1)
                           {
                           $current_scanning_count--;
                        }
		#   } else
		#	{
                #        if ($debug >= 1)
                #           {
                #           print "Error retrieving results for: ".$scanning."\n";
                #        }
		#	print "Random borkage.......\n\n";
		#	print $res->content."\n\n\n";
		#   }
                }
	 sleep $sleep_between_files;
	}
}
