#!/usr/bin/env php
<?php # (jEdit options) :folding=explicit:collapseFolds=1:
/*****************************************************************************
INPUTS / SWITCHES (via $_SERVER['argv']):
    inDir           the a directory of files ending in ".pdb"
    -q              quiet mode (no progress messages)
    -filter         operate in Unix filter mode (stdin to stdout)
    -nostats        don't output info from PDB headers
    -noclash        don't output/calc clashscore, etc.
    -nocbeta        don't output/calc C-beta deviations
    -norota         don't output/calc rotamer outliers
    -norama         don't output/calc Ramachandran outliers
    -nogeom         don't output/calc bond geometry outliers
    -dopucker       do RNA base phosphate perpendicular distance calc
    -dosuite        do RNA suite conformation analysis

OUTPUTS / RESULTS:
    Runs clash, Rama, rota, Cb dev analysis on all input files.

*****************************************************************************/
// EVERY *top-level* page must start this way:
// 1. Define it's relationship to the root of the MolProbity installation.
// Pages in subdirectories of lib/ or public_html/ will need more "/.." 's.
    if(!defined('MP_BASE_DIR')) define('MP_BASE_DIR', realpath(dirname(__FILE__).'/..'));
// 2. Include core functionality - defines constants, etc.
    require_once(MP_BASE_DIR.'/lib/core.php');
    require_once(MP_BASE_DIR.'/lib/model.php');
    require_once(MP_BASE_DIR.'/lib/analyze.php');
    require_once(MP_BASE_DIR.'/lib/eff_resol.php');
// 3. Restore session data. If you don't want to access the session
// data for some reason, you must call mpInitEnvirons() instead.
    mpInitEnvirons();       // use std PATH, etc.
    //mpStartSession(true);   // create session dir
// 5. Set up reasonable values to emulate CLI behavior if we're CGI
    set_time_limit(0); // don't want to bail after 30 sec!
// 6. Unlimited memory for processing large files:
    ini_set('memory_limit', -1);

#{{{ summaryAnalysis - run all analysis for one file
############################################################################
/**
* Uses one session per file processed to avoid overflowing space requirements.
*/
function summaryAnalysis($modelID)
{
    global $optStats, $optClash, $optCbeta, $optRota, $optRama, $optGeom, $optPuck, $optSuite;
    $out = "";

    $model =& $_SESSION['models'][$modelID];
    $reduce_blength = $_SESSION['reduce_blength'];
    //$bcutval = 40; TO-DO - make these user controllable
    //$ocutval = 10;

    $out .= ":$reduce_blength";

    if($optStats)
    {
        $pdbstats = $model['stats'];
        $out .= ":$pdbstats[chains]:$pdbstats[residues]:$pdbstats[nucacids]:$pdbstats[resolution]:$pdbstats[rvalue]:$pdbstats[rfree]";
    }

    // Run analysis; load data
    $pdbfile = $_SESSION['dataDir'].'/'.MP_DIR_MODELS."/$model[pdb]";
    $rawDir  = $_SESSION['dataDir'].'/'.MP_DIR_RAWDATA;
        if(!file_exists($rawDir)) mkdir($rawDir, 0777);

    if($optClash)
    {
        runClashlist($pdbfile, "$rawDir/$model[prefix]clash.data", $reduce_blength);
        $clash = loadClashlist("$rawDir/$model[prefix]clash.data");
        $clashstats = runClashStats($pdbstats['resolution'], $clash['scoreAll'], $clash['scoreBlt40']);
        $out .= ":" . $clash['scoreAll'] . ":" . $clash['scoreBlt40'];
        $out .= ":$clashstats[minresol]:$clashstats[maxresol]:$clashstats[n_samples]:$clashstats[pct_rank]:$clashstats[pct_rank40]";
    }
    if($optCbeta)
    {
        runCbetaDev($pdbfile, "$rawDir/$model[prefix]cbdev.data");
        $cbdev = loadCbetaDev("$rawDir/$model[prefix]cbdev.data");
        $badCbeta = findCbetaOutliers($cbdev);
        $cbStats = calcCbetaStats($cbdev);
        $out .= ":" . count($badCbeta) . ":" . count($cbdev);
        //$out .= ":$cbStats[max]:$cbStats[median]:".round($cbStats['mean'], 3);
    }
    if($optRota)
    {
        runRotamer($pdbfile, "$rawDir/$model[prefix]rota.data");
        $rota = loadRotamer("$rawDir/$model[prefix]rota.data");
        $badRota = findRotaOutliers($rota);
        $out .= ":" . count($badRota) . ":" . count($rota);
    }
    if($optRama)
    {
        runRamachandran($pdbfile, "$rawDir/$model[prefix]rama.data");
        $rama = loadRamachandran("$rawDir/$model[prefix]rama.data");
        foreach($rama as $r)
            $ramaScore[ $r['eval'] ] += 1;
        $out .= ":" . ($ramaScore['OUTLIER']+0) . ":" . ($ramaScore['Allowed']+0) . ":" . ($ramaScore['Favored']+0) . ":" . count($rama);
    }
    if($optGeom)
    {
      //echo "running val report\n";
      runValidationReport($pdbfile, "$rawDir/$model[prefix]geom.data", "protein");
      runValidationReport($pdbfile, "$rawDir/$model[prefix]rnageom.data", "rna");
      //echo "finished running val report\n";
      $bonds = array_merge(loadValidationBondReport("$rawDir/$model[prefix]geom.data", "protein"), loadValidationBondReport("$rawDir/$model[prefix]rnageom.data", "rna"));
      $angles = array_merge(loadValidationAngleReport("$rawDir/$model[prefix]geom.data", "protein"), loadValidationAngleReport("$rawDir/$model[prefix]rnageom.data", "rna"));
      //echo "finished loading val report\n";
      $total = 0; // total residues
      $totalBonds = 0;
      $outCount = 0; // residues with at least one bond outlier
      $outBondCount = 0;
      foreach($bonds as $cnit => $item) {
        //if($item['type'] == 'protein') {
          if($item['isOutlier']) {
            $outBondCount += $item['outCount'];
            $outCount += 1;
          }
          $totalBonds += $item['bondCount'];
          $total += 1;
        //}
      }
      if ($total > 0) {
        $out .= ":$outBondCount:$totalBonds:".sprintf("%.2f", 100.0 * $outBondCount / $totalBonds);
        $out .= ":".sprintf("%.2f", 100.0 * $outCount / $total);
      }
      else            $out .= ":-1:-1:-1:-1";
      //echo "outliers:$outCount total:$total";
      $total = 0; // total residues
      $totalAng = 0; 
      $outCount = 0; // residues with at least one angle outlier
      $outAngCount = 0;
      foreach($angles as $cnit => $item) {
        //if($item['type'] == 'protein') {
          if($item['isOutlier']) {
            $outAngCount += $item['outCount'];
            $outCount += 1;
          }
          $totalAng += $item['angCount'];
          $total += 1;
        //}
      }
      if ($total > 0) {
        $out .= ":$outAngCount:$totalAng:".sprintf("%.2f", 100.0 * $outAngCount / $totalAng);
        $out .= ":".sprintf("%.2f", 100.0 * $outCount / $total);
      }
      else            $out .= ":-1:-1:-1:-1";
      //echo "\noutliers:$outCount total:$total\n";
    }
    if($optClash && $optRota && $optRama)
    {
      $axr = $pdbstats['resolution'];                                     // Actual Xtalographic Resolution
      if ((count($rota) != 0) && (count($rama) != 0)) {
        $mer = getEffectiveResolution($clash, $rota, $rama);    // MolProbity Effective Resolution
        $mer_pct = getEffectiveResolutionPercentile($mer, $axr);
        $out .= sprintf(':%.3f', $mer);
        $out .= sprintf(':%s', $mer_pct[pct_rank]);
      } else {
        $out .= '::';
      }
    }
    if($optPuck) {
      $outfile = "$rawDir/$model[prefix]pperp.data";
      runBasePhosPerp($pdbfile, $outfile);
      $pperp = loadBasePhosPerp($outfile);
      $badPperp = findBasePhosPerpOutliers($pperp);
      $out .= ":" . count($badPperp) . ":" . count($pperp);
    }
    if($optSuite) {
      $outfile = "$rawDir/$model[prefix]suitename.txt";
      runSuitenameReport($pdbfile, $outfile);
      $suites = loadSuitenameReport($outfile);
      $badSuites = findSuitenameOutliers($suites);
      $out .= ":" . count($badSuites) . ":" . count($suites);
    }

    $out .= "\n"; // end of this line
    return $out;
}
#}}}########################################################################

#{{{ a_function_definition - sumary_statement_goes_here
############################################################################
/**
* Documentation for this function.
*/
//function someFunctionName() {}
#}}}########################################################################

# MAIN - the beginning of execution for this page
############################################################################
// Default options
$optVerbose = true;
$optMode    = 'batch';
$optStats   = true;
$optClash   = true;
$optCbeta   = true;
$optRota    = true;
$optRama    = true;
$optGeom    = true;
$optPuck    = false;
$optSuite   = false;

// First argument is the name of this script...
if(is_array($_SERVER['argv'])) foreach(array_slice($_SERVER['argv'], 1) as $arg)
{
    if($arg == '-q')            $optVerbose = false;
    elseif($arg == '-filter')   $optMode = 'filter';
    elseif($arg == '-nostats')  $optStats = false;
    elseif($arg == '-noclash')  $optClash = false;
    elseif($arg == '-nocbeta')  $optCbeta = false;
    elseif($arg == '-norota')   $optRota = false;
    elseif($arg == '-norama')   $optRama = false;
    elseif($arg == '-nogeom')   $optGeom = false;
    elseif($arg == '-dopucker') $optPuck = true;
    elseif($arg == '-dosuite')  $optSuite = true;
    elseif(!isset($inDir))      $inDir = $arg;
    else                        die("Too many or unrecognized arguments: '$arg'\n");
}

if($optMode == 'filter')
{
    $inpath = mpTempfile("tmp_pdb_");
    #copy("php://stdin", $inpath); -- for some reason, this truncates the file
    mpCopy("php://stdin", $inpath);
    mpStartSession(true); // create a new session
    // Need to ignore segIDs for stupid Top500 with seg new_ for all H
    $id = addModelOrEnsemble(
            $inpath,
            basename($inpath),
            false,
            true,
            true,
            false);
    if(isset($_SESSION['ensembles'][$id]))
    {
        foreach($_SESSION['ensembles'][$id]['models'] as $modelID)
        {
            echo basename($_SESSION['models'][$modelID]['pdb']);
            echo summaryAnalysis($modelID);
        }
    }
    else
    {
        echo basename($inFile);
        echo summaryAnalysis($id);
    }
    // Clean up and go home
    mpDestroySession();
    unlink($inpath);
    die(); // remaining code is for "batch" mode
}

if(!isset($inDir))          die("No input directory specified.\n");
elseif(!is_dir($inDir))     die("Input directory '$inDir' does not exist.\n");

if($optVerbose)
{
    echo "#INPUT  : ".realpath($inDir)."\n";

    // Describe the output of this script
    echo "#pdbFileName:x-H_type";
    if($optStats)   echo ":chains:residues:nucacids:resolution:rvalue:rfree";
    if($optClash)   echo ":clashscore:clashscoreB<40:minresol:maxresol:n_samples:pct_rank:pct_rank40";
    if($optCbeta)   echo ":cbeta>0.25:numCbeta";//:maxCbeta:medianCbeta:meanCbeta"; removed by jjh
    if($optRota)    echo ":rota<1%:numRota";
    if($optRama)    echo ":ramaOutlier:ramaAllowed:ramaFavored:numRama";
    if($optGeom)    echo ":numbadbonds:numbonds:pct_badbonds:pct_resbadbonds:numbadangles:numangles:pct_badangles:pct_resbadangles";
    if($optClash && $optRota && $optRama) echo ":MolProbityScore:Mol_pct_rank";
    if($optPuck)    echo ":numPperpOutliers:numPperp";
    if($optSuite)   echo ":numSuiteOutliers:numSuites";
    echo "\n";
}

foreach(listDir($inDir) as $inFile) if(endsWith($inFile, ".pdb"))
{
    mpStartSession(true); // create a new session
    $inpath = $inDir.'/'.$inFile;
    // Need to ignore segIDs for stupid Top500 with seg new_ for all H
    $id = addModelOrEnsemble(
            $inpath,
            basename($inpath),
            false,
            true,
            true,
            false);
    if(isset($_SESSION['ensembles'][$id]))
    {
        foreach($_SESSION['ensembles'][$id]['models'] as $modelID)
        {
            echo basename($_SESSION['models'][$modelID]['pdb']);
            echo summaryAnalysis($modelID);
        }
    }
    else
    {
        echo basename($inFile);
        echo summaryAnalysis($id);
    }
    // Clean up and go home
    mpDestroySession();
}

############################################################################
// Clean up and go home
//mpDestroySession(); // only call this if we created one
?>
