Tripal 0.3b
tripal_feature/syncFeatures.php
Go to the documentation of this file.
00001 <?php
00002 
00003 
00004 # This script can be run as a stand-alone script to sync all the features from chado to drupal
00005 // Parameter f specifies the feature_id to sync
00006 // -f 0 will sync all features 
00007 
00008 $arguments = getopt("f:");
00009 
00010 if(isset($arguments['f'])){
00011    $drupal_base_url = parse_url('http://www.example.com');
00012    $_SERVER['HTTP_HOST'] = $drupal_base_url['host'];
00013    $_SERVER['REQUEST_URI'] = $_SERVER['SCRIPT_NAME'] = $_SERVER['PHP_SELF'];
00014    $_SERVER['REMOTE_ADDR'] = NULL;
00015    $_SERVER['REQUEST_METHOD'] = NULL;
00016   
00017    require_once 'includes/bootstrap.inc';
00018    drupal_bootstrap(DRUPAL_BOOTSTRAP_FULL);
00019 
00020    $feature_id = $arguments['f'];
00021 
00022    if($feature_id > 0 ){ 
00023       tripal_feature_sync_feature($feature_id); 
00024    }
00025    else{ 
00026       print "syncing all features...\n";
00027       tripal_feature_sync_features();
00028    }   
00029 }
00030 /**
00031 *
00032 */   
00033 function tripal_feature_set_urls($job_id = NULL){
00034    // first get the list of features that have been synced
00035    $sql = "SELECT * FROM {chado_feature}";
00036    $nodes = db_query($sql);
00037    while($node = db_fetch_object($nodes)){
00038       // now get the feature details
00039       $feature_arr = tripal_core_chado_select('feature',
00040          array('feature_id','name','uniquename'),
00041          array('feature_id' => $node->feature_id));
00042       $feature = $feature_arr[0];
00043 
00044       tripal_feature_set_feature_url($node,$feature);
00045    }
00046 }
00047 /**
00048 *
00049 */
00050 function tripal_feature_set_feature_url($node,$feature){
00051 
00052    // determine which URL alias to use
00053    $alias_type = variable_get('chado_feature_url','internal ID');
00054    $aprefix = variable_get('chado_feature_accession_prefix','ID');
00055    switch ($alias_type) {
00056       case 'feature name':
00057          $url_alias = $feature->name;
00058          break;
00059       case 'feature unique name':
00060          $url_alias = $feature->uniquename;
00061          break;
00062       default:
00063          $url_alias = "$aprefix$feature->feature_id";
00064    }
00065    print "Setting $alias_type as URL alias for $feature->name: node/$node->nid => $url_alias\n";
00066    // remove any previous alias
00067    db_query("DELETE FROM {url_alias} WHERE src = '%s'", "node/$node->nid");
00068    // add the new alias
00069    path_set_alias("node/$node->nid",$url_alias);
00070 }
00071 /**
00072  *
00073  *
00074  * @ingroup tripal_feature
00075  */
00076 function tripal_feature_sync_features ($max_sync = 0, $job_id = NULL){
00077    //print "Syncing features (max of $max_sync)\n";
00078    $i = 0;
00079 
00080    // get the list of available sequence ontology terms for which
00081    // we will build drupal pages from features in chado.  If a feature
00082    // is not one of the specified typse we won't build a node for it.
00083    $allowed_types = variable_get('chado_feature_types','EST contig');
00084    $allowed_types = preg_replace("/[\s\n\r]+/"," ",$allowed_types);
00085 
00086    print "Looking for features of type: $allowed_types\n";
00087 
00088    $so_terms = split(' ',$allowed_types);
00089    $where_cvt = "";
00090    foreach ($so_terms as $term){
00091       $where_cvt .= "CVT.name = '$term' OR ";
00092    }
00093    $where_cvt = substr($where_cvt,0,strlen($where_cvt)-3);  # strip trailing 'OR'
00094 
00095    // get the list of organisms that are synced and only include features from
00096    // those organisms
00097    $orgs = organism_get_synced();
00098    $where_org = "";
00099    foreach($orgs as $org){
00100       if($org->organism_id){
00101          $where_org .= "F.organism_id = $org->organism_id OR ";
00102       }
00103    }
00104    $where_org = substr($where_org,0,strlen($where_org)-3);  # strip trailing 'OR'
00105 
00106    // use this SQL statement to get the features that we're going to upload
00107    $sql = "SELECT feature_id ".
00108           "FROM {FEATURE} F ".
00109           "  INNER JOIN Cvterm CVT ON F.type_id = CVT.cvterm_id ".
00110           "  INNER JOIN CV on CV.cv_id = CVT.cv_id ".
00111           "WHERE ($where_cvt) AND ($where_org) AND CV.name = 'sequence' ".
00112           "ORDER BY feature_id";
00113 
00114    // get the list of features
00115    $previous_db = tripal_db_set_active('chado');  // use chado database
00116    $results = db_query($sql);
00117 
00118    tripal_db_set_active($previous_db);  // now use drupal database
00119 
00120    // load into ids array
00121    $count = 0;
00122    $ids = array();
00123    while($id = db_fetch_object($results)){
00124       $ids[$count] = $id->feature_id;
00125       $count++;
00126    }
00127 
00128    // make sure our vocabularies are set before proceeding
00129    tripal_feature_set_vocabulary();
00130 
00131    // pre-create the SQL statement that will be used to check
00132    // if a feature has already been synced.  We skip features
00133    // that have been synced
00134    $sql = "SELECT * FROM {chado_feature} WHERE feature_id = %d";
00135 
00136    // Iterate through features that need to be synced
00137    $interval = intval($count * 0.01);
00138    $num_ids = sizeof($ids);
00139    $i = 0;
00140    foreach($ids as $feature_id){
00141       // update the job status every 1% features
00142       if($job_id and $i % $interval == 0){
00143          tripal_job_set_progress($job_id,intval(($i/$count)*100));
00144       }
00145       // if we have a maximum number to sync then stop when we get there
00146       // if not then just continue on
00147       if($max_sync and $i == $max_sync){
00148          return '';
00149       }
00150       if(!db_fetch_object(db_query($sql,$feature_id))){
00151         
00152          # parsing all the features can cause memory overruns 
00153          # we are not sure why PHP does not clean up the memory as it goes
00154          # to avoid this problem we will call this script through an
00155          # independent system call
00156          print "$i of $num_ids Syncing feature id: $feature_id\n";
00157          $cmd = "php " . drupal_get_path('module', 'tripal_feature') . "/syncFeatures.php -f $feature_id ";
00158          system($cmd);
00159 
00160       }
00161       $i++;
00162    }
00163 
00164    return '';
00165 }
00166 
00167 /**
00168  *
00169  *
00170  * @ingroup tripal_feature
00171  */
00172 function tripal_feature_sync_feature ($feature_id){
00173 //   print "\tSyncing feature $feature_id\n";
00174 
00175    $mem = memory_get_usage(TRUE);
00176    $mb = $mem/1048576;
00177 //   print "$mb mb\n";
00178 
00179    global $user;
00180    $create_node = 1;   // set to 0 if the node exists and we just sync and not create
00181 
00182    // get the accession prefix
00183    $aprefix = variable_get('chado_feature_accession_prefix','ID');
00184 
00185    // if we don't have a feature_id then return
00186    if(!$feature_id){
00187       drupal_set_message(t("Please provide a feature_id to sync"));
00188       return '';
00189    }
00190 
00191    // get information about this feature
00192    $fsql = "SELECT F.feature_id, F.name, F.uniquename,O.genus, ".
00193            "    O.species,CVT.name as cvname,F.residues,F.organism_id ".
00194            "FROM {FEATURE} F ".
00195            "  INNER JOIN Cvterm CVT ON F.type_id = CVT.cvterm_id ".
00196            "  INNER JOIN Organism O ON F.organism_id = O.organism_ID ".
00197            "WHERE F.feature_id = %d";
00198    $previous_db = tripal_db_set_active('chado');  // use chado database
00199    $feature = db_fetch_object(db_query($fsql,$feature_id));
00200    tripal_db_set_active($previous_db);  // now use drupal database
00201 
00202    // get the synonyms for this feature
00203    $synsql = "SELECT S.name ".
00204              "FROM {feature_synonym} FS ".
00205              "  INNER JOIN {synonym} S on FS.synonym_id = S.synonym_id ".
00206              "WHERE FS.feature_id = %d";
00207    $previous_db = tripal_db_set_active('chado');  // use chado database
00208    $synonyms = db_query($synsql,$feature_id);
00209    tripal_db_set_active($previous_db);  // now use drupal database
00210 
00211    // now add these synonyms to the feature object as a single string   
00212    $synstring = '';
00213    while($synonym = db_fetch_object($synonyms)){
00214       $synstring .= "$synonym->name\n";
00215    }        
00216    $feature->synonyms = $synstring;
00217 
00218    // check to make sure that we don't have any nodes with this feature name as a title
00219    // but without a corresponding entry in the chado_feature table if so then we want to
00220    // clean up that node.  (If a node is found we don't know if it belongs to our feature or
00221    // not since features can have the same name/title.)
00222    $tsql =  "SELECT * FROM {node} N ".
00223             "WHERE title = '%s'";
00224    $cnsql = "SELECT * FROM {chado_feature} ".
00225             "WHERE nid = %d";
00226    $nodes = db_query($tsql,$feature->name);
00227    // cycle through all nodes that may have this title
00228    while($node = db_fetch_object($nodes)){
00229       $feature_nid = db_fetch_object(db_query($cnsql,$node->nid));
00230       if(!$feature_nid){
00231          drupal_set_message(t("$feature_id: A node is present but the chado_feature entry is missing... correcting"));
00232          node_delete($node->nid);
00233       }
00234    }
00235 
00236    // check if this feature already exists in the chado_feature table.
00237    // if we have a chado feature, we want to check to see if we have a node
00238    $cfsql = "SELECT * FROM {chado_feature} ".
00239             "WHERE feature_id = %d";
00240    $nsql =  "SELECT * FROM {node} ".
00241             "WHERE nid = %d";
00242    $chado_feature = db_fetch_object(db_query($cfsql,$feature->feature_id));
00243    if($chado_feature){
00244       drupal_set_message(t("$feature_id: A chado_feature entry exists"));
00245       $node = db_fetch_object(db_query($nsql,$chado_feature->nid));
00246       if(!$node){
00247          // if we have a chado_feature but not a node then we have a problem and
00248          // need to cleanup
00249          drupal_set_message(t("$feature_id: The node is missing, but has a chado_feature entry... correcting"));
00250          $df_sql = "DELETE FROM {chado_feature} WHERE feature_id = %d";
00251          db_query($df_sql,$feature_id);
00252       } else {
00253          drupal_set_message(t("$feature_id: A corresponding node exists"));
00254          $create_node = 0;
00255       }
00256    }
00257 
00258    // if we've encountered an error then just return.
00259    if($error_msg = db_error()){
00260       //print "$error_msg\n";
00261       return '';
00262    }
00263 
00264    // if a drupal node does not exist for this feature then we want to
00265    // create one.  Note that the node_save call in this block
00266    // will call the hook_submit function which
00267    if($create_node){
00268       // get the organism for this feature
00269       $sql = "SELECT * FROM {organism} WHERE organism_id = %d";
00270       $organism = db_fetch_object(db_query($sql,$feature->organism_id));
00271 
00272       drupal_set_message(t("$feature_id: Creating node $feature->name"));
00273       $new_node = new stdClass();
00274       $new_node->type = 'chado_feature';
00275       $new_node->uid = $user->uid;
00276       $new_node->title = "$feature->name, $feature->uniquename ($feature->cvname) $organism->genus $organism->species";
00277       $new_node->fname = "$feature->name";
00278       $new_node->uniquename = "$feature->uniquename";
00279       $new_node->feature_id = $feature->feature_id;
00280       $new_node->residues = $feature->residues;
00281       $new_node->organism_id = $feature->organism_id;
00282       $new_node->feature_type = $feature->cvname;
00283       $new_node->synonyms = $feature->synonyms;
00284 
00285       // validate the node and if okay then submit
00286       node_validate($new_node);
00287       if ($errors = form_get_errors()) {
00288          foreach($errors as $key => $msg){
00289             drupal_set_message($msg);
00290          }
00291          return $errors;
00292       } else {
00293          $node = node_submit($new_node);
00294          node_save($node);
00295       }
00296 
00297    }
00298    else {
00299       $node = $chado_feature;
00300    }
00301 
00302 
00303    // set the taxonomy for this node
00304    drupal_set_message(t("$feature_id ($node->nid): setting taxonomy"));
00305    tripal_feature_set_taxonomy($node,$feature_id);
00306 
00307    // reindex the node
00308    // drupal_set_message(t("$feature_id( $node->nid): indexing"));
00309    // tripal_feature_index_feature ($feature_id,$node->nid);
00310 
00311    // set the URL alias for this node
00312    tripal_feature_set_feature_url($node,$feature);
00313 
00314 
00315    return '';
00316 }
00317 
00318 
00319 
00320 /**
00321  *  Returns a list of organisms that are currently synced with Drupal
00322  *
00323  * @ingroup tripal_feature
00324  */
00325 function organism_get_synced() {
00326 
00327    // use this SQL for getting synced organisms
00328    $dsql =  "SELECT * FROM {chado_organism}";
00329    $orgs = db_query($dsql);
00330 
00331    // use this SQL statement for getting the organisms
00332    $csql =  "SELECT * FROM {Organism} ".
00333             "WHERE organism_id = %d";
00334 
00335    $org_list = array();
00336 
00337    // iterate through the organisms and build an array of those that are synced
00338    while($org = db_fetch_object($orgs)){
00339       $previous_db = tripal_db_set_active('chado');  // use chado database
00340       $info = db_fetch_object(db_query($csql,$org->organism_id));
00341       tripal_db_set_active($previous_db);  // now use drupal database
00342       $org_list[] = $info;
00343    }    
00344    return $org_list;
00345 }
00346 
00347 
00348 
00349 
00350 
00351 ?>
 All Classes Files Functions Variables