|
Tripal 0.3b
|
00001 <?php 00002 00003 00004 # This script can be run as a stand-alone script to sync all the features from chado to drupal 00005 // Parameter f specifies the feature_id to sync 00006 // -f 0 will sync all features 00007 00008 $arguments = getopt("f:"); 00009 00010 if(isset($arguments['f'])){ 00011 $drupal_base_url = parse_url('http://www.example.com'); 00012 $_SERVER['HTTP_HOST'] = $drupal_base_url['host']; 00013 $_SERVER['REQUEST_URI'] = $_SERVER['SCRIPT_NAME'] = $_SERVER['PHP_SELF']; 00014 $_SERVER['REMOTE_ADDR'] = NULL; 00015 $_SERVER['REQUEST_METHOD'] = NULL; 00016 00017 require_once 'includes/bootstrap.inc'; 00018 drupal_bootstrap(DRUPAL_BOOTSTRAP_FULL); 00019 00020 $feature_id = $arguments['f']; 00021 00022 if($feature_id > 0 ){ 00023 tripal_feature_sync_feature($feature_id); 00024 } 00025 else{ 00026 print "syncing all features...\n"; 00027 tripal_feature_sync_features(); 00028 } 00029 } 00030 /** 00031 * 00032 */ 00033 function tripal_feature_set_urls($job_id = NULL){ 00034 // first get the list of features that have been synced 00035 $sql = "SELECT * FROM {chado_feature}"; 00036 $nodes = db_query($sql); 00037 while($node = db_fetch_object($nodes)){ 00038 // now get the feature details 00039 $feature_arr = tripal_core_chado_select('feature', 00040 array('feature_id','name','uniquename'), 00041 array('feature_id' => $node->feature_id)); 00042 $feature = $feature_arr[0]; 00043 00044 tripal_feature_set_feature_url($node,$feature); 00045 } 00046 } 00047 /** 00048 * 00049 */ 00050 function tripal_feature_set_feature_url($node,$feature){ 00051 00052 // determine which URL alias to use 00053 $alias_type = variable_get('chado_feature_url','internal ID'); 00054 $aprefix = variable_get('chado_feature_accession_prefix','ID'); 00055 switch ($alias_type) { 00056 case 'feature name': 00057 $url_alias = $feature->name; 00058 break; 00059 case 'feature unique name': 00060 $url_alias = $feature->uniquename; 00061 break; 00062 default: 00063 $url_alias = "$aprefix$feature->feature_id"; 00064 } 00065 print "Setting $alias_type as URL alias for $feature->name: node/$node->nid => $url_alias\n"; 00066 // remove any previous alias 00067 db_query("DELETE FROM {url_alias} WHERE src = '%s'", "node/$node->nid"); 00068 // add the new alias 00069 path_set_alias("node/$node->nid",$url_alias); 00070 } 00071 /** 00072 * 00073 * 00074 * @ingroup tripal_feature 00075 */ 00076 function tripal_feature_sync_features ($max_sync = 0, $job_id = NULL){ 00077 //print "Syncing features (max of $max_sync)\n"; 00078 $i = 0; 00079 00080 // get the list of available sequence ontology terms for which 00081 // we will build drupal pages from features in chado. If a feature 00082 // is not one of the specified typse we won't build a node for it. 00083 $allowed_types = variable_get('chado_feature_types','EST contig'); 00084 $allowed_types = preg_replace("/[\s\n\r]+/"," ",$allowed_types); 00085 00086 print "Looking for features of type: $allowed_types\n"; 00087 00088 $so_terms = split(' ',$allowed_types); 00089 $where_cvt = ""; 00090 foreach ($so_terms as $term){ 00091 $where_cvt .= "CVT.name = '$term' OR "; 00092 } 00093 $where_cvt = substr($where_cvt,0,strlen($where_cvt)-3); # strip trailing 'OR' 00094 00095 // get the list of organisms that are synced and only include features from 00096 // those organisms 00097 $orgs = organism_get_synced(); 00098 $where_org = ""; 00099 foreach($orgs as $org){ 00100 if($org->organism_id){ 00101 $where_org .= "F.organism_id = $org->organism_id OR "; 00102 } 00103 } 00104 $where_org = substr($where_org,0,strlen($where_org)-3); # strip trailing 'OR' 00105 00106 // use this SQL statement to get the features that we're going to upload 00107 $sql = "SELECT feature_id ". 00108 "FROM {FEATURE} F ". 00109 " INNER JOIN Cvterm CVT ON F.type_id = CVT.cvterm_id ". 00110 " INNER JOIN CV on CV.cv_id = CVT.cv_id ". 00111 "WHERE ($where_cvt) AND ($where_org) AND CV.name = 'sequence' ". 00112 "ORDER BY feature_id"; 00113 00114 // get the list of features 00115 $previous_db = tripal_db_set_active('chado'); // use chado database 00116 $results = db_query($sql); 00117 00118 tripal_db_set_active($previous_db); // now use drupal database 00119 00120 // load into ids array 00121 $count = 0; 00122 $ids = array(); 00123 while($id = db_fetch_object($results)){ 00124 $ids[$count] = $id->feature_id; 00125 $count++; 00126 } 00127 00128 // make sure our vocabularies are set before proceeding 00129 tripal_feature_set_vocabulary(); 00130 00131 // pre-create the SQL statement that will be used to check 00132 // if a feature has already been synced. We skip features 00133 // that have been synced 00134 $sql = "SELECT * FROM {chado_feature} WHERE feature_id = %d"; 00135 00136 // Iterate through features that need to be synced 00137 $interval = intval($count * 0.01); 00138 $num_ids = sizeof($ids); 00139 $i = 0; 00140 foreach($ids as $feature_id){ 00141 // update the job status every 1% features 00142 if($job_id and $i % $interval == 0){ 00143 tripal_job_set_progress($job_id,intval(($i/$count)*100)); 00144 } 00145 // if we have a maximum number to sync then stop when we get there 00146 // if not then just continue on 00147 if($max_sync and $i == $max_sync){ 00148 return ''; 00149 } 00150 if(!db_fetch_object(db_query($sql,$feature_id))){ 00151 00152 # parsing all the features can cause memory overruns 00153 # we are not sure why PHP does not clean up the memory as it goes 00154 # to avoid this problem we will call this script through an 00155 # independent system call 00156 print "$i of $num_ids Syncing feature id: $feature_id\n"; 00157 $cmd = "php " . drupal_get_path('module', 'tripal_feature') . "/syncFeatures.php -f $feature_id "; 00158 system($cmd); 00159 00160 } 00161 $i++; 00162 } 00163 00164 return ''; 00165 } 00166 00167 /** 00168 * 00169 * 00170 * @ingroup tripal_feature 00171 */ 00172 function tripal_feature_sync_feature ($feature_id){ 00173 // print "\tSyncing feature $feature_id\n"; 00174 00175 $mem = memory_get_usage(TRUE); 00176 $mb = $mem/1048576; 00177 // print "$mb mb\n"; 00178 00179 global $user; 00180 $create_node = 1; // set to 0 if the node exists and we just sync and not create 00181 00182 // get the accession prefix 00183 $aprefix = variable_get('chado_feature_accession_prefix','ID'); 00184 00185 // if we don't have a feature_id then return 00186 if(!$feature_id){ 00187 drupal_set_message(t("Please provide a feature_id to sync")); 00188 return ''; 00189 } 00190 00191 // get information about this feature 00192 $fsql = "SELECT F.feature_id, F.name, F.uniquename,O.genus, ". 00193 " O.species,CVT.name as cvname,F.residues,F.organism_id ". 00194 "FROM {FEATURE} F ". 00195 " INNER JOIN Cvterm CVT ON F.type_id = CVT.cvterm_id ". 00196 " INNER JOIN Organism O ON F.organism_id = O.organism_ID ". 00197 "WHERE F.feature_id = %d"; 00198 $previous_db = tripal_db_set_active('chado'); // use chado database 00199 $feature = db_fetch_object(db_query($fsql,$feature_id)); 00200 tripal_db_set_active($previous_db); // now use drupal database 00201 00202 // get the synonyms for this feature 00203 $synsql = "SELECT S.name ". 00204 "FROM {feature_synonym} FS ". 00205 " INNER JOIN {synonym} S on FS.synonym_id = S.synonym_id ". 00206 "WHERE FS.feature_id = %d"; 00207 $previous_db = tripal_db_set_active('chado'); // use chado database 00208 $synonyms = db_query($synsql,$feature_id); 00209 tripal_db_set_active($previous_db); // now use drupal database 00210 00211 // now add these synonyms to the feature object as a single string 00212 $synstring = ''; 00213 while($synonym = db_fetch_object($synonyms)){ 00214 $synstring .= "$synonym->name\n"; 00215 } 00216 $feature->synonyms = $synstring; 00217 00218 // check to make sure that we don't have any nodes with this feature name as a title 00219 // but without a corresponding entry in the chado_feature table if so then we want to 00220 // clean up that node. (If a node is found we don't know if it belongs to our feature or 00221 // not since features can have the same name/title.) 00222 $tsql = "SELECT * FROM {node} N ". 00223 "WHERE title = '%s'"; 00224 $cnsql = "SELECT * FROM {chado_feature} ". 00225 "WHERE nid = %d"; 00226 $nodes = db_query($tsql,$feature->name); 00227 // cycle through all nodes that may have this title 00228 while($node = db_fetch_object($nodes)){ 00229 $feature_nid = db_fetch_object(db_query($cnsql,$node->nid)); 00230 if(!$feature_nid){ 00231 drupal_set_message(t("$feature_id: A node is present but the chado_feature entry is missing... correcting")); 00232 node_delete($node->nid); 00233 } 00234 } 00235 00236 // check if this feature already exists in the chado_feature table. 00237 // if we have a chado feature, we want to check to see if we have a node 00238 $cfsql = "SELECT * FROM {chado_feature} ". 00239 "WHERE feature_id = %d"; 00240 $nsql = "SELECT * FROM {node} ". 00241 "WHERE nid = %d"; 00242 $chado_feature = db_fetch_object(db_query($cfsql,$feature->feature_id)); 00243 if($chado_feature){ 00244 drupal_set_message(t("$feature_id: A chado_feature entry exists")); 00245 $node = db_fetch_object(db_query($nsql,$chado_feature->nid)); 00246 if(!$node){ 00247 // if we have a chado_feature but not a node then we have a problem and 00248 // need to cleanup 00249 drupal_set_message(t("$feature_id: The node is missing, but has a chado_feature entry... correcting")); 00250 $df_sql = "DELETE FROM {chado_feature} WHERE feature_id = %d"; 00251 db_query($df_sql,$feature_id); 00252 } else { 00253 drupal_set_message(t("$feature_id: A corresponding node exists")); 00254 $create_node = 0; 00255 } 00256 } 00257 00258 // if we've encountered an error then just return. 00259 if($error_msg = db_error()){ 00260 //print "$error_msg\n"; 00261 return ''; 00262 } 00263 00264 // if a drupal node does not exist for this feature then we want to 00265 // create one. Note that the node_save call in this block 00266 // will call the hook_submit function which 00267 if($create_node){ 00268 // get the organism for this feature 00269 $sql = "SELECT * FROM {organism} WHERE organism_id = %d"; 00270 $organism = db_fetch_object(db_query($sql,$feature->organism_id)); 00271 00272 drupal_set_message(t("$feature_id: Creating node $feature->name")); 00273 $new_node = new stdClass(); 00274 $new_node->type = 'chado_feature'; 00275 $new_node->uid = $user->uid; 00276 $new_node->title = "$feature->name, $feature->uniquename ($feature->cvname) $organism->genus $organism->species"; 00277 $new_node->fname = "$feature->name"; 00278 $new_node->uniquename = "$feature->uniquename"; 00279 $new_node->feature_id = $feature->feature_id; 00280 $new_node->residues = $feature->residues; 00281 $new_node->organism_id = $feature->organism_id; 00282 $new_node->feature_type = $feature->cvname; 00283 $new_node->synonyms = $feature->synonyms; 00284 00285 // validate the node and if okay then submit 00286 node_validate($new_node); 00287 if ($errors = form_get_errors()) { 00288 foreach($errors as $key => $msg){ 00289 drupal_set_message($msg); 00290 } 00291 return $errors; 00292 } else { 00293 $node = node_submit($new_node); 00294 node_save($node); 00295 } 00296 00297 } 00298 else { 00299 $node = $chado_feature; 00300 } 00301 00302 00303 // set the taxonomy for this node 00304 drupal_set_message(t("$feature_id ($node->nid): setting taxonomy")); 00305 tripal_feature_set_taxonomy($node,$feature_id); 00306 00307 // reindex the node 00308 // drupal_set_message(t("$feature_id( $node->nid): indexing")); 00309 // tripal_feature_index_feature ($feature_id,$node->nid); 00310 00311 // set the URL alias for this node 00312 tripal_feature_set_feature_url($node,$feature); 00313 00314 00315 return ''; 00316 } 00317 00318 00319 00320 /** 00321 * Returns a list of organisms that are currently synced with Drupal 00322 * 00323 * @ingroup tripal_feature 00324 */ 00325 function organism_get_synced() { 00326 00327 // use this SQL for getting synced organisms 00328 $dsql = "SELECT * FROM {chado_organism}"; 00329 $orgs = db_query($dsql); 00330 00331 // use this SQL statement for getting the organisms 00332 $csql = "SELECT * FROM {Organism} ". 00333 "WHERE organism_id = %d"; 00334 00335 $org_list = array(); 00336 00337 // iterate through the organisms and build an array of those that are synced 00338 while($org = db_fetch_object($orgs)){ 00339 $previous_db = tripal_db_set_active('chado'); // use chado database 00340 $info = db_fetch_object(db_query($csql,$org->organism_id)); 00341 tripal_db_set_active($previous_db); // now use drupal database 00342 $org_list[] = $info; 00343 } 00344 return $org_list; 00345 } 00346 00347 00348 00349 00350 00351 ?>