Module xg
[hide private]
[frames] | no frames]

Source Code for Module xg

   1  #**************************************************************************** 
   2  #  Copyright (C) 2005-2010 Brian Granger <ellisonbg@gmail.com> and 
   3  #                          Barry Wark <bwark@u.washington.edu> and 
   4  #                          Beat Rupp <beatrupp@gmail.com> 
   5  #  Distributed under the terms of the BSD License.   
   6  #**************************************************************************** 
   7   
   8  # PyXG-0.3.0 
   9   
  10  """PyXG provides a Python interface to Apple's Xgrid. 
  11   
  12  Xgrid is Apple's software for building and managing clusters of  
  13  Macintosh computers for use in high performance computation.   
  14  See U{http://www.apple.com/server/macosx/technology/xgrid.html} for more details. 
  15   
  16  This module wraps the xgrid command line in Mac OS X. It will not work with 
  17  the Technonogy Previews of Xgrid. The command line is wrapped in this module  
  18  as the goal is to provide an interface to Xgrid that can be used from an 
  19  interactive Python prompt. The Cocoa API for Xgrid (XgridFoundation) is 
  20  based on an event-loop paradigm and is less well suited for interactive work. 
  21  If you want to use Xgrid and Python from within a Cocoa application, you should 
  22  use XgridFoundation and PyObjC. 
  23   
  24  Features 
  25  ======== 
  26   
  27      1.  Use Xgrid from within Python scripts as well as in interactive Python 
  28          sessions. 
  29      2.  Create, submit and manage simple (one task) and batch (many task) Xgrid 
  30          jobs using Python's elegant syntax. 
  31      3.  Work with multiple Xgrid controllers simultaneouly. 
  32      4.  List available grids for each controller and query their status. 
  33   
  34  Quick Start 
  35  =========== 
  36   
  37  Import xg, create a Connection and Controller object: 
  38   
  39  >>> from xg import * 
  40  >>> conn = Connection(hostname='xgrid.work.com',password='secret') 
  41  >>> cont = Controller(conn) 
  42   
  43  List the grids managed by the controller: 
  44   
  45  >>> cont.gridIDs() 
  46  (0, 3) 
  47  >>> cont.grids() 
  48  [<Grid with gridID = 0>, <Grid with gridID = 3>] 
  49   
  50  Work with the default grid, listing active jobs: 
  51   
  52  >>> g = cont.grid(0) 
  53  >>> g.jobIDs() 
  54  (229, 230, 231, 232) 
  55  >>> g.printJobs() 
  56  ################################################## 
  57  id   Date Started             Status     CPU Power  
  58  ################################################## 
  59  229  2005-12-22 11:18:47 -0800 Finished   0          
  60  230  2005-12-22 11:18:50 -0800 Finished   0          
  61  231  2005-12-22 11:18:52 -0800 Finished   0          
  62  232  2005-12-22 11:18:55 -0800 Finished   0          
  63   
  64  Get a job from the default grid and work with it: 
  65   
  66  >>> j = g.job(229)       
  67  >>> j.printInfo() 
  68  { 
  69      name:  /usr/bin/cal 
  70      jobStatus:  Finished 
  71      taskCount:  1 
  72      undoneTaskCount:  0 
  73      percentDone:  100 
  74  } 
  75  >>> j.printInfo(verbose=False) 
  76  229  2005-12-22 11:18:47 -0800 Finished   0    
  77  >>> j.printSpecification() 
  78  { 
  79      applicationIdentifier :  com.apple.xgrid.cli 
  80      taskSpecifications :  {0 = {arguments = (); command = "/usr/bin/cal"; }; } 
  81      name :  /usr/bin/cal 
  82      inputFiles :  {} 
  83      submissionIdentifier :  abc 
  84  } 
  85   
  86  Get job results: 
  87   
  88  >>> j.results(stdout="job229.out",stderr="job229.err") 
  89  Job stdout saved in file: job229.out  
  90   
  91  Use a Grid object to submit a single task job: 
  92   
  93  >>> j = g.submit(cmd='/usr/bin/cal') 
  94  Job submitted with id:  234 
  95  >>> j.printInfo(verbose=False) 
  96  234  2005-12-22 13:09:52 -0800 Finished   0       
  97  """ 
  98   
  99  ##################################################################### 
 100  # Imports                                                           # 
 101  #####################################################################  
 102   
 103  import commands 
 104  from functools import wraps 
 105  import itertools 
 106  import os.path 
 107  import platform 
 108  import re 
 109  import tempfile 
 110  import time 
 111   
 112  try: 
 113      import Foundation 
 114      import objc 
 115  except ImportError, e: 
 116      print "This module requires PyObjC." 
 117      raise e 
118 119 ##################################################################### 120 # Exceptions # 121 ##################################################################### 122 123 -class XgridError(Exception):
124 """Xgrid exception class."""
125 - def __init__(self, err):
126 self.err = err
127 - def __repr__(self):
128 return "Xgrid Error: %s" % (self.err)
129
130 -class InvalidIdentifier(XgridError):
131 """Xgrid exception for invalid job or grid identifiers."""
132 - def __init__(self, id):
133 self.id = id
134 - def __repr__(self):
135 return "Invalid Xgrid Identifier: " + str(self.id)
136
137 -class InvalidGridIdentifier(InvalidIdentifier):
138 """Invalid grid identifier exception."""
139 - def __repr__(self):
140 return "Invalid Grid Identifier: " + str(self.id)
141
142 -class InvalidJobIdentifier(InvalidIdentifier):
143 """Invalid job identifier exception."""
144 - def __repr__(self):
145 return "Invalid Job Identifier: " + str(self.id)
146
147 -class InvalidAction(XgridError):
148 """Invalid action exception."""
149 - def __init__(self, action):
150 self.action = action
151 - def __repr__(self):
152 return "Invalid Xgrid Action: " + str(self.action)
153
154 -class InvalidIdentifierType(Exception):
155 """Invalid job or grid identifier type."""
156 - def __init__(self, bad_var):
157 self.bad_var = bad_var
158 - def __repr__(self):
159 return "Invalid Xgrid Identifier Type: " + str(self.bad_var)
160 161 # Setting this flag causes printing of every Xgrid command that is executed 162 PYXGRID_DEBUG = False 163 VERSION = '0.3.0' 164 165 ##################################################################### 166 # See if there is an Xgrid cluster defined by environment vars # 167 ##################################################################### 168 169 defaultXgridHostname = os.environ.get('XGRID_CONTROLLER_HOSTNAME') 170 defaultXgridPassword = os.environ.get('XGRID_CONTROLLER_PASSWORD') 171 172 if not defaultXgridPassword: 173 defaultXgridPassword = ''
174 175 ##################################################################### 176 # Utilities: memory management, running & parsing Xgrid commands # 177 ##################################################################### 178 179 -def autorelease(func):
180 """A decorator to properly release ObjC object instances. 181 182 Anytime an ObjC object instance is created, an NSAutoReleasePool needs to 183 be available. PyObjC will create one but it won't get drained very often in 184 Mac OS X Leopard (10.5) and earlier. This is especially a problem if many 185 PyXG jobs are created within a loop. In order to prevent memory leaking, the 186 call(s) to PyXG within the loop should be decorated with this decorator. 187 188 @param func: The function to be decorated. 189 """ 190 191 @wraps(func) 192 def wrapped(*args, **kw): 193 pool = Foundation.NSAutoreleasePool.alloc().init() 194 try: 195 func(*args, **kw) 196 finally: 197 pool.drain() 198 del pool
199 200 return wrapped 201
202 -class NSString(objc.Category(Foundation.NSString)):
203 - def xGridPropertyList(self):
204 """Category to extend NSString. 205 206 This enables the handling of illegal 'old-style' plists returned by 207 the xgrid command-line tool. 208 209 In particular, on systems before Mac OS X Snow Leopard (10.6) xgrid 210 returns "old-style" plists that contain dates that aren't quoted 211 strings. Because old-style plists can't contain dates in native 212 format (only as quoted strings), the built-in CoreFoundation 213 parser chokes on the output. 214 215 xGridPropertyList: uses a compiled RegEx to add quotes around date 216 strings in the xgrid output before passing the result to NSString's 217 propertyList: 218 """ 219 220 str = unicode(self) 221 m = re.compile(r'(?P<prefix>^\s*date.* = )(?P<date>.*?);') 222 lines = str.splitlines() 223 224 for (i, l) in itertools.izip(itertools.count(), lines): 225 if (m.search(l)): 226 lines[i] = m.sub(r'\g<prefix>"\g<date>";', l) 227 228 sep = '\n' 229 str = sep.join(lines) 230 231 return NSString.stringWithString_(str).propertyList()
232
233 -def xgridParse(cmd="xgrid -grid list"):
234 """Submits and parses output from the xgrid command line. 235 236 The output of the xgrid CLI is a (sometimes illegal) old-style plist. 237 This function runs an xgrid command and parses the output of the command 238 into a valid NSDictionary (a Python dict). 239 240 To handle the illegal plists returned by the xgrid CLI, we use the 241 xGridPropertyList: method of NSString (defined above). 242 243 See the xgrid man pages for details on the xgrid command. 244 This fuction will return a nested Python structure that 245 reflects the output of the xgrid command. 246 """ 247 248 # When set, print the actual commands Xgrid sent 249 if PYXGRID_DEBUG == True: 250 print cmd 251 252 # Run the xgrid command 253 result = commands.getstatusoutput(cmd) 254 255 # Check for good exit status (0) and parse output 256 if result[0] == 0: 257 if result[1]: 258 if detectPlatform() < 10.6: 259 return NSString.stringWithString_(result[1]).xGridPropertyList() 260 else: 261 return NSString.stringWithString_(result[1]).propertyList() 262 else: 263 return {} 264 else: 265 raise XgridError("xgrid command error: %s" % result[0])
266
267 ##################################################################### 268 # Other Utilities # 269 ##################################################################### 270 271 -def processID(id):
272 """Makes sure that the id is a unicode string""" 273 274 if (isinstance(id, str) or isinstance(id, unicode)): 275 return unicode(id) 276 elif isinstance(id, int): 277 return unicode(id) 278 else: 279 raise InvalidIdentifierType(id)
280
281 -def detectPlatform():
282 """Detect the version of Mac OS. 283 284 @return: Platform version 285 @rtype: float 286 """ 287 version, _, _ = platform.mac_ver() 288 version = float('.'.join(version.split('.')[:2])) 289 return version
290
291 ##################################################################### 292 # Classes # 293 ##################################################################### 294 295 -class Connection(object):
296 """Track information needed to connect to an Xgrid controller.""" 297
298 - def __init__(self, hostname=0, password=0, kerberos=False):
299 """Create a Connection object to be passed to other objects. 300 301 To connect to a specific Xgrid controller, create a Connection 302 object and then pass it to the Controller, Grid or Job objects 303 you create. This class performs no verification of the hostname 304 or password. 305 306 Examples 307 ======== 308 309 Use the controller and password given in environmental vars. 310 311 >>> cn = Connection() 312 313 Specify a hostname and password. 314 315 >>> cn = Connection('xgrid.work.com','topsecret') 316 317 Use Kerberos. 318 319 >>> cn = Connection('xgrid.work.com',kerberos=True) 320 321 Usage 322 ===== 323 324 @param hostname: The hostname of the xgrid controller, like 325 "xgrid.work.com". If set to 0, it will default to the value set 326 in the environment variable XGRID_CONTROLLER_HOSTNAME 327 @type hostname: string 328 @param password: The password of the xgrid controller, like 329 "mysecret". If set to 0, it will default to the value set in the 330 environment variable XGRID_CONTROLLER_PASSWORD. For no password, 331 set it equal to the empty string: password=''. 332 @type password: string 333 @param kerberos: If True, connect using single sign on (SSO), instead 334 of a password. You must have already obtained a kerberos 335 ticket-granting ticket from the KDC that controlls the kerberos 336 domain containing the Xgrid controller. If kerberos is True, the 337 password is ignored. 338 @type kerberos: boolean 339 """ 340 341 # Setup the hostname and password 342 if hostname == 0: 343 if defaultXgridHostname: 344 self.hostname = defaultXgridHostname 345 else: 346 raise XgridError('No controller hostname specified') 347 else: 348 self.hostname = hostname 349 350 if kerberos: # kerberos overrides password 351 self.kerberos = True 352 self.password = False 353 else: 354 self.kerberos = False 355 if password == 0: 356 self.password = defaultXgridPassword 357 else: 358 self.password = password 359 360 self._buildConnectString()
361
362 - def _buildConnectString(self):
363 """Builds the connect_string.""" 364 self._connectString = '-h %s ' % self.hostname 365 if (self.kerberos): 366 self._connectString = '%s-auth Kerberos ' % self._connectString 367 else: 368 if self.password: 369 self._connectString = '%s-p %s ' % \ 370 (self._connectString, self.password)
371
372 - def connectString(self):
373 """Returns the connection string to be used in Xgrid commands.""" 374 return self._connectString
375
376 -class JobManager(object):
377 """Manage a set of Xgrid jobs.""" 378
379 - def __init__(self, gridID=u'0', connection=None, update=0):
380 """Create a JobManager for a given Grid and Connection. 381 382 This class is mainly designed to be a base class of the Conroller 383 and Grid classes, both of which need to manage Xgrid jobs. The class 384 provides basic capabilities to list active jobs and perform various 385 actions on those jobs (stop, restart, resume, suspend, delete). Job 386 submission is handled by the Controller, Grid and Job classes. 387 388 Usage 389 ===== 390 391 @arg gridID: The grid identifier of the grid on which the JobManager 392 will manage jobs. Internally, the grid identifier is a unicode 393 string, but gridID can be given in any of the formats u'0', '0' 394 or 0. If gridID=u'0', the JobManager will manage jobs on the 395 default grid 396 @type gridID: unicode, str or int 397 @arg connection: Instance of Connection class. If empty a default 398 Connection object is used. 399 @type connection: Connection 400 @arg update: A boolean flag that determines whether or not the 401 internal state is updated upon creation. This involves a call to 402 the Xgrid controller. 403 @type update: boolean 404 """ 405 406 self.gridID = processID(gridID) 407 408 if connection is None: 409 self._connection = Connection() 410 else: 411 self._connection = connection 412 413 self._jobs = [] 414 self._jobIDs = () 415 if update: 416 self._updateJobs()
417
418 - def _updateJobs(self):
419 """Updates the _jobIDs and _jobs instance variables.""" 420 421 gridIDString = u'' 422 if self.gridID: 423 gridIDString = u'-gid ' + self.gridID 424 425 cmd = 'xgrid %s-job list %s' % (self._connection.connectString(), 426 gridIDString) 427 result = xgridParse(cmd) 428 self._checkGridID(result, self.gridID) 429 self._jobIDs = result['jobList'] 430 431 # Now build the array of Job objects 432 self._jobs = [] 433 for jid in self._jobIDs: 434 self._jobs.append(Job(jid, self._connection))
435
436 - def _checkGridID(self, result, gridID):
437 """Checks a dictionary for an InvalidGridIdentifier error.""" 438 if result.has_key('error'): 439 if result['error'] == 'InvalidGridIdentifier': 440 raise InvalidGridIdentifier(gridID)
441
442 - def jobs(self, update=1):
443 """Returns a list of initialized Job objects for all active jobs. 444 445 @arg update: A boolean flag that determines whether or not the 446 internal state is updated upon creation. This involves a call to 447 the Xgrid controller. 448 @type update: boolean 449 @return: a lists of active Job objects. 450 @rtype: list 451 """ 452 453 if update: 454 self._updateJobs() 455 return self._jobs
456
457 - def job(self, jobID=u'999999999', update=1):
458 """Returns the Job object with job identifier id. 459 460 @arg jobID: The job identifier. Can be given as unicode, str or int. 461 @type jobID: unicode, str, or int 462 @arg update: A boolean flag that determines whether or not the 463 internal state is updated upon creation. This involves a call to 464 the Xgrid controller. 465 @type update: boolean 466 @return: Initialize Job object. 467 """ 468 469 processedID = processID(jobID) 470 471 if update: 472 self._updateJobs() 473 if processedID in self._jobIDs: 474 return Job(processedID, self._connection) 475 else: 476 raise InvalidJobIdentifier(processedID)
477
478 - def jobIDs(self, update=1):
479 """Returns a tuple of job identifiers for all active jobs. 480 481 @arg update: A boolean flag that determines whether or not the 482 internal state is updated upon creation. This involves a call to 483 the Xgrid controller. 484 @type update: boolean 485 @returns: Tuple of job identifiers. 486 @rtype: tuple 487 """ 488 489 if update: 490 self._updateJobs() 491 return self._jobIDs
492 493 # Job management methods 494
495 - def perform(self, action, jobIDs):
496 """Performs an action on a subset of active jobs. 497 498 @arg action: The action to be performed as a string. Implemented 499 actions are stop, resume, delete, restart, and suspend. 500 @type action: str 501 @arg jobIDs: Jobs to perform the action on. 502 @type jobIDs: Either the string 'all' or a Python sequence of 503 job identifiers. 504 """ 505 506 # Validate the action 507 actions = ('stop', 'suspend', 'resume', 'delete', 'restart') 508 if action not in actions: 509 raise InvalidAction(action) 510 511 if jobIDs == 'all': 512 # Delete all jobs 513 self._updateJobs() 514 jobList = self._jobIDs # list of jobs to act on 515 elif isinstance(jobIDs, tuple) or isinstance(jobIDs, list): 516 # Delete some jobs 517 jobList = jobIDs 518 else: 519 raise TypeError, jobIDs 520 521 for jid in jobList: 522 tempJob = Job(processID(jid), self._connection) 523 tempJob.perform(action) # this will raise any errors
524
525 - def stopAll(self):
526 """Stops all active jobs.""" 527 self.perform(action='stop', jobIDs='all')
528
529 - def suspendAll(self):
530 """Suspends all active jobs.""" 531 self.perform(action='suspend', jobIDs='all')
532
533 - def resumeAll(self):
534 """Resumes all active jobs.""" 535 self.perform(action='resume', jobIDs='all')
536
537 - def deleteAll(self):
538 """Deletes all active jobs.""" 539 self.perform(action='delete', jobIDs='all')
540
541 - def restartAll(self):
542 """Restarts all active jobs.""" 543 self.perform(action='restart', jobIDs='all')
544
545 - def printJobs(self):
546 """Prints information about all active Xgrid jobs.""" 547 548 self._updateJobs() 549 print "##################################################" 550 print "%-4s %-24s %-10s %-10s" % \ 551 ("id", "Date Started", "Status", "CPU Power") 552 print "##################################################" 553 for j in self._jobs: 554 j.printInfo(0)
555
556 -class GridManager(object):
557 """Manage the grids of a given Xgrid controller.""" 558
559 - def __init__(self, connection=None, update=0):
560 """A class to manage a set of Xgrid grids. 561 562 This class is meant to be a base class for the Controller class. 563 It provides basic capabilities for listing the available grids. 564 565 @arg connection: 566 Instance of Connection class. If empty a default Connection object 567 is used. 568 @type connection: Connection 569 @arg update: 570 A boolean flag that determines whether or not the 571 internal state is updated upon creation. This involves a call to 572 the Xgrid controller. 573 @type update: boolean 574 """ 575 576 if connection is None: 577 self._connection = Connection() 578 else: 579 self._connection = connection 580 581 self._grids = [] 582 self._gridIDs = () 583 584 if update: 585 self._updateGrids()
586
587 - def _updateGrids(self):
588 """Updates the _gridIDs and _grids instance variables.""" 589 590 cmd = 'xgrid %s-grid list' % self._connection.connectString() 591 result = xgridParse(cmd) 592 self._gridIDs = result['gridList'] 593 594 # Now build the array of Grid objects 595 self._grids = [] 596 for gridID in self._gridIDs: 597 self._grids.append(Grid(gridID, self._connection))
598
599 - def grids(self, update=1):
600 """Returns a list of initialized Grid objects. 601 602 @arg update: A boolean flag that determines whether or not the 603 internal state is updated upon creation. This involves a call to 604 the Xgrid controller. 605 @type update: boolean 606 """ 607 608 if update: 609 self._updateGrids() 610 return self._grids
611
612 - def grid(self, gridID=u'0', update=1):
613 """Returns the Grid object with grid identifier gridID. 614 615 @arg gridID: 616 The unicode string identifier of the grid. If no gridID is given, 617 the default grid u'0' is used. 618 @type gridID: unicode, int or str 619 @arg update: A boolean flag that determines whether or not the 620 internal state is updated upon creation. This involves a call to 621 the Xgrid controller. 622 @type update: boolean 623 """ 624 625 processedGridID = processID(gridID) 626 627 if update: 628 self._updateGrids() 629 if processedGridID in self._gridIDs: 630 return Grid(processedGridID, self._connection) 631 else: 632 raise InvalidGridIdentifier(gridID)
633
634 - def gridIDs(self, update=1):
635 """Returns a tuple of grid identifiers for all avialable grids. 636 637 @arg update: A boolean flag that determines whether or not the 638 internal state is updated upon creation. This involves a call to 639 the Xgrid controller. 640 @type update: boolean 641 """ 642 643 if update: 644 self._updateGrids() 645 return self._gridIDs
646
647 -class Controller(JobManager, GridManager):
648 """A class for working with an Xgrid controller.""" 649
650 - def __init__(self, connection=None, update=0):
651 """This class provides an interface to an Xgrid controller. 652 653 An Xgrid controller is a single machine that manages a set of 654 of grids. Each grid in turn, consists of a set of agents and 655 jobs running on the agents. 656 657 This class provides access to the grids and jobs managed by the 658 controller. In Xgrid, both grids and jobs have identifiers, which are 659 unicode strings, like u'0', but this module can take identifiers as 660 strings or integers as well. 661 662 Controller and Grid objects can be used to submit Xgrid jobs, but the 663 Job class is used to retrieve job results. 664 665 The Controller is only the JobManager for the default Grid. To access 666 the jobs of other grids, create instances of their Grid objects. 667 668 Examples 669 ======== 670 671 >>> cn = Connection('myhost','mypassword') 672 673 >>> c = Controller(cn) 674 675 >>> c.jobIDs() 676 (1, 2, 3) 677 678 >>> j1 = c.job('1') # Get an initialized Job object with id = '1' 679 >>> j1 680 <Job with id = 1> 681 682 >>> c.grid_ids() # List the grid ids 683 ('0',) 684 685 >>> c.grid('10') # Get an initialized Grid object with id = '10' 686 <Grid with gridID = 10> 687 688 >>> c.grid() # Get the Grid boject for the default grid 689 690 @arg connection: Instance of Connection class. If empty a default 691 Connection object is used. 692 @type connection: Connection 693 694 @arg update: A boolean flag that determines whether or not the 695 internal state is updated upon creation. This involves a call to 696 the Xgrid controller. 697 @type update: boolean 698 """ 699 JobManager.__init__(self, u'', connection) 700 GridManager.__init__(self, connection) 701 702 if update: 703 self._update()
704
705 - def _update(self):
706 """Updates all instance variables for active grids and jobs.""" 707 708 self._updateGrids() 709 self._updateJobs()
710 711 # Job Submission 712
713 - def submit(self, cmd, args='', stdin='', indir='', email='', gridID=u'0'):
714 """Submits a single task job to the specified grid. 715 716 This is a nonblocking job submission method for a single job 717 with no sub-tasks. For more complicated jobs with sub-tasks, use 718 the batch() method and the JobSpecification class. 719 720 Job results can be obtained by calling the results() method of the 721 Job object. 722 723 @arg cmd: 724 The command the execute as a string. The executable is not 725 copied if the full path is given, otherwise it is. 726 @type cmd: str 727 @arg args: 728 The command line arguments to be passed to the command. 729 @type args: list or str 730 @arg stdin: 731 A local file to use as the stdin stream for the job. 732 @type stdin: str 733 @arg indir: 734 A local directory to copy to the remote agent. 735 @type indir: str 736 @arg email: 737 An email to which notification will be send of various job 738 state changes. 739 @type email: str 740 @arg gridID: 741 The identifier of the Grid to which the job will be submitted. 742 If empty, the default grid u'0' is used. 743 @type gridID: unicode, str or int 744 @returns: Initialized Job object for sumbitted job. 745 @rtype: Job 746 """ 747 748 j = Job(connection=self._connection) 749 id = j.submit(cmd, args, stdin, indir, email, gridID) 750 return j
751
752 - def batch(self, specification, gridID=u'0', silent=False):
753 """Submits a batch job to the specified grid. 754 755 This is a nonblocking job submission method used for submitting 756 complex multi-task jobs. For single task jobs, use submit(). 757 758 To retrieve job results, use the results() method of the Job object. 759 760 @arg specification: 761 The job specification of the job, which must be an instance of the 762 JobSpecification class. See the docstring for JobSpecification 763 for more details. 764 @type specification: JobSpecification 765 @arg gridID: 766 The identifier of the Grid to which the job will be submitted. 767 If empty, the default grid u'0' is used. 768 @type gridID: unicode, str or int 769 @arg silent: 770 If set to True will slience all messages. 771 @type silent: boolean 772 @returns: Initialized Job object for sumbitted job. 773 @rtype: Job 774 """ 775 776 j = Job(connection=self._connection) 777 id = j.batch(specification, gridID, silent=silent) 778 return j
779
780 -class Grid(JobManager):
781 """A class for working with jobs on a specific Xgrid grid.""" 782
783 - def __init__(self, gridID=u'0', connection=None, update=0):
784 """This class provides an interface to an Xgrid grid. 785 786 An Xgrid grid is a collection of agents and jobs running on the 787 agents. This class provides access to the jobs running on a grid. 788 Currently, Xgrid does not expose an API for working directly with 789 the agents in a grid. 790 791 Instances of this class can be obtained using two methods. 792 793 1. By calling the grid() or grids() methods of the GridManager 794 or Controller classes. 795 796 2. By creating a new Grid object directly with a valid gridID: 797 798 >>> g = Grid(u'0') 799 800 @arg gridID: 801 The grid identifier of the grid. If gridID is empty the default 802 grid (u'0') will be used. 803 @type gridID: unicode, int or str 804 @arg connection: 805 Instance of Connection class. If empty a default Connection object 806 is used. 807 @type connection: Connection 808 @arg update: A boolean flag that determines whether or not the 809 internal state is updated upon creation. This involves a call to 810 the Xgrid controller. 811 @type update: boolean 812 """ 813 JobManager.__init__(self, gridID, connection) 814 815 self._info = {} 816 if update: 817 self._update()
818 819 # Private methods 820
821 - def _update(self):
822 self._updateJobs() 823 self._updateInfo()
824
825 - def _updateInfo(self):
826 cmd = 'xgrid %s-grid attributes -gid %s' % \ 827 (self._connection.connectString(), self.gridID) 828 result = xgridParse(cmd) 829 self._checkGridID(result, self.gridID) 830 self._info = result['gridAttributes']
831
832 - def _checkGridID(self, result, gridID):
833 if result.has_key('error'): 834 if result['error'] == 'InvalidGridIdentifier': 835 raise InvalidGridIdentifier(gridID)
836
837 - def info(self, update=1):
838 """Return the current status information about a grid. 839 840 The grid info is a dictionary of keys describing the current state 841 of the grid. 842 843 @arg update: A boolean flag that determines whether or not the 844 internal state is updated upon creation. This involves a call to 845 the Xgrid controller. 846 @type update: boolean 847 848 """ 849 if update: 850 self._updateInfo() 851 return self._info
852 853 # Job Submission 854
855 - def submit(self, cmd, args='', stdin='', indir='', email=''):
856 """Submits a single task job to the current grid. 857 858 This is a nonblocking job submission method for a single job 859 with no sub-tasks. For more complicated jobs with sub-tasks, use 860 the batch() method and the JobSpecification class. 861 862 Job results can be obtained by calling the results() method of the 863 Job object. 864 865 @arg cmd: 866 The command the execute as a string. The executable is not 867 copied if the full path is given, otherwise it is. 868 @type cmd: str 869 @arg args: 870 The command line arguments to be passed to the command. 871 @type args: list or str 872 @arg stdin: 873 A local file to use as the stdin stream for the job. 874 @type stdin: str 875 @arg indir: 876 A local directory to copy to the remote agent. 877 @type indir: str 878 @arg email: 879 An email to which notification will be send of various job 880 state changes. 881 @type email: str 882 @returns: Initialized Job object for sumbitted job. 883 @rtype: Job 884 """ 885 886 j = Job(connection=self._connection) 887 id = j.submit(cmd, args, stdin, indir, email, self.gridID) 888 return j
889
890 - def batch(self, specification):
891 """Submits a batch job to the current grid. 892 893 This is a nonblocking job submission method used for submitting 894 complex multi-task jobs. For single task jobs, use submit(). 895 896 To retrieve job results, use the results() method of the Job class. 897 898 @arg specification: 899 The job specification of the job, which must be an instance of the 900 JobSpecification class. See the docstring for JobSpecification 901 for more details. 902 @type specification: JobSpecification 903 @returns: Initialized Job object for sumbitted job. 904 @rtype: Job 905 """ 906 907 j = Job(connection=self._connection) 908 id = j.batch(specification, self.gridID) 909 return j
910 911 # Other methods 912
913 - def __repr__(self):
914 result = '<Grid with gridID = %s>' % self.gridID 915 return result
916
917 918 -class Job(object):
919 """A class for working with an Xgrid job.""" 920
921 - def __init__(self, jobID=u'999999999', connection=None):
922 """An Xgrid job class. 923 924 This class allows a user to work with an Xgrid job. It provides 925 capabilities for starting jobs, managing them and retrieving 926 their results. 927 928 Job instances are created in two ways: 929 930 1. By calling the job() or jobs() methods of the Grid or Controller 931 classes. 932 933 2. By simply creating a new Job object: 934 935 >>> j = Job(u'200') # Create a new job with id of 200 936 937 @arg jobID: 938 The job identifier of the job. To create a new job, leave blank. 939 @type jobID: unicode, str or int 940 @arg connection: 941 Instance of Connection class. If empty a default Connection object 942 is used. 943 @type connection: Connection 944 """ 945 946 self.jobID = processID(jobID) 947 948 if connection is None: 949 self._connection = Connection() 950 else: 951 self._connection = connection 952 953 self._specification = {} 954 self._info = {}
955 956 # Semi-private methods 957
958 - def _updateInfo(self):
959 cmd = 'xgrid %s-job attributes -id %s' % \ 960 (self._connection.connectString(), self.jobID) 961 result = xgridParse(cmd) 962 self._checkJobID(result) 963 self._info = result['jobAttributes']
964
965 - def _updateSpecification(self):
966 cmd = 'xgrid %s-job specification -id %s' % \ 967 (self._connection.connectString(), self.jobID) 968 result = xgridParse(cmd) 969 self._checkJobID(result) 970 self._specification = result['jobSpecification']
971
972 - def _update(self):
973 self._updateInfo() 974 self._updateSpecification()
975
976 - def _checkJobID(self, result):
977 if result.has_key('error'): 978 if result['error'] == 'InvalidJobIdentifier': 979 raise InvalidJobIdentifier(self.jobID)
980
981 - def _checkGridID(self, result, gridID):
982 if result.has_key('error'): 983 if result['error'] == 'InvalidGridIdentifier': 984 raise InvalidGridIdentifier(gridID)
985 986 # Get methods 987
988 - def specification(self, update=1):
989 """Return the Xgrid job specification. 990 991 The Xgrid job specification is the dictionary that Xgrid uses 992 to submit the job. It contains keys that describe the command 993 arguments, directories, etc. 994 """ 995 996 if update: 997 self._updateSpecification() 998 return self._specification
999
1000 - def info(self, update=1):
1001 """Return the current status information about a job. 1002 1003 The job info is a dictionary of keys describing the current state 1004 of the job. This includes start/stop dates, name, etc. 1005 1006 The method printInfo() prints the info() dictionary in a nice form. 1007 1008 @arg update: A boolean flag that determines whether or not the 1009 internal state is updated upon creation. This involves a call to 1010 the Xgrid controller. 1011 @type update: boolean 1012 1013 """ 1014 1015 if update: 1016 self._updateInfo() 1017 return self._info
1018 1019 # Job submission and results 1020
1021 - def results(self, stdout='', outdir='', stderr='', block=10, silent=False):
1022 """Retrieve the results of an Xgrid job. 1023 1024 This method provides both a blocking and nonblocking method of 1025 getting the results of an Xgrid job. The job does not need to be 1026 completed to retrieve the results. Because of this, the results 1027 method can be used to get partial results while the job continues 1028 to run. It can also automatically name output files. 1029 1030 @arg stdout: 1031 The local file in which to put the stdout stream of the remote job. 1032 If this is empty, the method will automatically generate a name in 1033 the local directory of the form: xgridjob-jobID.out. This file 1034 always is placed in the cwd rather than the outdir 1035 @type stdout: str 1036 @arg stderr: 1037 The local file in which to put the stderr stream of the remote job. 1038 If this is empty, the method will automatically generate a name in 1039 the local directory of the form: xgridjob-jobID.err. 1040 @type stderr: str 1041 @arg outdir: 1042 The local directory in which to put the files retrieved from the 1043 remote job. This is only for files other than the stdout and 1044 stderr files. When empty, the other files are not brought back. 1045 This is to prevent any accidental overwrites of results. 1046 @type outdir: str 1047 @arg block: 1048 Whether or not to block until the job is finished. If block=0, 1049 partially completed results are retrieved and the job will 1050 continue to run. If block > 0, the job status is queried every 1051 block seconds and the results are returned when the job 1052 is completed. 1053 @type block: int 1054 @arg silent: Silence all messages. 1055 @type silent: boolean 1056 """ 1057 1058 so = '' 1059 se = '' 1060 out = '' 1061 1062 if stdout: 1063 so = '-so ' + stdout + ' ' 1064 else: 1065 temp_stdout = 'xgridjob-' + self.jobID + '.out' 1066 so = '-so ' + temp_stdout + ' ' 1067 1068 if outdir: 1069 out = '-out ' + outdir 1070 1071 if stderr: 1072 se = '-se ' + stderr + ' ' 1073 else: 1074 temp_stderr = 'xgridjob-' + self.jobID + '.err' 1075 se = '-se ' + temp_stderr + ' ' 1076 1077 cmd = "xgrid %s-job results -id %s %s%s%s" % \ 1078 (self._connection.connectString(),self.jobID,so,se,out) 1079 1080 # Block until the results are back! 1081 self._updateInfo() 1082 if block: 1083 while not self._info['jobStatus'] == 'Finished': 1084 time.sleep(block) 1085 self._updateInfo() 1086 log = xgridParse(cmd) 1087 else: 1088 log = xgridParse(cmd) 1089 1090 if (not silent) and (len(so) > 0): 1091 print "Job stdout saved in file: " + so[4:]
1092 1093 # Job Submission 1094
1095 - def submit(self, cmd, args='', stdin='', indir='', email='', gridID=u'0', 1096 silent=False):
1097 """Submits a single task job to the specified grid. 1098 1099 This is a nonblocking job submission method for a single job 1100 with no sub-tasks. For more complicated jobs with sub-tasks, use 1101 the batch() method. 1102 1103 Job results can be obtained by calling the results() method. 1104 1105 @arg cmd: 1106 The command to execute as a string. The executable is not 1107 copied if the full path is given, otherwise it is. 1108 @type cmd: str 1109 @arg args: 1110 The command line arguments to be passed to the command. 1111 @type args: list or str 1112 @arg stdin: 1113 A local file to use as the stdin stream for the job. 1114 @type stdin: str 1115 @arg indir: 1116 A local directory to copy to the remote agent. 1117 @type indir: str 1118 @arg email: 1119 An email to which notification will be send of various job 1120 state changes. 1121 @type email: str 1122 @arg gridID: 1123 The identifier of the Grid to which the job will be submitted. 1124 If empty, the default grid u'0' is used. 1125 @type gridID: unicode, str or int 1126 @arg silent: 1127 If set to True will slience all messages. 1128 @type silent: boolean 1129 @returns: Initialized Job object for sumbitted job. 1130 @rtype: Job 1131 """ 1132 1133 processedGridID = processID(gridID) 1134 1135 # First build the submit_string 1136 submitString = '' 1137 stdinString = '' 1138 indirString = '' 1139 emailString = '' 1140 if stdin: 1141 stdinString = '-si ' + stdin + ' ' 1142 if indir: 1143 indirString = '-in ' + indir + ' ' 1144 if email: 1145 emailString = '-email ' + email + ' ' 1146 1147 # Process the arguments 1148 if isinstance(args, str): 1149 argString = args 1150 elif isinstance(args, list): 1151 argList = [] 1152 for a in args: 1153 argList.append(str(a)+" ") 1154 argString = "".join(argList).strip() 1155 else: 1156 raise TypeError 1157 1158 submitString = stdinString + indirString + emailString + \ 1159 cmd + ' ' + argString 1160 1161 # Now submit the job and set the job id 1162 #print "Submitting job to grid: ", gridID 1163 cmd = "xgrid %s-gid %s -job submit %s" % \ 1164 (self._connection.connectString(), gridID, submitString) 1165 jobinfo = xgridParse(cmd) 1166 self._checkGridID(jobinfo, processedGridID) 1167 self.jobID = jobinfo['jobIdentifier'] 1168 if not silent: 1169 print "Job submitted with id: ", self.jobID 1170 return self.jobID
1171
1172 - def batch(self, specification, gridID=u'0', silent=False):
1173 """Submits a batch job to the specified grid. 1174 1175 This is a nonblocking job submission method used for submitting 1176 complex multi-task jobs. For single task jobs, use submit(). 1177 1178 To retrieve job results, use the results() method. 1179 1180 @arg specification: 1181 The job specification of the job, which must be an instance of the 1182 JobSpecification class. See the docstring for JobSpecification 1183 for more details. 1184 @type specification: JobSpecification 1185 @arg gridID: 1186 The identifier of the Grid to which the job will be submitted. 1187 If empty, the default grid u'0' is used. 1188 @type gridID: unicode, str or int 1189 @arg silent: 1190 If set to True will slience all messages. 1191 @type silent: boolean 1192 @returns: Initialized Job object for sumbitted job. 1193 @rtype: Job 1194 """ 1195 1196 if not isinstance(specification, JobSpecification): 1197 raise XgridError 1198 1199 processedGridID = processID(gridID) 1200 1201 #job_dict = propertyListFromPythonCollection(specification.jobspec()) 1202 jobSpec = specification.jobSpec() 1203 plistFile = tempfile.NamedTemporaryFile().name 1204 jobSpec.writeToFile_atomically_(plistFile, 1) 1205 cmd = "xgrid %s-gid %s -job batch %s" % \ 1206 (self._connection.connectString(), processedGridID, plistFile) 1207 jobinfo = xgridParse(cmd) 1208 self._checkGridID(jobinfo, processedGridID) 1209 self.jobID = jobinfo['jobIdentifier'] 1210 1211 if not silent: 1212 print "Job submitted with id: ", self.jobID 1213 1214 return self.jobID
1215 1216 1217 # Job control methods 1218
1219 - def perform(self, action):
1220 """Performs an action on a job. 1221 1222 @arg action: 1223 The action to be performed as a string. Implemented actions 1224 are stop, resume, delete, restart, and suspend. 1225 @type action: str 1226 """ 1227 1228 actions = ('stop', 'suspend', 'resume', 'delete', 'restart') 1229 if action in actions: 1230 cmd = 'xgrid %s-job %s -id %s' % \ 1231 (self._connection.connectString(), action, self.jobID) 1232 result = xgridParse(cmd) 1233 self._checkJobID(result) 1234 print "Action %s performed on job %s" % (action, self.jobID) 1235 # If delete reset everything but the controller 1236 if action == 'delete': 1237 self.jobID = u'999999999' 1238 self._specification = {} 1239 self._info = {} 1240 else: 1241 raise InvalidAction(action)
1242
1243 - def stop(self):
1244 """Stops the job.""" 1245 self.perform('stop')
1246
1247 - def suspend(self):
1248 """Suspends the job.""" 1249 self.perform('suspend')
1250
1251 - def resume(self):
1252 """Resumes the job.""" 1253 self.perform('resume')
1254
1255 - def delete(self):
1256 """Deletes the job.""" 1257 self.perform('delete')
1258
1259 - def restart(self):
1260 """Restarts the job.""" 1261 self.perform('restart')
1262 1263 # Other methods 1264
1265 - def __repr__(self):
1266 result = '<Job with jobID = %s>' % self.jobID 1267 return result
1268
1269 - def printInfo(self, verbose=True):
1270 """Prints the info() dictionary of a job.""" 1271 self._updateInfo() 1272 if verbose == False: 1273 output = "%-4s %-24s %-10s %-10s" % \ 1274 (self.jobID, self._info['dateStarted'], 1275 self._info['jobStatus'], 1276 self._info['activeCPUPower']) 1277 print output 1278 elif verbose == True: 1279 print "{" 1280 for key in self._info.keys(): 1281 print ' ', key, ': ', self._info[key] 1282 print "}"
1283
1284 - def printSpecification(self):
1285 """Print the job specification used to submit the job.""" 1286 1287 self._updateSpecification() 1288 print "{" 1289 for key in self._specification.keys(): 1290 print ' ', key, ': ', self._specification[key] 1291 print "}"
1292
1293 -class JobSpecification(object):
1294 """A class used for constructing multi-task batch jobs.""" 1295
1296 - def __init__(self):
1297 """This class is used to setup the plist file for multi-task jobs. 1298 """ 1299 self._jobDict = Foundation.NSMutableDictionary.dictionaryWithCapacity_(10) 1300 self._jobSpec = Foundation.NSArray.arrayWithObject_(self._jobDict) 1301 self._jobDict[u'taskSpecifications'] = {} 1302 #self.tasks = [] 1303 self.nextTask = 0 1304 self._jobDict[u'applicationIdentifier'] = u'PyXG' 1305 self._jobDict[u'schedulerParameters'] = {} 1306 self._jobDict[u'schedulerParameters'][u'tasksMustStartSimultaneously'] \ 1307 = u'NO'
1308 1309 # Utility methods 1310
1311 - def _checkSchedulerParameters(self):
1312 if not self._jobDict.has_key(u'schedulerParameters'): 1313 self._jobDict[u'schedulerParameters'] = {}
1314
1315 - def _checkInputFiles(self):
1316 if not self._jobDict.has_key(u'inputFiles'): 1317 self._jobDict[u'inputFiles'] = {}
1318
1319 - def jobSpec(self):
1320 """Prints the full job specification dictionary.""" 1321 return self._jobSpec
1322 1323 # Job/Task setup methods 1324
1325 - def setName(self, name):
1326 """Set the name (a string) of the job.""" 1327 self._jobDict[u'name'] = unicode(name)
1328
1329 - def name(self):
1330 """Returns the job name.""" 1331 return self._jobDict.get(u'name')
1332
1333 - def setEmail(self, email):
1334 """Set the notification email for the batch job.""" 1335 self._jobDict[u'notificationEmail'] = unicode(email)
1336
1337 - def email(self):
1338 """Returns the notification email.""" 1339 return self._jobDict.get(u'notificationEmail')
1340
1341 - def setTasksMustStartSimultaneously(self, simul):
1342 """Sets the tasksMustStartSimultanously flag.""" 1343 1344 if(simul): 1345 self._jobDict[u'schedulerParameters'][u'tasksMustStartSimultaneously'] = u'YES' 1346 else: 1347 self._jobDict[u'schedulerParameters'][u'tasksMustStartSimultaneously'] = u'NO'
1348
1350 """Returns the value of tasksMustStartSimultaneously.""" 1351 return self._jobDict[u'schedulerParameters'].get(u'tasksMustStartSimultaneously')
1352
1353 - def setMinimumTaskCount(self, count):
1354 """Sets the min number of tasks that should be started.""" 1355 #self._checkSchedulerParameters() 1356 self._jobDict[u'schedulerParameters'][u'minimumTaskCount'] = count
1357
1358 - def minimumTaskCount(self):
1359 """Returns the value of minimumTaskCount.""" 1360 return self._jobDict[u'schedulerParameters'].get(u'minimumTaskCount')
1361
1362 - def setDependsOnJobs(self, jobArray):
1363 """Takes a list of Xgrid job ids that must complete before this job 1364 begins.""" 1365 #self._checkSchedulerParameters() 1366 self._jobDict[u'schedulerParameters'][u'dependsOnJobs'] = \ 1367 [unicode(j) for j in jobArray]
1368
1369 - def dependsOnJobs(self):
1370 """Returns the value of dependsOnJobs.""" 1371 return self._jobDict[u'schedulerParameters'].get(u'dependsOnJobs')
1372
1373 - def addFile(self, localFilePath, fileName, isExecutable=0):
1374 """Specifies a local file to copy to the Xgrid agents. 1375 1376 This file is encoded into a base64 string and inserted into the 1377 job specification dictionary. 1378 1379 @arg localFilePath: 1380 The full path of the file on the client (local) computer 1381 @type localFilePath: unicode or str 1382 @arg fileName: 1383 The name to call the file on the agent 1384 @type fileName: unicode or str 1385 @arg isExecutable: 1386 Set to 1 if the file should be executable 1387 @type isExecutable: boolean 1388 """ 1389 1390 assert os.path.isfile(localFilePath), "File does not exist: %s" % localFilePath 1391 path = NSString.stringWithString_(unicode(localFilePath)).stringByStandardizingPath() 1392 data = Foundation.NSData.dataWithContentsOfFile_(path) 1393 self._checkInputFiles() 1394 if isExecutable: 1395 isExecString = u'YES' 1396 else: 1397 isExecString = u'NO' 1398 self._jobDict[u'inputFiles'][unicode(fileName)] = \ 1399 {u'fileData':data,u'isExecutable':isExecString}
1400
1401 - def delFile(self, fileName):
1402 """Deletes the file named fileName from the JobSpecification. 1403 1404 List filenames using the flies() method. 1405 """ 1406 if self._jobDict.has_key(u'inputFiles'): 1407 if self._jobDict[u'inputFiles'].has_key(unicode(fileName)): 1408 del self._jobDict[u'inputFiles'][unicode(fileName)]
1409
1410 - def files(self):
1411 """Prints a list of included filenames.""" 1412 f = self._jobDict.get(u'inputFiles') 1413 if f: 1414 return f.keys()
1415
1416 - def addTask(self, cmd, args=u'', env={}, inputStream=u'', \ 1417 dependsOnTasks=[]):
1418 """Adds a task to the jobSpecification. 1419 1420 @arg cmd: 1421 The command to execute as a string. The executable is not 1422 copied if the full path is given, otherwise it is. 1423 @type cmd: str 1424 @arg args: 1425 The command line arguments to be passed to the command. 1426 @type args: list or str 1427 @arg env: 1428 A Python dictionary of environment variables to use on the agents. 1429 @type env: unicode or str 1430 @arg inputStream: 1431 A local file to send to the agents that will be used as stdin for 1432 the task 1433 @type inputStream: unicode or str 1434 @arg dependsOnTasks: 1435 A list of task ids that must complete before this one begins 1436 @type dependsOnTasks: list 1437 """ 1438 taskSpec = {} 1439 taskName = unicode('task%i' % self.nextTask) 1440 self.nextTask += 1 1441 1442 # Process the arguments 1443 if isinstance(args, str) or isinstance(args, unicode): 1444 argList = args.split(' ') 1445 elif isinstance(args, list): 1446 argList = args 1447 else: 1448 raise TypeError 1449 1450 taskSpec[u'command'] = unicode(cmd) 1451 if args: 1452 taskSpec[u'arguments'] = [unicode(a) for a in argList] 1453 if env: 1454 taskSpec[u'environment'] = env 1455 if inputStream: 1456 taskSpec[u'inputStream'] = unicode(inputStream) 1457 if dependsOnTasks: 1458 taskSpec[u'dependsOnTasks'] = dependsOnTasks 1459 self._jobDict[u'taskSpecifications'][taskName] = taskSpec
1460
1461 - def copyTask(self):
1462 pass
1463
1464 - def delTask(self, task):
1465 """Deletes the task named task. 1466 1467 List the task names using the tasks() method. 1468 """ 1469 if self._jobDict[u'taskSpecifications'].has_key(unicode(task)): 1470 del self._jobDict[u'taskSpecifications'][unicode(task)]
1471
1472 - def editTask(self):
1473 pass
1474
1475 - def tasks(self):
1476 """Return a list of the task names.""" 1477 return self._jobDict[u'taskSpecifications'].keys()
1478
1479 - def printTasks(self):
1480 """Print the task specifications of all tasks.""" 1481 for tid in self._jobDict[u'taskSpecifications'].keys(): 1482 print str(tid) + " " + str(self._jobDict[u'taskSpecifications'][tid])
1483