How to use wait_for_state method in wpt

Best JavaScript code snippet using wpt

test_processdispatcher_service.py

Source:test_processdispatcher_service.py Github

copy

Full Screen

...322 queued=queued)323 # ensure that procs that request existing engine id run324 self.client.schedule_process("proc2", self.process_definition_id,325 queueing_mode=proc1_queueing_mode, execution_engine_id="engine1")326 self.notifier.wait_for_state("proc2", ProcessState.RUNNING)327 self._wait_assert_pd_dump(self._assert_process_states,328 ProcessState.RUNNING, ["proc2"])329 # ensure that procs that don't specify an engine id run330 self.client.schedule_process("proc3", self.process_definition_id,331 queueing_mode=proc1_queueing_mode)332 self.notifier.wait_for_state("proc3", ProcessState.RUNNING)333 self._wait_assert_pd_dump(self._assert_process_states,334 ProcessState.RUNNING, ["proc3"])335 # now add an engine for proc1 and it should be scheduled336 self.client.node_state("node2", domain_id_from_engine("engine2"),337 InstanceState.RUNNING)338 self._spawn_eeagent("node2", 4)339 self.notifier.wait_for_state("proc1", ProcessState.RUNNING)340 self._wait_assert_pd_dump(self._assert_process_states,341 ProcessState.RUNNING, ["proc1"])342 # now launch another process for engine2. it should be scheduled too343 self.client.schedule_process("proc4", self.process_definition_id,344 queueing_mode=QueueingMode.NEVER, execution_engine_id="engine2")345 self.notifier.wait_for_state("proc4", ProcessState.RUNNING)346 self._wait_assert_pd_dump(self._assert_process_states,347 ProcessState.RUNNING, ["proc4"])348 def test_default_ee(self):349 self.client.node_state("node1", domain_id_from_engine("engine1"),350 InstanceState.RUNNING)351 self._spawn_eeagent("node1", 4)352 self.client.node_state("node2", domain_id_from_engine("engine2"),353 InstanceState.RUNNING)354 self._spawn_eeagent("node2", 4)355 # fill up all 4 slots on engine1 agent and launch one more proc356 for upid in ['p1', 'p2', 'p3', 'p4', 'p5']:357 self.client.schedule_process(upid, self.process_definition_id,358 queueing_mode=QueueingMode.ALWAYS)359 self.notifier.wait_for_state('p1', ProcessState.RUNNING)360 self.notifier.wait_for_state('p2', ProcessState.RUNNING)361 self.notifier.wait_for_state('p3', ProcessState.RUNNING)362 self.notifier.wait_for_state('p4', ProcessState.RUNNING)363 # p5 should be queued since it is not compatible with engine2364 self.notifier.wait_for_state('p5', ProcessState.WAITING)365 # now schedule p6 directly to engine2366 self.client.schedule_process("p6", self.process_definition_id,367 queueing_mode=QueueingMode.ALWAYS, execution_engine_id="engine2")368 self.notifier.wait_for_state('p1', ProcessState.RUNNING)369 # add another eeagent for engine1, p5 should run370 self.client.node_state("node3", domain_id_from_engine("engine1"),371 InstanceState.RUNNING)372 self._spawn_eeagent("node3", 4)373 self.notifier.wait_for_state('p5', ProcessState.RUNNING)374 def test_process_engine_map(self):375 def1 = uuid.uuid4().hex376 self.client.create_definition(def1, "dtype",377 executable={"module": "a.b", "class": "C"},378 name="my_process")379 def2 = uuid.uuid4().hex380 self.client.create_definition(def2, "dtype",381 executable={"module": "a.b", "class": "D"},382 name="my_process")383 def3 = uuid.uuid4().hex384 self.client.create_definition(def3, "dtype",385 executable={"module": "a", "class": "B"},386 name="my_process")387 self.client.node_state("node1", domain_id_from_engine("engine1"),388 InstanceState.RUNNING)389 eeagent1 = self._spawn_eeagent("node1", 4)390 self.client.node_state("node2", domain_id_from_engine("engine2"),391 InstanceState.RUNNING)392 eeagent2 = self._spawn_eeagent("node2", 4)393 self.client.node_state("node3", domain_id_from_engine("engine3"),394 InstanceState.RUNNING)395 eeagent3 = self._spawn_eeagent("node3", 4)396 self.client.schedule_process("proc1", def1)397 self.client.schedule_process("proc2", def2)398 self.client.schedule_process("proc3", def3)399 self.notifier.wait_for_state("proc1", ProcessState.RUNNING)400 self.notifier.wait_for_state("proc2", ProcessState.RUNNING)401 self.notifier.wait_for_state("proc3", ProcessState.RUNNING)402 proc1 = self.client.describe_process("proc1")403 self.assertEqual(proc1['assigned'], eeagent3.name)404 proc2 = self.client.describe_process("proc2")405 self.assertEqual(proc2['assigned'], eeagent2.name)406 proc3 = self.client.describe_process("proc3")407 self.assertEqual(proc3['assigned'], eeagent1.name)408 def test_node_exclusive(self):409 node = "node1"410 domain_id = domain_id_from_engine('engine1')411 node_properties = dict(engine="fedora")412 self.client.node_state(node, domain_id, InstanceState.RUNNING,413 node_properties)414 self._spawn_eeagent(node, 4)415 exclusive_attr = "hamsandwich"416 queued = []417 proc1_queueing_mode = QueueingMode.ALWAYS418 # Process should be scheduled, since no other procs have its419 # exclusive attribute420 self.client.schedule_process("proc1", self.process_definition_id,421 queueing_mode=proc1_queueing_mode,422 node_exclusive=exclusive_attr)423 self.notifier.wait_for_state("proc1", ProcessState.RUNNING)424 self._wait_assert_pd_dump(self._assert_process_states,425 ProcessState.RUNNING, ["proc1"])426 # Process should be queued, because proc1 has the same attribute427 self.client.schedule_process("proc2", self.process_definition_id,428 queueing_mode=proc1_queueing_mode,429 node_exclusive=exclusive_attr)430 queued.append("proc2")431 self._wait_assert_pd_dump(self._assert_process_distribution,432 queued=queued)433 # Now kill the first process, and proc2 should run.434 self.client.terminate_process("proc1")435 queued.remove("proc2")436 self.notifier.wait_for_state("proc2", ProcessState.RUNNING)437 self._wait_assert_pd_dump(self._assert_process_states,438 ProcessState.RUNNING, ["proc2"])439 # Process should be queued, because proc2 has the same attribute440 self.client.schedule_process("proc3", self.process_definition_id,441 queueing_mode=proc1_queueing_mode,442 node_exclusive=exclusive_attr)443 queued.append("proc3")444 self._wait_assert_pd_dump(self._assert_process_distribution,445 queued=queued)446 # Process should be scheduled, since no other procs have its447 # exclusive attribute448 other_exclusive_attr = "hummussandwich"449 self.client.schedule_process("proc4", self.process_definition_id,450 queueing_mode=proc1_queueing_mode,451 node_exclusive=other_exclusive_attr)452 self.notifier.wait_for_state("proc4", ProcessState.RUNNING)453 self._wait_assert_pd_dump(self._assert_process_states,454 ProcessState.RUNNING, ["proc4"])455 # Now that we've started another node, waiting node should start456 node = "node2"457 node_properties = dict(engine="fedora")458 self.client.node_state(node, domain_id, InstanceState.RUNNING,459 node_properties)460 self._spawn_eeagent(node, 4)461 self.notifier.wait_for_state("proc3", ProcessState.RUNNING)462 self._wait_assert_pd_dump(self._assert_process_states,463 ProcessState.RUNNING, ["proc3"])464 def test_node_exclusive_bug(self):465 slots = 2466 node_1 = "node1"467 domain_id = domain_id_from_engine('engine1')468 node_properties = dict(engine="fedora")469 self.client.node_state(node_1, domain_id, InstanceState.RUNNING,470 node_properties)471 self._spawn_eeagent(node_1, slots)472 node_2 = "node2"473 domain_id = domain_id_from_engine('engine1')474 node_properties = dict(engine="fedora")475 self.client.node_state(node_2, domain_id, InstanceState.RUNNING,476 node_properties)477 self._spawn_eeagent(node_2, slots)478 node_3 = "node3"479 domain_id = domain_id_from_engine('engine1')480 node_properties = dict(engine="fedora")481 self.client.node_state(node_3, domain_id, InstanceState.RUNNING,482 node_properties)483 self._spawn_eeagent(node_3, slots)484 node_4 = "node4"485 domain_id = domain_id_from_engine('engine1')486 node_properties = dict(engine="fedora")487 self.client.node_state(node_4, domain_id, InstanceState.RUNNING,488 node_properties)489 self._spawn_eeagent(node_4, slots)490 pydap_xattr = "pydap"491 service_gateway_xattr = "service_gateway"492 queueing_mode = QueueingMode.START_ONLY493 # Process should be scheduled, since no other procs have its494 # exclusive attribute495 pydap_xattr_procs = []496 service_gateway_xattr_procs = []497 proc_1 = "proc_1"498 self.client.schedule_process(proc_1, self.process_definition_id,499 queueing_mode=queueing_mode,500 node_exclusive=pydap_xattr)501 pydap_xattr_procs.append(proc_1)502 proc_2 = "proc_2"503 self.client.schedule_process(proc_2, self.process_definition_id,504 queueing_mode=queueing_mode,505 node_exclusive=service_gateway_xattr)506 pydap_xattr_procs.append(proc_2)507 proc_3 = "proc_3"508 self.client.schedule_process(proc_3, self.process_definition_id,509 queueing_mode=queueing_mode,510 node_exclusive=service_gateway_xattr)511 service_gateway_xattr_procs.append(proc_1)512 proc_4 = "proc_4"513 self.client.schedule_process(proc_4, self.process_definition_id,514 queueing_mode=queueing_mode,515 node_exclusive=pydap_xattr)516 pydap_xattr_procs.append(proc_4)517 proc_5 = "proc_5"518 self.client.schedule_process(proc_5, self.process_definition_id,519 queueing_mode=queueing_mode,520 node_exclusive=service_gateway_xattr)521 service_gateway_xattr_procs.append(proc_5)522 proc_6 = "proc_6"523 self.client.schedule_process(proc_6, self.process_definition_id,524 queueing_mode=queueing_mode,525 node_exclusive=pydap_xattr)526 pydap_xattr_procs.append(proc_6)527 proc_7 = "proc_7"528 self.client.schedule_process(proc_7, self.process_definition_id,529 queueing_mode=queueing_mode,530 node_exclusive=service_gateway_xattr)531 service_gateway_xattr_procs.append(proc_7)532 proc_8 = "proc_8"533 self.client.schedule_process(proc_8, self.process_definition_id,534 queueing_mode=queueing_mode,535 node_exclusive=pydap_xattr)536 pydap_xattr_procs.append(proc_8)537 for proc in (pydap_xattr_procs + service_gateway_xattr_procs):538 self.notifier.wait_for_state(proc, ProcessState.RUNNING)539 self._wait_assert_pd_dump(self._assert_process_states,540 ProcessState.RUNNING, [proc])541 self._wait_assert_pd_dump(self._assert_node_exclusive)542 self.client.terminate_process(proc_8)543 self._wait_assert_pd_dump(self._assert_node_exclusive)544 def test_node_exclusive_multiple_eeagents(self):545 node = "node1"546 domain_id = domain_id_from_engine('engine1')547 node_properties = dict(engine="fedora")548 self.client.node_state(node, domain_id, InstanceState.RUNNING,549 node_properties)550 self._spawn_eeagent(node, 4)551 self._spawn_eeagent(node, 4)552 exclusive_attr = "hamsandwich"553 queued = []554 proc1_queueing_mode = QueueingMode.ALWAYS555 # Process should be scheduled, since no other procs have its556 # exclusive attribute557 self.client.schedule_process("proc1", self.process_definition_id,558 queueing_mode=proc1_queueing_mode,559 node_exclusive=exclusive_attr)560 self.notifier.wait_for_state("proc1", ProcessState.RUNNING)561 self._wait_assert_pd_dump(self._assert_process_states,562 ProcessState.RUNNING, ["proc1"])563 # Process should be queued, because proc1 has the same attribute564 self.client.schedule_process("proc2", self.process_definition_id,565 queueing_mode=proc1_queueing_mode,566 node_exclusive=exclusive_attr)567 queued.append("proc2")568 self._wait_assert_pd_dump(self._assert_process_distribution,569 queued=queued)570 # Now kill the first process, and proc2 should run.571 self.client.terminate_process("proc1")572 queued.remove("proc2")573 self.notifier.wait_for_state("proc2", ProcessState.RUNNING)574 self._wait_assert_pd_dump(self._assert_process_states,575 ProcessState.RUNNING, ["proc2"])576 # Process should be queued, because proc2 has the same attribute577 self.client.schedule_process("proc3", self.process_definition_id,578 queueing_mode=proc1_queueing_mode,579 node_exclusive=exclusive_attr)580 queued.append("proc3")581 self._wait_assert_pd_dump(self._assert_process_distribution,582 queued=queued)583 # Process should be scheduled, since no other procs have its584 # exclusive attribute585 other_exclusive_attr = "hummussandwich"586 self.client.schedule_process("proc4", self.process_definition_id,587 queueing_mode=proc1_queueing_mode,588 node_exclusive=other_exclusive_attr)589 self.notifier.wait_for_state("proc4", ProcessState.RUNNING)590 self._wait_assert_pd_dump(self._assert_process_states,591 ProcessState.RUNNING, ["proc4"])592 # Now that we've started another node, waiting node should start593 node = "node2"594 node_properties = dict(engine="fedora")595 self.client.node_state(node, domain_id, InstanceState.RUNNING,596 node_properties)597 self._spawn_eeagent(node, 4)598 self.notifier.wait_for_state("proc3", ProcessState.RUNNING)599 self._wait_assert_pd_dump(self._assert_process_states,600 ProcessState.RUNNING, ["proc3"])601 def test_queueing(self):602 # submit some processes before there are any resources available603 procs = ["proc1", "proc2", "proc3", "proc4", "proc5"]604 for proc in procs:605 procstate = self.client.schedule_process(proc, self.process_definition_id)606 self.assertEqual(procstate['upid'], proc)607 for proc in procs:608 self.notifier.wait_for_state(proc, ProcessState.WAITING)609 self._wait_assert_pd_dump(self._assert_process_states,610 ProcessState.WAITING, procs)611 # add 2 nodes and a resource that supports 4 processes612 nodes = ["node1", "node2"]613 domain_id = domain_id_from_engine('engine1')614 for node in nodes:615 self.client.node_state(node, domain_id, InstanceState.RUNNING)616 self._spawn_eeagent(nodes[0], 4)617 for proc in procs[:4]:618 self.notifier.wait_for_state(proc, ProcessState.RUNNING)619 self._wait_assert_pd_dump(self._assert_process_states,620 ProcessState.RUNNING, procs[:4])621 for proc in procs[4:]:622 self.notifier.wait_for_state(proc, ProcessState.WAITING)623 self._wait_assert_pd_dump(self._assert_process_states,624 ProcessState.WAITING, procs[4:])625 # stand up a resource on the second node to support the other process626 self._spawn_eeagent(nodes[1], 4)627 # all processes should now be running628 for proc in procs:629 self.notifier.wait_for_state(proc, ProcessState.RUNNING)630 self._wait_assert_pd_dump(self._assert_process_states,631 ProcessState.RUNNING, procs)632 def _assert_process_states(self, dump, expected_state, upids):633 for upid in upids:634 process = dump['processes'][upid]635 assert process['state'] == expected_state, "%s: %s, expected %s!" % (636 upid, process['state'], expected_state)637 def test_node_death(self):638 # set up two nodes with 4 slots each639 nodes = ['node1', 'node2']640 domain_id = domain_id_from_engine('engine1')641 for node in nodes:642 self.client.node_state(node, domain_id, InstanceState.RUNNING)643 for node in nodes:644 self._spawn_eeagent(node, 4)645 # 8 total slots are available, schedule 6 processes646 procs = ['proc' + str(i + 1) for i in range(6)]647 # schedule the first process to never restart. it shouldn't come back.648 self.client.schedule_process(procs[0], self.process_definition_id,649 restart_mode=RestartMode.NEVER)650 # and the second process to restart on abnormal termination. it should651 # come back.652 self.client.schedule_process(procs[1], self.process_definition_id,653 restart_mode=RestartMode.ABNORMAL)654 for proc in procs[2:]:655 self.client.schedule_process(proc, self.process_definition_id)656 self._wait_assert_pd_dump(self._assert_process_distribution,657 node_counts=[4, 2],658 queued_count=0)659 # now kill one node660 log.debug("killing node %s", nodes[0])661 self.client.node_state(nodes[0], domain_id, InstanceState.TERMINATING)662 # 5 procesess should be rescheduled. since we have 5 processes and only663 # 4 slots, 1 should be queued664 self._wait_assert_pd_dump(self._assert_process_distribution,665 node_counts=[4],666 queued_count=1)667 # ensure that the correct process was not rescheduled668 self.notifier.wait_for_state(procs[0], ProcessState.FAILED)669 def _assert_process_distribution(self, dump, nodes=None, node_counts=None,670 agents=None, agent_counts=None,671 queued=None, queued_count=None,672 rejected=None, rejected_count=None):673 # Assert the distribution of processes among nodes674 # node and agent counts are given as sequences of integers which are not675 # specific to a named node. So specifying node_counts=[4,3] will match676 # as long as you have 4 processes assigned to one node and 3 to another,677 # regardless of the node name678 found_rejected = set()679 found_queued = set()680 found_node = defaultdict(set)681 found_assigned = defaultdict(set)682 for process in dump['processes'].itervalues():683 upid = process['upid']684 assigned = process['assigned']685 if process['state'] == ProcessState.WAITING:686 found_queued.add(upid)687 elif process['state'] == ProcessState.REJECTED:688 found_rejected.add(upid)689 elif process['state'] == ProcessState.RUNNING:690 resource = dump['resources'].get(assigned)691 self.assertIsNotNone(resource)692 node_id = resource['node_id']693 found_node[node_id].add(upid)694 found_assigned[assigned].add(upid)695 print "Queued: %s\nRejected: %s\n" % (queued, rejected)696 print "Found Queued: %s\nFound Rejected: %s\n" % (found_queued, found_rejected)697 if queued is not None:698 self.assertEqual(set(queued), found_queued)699 if queued_count is not None:700 self.assertEqual(len(found_queued), queued_count)701 if rejected is not None:702 self.assertEqual(set(rejected), found_rejected)703 if rejected_count is not None:704 self.assertEqual(len(found_rejected), rejected_count)705 if agents is not None:706 self.assertEqual(set(agents.keys()), set(found_assigned.keys()))707 for ee_id, processes in found_assigned.iteritems():708 self.assertEqual(set(agents[ee_id]), processes)709 if agent_counts is not None:710 assigned_lengths = [len(s) for s in found_assigned.itervalues()]711 # omit zero counts712 agent_counts = [count for count in agent_counts if count != 0]713 # print "%s =?= %s" % (agent_counts, assigned_lengths)714 self.assertEqual(sorted(assigned_lengths), sorted(agent_counts))715 if nodes is not None:716 self.assertEqual(set(nodes.keys()), set(found_node.keys()))717 for node_id, processes in found_node.iteritems():718 self.assertEqual(set(nodes[node_id]), processes)719 if node_counts is not None:720 node_lengths = [len(s) for s in found_node.itervalues()]721 # omit zero counts722 node_counts = [count for count in node_counts if count != 0]723 self.assertEqual(sorted(node_lengths), sorted(node_counts))724 def _assert_node_exclusive(self, dump):725 """assert that processes are distributed in a way consistent726 with the node exclusive properties of those processes727 """728 exclusive_dist = {}729 for proc_id, proc in dump['processes'].iteritems():730 if proc['state'] == '700-TERMINATED':731 continue732 assigned = proc.get('assigned')733 assert assigned is not None, proc734 node_exclusive = proc.get('node_exclusive')735 assert node_exclusive is not None736 if exclusive_dist.get(assigned) is None:737 exclusive_dist[assigned] = []738 exclusive_dist[assigned].append(node_exclusive)739 exclusive_dist[assigned].sort()740 for node, exclusives in exclusive_dist.iteritems():741 assert len(exclusives) == len(set(exclusives))742 exclusive_dist_nodes = {}743 exclusive_dist_resources = {}744 for node_id, node in dump['nodes'].iteritems():745 exclusive_dist_nodes[node_id] = node['node_exclusive']746 exclusive_dist_nodes[node_id].sort()747 for resource in node['resources']:748 exclusive_dist_resources[resource] = node['node_exclusive']749 exclusive_dist_resources[resource].sort()750 for node, exclusives in exclusive_dist_nodes.iteritems():751 assert len(exclusives) == len(set(exclusives))752 print "nodes: %s" % exclusive_dist_nodes753 print "resources: %s" % exclusive_dist_resources754 print "proc: %s" % exclusive_dist755 assert exclusive_dist == exclusive_dist_resources, "%s != %s" % (exclusive_dist, exclusive_dist_resources)756 return exclusive_dist757 def test_constraints(self):758 nodes = ['node1', 'node2']759 domain_id = domain_id_from_engine('engine1')760 node1_properties = dict(hat_type="fedora")761 node2_properties = dict(hat_type="bowler")762 self.client.node_state(nodes[0], domain_id, InstanceState.RUNNING,763 node1_properties)764 self._spawn_eeagent(nodes[0], 4)765 proc1_constraints = dict(hat_type="fedora")766 proc2_constraints = dict(hat_type="bowler")767 self.client.schedule_process("proc1", self.process_definition_id,768 constraints=proc1_constraints)769 self.client.schedule_process("proc2", self.process_definition_id,770 constraints=proc2_constraints)771 # proc1 should be running on the node/agent, proc2 queued772 self._wait_assert_pd_dump(self._assert_process_distribution,773 nodes=dict(node1=["proc1"]),774 queued=["proc2"])775 # launch another eeagent that supports proc2's engine_type776 self.client.node_state(nodes[1], domain_id, InstanceState.RUNNING,777 node2_properties)778 self._spawn_eeagent(nodes[1], 4)779 self._wait_assert_pd_dump(self._assert_process_distribution,780 nodes=dict(node1=["proc1"],781 node2=["proc2"]),782 queued=[])783 def test_queue_mode(self):784 constraints = dict(hat_type="fedora")785 queued = []786 rejected = []787 # Test QueueingMode.NEVER788 proc1_queueing_mode = QueueingMode.NEVER789 self.client.schedule_process("proc1", self.process_definition_id,790 constraints=constraints, queueing_mode=proc1_queueing_mode)791 # proc1 should be rejected792 rejected.append("proc1")793 self._wait_assert_pd_dump(self._assert_process_distribution,794 rejected=rejected)795 # Test QueueingMode.ALWAYS796 proc2_queueing_mode = QueueingMode.ALWAYS797 self.client.schedule_process("proc2", self.process_definition_id,798 constraints=constraints, queueing_mode=proc2_queueing_mode)799 # proc2 should be queued800 queued.append("proc2")801 self._wait_assert_pd_dump(self._assert_process_distribution,802 queued=queued)803 # Test QueueingMode.START_ONLY804 proc3_queueing_mode = QueueingMode.START_ONLY805 proc3_restart_mode = RestartMode.ALWAYS806 self.client.schedule_process("proc3", self.process_definition_id,807 constraints=constraints, queueing_mode=proc3_queueing_mode,808 restart_mode=proc3_restart_mode)809 # proc3 should be queued, since its start_only810 queued.append("proc3")811 self._wait_assert_pd_dump(self._assert_process_distribution,812 queued=queued)813 node = "node1"814 domain_id = domain_id_from_engine('engine1')815 node_properties = dict(hat_type="fedora")816 self.client.node_state(node, domain_id, InstanceState.RUNNING,817 node_properties)818 self._spawn_eeagent(node, 4)819 # we created a node, so it should now run820 self.notifier.wait_for_state("proc3", ProcessState.RUNNING)821 self._wait_assert_pd_dump(self._assert_process_states,822 ProcessState.RUNNING, ["proc3"])823 log.debug("killing node %s", node)824 self._kill_all_eeagents()825 self.client.node_state(node, domain_id, InstanceState.TERMINATING)826 # proc3 should now be rejected, because its START_ONLY827 queued.remove("proc3")828 rejected.append("proc3")829 self._wait_assert_pd_dump(self._assert_process_distribution,830 rejected=rejected)831 # Test QueueingMode.RESTART_ONLY832 # First test that its rejected if it doesn't start right away833 proc4_queueing_mode = QueueingMode.RESTART_ONLY834 proc4_restart_mode = RestartMode.ALWAYS835 self.client.schedule_process("proc4", self.process_definition_id,836 constraints=constraints, queueing_mode=proc4_queueing_mode,837 restart_mode=proc4_restart_mode)838 # proc4 should be rejected, since its RESTART_ONLY839 rejected.append("proc4")840 self._wait_assert_pd_dump(self._assert_process_distribution,841 rejected=rejected)842 # Second test that if a proc starts, it'll get queued after it fails843 proc5_queueing_mode = QueueingMode.RESTART_ONLY844 proc5_restart_mode = RestartMode.ALWAYS845 # Start a node846 self.client.node_state(node, domain_id, InstanceState.RUNNING,847 node_properties)848 self._spawn_eeagent(node, 4)849 self.client.schedule_process("proc5", self.process_definition_id,850 constraints=constraints, queueing_mode=proc5_queueing_mode,851 restart_mode=proc5_restart_mode)852 self.notifier.wait_for_state("proc5", ProcessState.RUNNING)853 self._wait_assert_pd_dump(self._assert_process_states,854 ProcessState.RUNNING, ["proc5"])855 log.debug("killing node %s", node)856 self.client.node_state(node, domain_id, InstanceState.TERMINATING)857 self._kill_all_eeagents()858 # proc5 should be queued, since its RESTART_ONLY859 queued.append("proc5")860 self._wait_assert_pd_dump(self._assert_process_distribution,861 queued=queued)862 def test_restart_mode_never(self):863 constraints = dict(hat_type="fedora")864 # Start a node865 node = "node1"866 domain_id = domain_id_from_engine('engine1')867 node_properties = dict(hat_type="fedora")868 self.client.node_state(node, domain_id, InstanceState.RUNNING,869 node_properties)870 eeagent = self._spawn_eeagent(node, 4)871 # Test RestartMode.NEVER872 proc1_queueing_mode = QueueingMode.ALWAYS873 proc1_restart_mode = RestartMode.NEVER874 self.client.schedule_process("proc1", self.process_definition_id,875 constraints=constraints, queueing_mode=proc1_queueing_mode,876 restart_mode=proc1_restart_mode)877 self.notifier.wait_for_state("proc1", ProcessState.RUNNING)878 self._wait_assert_pd_dump(self._assert_process_states,879 ProcessState.RUNNING, ["proc1"])880 eeagent.fail_process("proc1")881 self.notifier.wait_for_state("proc1", ProcessState.FAILED)882 self._wait_assert_pd_dump(self._assert_process_states,883 ProcessState.FAILED, ["proc1"])884 def test_restart_mode_always(self):885 constraints = dict(hat_type="fedora")886 queued = []887 # Start a node888 node = "node1"889 domain_id = domain_id_from_engine('engine1')890 node_properties = dict(hat_type="fedora")891 self.client.node_state(node, domain_id, InstanceState.RUNNING,892 node_properties)893 eeagent = self._spawn_eeagent(node, 4)894 # Test RestartMode.ALWAYS895 proc2_queueing_mode = QueueingMode.ALWAYS896 proc2_restart_mode = RestartMode.ALWAYS897 self.client.schedule_process("proc2", self.process_definition_id,898 constraints=constraints, queueing_mode=proc2_queueing_mode,899 restart_mode=proc2_restart_mode, configuration={'process': {'minimum_time_between_starts': 0.1}})900 self.notifier.wait_for_state("proc2", ProcessState.RUNNING)901 self._wait_assert_pd_dump(self._assert_process_states,902 ProcessState.RUNNING, ["proc2"])903 eeagent.exit_process("proc2")904 self.notifier.wait_for_state("proc2", ProcessState.RUNNING)905 self._wait_assert_pd_dump(self._assert_process_states,906 ProcessState.RUNNING, ["proc2"])907 eeagent.fail_process("proc2")908 self.notifier.wait_for_state("proc2", ProcessState.RUNNING)909 self._wait_assert_pd_dump(self._assert_process_states,910 ProcessState.RUNNING, ["proc2"])911 log.debug("killing node %s", node)912 self.client.node_state(node, domain_id, InstanceState.TERMINATING)913 self._kill_all_eeagents()914 # proc2 should be queued, since there are no more resources915 queued.append("proc2")916 self._wait_assert_pd_dump(self._assert_process_distribution,917 queued=queued)918 def test_restart_mode_abnormal(self):919 constraints = dict(hat_type="fedora")920 queued = []921 # Start a node922 node = "node1"923 domain_id = domain_id_from_engine('engine1')924 node_properties = dict(hat_type="fedora")925 self.client.node_state(node, domain_id, InstanceState.RUNNING,926 node_properties)927 eeagent = self._spawn_eeagent(node, 4)928 # Test RestartMode.ABNORMAL929 proc2_queueing_mode = QueueingMode.ALWAYS930 proc2_restart_mode = RestartMode.ABNORMAL931 self.client.schedule_process("proc2", self.process_definition_id,932 constraints=constraints, queueing_mode=proc2_queueing_mode,933 restart_mode=proc2_restart_mode)934 self.notifier.wait_for_state("proc2", ProcessState.RUNNING)935 self._wait_assert_pd_dump(self._assert_process_states,936 ProcessState.RUNNING, ["proc2"])937 eeagent.fail_process("proc2")938 # This can be very slow on buildbot, hence the long timeout939 self.notifier.wait_for_state("proc2", ProcessState.RUNNING, 60)940 self._wait_assert_pd_dump(self._assert_process_states,941 ProcessState.RUNNING, ["proc2"])942 log.debug("killing node %s", node)943 self.client.node_state(node, domain_id, InstanceState.TERMINATING)944 self._kill_all_eeagents()945 # proc2 should be queued, since there are no more resources946 queued.append("proc2")947 self._wait_assert_pd_dump(self._assert_process_distribution,948 queued=queued)949 self.client.node_state(node, domain_id, InstanceState.RUNNING,950 node_properties)951 eeagent = self._spawn_eeagent(node, 4)952 self.client.schedule_process("proc1", self.process_definition_id,953 constraints=constraints, queueing_mode=proc2_queueing_mode,954 restart_mode=proc2_restart_mode)955 self.notifier.wait_for_state("proc1", ProcessState.RUNNING)956 eeagent.exit_process("proc1")957 self.notifier.wait_for_state("proc1", ProcessState.EXITED)958 self._wait_assert_pd_dump(self._assert_process_states,959 ProcessState.EXITED, ["proc1"])960 def test_start_count(self):961 nodes = ['node1']962 domain_id = domain_id_from_engine('engine1')963 node1_properties = dict(hat_type="fedora")964 self.client.node_state(nodes[0], domain_id, InstanceState.RUNNING,965 node1_properties)966 self._spawn_eeagent(nodes[0], 4)967 proc1_constraints = dict(hat_type="fedora")968 self.client.schedule_process("proc1", self.process_definition_id,969 constraints=proc1_constraints)970 # proc1 should be running on the node/agent, proc2 queued971 self._wait_assert_pd_dump(self._assert_process_distribution,972 nodes=dict(node1=["proc1"]))973 proc = self.store.get_process(None, "proc1")974 self.assertEqual(proc.starts, 1)975 time.sleep(2)976 self.client.restart_process("proc1")977 # proc1 should be running on the node/agent, proc2 queued978 self._wait_assert_pd_dump(self._assert_process_distribution,979 nodes=dict(node1=["proc1"]))980 proc = self.store.get_process(None, "proc1")981 self.assertEqual(proc.starts, 2)982 def test_minimum_time_between_starts(self):983 constraints = dict(hat_type="fedora")984 # Start a node985 node = "node1"986 domain_id = domain_id_from_engine('engine1')987 node_properties = dict(hat_type="fedora")988 self.client.node_state(node, domain_id, InstanceState.RUNNING,989 node_properties)990 eeagent = self._spawn_eeagent(node, 4)991 # Test RestartMode.ALWAYS992 queueing_mode = QueueingMode.ALWAYS993 restart_mode = RestartMode.ALWAYS994 default_time_to_throttle = 2995 time_to_throttle = 10996 self.client.schedule_process("proc1", self.process_definition_id,997 constraints=constraints, queueing_mode=queueing_mode,998 restart_mode=restart_mode)999 self.client.schedule_process("proc2", self.process_definition_id,1000 constraints=constraints, queueing_mode=queueing_mode,1001 restart_mode=restart_mode,1002 configuration=minimum_time_between_starts_config(time_to_throttle))1003 # Processes should start once without delay1004 self.notifier.wait_for_state("proc1", ProcessState.RUNNING)1005 self._wait_assert_pd_dump(self._assert_process_states,1006 ProcessState.RUNNING, ["proc1"])1007 self.notifier.wait_for_state("proc2", ProcessState.RUNNING)1008 self._wait_assert_pd_dump(self._assert_process_states,1009 ProcessState.RUNNING, ["proc2"])1010 # Processes should be restarted once without delay1011 eeagent.exit_process("proc1")1012 eeagent.exit_process("proc2")1013 self.notifier.wait_for_state("proc1", ProcessState.RUNNING)1014 self._wait_assert_pd_dump(self._assert_process_states,1015 ProcessState.RUNNING, ["proc1"])1016 self.notifier.wait_for_state("proc2", ProcessState.RUNNING)1017 self._wait_assert_pd_dump(self._assert_process_states,1018 ProcessState.RUNNING, ["proc2"])1019 # The second time proc1 should be throttled for 2s (the default), and1020 # proc2 should be throttled for the configured 5s1021 eeagent.exit_process("proc1")1022 eeagent.exit_process("proc2")1023 self.notifier.wait_for_state("proc1", ProcessState.WAITING)1024 self._wait_assert_pd_dump(self._assert_process_states,1025 ProcessState.WAITING, ["proc1"])1026 self.notifier.wait_for_state("proc2", ProcessState.WAITING)1027 self._wait_assert_pd_dump(self._assert_process_states,1028 ProcessState.WAITING, ["proc2"])1029 # After waiting a few seconds, proc1 should be restarted1030 time.sleep(default_time_to_throttle + 1)1031 self.notifier.wait_for_state("proc1", ProcessState.RUNNING)1032 self._wait_assert_pd_dump(self._assert_process_states,1033 ProcessState.RUNNING, ["proc1"])1034 self.notifier.wait_for_state("proc2", ProcessState.WAITING)1035 self._wait_assert_pd_dump(self._assert_process_states,1036 ProcessState.WAITING, ["proc2"])1037 # After a few more secs, proc2 should be restarted as well1038 time.sleep(time_to_throttle - (default_time_to_throttle + 1) + 1)1039 self.notifier.wait_for_state("proc2", ProcessState.RUNNING)1040 self._wait_assert_pd_dump(self._assert_process_states,1041 ProcessState.RUNNING, ["proc2"])1042 def test_describe(self):1043 self.client.schedule_process("proc1", self.process_definition_id)1044 processes = self.client.describe_processes()1045 self.assertEqual(len(processes), 1)1046 self.assertEqual(processes[0]['upid'], "proc1")1047 proc1 = self.client.describe_process("proc1")1048 self.assertEqual(proc1['upid'], "proc1")1049 self.client.schedule_process("proc2", self.process_definition_id)1050 processes = self.client.describe_processes()1051 self.assertEqual(len(processes), 2)1052 if processes[0]['upid'] == "proc1":1053 self.assertEqual(processes[1]['upid'], "proc2")1054 elif processes[0]['upid'] == "proc2":1055 self.assertEqual(processes[1]['upid'], "proc1")1056 else:1057 self.fail()1058 proc1 = self.client.describe_process("proc1")1059 self.assertEqual(proc1['upid'], "proc1")1060 proc2 = self.client.describe_process("proc2")1061 self.assertEqual(proc2['upid'], "proc2")1062 def test_process_exited(self):1063 node = "node1"1064 domain_id = domain_id_from_engine('engine1')1065 self.client.node_state(node, domain_id, InstanceState.RUNNING)1066 self._spawn_eeagent(node, 1)1067 proc = "proc1"1068 self.client.schedule_process(proc, self.process_definition_id)1069 self._wait_assert_pd_dump(self._assert_process_states,1070 ProcessState.RUNNING, [proc])1071 agent = self._get_eeagent_for_process(proc)1072 agent.exit_process(proc)1073 self._wait_assert_pd_dump(self._assert_process_states,1074 ProcessState.EXITED, [proc])1075 self.notifier.wait_for_state(proc, ProcessState.EXITED)1076 def test_neediness(self, process_count=20, node_count=5):1077 procs = ["proc" + str(i) for i in range(process_count)]1078 for proc in procs:1079 procstate = self.client.schedule_process(proc,1080 self.process_definition_id)1081 self.assertEqual(procstate['upid'], proc)1082 self._wait_assert_pd_dump(self._assert_process_states,1083 ProcessState.WAITING, procs)1084 for i in range(3):1085 # retry this a few times to avoid a race between processes1086 # hitting WAITING state and the needs being registered1087 try:1088 self.epum_client.assert_needs(range(node_count + 1),1089 domain_id_from_engine("engine1"))1090 break1091 except AssertionError:1092 time.sleep(0.01)1093 self.epum_client.clear()1094 # now provide nodes and resources, processes should start1095 nodes = ["node" + str(i) for i in range(node_count)]1096 domain_id = domain_id_from_engine('engine1')1097 for node in nodes:1098 self.client.node_state(node, domain_id, InstanceState.RUNNING)1099 for node in nodes:1100 self._spawn_eeagent(node, 4)1101 self._wait_assert_pd_dump(self._assert_process_states,1102 ProcessState.RUNNING, procs)1103 # now kill all processes in a random order1104 killlist = list(procs)1105 random.shuffle(killlist)1106 for proc in killlist:1107 self.client.terminate_process(proc)1108 self._wait_assert_pd_dump(self._assert_process_states,1109 ProcessState.TERMINATED, procs)1110 for i in range(3):1111 # retry this a few times to avoid a race between processes1112 # hitting WAITING state and the needs being registered1113 try:1114 self.epum_client.assert_needs(range(node_count + 1),1115 domain_id_from_engine("engine1"))1116 break1117 except AssertionError:1118 time.sleep(0.01)1119 def test_definitions(self):1120 self.client.create_definition("d1", "t1", "notepad.exe")1121 d1 = self.client.describe_definition("d1")1122 self.assertEqual(d1['definition_id'], "d1")1123 self.assertEqual(d1['definition_type'], "t1")1124 self.assertEqual(d1['executable'], "notepad.exe")1125 self.client.update_definition("d1", "t1", "notepad2.exe")1126 d1 = self.client.describe_definition("d1")1127 self.assertEqual(d1['executable'], "notepad2.exe")1128 d_list = self.client.list_definitions()1129 self.assertIn("d1", d_list)1130 self.client.remove_definition("d1")1131 def test_reschedule_process(self):1132 node = "node1"1133 domain_id = domain_id_from_engine('engine1')1134 self.client.node_state(node, domain_id, InstanceState.RUNNING)1135 self._spawn_eeagent(node, 1)1136 proc = "proc1"1137 # start a process that is never restarted automatically.1138 self.client.create_process(proc, self.process_definition_id)1139 self.client.schedule_process(proc, restart_mode=RestartMode.NEVER)1140 self._wait_assert_pd_dump(self._assert_process_states,1141 ProcessState.RUNNING, [proc])1142 agent = self._get_eeagent_for_process(proc)1143 agent.exit_process(proc)1144 self._wait_assert_pd_dump(self._assert_process_states,1145 ProcessState.EXITED, [proc])1146 self.notifier.wait_for_state(proc, ProcessState.EXITED)1147 record = self.client.schedule_process(proc)1148 self.assertEqual(record['state'], ProcessState.REQUESTED)1149 self.notifier.wait_for_state(proc, ProcessState.RUNNING)1150 # now fail the process. it should still be restartable.1151 agent.fail_process(proc)1152 def test_create_schedule(self):1153 node = "node1"1154 domain_id = domain_id_from_engine('engine1')1155 self.client.node_state(node, domain_id, InstanceState.RUNNING)1156 self._spawn_eeagent(node, 1)1157 proc = "proc1"1158 # create a process. it should be UNSCHEDULED until we schedule it1159 self.client.create_process(proc, self.process_definition_id)1160 self._wait_assert_pd_dump(self._assert_process_states,1161 ProcessState.UNSCHEDULED, [proc])1162 # creating again is harmless1163 self.client.create_process(proc, self.process_definition_id)1164 # now schedule it1165 self.client.schedule_process(proc)1166 self._wait_assert_pd_dump(self._assert_process_states,1167 ProcessState.RUNNING, [proc])1168 self.notifier.wait_for_state(proc, ProcessState.RUNNING)1169 # scheduling again is harmless1170 self.client.schedule_process(proc)1171 def test_restart_system_boot(self):1172 # set up some state in the PD before restart1173 self.client.node_state("node1", domain_id_from_engine("engine1"),1174 InstanceState.RUNNING)1175 self._spawn_eeagent("node1", 4)1176 procs = [('p1', RestartMode.ABNORMAL, None),1177 ('p2', RestartMode.ALWAYS, None),1178 ('p3', RestartMode.NEVER, None),1179 ('p4', RestartMode.ALWAYS, nosystemrestart_process_config()),1180 ('p5', RestartMode.ABNORMAL, None),1181 ('p6', RestartMode.ABNORMAL, nosystemrestart_process_config())]1182 # fill up all 4 slots on engine1 agent and launch 2 more procs1183 for upid, restart_mode, config in procs:1184 self.client.schedule_process(upid, self.process_definition_id,1185 queueing_mode=QueueingMode.ALWAYS, restart_mode=restart_mode,1186 configuration=config)1187 self.notifier.wait_for_state('p1', ProcessState.RUNNING)1188 self.notifier.wait_for_state('p2', ProcessState.RUNNING)1189 self.notifier.wait_for_state('p3', ProcessState.RUNNING)1190 self.notifier.wait_for_state('p4', ProcessState.RUNNING)1191 self.notifier.wait_for_state('p5', ProcessState.WAITING)1192 self.notifier.wait_for_state('p6', ProcessState.WAITING)1193 # now kill PD and eeagents. come back in system restart mode.1194 self.stop_pd()1195 self._kill_all_eeagents()1196 self.store.initialize()1197 self.store.set_system_boot(True)1198 self.store.shutdown()1199 self.start_pd()1200 self.store.wait_initialized(timeout=20)1201 # some processes should come back pending. others should fail out1202 # due to their restart mode flag.1203 self.notifier.wait_for_state('p1', ProcessState.UNSCHEDULED_PENDING)1204 self.notifier.wait_for_state('p2', ProcessState.UNSCHEDULED_PENDING)1205 self.notifier.wait_for_state('p3', ProcessState.TERMINATED)1206 self.notifier.wait_for_state('p4', ProcessState.TERMINATED)1207 self.notifier.wait_for_state('p5', ProcessState.UNSCHEDULED_PENDING)1208 self.notifier.wait_for_state('p6', ProcessState.TERMINATED)1209 # add resources back1210 self.client.node_state("node1", domain_id_from_engine("engine1"),1211 InstanceState.RUNNING)1212 self._spawn_eeagent("node1", 4)1213 # now launch a new process to make sure scheduling still works during1214 # system boot mode1215 self.client.schedule_process("p7", self.process_definition_id)1216 # and restart a couple of the dead procs. one FAILED and one UNSCHEDULED_PENDING1217 self.client.schedule_process("p1")1218 self.client.schedule_process("p4")1219 self.notifier.wait_for_state('p1', ProcessState.RUNNING)1220 self.notifier.wait_for_state('p4', ProcessState.RUNNING)1221 self.notifier.wait_for_state('p7', ProcessState.RUNNING)1222 # finally, end system boot mode. the remaining 2 U-P procs should be scheduled1223 self.client.set_system_boot(False)1224 self._wait_assert_pd_dump(self._assert_process_distribution,1225 node_counts=[4],1226 queued_count=1)1227 # one process will end up queued. doesn't matter which1228 p2 = self.client.describe_process("p2")1229 p5 = self.client.describe_process("p5")1230 states = set([p2['state'], p5['state']])1231 self.assertEqual(states, set([ProcessState.RUNNING, ProcessState.WAITING]))1232 def test_missing_ee(self):1233 """test_missing_ee1234 Ensure that the PD kills lingering processes on eeagents after they've been1235 evacuated.1236 """1237 # create some fake nodes and tell PD about them1238 node_1 = "node1"1239 domain_id = domain_id_from_engine("engine4")1240 self.client.node_state(node_1, domain_id, InstanceState.RUNNING)1241 # PD knows about this node but hasn't gotten a heartbeat yet1242 # spawn the eeagents and tell them all to heartbeat1243 eeagent_1 = self._spawn_eeagent(node_1, 1)1244 def assert_all_resources(state):1245 eeagent_nodes = set()1246 for resource in state['resources'].itervalues():1247 eeagent_nodes.add(resource['node_id'])1248 self.assertEqual(set([node_1]), eeagent_nodes)1249 self._wait_assert_pd_dump(assert_all_resources)1250 time_to_throttle = 01251 self.client.schedule_process("p1", self.process_definition_id, execution_engine_id="engine4",1252 configuration=minimum_time_between_starts_config(time_to_throttle))1253 # Send a heartbeat to show the process is RUNNING, then wait for doctor1254 # to mark the eeagent missing1255 time.sleep(1)1256 eeagent_1.send_heartbeat()1257 self.notifier.wait_for_state('p1', ProcessState.RUNNING, timeout=30)1258 self.notifier.wait_for_state('p1', ProcessState.WAITING, timeout=30)1259 # Check that process is still 'Running' on the eeagent, even the PD has1260 # since marked it failed1261 eeagent_process = eeagent_1._get_process_with_upid('p1')1262 self.assertEqual(eeagent_process['u_pid'], 'p1')1263 self.assertEqual(eeagent_process['state'], ProcessState.RUNNING)1264 self.assertEqual(eeagent_process['round'], 0)1265 # Now send another heartbeat to start getting procs again1266 eeagent_1.send_heartbeat()1267 self.notifier.wait_for_state('p1', ProcessState.RUNNING, timeout=30)1268 eeagent_process = eeagent_1._get_process_with_upid('p1')1269 self.assertEqual(eeagent_process['u_pid'], 'p1')1270 self.assertEqual(eeagent_process['state'], ProcessState.RUNNING)1271 self.assertEqual(eeagent_process['round'], 1)1272 # The pd should now have rescheduled the proc, and terminated the1273 # lingering process1274 self.assertEqual(len(eeagent_1.history), 1)1275 terminated_history = eeagent_1.history[0]1276 self.assertEqual(terminated_history['u_pid'], 'p1')1277 self.assertEqual(terminated_history['state'], ProcessState.TERMINATED)1278 self.assertEqual(terminated_history['round'], 0)1279 def test_matchmaker_msg_retry(self):1280 node = "node1"1281 domain_id = domain_id_from_engine('engine1')1282 self.client.node_state(node, domain_id, InstanceState.RUNNING)1283 self._spawn_eeagent(node, 1)1284 # sneak in and shorten retry time1285 self.pd.matchmaker.process_launcher.retry_seconds = 0.51286 proc1 = "proc1"1287 proc2 = "proc2"1288 with patch.object(self.pd.matchmaker.process_launcher, "resource_client") as mock_resource_client:1289 # first process request goes to mock, not real client but no error1290 self.client.schedule_process(proc1, self.process_definition_id)1291 # second process hits an error but that should not cause problems1292 mock_resource_client.launch_process.side_effect = Exception("boom!")1293 self.client.schedule_process(proc2, self.process_definition_id)1294 self._wait_assert_pd_dump(self._assert_process_states,1295 ProcessState.ASSIGNED, [proc1, proc2])1296 self.assertEqual(mock_resource_client.launch_process.call_count, 2)1297 # now resource client works again. those messages should be retried1298 self.notifier.wait_for_state(proc1, ProcessState.RUNNING)1299 self.notifier.wait_for_state(proc2, ProcessState.RUNNING)1300 self._wait_assert_pd_dump(self._assert_process_states,1301 ProcessState.RUNNING, [proc1, proc2])1302class ProcessDispatcherServiceZooKeeperTests(ProcessDispatcherServiceTests, ZooKeeperTestMixin):1303 # this runs all of the ProcessDispatcherService tests wih a ZK store1304 def setup_store(self):1305 self.setup_zookeeper(base_path_prefix="/processdispatcher_service_tests_")1306 store = ProcessDispatcherZooKeeperStore(self.zk_hosts,1307 self.zk_base_path, use_gevent=self.use_gevent)1308 store.initialize()1309 return store1310 def teardown_store(self):1311 if self.store:1312 self.store.shutdown()1313 self.teardown_zookeeper()...

Full Screen

Full Screen

test_update.py

Source:test_update.py Github

copy

Full Screen

...67 return "test_update_stateless_job_spec.yaml"68 return "test_stateless_job_spec_k8s.yaml"69def test__create_update(stateless_job, in_place):70 stateless_job.create()71 stateless_job.wait_for_state(goal_state="RUNNING")72 old_pod_infos = stateless_job.query_pods()73 old_instance_zero_spec = stateless_job.get_pod(0).get_pod_spec()74 update = StatelessUpdate(75 stateless_job, updated_job_file=UPDATE_STATELESS_JOB_SPEC76 )77 update.create(in_place=in_place)78 update.wait_for_state(goal_state="SUCCEEDED")79 new_pod_infos = stateless_job.query_pods()80 new_instance_zero_spec = stateless_job.get_pod(0).get_pod_spec()81 assert_pod_id_changed(old_pod_infos, new_pod_infos)82 assert_pod_spec_changed(old_instance_zero_spec, new_instance_zero_spec)83def test__create_update_add_instances(stateless_job, in_place):84 stateless_job.create()85 stateless_job.wait_for_state(goal_state="RUNNING")86 old_pod_infos = stateless_job.query_pods()87 update = StatelessUpdate(88 stateless_job, updated_job_file=UPDATE_STATELESS_JOB_ADD_INSTANCES_SPEC89 )90 update.create(in_place=in_place)91 update.wait_for_state(goal_state="SUCCEEDED")92 new_pod_infos = stateless_job.query_pods()93 assert len(old_pod_infos) == 394 assert len(new_pod_infos) == 595def test__create_update_update_and_add_instances(stateless_job, in_place):96 stateless_job.create()97 stateless_job.wait_for_state(goal_state="RUNNING")98 old_pod_infos = stateless_job.query_pods()99 old_instance_zero_spec = stateless_job.get_pod(0).get_pod_spec()100 update = StatelessUpdate(101 stateless_job,102 updated_job_file=UPDATE_STATELESS_JOB_UPDATE_AND_ADD_INSTANCES_SPEC,103 )104 update.create(in_place=in_place)105 update.wait_for_state(goal_state="SUCCEEDED")106 new_pod_infos = stateless_job.query_pods()107 new_instance_zero_spec = stateless_job.get_pod(0).get_pod_spec()108 assert len(old_pod_infos) == 3109 assert len(new_pod_infos) == 5110 assert_pod_id_changed(old_pod_infos, new_pod_infos)111 assert_pod_spec_changed(old_instance_zero_spec, new_instance_zero_spec)112def test__create_update_update_start_paused(stateless_job, in_place):113 stateless_job.create()114 stateless_job.wait_for_state(goal_state="RUNNING")115 old_pod_infos = stateless_job.query_pods()116 old_instance_zero_spec = stateless_job.get_pod(0).get_pod_spec()117 update = StatelessUpdate(118 stateless_job,119 updated_job_file=UPDATE_STATELESS_JOB_UPDATE_AND_ADD_INSTANCES_SPEC,120 start_paused=True,121 )122 update.create(in_place=in_place)123 update.wait_for_state(goal_state="PAUSED")124 update.resume()125 update.wait_for_state(goal_state="SUCCEEDED")126 new_pod_infos = stateless_job.query_pods()127 new_instance_zero_spec = stateless_job.get_pod(0).get_pod_spec()128 assert len(old_pod_infos) == 3129 assert len(new_pod_infos) == 5130 assert_pod_id_changed(old_pod_infos, new_pod_infos)131 assert_pod_spec_changed(old_instance_zero_spec, new_instance_zero_spec)132def test__create_update_with_batch_size(stateless_job, in_place):133 stateless_job.create()134 stateless_job.wait_for_state(goal_state="RUNNING")135 old_pod_infos = stateless_job.query_pods()136 old_instance_zero_spec = stateless_job.get_pod(0).get_pod_spec()137 update = StatelessUpdate(138 stateless_job, updated_job_file=UPDATE_STATELESS_JOB_SPEC, batch_size=1139 )140 update.create(in_place=in_place)141 update.wait_for_state(goal_state="SUCCEEDED")142 new_pod_infos = stateless_job.query_pods()143 new_instance_zero_spec = stateless_job.get_pod(0).get_pod_spec()144 assert_pod_id_changed(old_pod_infos, new_pod_infos)145 assert_pod_spec_changed(old_instance_zero_spec, new_instance_zero_spec)146def test__create_update_add_instances_with_batch_size(stateless_job, in_place):147 stateless_job.create()148 stateless_job.wait_for_state(goal_state="RUNNING")149 old_pod_infos = stateless_job.query_pods()150 update = StatelessUpdate(151 stateless_job,152 updated_job_file=UPDATE_STATELESS_JOB_ADD_INSTANCES_SPEC,153 batch_size=1,154 )155 update.create(in_place=in_place)156 update.wait_for_state(goal_state="SUCCEEDED")157 new_pod_infos = stateless_job.query_pods()158 assert len(old_pod_infos) == 3159 assert len(new_pod_infos) == 5160def test__create_update_update_and_add_instances_with_batch(stateless_job, in_place):161 stateless_job.create()162 stateless_job.wait_for_state(goal_state="RUNNING")163 old_pod_infos = stateless_job.query_pods()164 old_instance_zero_spec = stateless_job.get_pod(0).get_pod_spec()165 update = StatelessUpdate(166 stateless_job,167 updated_job_file=UPDATE_STATELESS_JOB_UPDATE_AND_ADD_INSTANCES_SPEC,168 batch_size=1,169 )170 update.create(in_place=in_place)171 update.wait_for_state(goal_state="SUCCEEDED")172 new_pod_infos = stateless_job.query_pods()173 new_instance_zero_spec = stateless_job.get_pod(0).get_pod_spec()174 assert len(old_pod_infos) == 3175 assert len(new_pod_infos) == 5176 assert_pod_id_changed(old_pod_infos, new_pod_infos)177 assert_pod_spec_changed(old_instance_zero_spec, new_instance_zero_spec)178def test__create_update_bad_version(stateless_job, in_place):179 stateless_job.create()180 stateless_job.wait_for_state(goal_state="RUNNING")181 update = StatelessUpdate(182 stateless_job,183 updated_job_file=UPDATE_STATELESS_JOB_UPDATE_AND_ADD_INSTANCES_SPEC,184 batch_size=1,185 )186 try:187 update.create(entity_version="1-2-3", in_place=in_place)188 except grpc.RpcError as e:189 assert e.code() == grpc.StatusCode.ABORTED190 assert INVALID_ENTITY_VERSION_ERR_MESSAGE in e.details()191 return192 raise Exception("entity version mismatch error not received")193def test__pause_update_bad_version(stateless_job, in_place):194 stateless_job.create()195 stateless_job.wait_for_state(goal_state="RUNNING")196 update = StatelessUpdate(197 stateless_job,198 updated_job_file=UPDATE_STATELESS_JOB_UPDATE_AND_ADD_INSTANCES_SPEC,199 batch_size=1,200 )201 update.create(in_place=in_place)202 try:203 update.pause(entity_version="1-2-3")204 except grpc.RpcError as e:205 assert e.code() == grpc.StatusCode.ABORTED206 assert INVALID_ENTITY_VERSION_ERR_MESSAGE in e.details()207 return208 raise Exception("entity version mismatch error not received")209def test__resume_update_bad_version(stateless_job, in_place):210 stateless_job.create()211 stateless_job.wait_for_state(goal_state="RUNNING")212 update = StatelessUpdate(213 stateless_job,214 updated_job_file=UPDATE_STATELESS_JOB_UPDATE_AND_ADD_INSTANCES_SPEC,215 start_paused=True,216 batch_size=1,217 )218 update.create(in_place=in_place)219 try:220 update.resume(entity_version="1-2-3")221 except grpc.RpcError as e:222 assert e.code() == grpc.StatusCode.ABORTED223 assert INVALID_ENTITY_VERSION_ERR_MESSAGE in e.details()224 return225 raise Exception("entity version mismatch error not received")226def test__abort_update_bad_version(stateless_job, in_place):227 stateless_job.create()228 stateless_job.wait_for_state(goal_state="RUNNING")229 update = StatelessUpdate(230 stateless_job,231 updated_job_file=UPDATE_STATELESS_JOB_UPDATE_AND_ADD_INSTANCES_SPEC,232 batch_size=1,233 )234 update.create(in_place=in_place)235 try:236 update.abort(entity_version="1-2-3")237 except grpc.RpcError as e:238 assert e.code() == grpc.StatusCode.ABORTED239 assert INVALID_ENTITY_VERSION_ERR_MESSAGE in e.details()240 return241 raise Exception("entity version mismatch error not received")242def test__create_update_stopped_job(stateless_job, in_place):243 stateless_job.create()244 stateless_job.wait_for_state(goal_state="RUNNING")245 old_pod_infos = stateless_job.query_pods()246 old_instance_zero_spec = stateless_job.get_pod(0).get_pod_spec()247 old_pod_states = set()248 for pod_info in old_pod_infos:249 old_pod_states.add(pod_info.spec.pod_name.value)250 stateless_job.stop()251 stateless_job.wait_for_state(goal_state="KILLED")252 update = StatelessUpdate(253 stateless_job,254 updated_job_file=UPDATE_STATELESS_JOB_UPDATE_AND_ADD_INSTANCES_SPEC,255 batch_size=1,256 )257 update.create(in_place=in_place)258 stateless_job.start()259 update.wait_for_state(goal_state="SUCCEEDED")260 stateless_job.wait_for_state(goal_state="RUNNING")261 new_pod_infos = stateless_job.query_pods()262 new_instance_zero_spec = stateless_job.get_pod(0).get_pod_spec()263 assert len(old_pod_infos) == 3264 assert len(new_pod_infos) == 5265 assert_pod_id_changed(old_pod_infos, new_pod_infos)266 assert_pod_spec_changed(old_instance_zero_spec, new_instance_zero_spec)267 # Only new instances should be RUNNING268 for pod_info in new_pod_infos:269 if pod_info.spec.pod_name.value in new_pod_infos:270 assert pod_info.status.state == pod_pb2.POD_STATE_KILLED271 else:272 assert pod_info.status.state == pod_pb2.POD_STATE_RUNNING273def test__create_update_stopped_tasks(stateless_job, in_place):274 stateless_job.create()275 stateless_job.wait_for_state(goal_state="RUNNING")276 old_pod_infos = stateless_job.query_pods()277 old_instance_zero_spec = stateless_job.get_pod(0).get_pod_spec()278 stateless_job.stop()279 update = StatelessUpdate(280 stateless_job,281 updated_job_file=UPDATE_STATELESS_JOB_UPDATE_AND_ADD_INSTANCES_SPEC,282 batch_size=1,283 )284 update.create(in_place=in_place)285 update.wait_for_state(goal_state="SUCCEEDED")286 stateless_job.wait_for_state(goal_state="KILLED")287 stateless_job.start()288 stateless_job.wait_for_state(goal_state="RUNNING")289 new_pod_infos = stateless_job.query_pods()290 new_instance_zero_spec = stateless_job.get_pod(0).get_pod_spec()291 assert len(old_pod_infos) == 3292 assert len(new_pod_infos) == 5293 assert_pod_id_changed(old_pod_infos, new_pod_infos)294 assert_pod_spec_changed(old_instance_zero_spec, new_instance_zero_spec)295def test__create_multiple_consecutive_updates(stateless_job, in_place):296 stateless_job.create()297 stateless_job.wait_for_state(goal_state="RUNNING")298 old_pod_infos = stateless_job.query_pods()299 old_instance_zero_spec = stateless_job.get_pod(0).get_pod_spec()300 update1 = StatelessUpdate(301 stateless_job, updated_job_file=UPDATE_STATELESS_JOB_ADD_INSTANCES_SPEC302 )303 update1.create(in_place=in_place)304 update2 = StatelessUpdate(305 stateless_job,306 updated_job_file=UPDATE_STATELESS_JOB_UPDATE_AND_ADD_INSTANCES_SPEC,307 batch_size=1,308 )309 update2.create(in_place=in_place)310 update2.wait_for_state(goal_state="SUCCEEDED")311 new_pod_infos = stateless_job.query_pods()312 new_instance_zero_spec = stateless_job.get_pod(0).get_pod_spec()313 assert len(old_pod_infos) == 3314 assert len(new_pod_infos) == 5315 assert_pod_id_changed(old_pod_infos, new_pod_infos)316 assert_pod_spec_changed(old_instance_zero_spec, new_instance_zero_spec)317def test__abort_update(stateless_job, in_place):318 stateless_job.create()319 stateless_job.wait_for_state(goal_state="RUNNING")320 update = StatelessUpdate(321 stateless_job,322 updated_job_file=UPDATE_STATELESS_JOB_UPDATE_AND_ADD_INSTANCES_SPEC,323 batch_size=1,324 )325 update.create(in_place=in_place)326 update.wait_for_state(goal_state="ROLLING_FORWARD")327 update.abort()328 update.wait_for_state(goal_state="ABORTED")329def test__update_reduce_instances(stateless_job, in_place):330 stateless_job.create()331 stateless_job.wait_for_state(goal_state="RUNNING")332 old_pod_infos = stateless_job.query_pods()333 assert len(old_pod_infos) == 3334 # first increase instances335 update = StatelessUpdate(336 stateless_job, updated_job_file=UPDATE_STATELESS_JOB_ADD_INSTANCES_SPEC337 )338 update.create()339 update.wait_for_state(goal_state="SUCCEEDED")340 new_pod_infos = stateless_job.query_pods()341 assert len(new_pod_infos) == 5342 # now reduce instances343 update = StatelessUpdate(344 stateless_job,345 updated_job_file=UPDATE_STATELESS_JOB_UPDATE_REDUCE_INSTANCES_SPEC,346 )347 update.create(in_place=in_place)348 update.wait_for_state(goal_state="SUCCEEDED")349 new_pod_infos = stateless_job.query_pods()350 assert len(new_pod_infos) == 3351 # now increase back again352 update = StatelessUpdate(353 stateless_job, updated_job_file=UPDATE_STATELESS_JOB_ADD_INSTANCES_SPEC354 )355 update.create()356 update.wait_for_state(goal_state="SUCCEEDED")357 new_pod_infos = stateless_job.query_pods()358 assert len(new_pod_infos) == 5359def test__update_reduce_instances_stopped_tasks(stateless_job, in_place):360 stateless_job.create()361 stateless_job.wait_for_state(goal_state="RUNNING")362 old_pod_infos = stateless_job.query_pods()363 assert len(old_pod_infos) == 3364 # first increase instances365 update = StatelessUpdate(366 stateless_job, updated_job_file=UPDATE_STATELESS_JOB_ADD_INSTANCES_SPEC367 )368 update.create(in_place=in_place)369 update.wait_for_state(goal_state="SUCCEEDED")370 new_pod_infos = stateless_job.query_pods()371 assert len(new_pod_infos) == 5372 # now stop last 2 tasks373 ranges = task_pb2.InstanceRange(to=5)374 setattr(ranges, "from", 3)375 stateless_job.stop(ranges=[ranges])376 # now reduce instance count377 update = StatelessUpdate(378 stateless_job,379 updated_job_file=UPDATE_STATELESS_JOB_UPDATE_REDUCE_INSTANCES_SPEC,380 )381 update.create(in_place=in_place)382 update.wait_for_state(goal_state="SUCCEEDED")383 new_pod_infos = stateless_job.query_pods()384 assert len(new_pod_infos) == 3385# test__create_update_bad_config tests creating an update with bad config386# without rollback387def test__create_update_with_bad_config(stateless_job, in_place):388 stateless_job.create()389 stateless_job.wait_for_state(goal_state="RUNNING")390 old_instance_zero_spec = stateless_job.get_pod(0).get_pod_spec()391 update = StatelessUpdate(392 stateless_job,393 updated_job_file=UPDATE_STATELESS_JOB_BAD_SPEC,394 max_failure_instances=3,395 max_instance_attempts=1,396 )397 update.create(in_place=in_place)398 update.wait_for_state(goal_state="FAILED", failed_state="SUCCEEDED")399 new_instance_zero_spec = stateless_job.get_pod(0).get_pod_spec()400 assert_pod_spec_changed(old_instance_zero_spec, new_instance_zero_spec)401 for pod_info in stateless_job.query_pods():402 assert pod_info.status.state == pod_pb2.POD_STATE_FAILED403# test__create_update_add_instances_with_bad_config404# tests creating an update with bad config and more instances405# without rollback406def test__create_update_add_instances_with_bad_config(stateless_job, in_place):407 stateless_job.create()408 stateless_job.wait_for_state(goal_state="RUNNING")409 job_spec_dump = load_test_config(UPDATE_STATELESS_JOB_BAD_SPEC)410 updated_job_spec = JobSpec()411 json_format.ParseDict(job_spec_dump, updated_job_spec)412 updated_job_spec.instance_count = stateless_job.job_spec.instance_count + 3413 update = StatelessUpdate(414 stateless_job,415 batch_size=1,416 updated_job_spec=updated_job_spec,417 max_failure_instances=1,418 max_instance_attempts=1,419 )420 update.create(in_place=in_place)421 update.wait_for_state(goal_state="FAILED", failed_state="SUCCEEDED")422 # only one instance should be added423 assert (424 len(stateless_job.query_pods())425 == stateless_job.job_spec.instance_count + 1426 )427# test__create_update_reduce_instances_with_bad_config428# tests creating an update with bad config and fewer instances429# without rollback430def test__create_update_reduce_instances_with_bad_config(stateless_job, in_place):431 stateless_job.create()432 stateless_job.wait_for_state(goal_state="RUNNING")433 old_pod_infos = stateless_job.query_pods()434 job_spec_dump = load_test_config(UPDATE_STATELESS_JOB_BAD_SPEC)435 updated_job_spec = JobSpec()436 json_format.ParseDict(job_spec_dump, updated_job_spec)437 updated_job_spec.instance_count = stateless_job.job_spec.instance_count - 1438 update = StatelessUpdate(439 stateless_job,440 updated_job_spec=updated_job_spec,441 batch_size=1,442 max_failure_instances=1,443 max_instance_attempts=1,444 )445 update.create(in_place=in_place)446 update.wait_for_state(goal_state="FAILED", failed_state="SUCCEEDED")447 new_pod_infos = stateless_job.query_pods()448 assert len(old_pod_infos) == len(new_pod_infos)449# test__create_update_with_failed_health_check450# tests an update fails even if the new task state is RUNNING,451# as long as the health check fails452def test__create_update_with_failed_health_check(stateless_job, in_place):453 stateless_job.create()454 stateless_job.wait_for_state(goal_state="RUNNING")455 update = StatelessUpdate(456 stateless_job,457 updated_job_file=UPDATE_STATELESS_JOB_BAD_HEALTH_CHECK_SPEC,458 max_failure_instances=1,459 max_instance_attempts=1,460 )461 update.create(in_place=in_place)462 update.wait_for_state(goal_state="FAILED", failed_state="SUCCEEDED")463# test__create_update_to_disable_health_check tests an update which464# disables healthCheck465def test__create_update_to_disable_health_check(in_place):466 job = StatelessJob(467 job_file=UPDATE_STATELESS_JOB_WITH_HEALTH_CHECK_SPEC,468 config=IntegrationTestConfig(469 max_retry_attempts=100,470 pool_file='test_stateless_respool.yaml',471 ),472 )473 job.create()474 job.wait_for_state(goal_state="RUNNING")475 job.job_spec.default_spec.containers[0].liveness_check.enabled = False476 update = StatelessUpdate(477 job,478 updated_job_spec=job.job_spec,479 max_failure_instances=1,480 max_instance_attempts=1,481 )482 update.create(in_place=in_place)483 update.wait_for_state(goal_state="SUCCEEDED")484# test__create_update_to_enable_health_check tests an update which485# enables healthCheck486def test__create_update_to_enable_health_check(in_place):487 job = StatelessJob(488 job_file=UPDATE_STATELESS_JOB_WITH_HEALTH_CHECK_SPEC,489 config=IntegrationTestConfig(490 max_retry_attempts=100,491 pool_file='test_stateless_respool.yaml',492 ),493 )494 job.job_spec.default_spec.containers[0].liveness_check.enabled = False495 job.create()496 job.wait_for_state(goal_state="RUNNING")497 job.job_spec.default_spec.containers[0].liveness_check.enabled = True498 update = StatelessUpdate(499 job,500 updated_job_spec=job.job_spec,501 max_failure_instances=1,502 max_instance_attempts=1,503 )504 update.create(in_place=in_place)505 update.wait_for_state(goal_state="SUCCEEDED")506# test__create_update_to_unset_health_check tests an update to unset507# health check config508def test__create_update_to_unset_health_check(in_place):509 job = StatelessJob(510 job_file=UPDATE_STATELESS_JOB_WITH_HEALTH_CHECK_SPEC,511 config=IntegrationTestConfig(512 max_retry_attempts=100,513 pool_file='test_stateless_respool.yaml',514 ),515 )516 job.create()517 job.wait_for_state(goal_state="RUNNING")518 update = StatelessUpdate(519 job,520 updated_job_file=UPDATE_STATELESS_JOB_SPEC,521 max_failure_instances=1,522 max_instance_attempts=1,523 )524 update.create(in_place=in_place)525 update.wait_for_state(goal_state="SUCCEEDED")526# test__create_update_to_unset_health_check tests an update to set527# health check config for a job without health check set528def test__create_update_to_set_health_check(in_place):529 job = StatelessJob(530 job_file=UPDATE_STATELESS_JOB_SPEC,531 config=IntegrationTestConfig(532 max_retry_attempts=100,533 pool_file='test_stateless_respool.yaml',534 ),535 )536 job.create()537 job.wait_for_state(goal_state="RUNNING")538 update = StatelessUpdate(539 job,540 updated_job_file=UPDATE_STATELESS_JOB_WITH_HEALTH_CHECK_SPEC,541 max_failure_instances=1,542 max_instance_attempts=1,543 )544 update.create(in_place=in_place)545 update.wait_for_state(goal_state="SUCCEEDED")546# test__create_update_to_change_health_check_config tests an update which547# changes healthCheck548def test__create_update_to_change_health_check_config(in_place):549 job = StatelessJob(550 job_file=UPDATE_STATELESS_JOB_WITH_HEALTH_CHECK_SPEC,551 config=IntegrationTestConfig(552 max_retry_attempts=100,553 pool_file='test_stateless_respool.yaml',554 ),555 )556 job.job_spec.default_spec.containers[0].liveness_check.enabled = False557 job.create()558 job.wait_for_state(goal_state="RUNNING")559 job.job_spec.default_spec.containers[560 0561 ].liveness_check.initial_interval_secs = 2562 update = StatelessUpdate(563 job,564 updated_job_spec=job.job_spec,565 max_failure_instances=1,566 max_instance_attempts=1,567 )568 update.create(in_place=in_place)569 update.wait_for_state(goal_state="SUCCEEDED")570# test__auto_rollback_update_with_bad_config tests creating an update with bad config571# with rollback572def test__auto_rollback_update_with_bad_config(stateless_job, in_place):573 stateless_job.create()574 stateless_job.wait_for_state(goal_state="RUNNING")575 old_instance_zero_spec = stateless_job.get_pod(0).get_pod_spec()576 update = StatelessUpdate(577 stateless_job,578 updated_job_file=UPDATE_STATELESS_JOB_BAD_SPEC,579 roll_back_on_failure=True,580 max_failure_instances=1,581 max_instance_attempts=1,582 )583 update.create(in_place=in_place)584 update.wait_for_state(goal_state="ROLLED_BACK")585 new_instance_zero_spec = stateless_job.get_pod(0).get_pod_spec()586 assert_pod_spec_equal(old_instance_zero_spec, new_instance_zero_spec)587# test__auto_rollback_update_add_instances_with_bad_config588# tests creating an update with bad config and more instances589# with rollback590def test__auto_rollback_update_add_instances_with_bad_config(stateless_job, in_place):591 stateless_job.create()592 stateless_job.wait_for_state(goal_state="RUNNING")593 old_instance_zero_spec = stateless_job.get_pod(0).get_pod_spec()594 job_spec_dump = load_test_config(UPDATE_STATELESS_JOB_BAD_SPEC)595 updated_job_spec = JobSpec()596 json_format.ParseDict(job_spec_dump, updated_job_spec)597 updated_job_spec.instance_count = stateless_job.job_spec.instance_count + 3598 update = StatelessUpdate(599 stateless_job,600 updated_job_spec=updated_job_spec,601 roll_back_on_failure=True,602 max_failure_instances=1,603 max_instance_attempts=1,604 )605 update.create(in_place=in_place)606 update.wait_for_state(goal_state="ROLLED_BACK")607 new_instance_zero_spec = stateless_job.get_pod(0).get_pod_spec()608 # no instance should be added609 assert (610 len(stateless_job.query_pods())611 == stateless_job.job_spec.instance_count612 )613 assert_pod_spec_equal(old_instance_zero_spec, new_instance_zero_spec)614# test__auto_rollback_update_reduce_instances_with_bad_config615# tests creating an update with bad config and fewer instances616# with rollback617def test__auto_rollback_update_reduce_instances_with_bad_config(stateless_job, in_place):618 stateless_job.create()619 stateless_job.wait_for_state(goal_state="RUNNING")620 old_instance_zero_spec = stateless_job.get_pod(0).get_pod_spec()621 job_spec_dump = load_test_config(UPDATE_STATELESS_JOB_BAD_SPEC)622 updated_job_spec = JobSpec()623 json_format.ParseDict(job_spec_dump, updated_job_spec)624 updated_job_spec.instance_count = stateless_job.job_spec.instance_count - 1625 update = StatelessUpdate(626 stateless_job,627 updated_job_spec=updated_job_spec,628 roll_back_on_failure=True,629 max_failure_instances=1,630 max_instance_attempts=1,631 )632 update.create(in_place=in_place)633 update.wait_for_state(goal_state="ROLLED_BACK")634 new_instance_zero_spec = stateless_job.get_pod(0).get_pod_spec()635 # no instance should be removed636 assert (637 len(stateless_job.query_pods())638 == stateless_job.job_spec.instance_count639 )640 assert_pod_spec_equal(old_instance_zero_spec, new_instance_zero_spec)641# test__auto_rollback_update_with_failed_health_check642# tests an update fails even if the new task state is RUNNING,643# as long as the health check fails644def test__auto_rollback_update_with_failed_health_check(stateless_job, in_place):645 stateless_job.create()646 stateless_job.wait_for_state(goal_state="RUNNING")647 old_instance_zero_spec = stateless_job.get_pod(0).get_pod_spec()648 update = StatelessUpdate(649 stateless_job,650 updated_job_file=UPDATE_STATELESS_JOB_BAD_HEALTH_CHECK_SPEC,651 roll_back_on_failure=True,652 max_failure_instances=1,653 max_instance_attempts=1,654 )655 update.create(in_place=in_place)656 update.wait_for_state(goal_state="ROLLED_BACK")657 new_instance_zero_spec = stateless_job.get_pod(0).get_pod_spec()658 assert_pod_spec_equal(old_instance_zero_spec, new_instance_zero_spec)659# test__pause_resume_initialized_update test pause and resume660# an update in initialized state661def test__pause_resume_initialized_update(stateless_job, in_place):662 stateless_job.create()663 stateless_job.wait_for_state(goal_state="RUNNING")664 old_pod_infos = stateless_job.query_pods()665 old_instance_zero_spec = stateless_job.get_pod(0).get_pod_spec()666 update = StatelessUpdate(667 stateless_job, batch_size=1, updated_job_file=UPDATE_STATELESS_JOB_SPEC668 )669 update.create(in_place=in_place)670 # immediately pause the update, so the update may still be INITIALIZED671 update.pause()672 update.wait_for_state(goal_state="PAUSED")673 update.resume()674 update.wait_for_state(goal_state="SUCCEEDED")675 new_pod_infos = stateless_job.query_pods()676 new_instance_zero_spec = stateless_job.get_pod(0).get_pod_spec()677 assert_pod_id_changed(old_pod_infos, new_pod_infos)678 assert_pod_spec_changed(old_instance_zero_spec, new_instance_zero_spec)679# test__pause_resume_initialized_update test pause and resume an update680def test__pause_resume__update(stateless_job, in_place):681 stateless_job.create()682 stateless_job.wait_for_state(goal_state="RUNNING")683 old_pod_infos = stateless_job.query_pods()684 old_instance_zero_spec = stateless_job.get_pod(0).get_pod_spec()685 update = StatelessUpdate(686 stateless_job, batch_size=1, updated_job_file=UPDATE_STATELESS_JOB_SPEC687 )688 update.create(in_place=in_place)689 # sleep for 1 sec so update can begin to roll forward690 time.sleep(1)691 update.pause()692 update.wait_for_state(goal_state="PAUSED")693 update.resume()694 update.wait_for_state(goal_state="SUCCEEDED")695 new_pod_infos = stateless_job.query_pods()696 new_instance_zero_spec = stateless_job.get_pod(0).get_pod_spec()697 assert_pod_id_changed(old_pod_infos, new_pod_infos)698 assert_pod_spec_changed(old_instance_zero_spec, new_instance_zero_spec)699# test_manual_rollback manually rolls back a running update when700# the instance count is reduced in the rollback.701# Note that manual rollback in peloton is just updating to the702# previous job configuration703def test_manual_rollback_reduce_instances(stateless_job, in_place):704 stateless_job.create()705 stateless_job.wait_for_state(goal_state="RUNNING")706 old_pod_infos = stateless_job.query_pods()707 old_instance_zero_spec = stateless_job.get_pod(0).get_pod_spec()708 update = StatelessUpdate(709 stateless_job, updated_job_file=UPDATE_STATELESS_JOB_ADD_INSTANCES_SPEC710 )711 update.create(in_place=in_place)712 # manually rollback the update713 update2 = StatelessUpdate(714 stateless_job,715 updated_job_file=UPDATE_STATELESS_JOB_UPDATE_REDUCE_INSTANCES_SPEC,716 )717 update2.create(in_place=in_place)718 update2.wait_for_state(goal_state="SUCCEEDED")719 new_pod_infos = stateless_job.query_pods()720 assert len(old_pod_infos) == len(new_pod_infos)721 new_instance_zero_spec = stateless_job.get_pod(0).get_pod_spec()722 assert_pod_spec_equal(old_instance_zero_spec, new_instance_zero_spec)723# test_manual_rollback manually rolls back a running update when724# the instance count is increased in the rollback725def test_manual_rollback_increase_instances(stateless_job, in_place):726 stateless_job.create()727 stateless_job.wait_for_state(goal_state="RUNNING")728 update = StatelessUpdate(729 stateless_job, updated_job_file=UPDATE_STATELESS_JOB_ADD_INSTANCES_SPEC730 )731 update.create(in_place=in_place)732 update.wait_for_state(goal_state="SUCCEEDED")733 old_pod_infos = stateless_job.query_pods()734 old_instance_zero_spec = stateless_job.get_pod(0).get_pod_spec()735 # reduce instance count and then roll it back736 update2 = StatelessUpdate(737 stateless_job,738 updated_job_file=UPDATE_STATELESS_JOB_UPDATE_REDUCE_INSTANCES_SPEC,739 )740 update2.create(in_place=in_place)741 update3 = StatelessUpdate(742 stateless_job, updated_job_file=UPDATE_STATELESS_JOB_ADD_INSTANCES_SPEC743 )744 update3.create(in_place=in_place)745 update3.wait_for_state(goal_state="SUCCEEDED")746 new_pod_infos = stateless_job.query_pods()747 assert len(old_pod_infos) == len(new_pod_infos)748 new_instance_zero_spec = stateless_job.get_pod(0).get_pod_spec()749 assert_pod_spec_equal(old_instance_zero_spec, new_instance_zero_spec)750# test_auto_rollback_reduce_instances751# rolls back a failed update when752# the instance count is reduced in the rollback.753def test_auto_rollback_reduce_instances(stateless_job, in_place):754 stateless_job.create()755 stateless_job.wait_for_state(goal_state="RUNNING")756 job_spec_dump = load_test_config(757 UPDATE_STATELESS_JOB_BAD_HEALTH_CHECK_SPEC758 )759 updated_job_spec = JobSpec()760 json_format.ParseDict(job_spec_dump, updated_job_spec)761 # increase the instance count762 updated_job_spec.instance_count = stateless_job.job_spec.instance_count + 3763 update = StatelessUpdate(764 stateless_job,765 updated_job_spec=updated_job_spec,766 roll_back_on_failure=True,767 max_instance_attempts=1,768 max_failure_instances=1,769 batch_size=1,770 )771 update.create(in_place=in_place)772 update.wait_for_state(goal_state="ROLLED_BACK")773 assert (774 len(stateless_job.query_pods())775 == stateless_job.job_spec.instance_count776 )777# test_update_create_failure_invalid_spec tests the778# update create failure due to invalid spec in request779def test_update_create_failure_invalid_spec(stateless_job, in_place):780 stateless_job.create()781 stateless_job.wait_for_state(goal_state="RUNNING")782 update = StatelessUpdate(783 stateless_job, updated_job_file=UPDATE_STATELESS_JOB_INVALID_SPEC784 )785 try:786 update.create(in_place=in_place)787 except grpc.RpcError as e:788 assert e.code() == grpc.StatusCode.INVALID_ARGUMENT789 return790 raise Exception("job spec validation error not received")791# test_update_killed_job tests updating a killed job.792# The job should be updated but still remain in killed state793def test_update_killed_job(in_place):794 job = StatelessJob(job_file=UPDATE_STATELESS_JOB_ADD_INSTANCES_SPEC)795 job.create()796 job.wait_for_state(goal_state="RUNNING")797 job.stop()798 job.wait_for_state(goal_state="KILLED")799 update = StatelessUpdate(800 job, updated_job_file=UPDATE_STATELESS_JOB_UPDATE_REDUCE_INSTANCES_SPEC801 )802 update.create(in_place=in_place)803 update.wait_for_state(goal_state="SUCCEEDED")804 assert job.get_spec().instance_count == 3805 assert job.get_status().state == stateless_pb2.JOB_STATE_KILLED806# test_start_job_with_active_update tests807# starting a job with an active update808def test_start_job_with_active_update(stateless_job, in_place):809 stateless_job.create()810 stateless_job.wait_for_state(goal_state="RUNNING")811 assert len(stateless_job.query_pods()) == 3812 stateless_job.stop()813 update = StatelessUpdate(814 stateless_job,815 updated_job_file=UPDATE_STATELESS_JOB_UPDATE_AND_ADD_INSTANCES_SPEC,816 batch_size=1,817 )818 update.create(in_place=in_place)819 stateless_job.start()820 update.wait_for_state(goal_state="SUCCEEDED")821 stateless_job.wait_for_all_pods_running()822 assert len(stateless_job.query_pods()) == 5823# test_stop_running_job_with_active_update_add_instances tests824# stopping a running job with an active update(add instances)825def test_stop_running_job_with_active_update_add_instances(stateless_job, in_place):826 stateless_job.create()827 stateless_job.wait_for_state(goal_state="RUNNING")828 assert len(stateless_job.query_pods()) == 3829 update = StatelessUpdate(830 stateless_job,831 updated_job_file=UPDATE_STATELESS_JOB_UPDATE_AND_ADD_INSTANCES_SPEC,832 batch_size=1,833 )834 update.create(in_place=in_place)835 update.wait_for_state(goal_state="ROLLING_FORWARD")836 stateless_job.stop()837 update.wait_for_state(goal_state="SUCCEEDED")838 assert stateless_job.get_spec().instance_count == 5839# test_stop_running_job_with_active_update_remove_instances tests840# stopping a running job with an active update(remove instances)841def test_stop_running_job_with_active_update_remove_instances(in_place):842 stateless_job = StatelessJob(843 job_file=UPDATE_STATELESS_JOB_ADD_INSTANCES_SPEC844 )845 stateless_job.create()846 stateless_job.wait_for_state(goal_state="RUNNING")847 assert len(stateless_job.query_pods()) == 5848 update = StatelessUpdate(849 stateless_job,850 updated_job_file=UPDATE_STATELESS_JOB_UPDATE_REDUCE_INSTANCES_SPEC,851 batch_size=1,852 )853 update.create(in_place=in_place)854 update.wait_for_state(goal_state="ROLLING_FORWARD")855 stateless_job.stop()856 update.wait_for_state(goal_state="SUCCEEDED")857 assert stateless_job.get_spec().instance_count == 3858# test_stop_running_job_with_active_update_same_instance_count tests stopping859# a running job with an active update that doesn't change instance count860def test_stop_running_job_with_active_update_same_instance_count(861 stateless_job,862 in_place863):864 stateless_job.create()865 stateless_job.wait_for_state(goal_state="RUNNING")866 stateless_job.job_spec.default_spec.containers[867 0868 ].command.value = "sleep 100"869 update = StatelessUpdate(870 stateless_job,871 updated_job_spec=stateless_job.job_spec,872 max_failure_instances=1,873 max_instance_attempts=1,874 )875 update.create(in_place=in_place)876 stateless_job.stop()877 update.wait_for_state(goal_state="SUCCEEDED")878 assert stateless_job.get_spec().instance_count == 3879 assert (880 stateless_job.get_spec().default_spec.containers[0].command.value881 == "sleep 100"882 )883# test__create_update_before_job_fully_created creates an update884# right after a job is created. It tests the case that job can be885# updated before it is fully created886def test__create_update_before_job_fully_created(stateless_job, in_place):887 stateless_job.create()888 update = StatelessUpdate(889 stateless_job, updated_job_file=UPDATE_STATELESS_JOB_SPEC890 )891 update.create(in_place=in_place)892 update.wait_for_state(goal_state="SUCCEEDED")893 assert (894 stateless_job.get_spec().default_spec.containers[0].command.value895 == "while :; do echo updated; sleep 10; done"896 )897# test__in_place_update_success_rate tests that in-place update898# should succeed when every daemon is in healthy state.899# It starts a job with 30 instances, and start the in-place update900# without batch size, then it tests if any pod is running on unexpected901# host.902# TODO: Re-enable k8s when it stops being flaky.903# @pytest.mark.k8s904def test__in_place_update_success_rate(stateless_job):905 stateless_job.job_spec.instance_count = 30906 stateless_job.create()907 stateless_job.wait_for_all_pods_running()908 old_pod_infos = stateless_job.query_pods()909 job_spec_dump = load_test_config(update_stateless_job_spec())910 updated_job_spec = JobSpec()911 json_format.ParseDict(job_spec_dump, updated_job_spec)912 updated_job_spec.instance_count = 30913 if minicluster_type() == "k8s":914 updated_job_spec.default_spec.containers[0].resource.mem_limit_mb = 0.1915 update = StatelessUpdate(stateless_job,916 updated_job_spec=updated_job_spec,917 batch_size=0)918 update.create(in_place=True)919 update.wait_for_state(goal_state='SUCCEEDED')920 new_pod_infos = stateless_job.query_pods()921 old_pod_dict = {}922 new_pod_dict = {}923 for old_pod_info in old_pod_infos:924 split_index = old_pod_info.status.pod_id.value.rfind('-')925 pod_name = old_pod_info.status.pod_id.value[:split_index]926 old_pod_dict[pod_name] = old_pod_info.status.host927 for new_pod_info in new_pod_infos:928 split_index = new_pod_info.status.pod_id.value.rfind('-')929 pod_name = new_pod_info.status.pod_id.value[:split_index]930 new_pod_dict[pod_name] = new_pod_info.status.host931 count = 0932 for pod_name, pod_id in old_pod_dict.items():933 if new_pod_dict[pod_name] != old_pod_dict[pod_name]:934 log.info("%s, prev:%s, cur:%s", pod_name,935 old_pod_dict[pod_name], new_pod_dict[pod_name])936 count = count + 1937 log.info("total mismatch: %d", count)938 assert count == 0939# test__in_place_kill_job_release_host tests the case of killing940# an ongoing in-place update would release hosts, so the second941# update can get completed942def test__in_place_kill_job_release_host():943 job1 = StatelessJob(944 job_file="test_stateless_job_spec.yaml",945 )946 job1.create()947 job1.wait_for_state(goal_state="RUNNING")948 job2 = StatelessJob(949 job_file="test_stateless_job_spec.yaml",950 )951 job2.create()952 job2.wait_for_state(goal_state="RUNNING")953 update1 = StatelessUpdate(job1,954 updated_job_file=UPDATE_STATELESS_JOB_SPEC,955 batch_size=0)956 update1.create(in_place=True)957 # stop the update958 job1.stop()959 update2 = StatelessUpdate(job2,960 updated_job_file=UPDATE_STATELESS_JOB_SPEC,961 batch_size=0)962 update2.create()963 # both updates should complete964 update1.wait_for_state(goal_state="SUCCEEDED")965 update2.wait_for_state(goal_state="SUCCEEDED")966@pytest.mark.skip(reason="flaky test")967def test__in_place_update_host_maintenance(stateless_job, maintenance):968 # add enough instances so each host should have some tasks running969 stateless_job.job_spec.instance_count = 9970 # need extra retry attempts, since in-place update would need more time971 # to process given agent is put in maintenance mode972 stateless_job.config = IntegrationTestConfig(973 max_retry_attempts=300,974 pool_file='test_stateless_respool.yaml',975 ),976 stateless_job.create()977 stateless_job.wait_for_all_pods_running()978 job_spec_dump = load_test_config(UPDATE_STATELESS_JOB_SPEC)979 updated_job_spec = JobSpec()980 json_format.ParseDict(job_spec_dump, updated_job_spec)981 updated_job_spec.instance_count = 9982 update = StatelessUpdate(stateless_job,983 updated_job_spec=updated_job_spec,984 batch_size=0)985 update.create(in_place=True)986 # Pick a host that is UP and start maintenance on it987 test_host = get_host_in_state(host_pb2.HOST_STATE_UP)988 resp = maintenance["start"]([test_host])989 assert resp990 wait_for_host_state(test_host, host_pb2.HOST_STATE_DOWN)991 update.wait_for_state(goal_state="SUCCEEDED")992def test__update_with_sla_aware_host_maintenance(stateless_job, maintenance):993 """994 1. Create a stateless job with 3 instances.995 2. Create a job update to update the instance job with instance count 2,996 add host-limit-1 constraint and define sla with maximum_unavailable_instances=1997 3. Start host maintenance on one of the hosts998 4. The host should transition to DOWN and the update workflow should SUCCEED999 """1000 stateless_job.create()1001 stateless_job.wait_for_all_pods_running()1002 job_spec_dump = load_test_config('test_stateless_job_spec_sla.yaml')1003 updated_job_spec = JobSpec()1004 json_format.ParseDict(job_spec_dump, updated_job_spec)1005 updated_job_spec.instance_count = 21006 update = StatelessUpdate(stateless_job,1007 updated_job_spec=updated_job_spec,1008 batch_size=1)1009 update.create()1010 # Pick a host that is UP and start maintenance on it1011 test_host = get_host_in_state(host_pb2.HOST_STATE_UP)1012 resp = maintenance["start"]([test_host])1013 assert resp1014 update.wait_for_state(goal_state="SUCCEEDED")1015 wait_for_host_state(test_host, host_pb2.HOST_STATE_DOWN)1016def test__update_with_host_maintenance_and_agent_down(stateless_job, maintenance):1017 """1018 1. Create a large stateless job (that take up more than two-thirds of1019 the cluster resources) with MaximumUnavailableInstances=2.1020 2. Start host maintenance on one of the hosts (say A) having pods of the job.1021 MaximumUnavailableInstances=2 ensures that not more than 2 pods are1022 unavailable due to host maintenance at a time.1023 3. Take down another host which has pods running on it. This will TASK_LOST1024 to be sent for all pods on the host after 75 seconds.1025 4. Start an update to modify the instance spec of one of the pods.1026 5. Since TASK_LOST would cause the job SLA to be violated, instances on the1027 host A should not be killed once LOST event is received. Verify that1028 host A does not transition to DOWN.1029 """1030 stateless_job.job_spec.instance_count = 301031 stateless_job.job_spec.default_spec.containers[0].resource.cpu_limit = 0.31032 stateless_job.job_spec.sla.maximum_unavailable_instances = 21033 stateless_job.create()1034 stateless_job.wait_for_all_pods_running()1035 hosts = [h.hostname for h in query_hosts([]).host_infos]1036 host_to_task_count = get_host_to_task_count(hosts, stateless_job)1037 sorted_hosts = [t[0] for t in sorted(1038 host_to_task_count.items(), key=operator.itemgetter(1), reverse=True)]1039 # Pick a host that has pods running on it to start maintenance on it.1040 test_host = sorted_hosts[0]1041 # pick another host which has pods of the job to take down1042 host_container = get_container([sorted_hosts[1]])1043 try:1044 host_container.stop()1045 maintenance["start"]([test_host])1046 stateless_job.job_spec.instance_spec[10].containers.extend(1047 [pod_pb2.ContainerSpec(resource=pod_pb2.ResourceSpec(disk_limit_mb=20))])1048 update = StatelessUpdate(stateless_job,1049 updated_job_spec=stateless_job.job_spec,1050 batch_size=0)1051 update.create()1052 update.wait_for_state(goal_state="SUCCEEDED")1053 stateless_job.stop()1054 wait_for_host_state(test_host, host_pb2.HOST_STATE_DOWN)1055 assert False, 'Host should not transition to DOWN'1056 except:1057 assert is_host_in_state(test_host, host_pb2.HOST_STATE_DRAINING)1058 pass1059 finally:1060 host_container.start()1061def test__update_with_host_maintenance__bad_config(stateless_job, maintenance):1062 """1063 1. Create a stateless job with 6 instances. Wait for all instances to reach1064 RUNNING state. This means that there is at least one host with 2 or more1065 instances on it1066 2. Start a bad job update with max failure tolerance of 1 and auto-rollback1067 disabled.1068 3. Start host maintenance on one of the hosts (say host A).1069 4. Wait for the update to fail. There should be 2 instances unavailable.1070 5. Since 2 instances are already unavailable and1071 maximum_unavailable_instances=1, host maintenance should not proceed.1072 Verify that the host A doesn't transition to DOWN.1073 """1074 stateless_job.job_spec.sla.maximum_unavailable_instances = 11075 stateless_job.job_spec.instance_count = 61076 stateless_job.create()1077 stateless_job.wait_for_all_pods_running()1078 hosts = [h.hostname for h in query_hosts([]).host_infos]1079 host_to_task_count = get_host_to_task_count(hosts, stateless_job)1080 sorted_hosts = [t[0] for t in sorted(1081 host_to_task_count.items(), key=operator.itemgetter(1), reverse=True)]1082 job_spec_dump = load_test_config(UPDATE_STATELESS_JOB_BAD_SPEC)1083 updated_job_spec = JobSpec()1084 json_format.ParseDict(job_spec_dump, updated_job_spec)1085 updated_job_spec.instance_count = 61086 updated_job_spec.sla.maximum_unavailable_instances = 11087 update = StatelessUpdate(1088 stateless_job,1089 updated_job_spec=updated_job_spec,1090 max_failure_instances=1,1091 max_instance_attempts=1,1092 batch_size=2,1093 )1094 update.create()1095 # Pick a host that has pods running on it to start maintenance on it.1096 test_host = sorted_hosts[0]1097 maintenance["start"]([test_host])1098 update.wait_for_state(goal_state="FAILED", failed_state="SUCCEEDED")1099 try:1100 wait_for_host_state(test_host, host_pb2.HOST_STATE_DOWN)1101 assert False, 'Host should not transition to DOWN'1102 except:1103 assert is_host_in_state(test_host, host_pb2.HOST_STATE_DRAINING)1104# test__create_update_update_job_config tests update job level config1105# would not trigger task restart1106def test__create_update_update_job_config(stateless_job):1107 stateless_job.create()1108 stateless_job.wait_for_all_pods_running()1109 old_pod_infos = stateless_job.query_pods()1110 update = StatelessUpdate(1111 stateless_job, updated_job_file=UPDATE_STATELESS_JOB_JOB_CONFIG_UPDATE_SPEC1112 )1113 update.create()1114 update.wait_for_state(goal_state="SUCCEEDED")1115 new_pod_infos = stateless_job.query_pods()...

Full Screen

Full Screen

test_provisioner_service.py

Source:test_provisioner_service.py Github

copy

Full Screen

...137 launch_id = _new_id()138 node_ids = [_new_id()]139 client.provision(launch_id, node_ids, deployable_type,140 'fake-site1', caller="asterix")141 ok = notifier.wait_for_state(InstanceState.FAILED, node_ids)142 self.assertTrue(ok)143 self.assertTrue(notifier.assure_record_count(1))144 self.assertStoreNodeRecords(InstanceState.FAILED, *node_ids)145 self.assertStoreLaunchRecord(InstanceState.FAILED, launch_id)146 def test_provision_with_vars(self):147 client = self.client148 caller = 'asterix'149 deployable_type = 'empty-with-vars'150 launch_id = _new_id()151 node_ids = [_new_id()]152 vars = {'image_id': 'fake-image'}153 client.provision(launch_id, node_ids, deployable_type,154 'fake-site1', vars=vars, caller=caller)155 self.notifier.wait_for_state(InstanceState.PENDING, node_ids,156 before=self.provisioner.leader._force_cycle)157 self.assertStoreNodeRecords(InstanceState.PENDING, *node_ids)158 def test_provision_with_missing_vars(self):159 client = self.client160 notifier = self.notifier161 caller = 'asterix'162 deployable_type = 'empty-with-vars'163 launch_id = _new_id()164 node_ids = [_new_id()]165 vars = {'foo': 'bar'}166 client.provision(launch_id, node_ids, deployable_type,167 'fake-site1', vars=vars, caller=caller)168 ok = notifier.wait_for_state(InstanceState.FAILED, node_ids)169 self.assertTrue(ok)170 self.assertTrue(notifier.assure_record_count(1))171 self.assertStoreNodeRecords(InstanceState.FAILED, *node_ids)172 self.assertStoreLaunchRecord(InstanceState.FAILED, launch_id)173 def test_provision_broker_error(self):174 client = self.client175 notifier = self.notifier176 deployable_type = 'empty'177 launch_id = _new_id()178 self.context_client.create_error = BrokerError("fake failure")179 node_ids = [_new_id()]180 client.provision(launch_id, node_ids, deployable_type,181 'fake-site1', caller="asterix")182 ok = notifier.wait_for_state(InstanceState.FAILED, node_ids)183 self.assertTrue(ok)184 self.assertTrue(notifier.assure_record_count(1))185 self.assertStoreNodeRecords(InstanceState.FAILED, *node_ids)186 self.assertStoreLaunchRecord(InstanceState.FAILED, launch_id)187 def test_dump_state(self):188 running_launch, running_nodes = make_launch_and_nodes(_new_id(), 10, InstanceState.RUNNING)189 self.store.add_launch(running_launch)190 for node in running_nodes:191 self.store.add_node(node)192 pending_launch, pending_nodes = make_launch_and_nodes(_new_id(), 3, InstanceState.PENDING)193 self.store.add_launch(pending_launch)194 for node in pending_nodes:195 self.store.add_node(node)196 running_node_ids = [node['node_id'] for node in running_nodes]197 pending_node_ids = [node['node_id'] for node in pending_nodes]198 all_node_ids = running_node_ids + pending_node_ids199 self.client.dump_state(running_node_ids)200 ok = self.notifier.wait_for_state(InstanceState.RUNNING, nodes=running_node_ids)201 self.assertTrue(ok)202 self.assertEqual(len(self.notifier.nodes), len(running_nodes))203 self.client.dump_state(pending_node_ids)204 ok = self.notifier.wait_for_state(InstanceState.PENDING, nodes=pending_node_ids)205 self.assertTrue(ok)206 self.assertEqual(len(self.notifier.nodes), len(all_node_ids))207 # we should have not gotten any dupe records yet208 self.assertTrue(self.notifier.assure_record_count(1))209 # empty dump request should dump nothing210 self.client.dump_state([])211 self.assertTrue(self.notifier.assure_record_count(1))212 def test_terminate(self):213 node_ids = []214 for _ in range(10):215 node_id = _new_id()216 node_ids.append(node_id)217 self.client.provision(_new_id(), [node_id], "empty",218 site="fake-site1", caller="asterix")219 self.notifier.wait_for_state(InstanceState.PENDING, node_ids,220 before=self.provisioner.leader._force_cycle)221 for node_id in node_ids:222 node = self.store.get_node(node_id)223 self.driver.set_node_running(node['iaas_id'])224 self.notifier.wait_for_state(InstanceState.STARTED, node_ids,225 before=self.provisioner.leader._force_cycle)226 # terminate half of the nodes then the rest227 first_five = node_ids[:5]228 last_five = node_ids[5:]229 self.client.terminate_nodes(first_five, caller="asterix")230 ok = self.notifier.wait_for_state(InstanceState.TERMINATED, nodes=first_five)231 self.assertTrue(ok)232 self.client.terminate_nodes(last_five, caller="asterix")233 ok = self.notifier.wait_for_state(InstanceState.TERMINATED, nodes=last_five)234 self.assertTrue(ok)235 self.assertEqual(set(node_ids), set(self.notifier.nodes))236 # should be REQUESTED, PENDING, STARTED, TERMINATING and TERMINATED records for each node237 self.assertTrue(self.notifier.assure_record_count(5))238 self.assertEqual(len(self.driver.destroyed),239 len(node_ids))240 def test_terminate_unknown(self):241 instance_id = _new_id()242 self.client.terminate_nodes([instance_id])243 ok = self.notifier.wait_for_state(InstanceState.TERMINATED, nodes=[instance_id])244 self.assertTrue(ok)245 def test_launch_allocation(self):246 node_id = _new_id()247 self.client.provision(_new_id(), [node_id], "empty",248 site="fake-site1", caller="asterix")249 self.notifier.wait_for_state(InstanceState.PENDING, [node_id],250 before=self.provisioner.leader._force_cycle)251 self.assertStoreNodeRecords(InstanceState.PENDING)252 self.assertEqual(len(self.driver.created), 1)253 libcloud_node = self.driver.created[0]254 self.assertEqual(libcloud_node.size.id, "m1.small")255 def test_launch_many_terminate_all(self):256 all_node_ids = []257 # after the terminate_all, provision requests should be REJECTED258 rejected_node_ids = []259 for _ in range(100):260 node_id = _new_id()261 all_node_ids.append(node_id)262 self.client.provision(_new_id(), [node_id], "empty",263 site="fake-site1", caller="asterix")264 self.notifier.wait_for_state(InstanceState.PENDING, all_node_ids,265 before=self.provisioner.leader._force_cycle)266 self.assertStoreNodeRecords(InstanceState.PENDING, *all_node_ids)267 for node_id in all_node_ids:268 node = self.store.get_node(node_id)269 self.driver.set_node_running(node['iaas_id'])270 self.notifier.wait_for_state(InstanceState.STARTED, all_node_ids,271 before=self.provisioner.leader._force_cycle)272 self.assertStoreNodeRecords(InstanceState.STARTED, *all_node_ids)273 log.debug("Expecting %d nodes to be terminated", len(all_node_ids))274 self.assertIs(self.client.terminate_all(), False)275 # future requests should be rejected276 for _ in range(5):277 node_id = _new_id()278 rejected_node_ids.append(node_id)279 self.client.provision(_new_id(), [node_id], "empty",280 site="fake-site1", caller="asterix")281 self.notifier.wait_for_state(InstanceState.TERMINATED, all_node_ids,282 before=self.provisioner.leader._force_cycle, timeout=240)283 self.assertStoreNodeRecords(InstanceState.TERMINATED, *all_node_ids)284 self.notifier.wait_for_state(InstanceState.REJECTED, rejected_node_ids, timeout=240)285 self.assertStoreNodeRecords(InstanceState.REJECTED, *rejected_node_ids)286 self.assertEqual(len(self.driver.destroyed),287 len(all_node_ids))288 self.assertIs(self.client.terminate_all(), True)289 # now re-enable290 self.client.enable()291 node_id = _new_id()292 log.debug("Launching node %s which should be accepted", node_id)293 self.client.provision(_new_id(), [node_id], "empty",294 site="fake-site1", caller="asterix")295 self.notifier.wait_for_state(InstanceState.PENDING, [node_id],296 before=self.provisioner.leader._force_cycle, timeout=60)297 self.assertStoreNodeRecords(InstanceState.PENDING, node_id)298 def test_describe(self):299 node_ids = []300 for _ in range(3):301 launch_id = _new_id()302 running_launch, running_nodes = make_launch_and_nodes(launch_id, 1,303 InstanceState.RUNNING,304 site="fake-site1", caller=self.default_user)305 self.store.add_launch(running_launch)306 for node in running_nodes:307 self.store.add_node(node)308 node_ids.append(running_nodes[0]['node_id'])309 log.debug("requestin")310 all_nodes = self.client.describe_nodes()311 self.assertEqual(len(all_nodes), len(node_ids))312 one_node = self.client.describe_nodes([node_ids[0]])313 self.assertEqual(len(one_node), 1)314 self.assertEqual(one_node[0]['node_id'], node_ids[0])315 def test_multiuser(self):316 """Test that nodes started by one user can't be modified by317 another user318 """319 permitted_user = "asterix"320 disallowed_user = "cacaphonix"321 client = self.client322 deployable_type = 'empty'323 launch_id = _new_id()324 node_ids = [_new_id()]325 vars = {'image_id': 'fake-image'}326 client.provision(launch_id, node_ids, deployable_type,327 'fake-site1', vars=vars, caller=permitted_user)328 self.notifier.wait_for_state(InstanceState.PENDING, node_ids,329 before=self.provisioner.leader._force_cycle)330 self.assertStoreNodeRecords(InstanceState.PENDING, *node_ids)331 # Test describe332 permitted_nodes = client.describe_nodes(caller=permitted_user)333 self.assertEqual(len(permitted_nodes), len(node_ids))334 disallowed_nodes = client.describe_nodes(caller=disallowed_user)335 self.assertEqual(len(disallowed_nodes), 0)336 # Test terminate337 client.terminate_nodes(node_ids, caller=disallowed_user)338 terminate_timed_out = False339 try:340 self.notifier.wait_for_state(InstanceState.TERMINATED, node_ids,341 before=self.provisioner.leader._force_cycle, timeout=2)342 except Exception:343 terminate_timed_out = True344 self.assertTrue(terminate_timed_out,345 msg="Terminate worked with non-matching user")346 client.terminate_nodes(node_ids, caller=permitted_user)347 self.notifier.wait_for_state(InstanceState.TERMINATED, node_ids,348 before=self.provisioner.leader._force_cycle, timeout=2)349 self.assertStoreNodeRecords(InstanceState.TERMINATED, *node_ids)350 def test_record_reaper(self):351 launch_id1 = _new_id()352 launch_id2 = _new_id()353 now = time.time()354 node1 = make_node(launch_id1, InstanceState.TERMINATED, caller=self.default_user,355 state_changes=[(InstanceState.TERMINATED, now - self.record_reaping_max_age - 1)])356 node2 = make_node(launch_id1, InstanceState.FAILED, caller=self.default_user,357 state_changes=[(InstanceState.FAILED, now - self.record_reaping_max_age - 1)])358 node3 = make_node(launch_id1, InstanceState.REJECTED, caller=self.default_user,359 state_changes=[(InstanceState.REJECTED, now - self.record_reaping_max_age - 1)])360 nodes1 = [node1, node2, node3]361 launch1 = make_launch(launch_id1, InstanceState.RUNNING, nodes1, caller=self.default_user)362 node4 = make_node(launch_id2, InstanceState.RUNNING, caller=self.default_user,363 state_changes=[(InstanceState.RUNNING, now - self.record_reaping_max_age - 1)])364 node5 = make_node(launch_id2, InstanceState.TERMINATED, caller=self.default_user,365 state_changes=[(InstanceState.TERMINATED, now - self.record_reaping_max_age - 1)])366 nodes2 = [node4, node5]367 launch2 = make_launch(launch_id2, InstanceState.RUNNING, nodes2, caller=self.default_user)368 self.store.add_launch(launch1)369 for node in nodes1:370 self.store.add_node(node)371 self.store.add_launch(launch2)372 for node in nodes2:373 self.store.add_node(node)374 # Wait a second for record to get written375 time.sleep(1)376 # Force a record reaping cycle377 self.provisioner.leader._force_record_reaping()378 # Check that the first launch is completely removed379 node_ids1 = map(lambda x: x['node_id'], nodes1)380 self.assertNoStoreNodeRecords(*node_ids1)381 self.assertNoStoreLaunchRecord(launch_id1)382 # Check that the second launch is still here but with only the running node383 self.assertStoreNodeRecords(InstanceState.RUNNING, node4['node_id'])384 self.assertStoreLaunchRecord(InstanceState.RUNNING, launch_id2)385class ProvisionerServiceNoContextualizationTest(BaseProvisionerServiceTests):386 def setUp(self):387 self.notifier = FakeProvisionerNotifier()388 self.context_client = None389 self.store = self.setup_store()390 self.driver = FakeNodeDriver()391 self.driver.initialize()392 self.spawn_procs()393 self.load_dtrs()394 def test_launch_no_context(self):395 all_node_ids = []396 for _ in range(10):397 node_id = _new_id()398 all_node_ids.append(node_id)399 self.client.provision(_new_id(), [node_id], "empty",400 site="fake-site1", caller="asterix")401 self.notifier.wait_for_state(InstanceState.PENDING, all_node_ids,402 before=self.provisioner.leader._force_cycle)403 self.assertStoreNodeRecords(InstanceState.PENDING, *all_node_ids)404 for node_id in all_node_ids:405 node = self.store.get_node(node_id)406 self.driver.set_node_running(node['iaas_id'])407 self.notifier.wait_for_state(InstanceState.RUNNING, all_node_ids,408 before=self.provisioner.leader._force_cycle)409 self.assertStoreNodeRecords(InstanceState.RUNNING, *all_node_ids)410class ProvisionerZooKeeperServiceTest(ProvisionerServiceTest, ZooKeeperTestMixin):411 # this runs all of the ProvisionerServiceTest tests wih a ZK store412 def setup_store(self):413 self.setup_zookeeper(base_path_prefix="/provisioner_service_tests_")414 store = ProvisionerZooKeeperStore(self.zk_hosts, self.zk_base_path, use_gevent=self.use_gevent)415 store.initialize()416 return store417 def teardown_store(self):...

Full Screen

Full Screen

test_v2v_migrations_single_vcenter.py

Source:test_v2v_migrations_single_vcenter.py Github

copy

Full Screen

...73 infra_map=mapping_data_vm_obj_mini.infra_mapping_data.get("name"),74 vm_list=mapping_data_vm_obj_mini.vm_list,75 target_provider=provider76 )77 assert migration_plan.wait_for_state("Started")78 assert migration_plan.wait_for_state("In_Progress")79 assert migration_plan.wait_for_state("Completed")80 assert migration_plan.wait_for_state("Successful")81 # check power state on migrated VM82 migrated_vm = get_migrated_vm(src_vm, provider)83 assert power_state in migrated_vm.mgmt.state84 # check tags85 collection = provider.appliance.provider_based_collection(provider)86 vm_obj = collection.instantiate(migrated_vm.name, provider)87 owner_tag = None88 for t in vm_obj.get_tags():89 if tag.display_name in t.display_name:90 owner_tag = t91 assert owner_tag is not None and tag.display_name in owner_tag.display_name92 # If Never is not there, that means retirement is set.93 assert 'Never' not in vm_obj.retirement_date94@pytest.mark.parametrize('source_type, dest_type, template_type',95 [96 ['nfs', 'nfs', [Templates.RHEL7_MINIMAL,97 Templates.RHEL7_MINIMAL,98 Templates.RHEL7_MINIMAL,99 Templates.RHEL7_MINIMAL]100 ]101 ])102def test_multi_host_multi_vm_migration(request, appliance, provider,103 source_type, dest_type, template_type,104 mapping_data_multiple_vm_obj_single_datastore):105 """106 Polarion:107 assignee: sshveta108 caseimportance: medium109 caseposneg: positive110 testtype: functional111 startsin: 5.10112 casecomponent: V2V113 initialEstimate: 1h114 """115 infrastructure_mapping_collection = appliance.collections.v2v_infra_mappings116 mapping_data = mapping_data_multiple_vm_obj_single_datastore.infra_mapping_data117 mapping = infrastructure_mapping_collection.create(**mapping_data)118 migration_plan_collection = appliance.collections.v2v_migration_plans119 migration_plan = migration_plan_collection.create(120 name=fauxfactory.gen_alphanumeric(start="plan_"),121 description=fauxfactory.gen_alphanumeric(15, start="plan_desc_"),122 infra_map=mapping.name,123 vm_list=mapping_data_multiple_vm_obj_single_datastore.vm_list,124 target_provider=provider125 )126 assert migration_plan.wait_for_state("Started")127 request_details_list = migration_plan.get_plan_vm_list(wait_for_migration=False)128 vms = request_details_list.read()129 # testing multi-host utilization130 match = ['Converting', 'Migrating']131 def _is_migration_started(vm):132 if any(string in request_details_list.get_message_text(vm) for string in match):133 return True134 return False135 for vm in vms:136 wait_for(func=_is_migration_started, func_args=[vm],137 message="migration has not started for all VMs", delay=5, num_sec=300)138 if provider.one_of(OpenStackProvider):139 host_creds = provider.appliance.collections.openstack_nodes.all()140 else:141 host_creds = provider.hosts.all()142 hosts_dict = {key.name: [] for key in host_creds}143 for vm in vms:144 popup_text = request_details_list.read_additional_info_popup(vm)145 # open__additional_info_popup function also closes opened popup in our case146 request_details_list.open_additional_info_popup(vm)147 if popup_text['Conversion Host'] in hosts_dict:148 hosts_dict[popup_text['Conversion Host']].append(vm)149 for host in hosts_dict:150 if len(hosts_dict[host]) > 0:151 logger.info("Host: {} is migrating VMs: {}".format(host, hosts_dict[host]))152 assert migration_plan.wait_for_state("In_Progress")153 assert migration_plan.wait_for_state("Completed")154 assert migration_plan.wait_for_state("Successful")155def test_migration_special_char_name(appliance, provider, request,156 mapping_data_vm_obj_mini):157 """Tests migration where name of migration plan is comprised of special non-alphanumeric158 characters, such as '@#$(&#@('.159 Polarion:160 assignee: sshveta161 caseimportance: medium162 caseposneg: positive163 testtype: functional164 startsin: 5.10165 casecomponent: V2V166 initialEstimate: 1h167 """168 migration_plan_collection = appliance.collections.v2v_migration_plans169 migration_plan = migration_plan_collection.create(170 name=fauxfactory.gen_alphanumeric(start="plan_"),171 description=fauxfactory.gen_alphanumeric(start="plan_desc_"),172 infra_map=mapping_data_vm_obj_mini.infra_mapping_data.get("name"),173 vm_list=mapping_data_vm_obj_mini.vm_list,174 target_provider=provider175 )176 assert migration_plan.wait_for_state("Started")177 assert migration_plan.wait_for_state("In_Progress")178 assert migration_plan.wait_for_state("Completed")179 assert migration_plan.wait_for_state("Successful")180 # validate MAC address matches between source and target VMs181 src_vm = mapping_data_vm_obj_mini.vm_list[0]182 migrated_vm = get_migrated_vm(src_vm, provider)183 @request.addfinalizer184 def _cleanup():185 cleanup_target(provider, migrated_vm)186 assert src_vm.mac_address == migrated_vm.mac_address187def test_migration_long_name(request, appliance, provider, source_provider):188 """Test to check VM name with 64 character should work189 Polarion:190 assignee: sshveta191 initialEstimate: 1/2h192 casecomponent: V2V193 """194 source_datastores_list = source_provider.data.get("datastores", [])195 source_datastore = [d.name for d in source_datastores_list if d.type == "nfs"][0]196 collection = appliance.provider_based_collection(source_provider)197 # Following code will create vm name with 64 characters198 vm_name = "{vm_name}{extra_words}".format(vm_name=random_vm_name(context="v2v"),199 extra_words=fauxfactory.gen_alpha(51))200 template = _get_template(source_provider, Templates.RHEL7_MINIMAL)201 vm_obj = collection.instantiate(202 name=vm_name,203 provider=source_provider,204 template_name=template.name,205 )206 vm_obj.create_on_provider(207 timeout=2400,208 find_in_cfme=True,209 allow_skip="default",210 datastore=source_datastore)211 request.addfinalizer(lambda: vm_obj.cleanup_on_provider())212 mapping_data = infra_mapping_default_data(source_provider, provider)213 infrastructure_mapping_collection = appliance.collections.v2v_infra_mappings214 mapping = infrastructure_mapping_collection.create(**mapping_data)215 @request.addfinalizer216 def _cleanup():217 infrastructure_mapping_collection.delete(mapping)218 migration_plan_collection = appliance.collections.v2v_migration_plans219 migration_plan = migration_plan_collection.create(220 name=fauxfactory.gen_alphanumeric(20, start="long_name_"),221 description=fauxfactory.gen_alphanumeric(25, start="desc_long_name_"),222 infra_map=mapping.name,223 vm_list=[vm_obj],224 target_provider=provider225 )226 assert migration_plan.wait_for_state("Started")227 assert migration_plan.wait_for_state("In_Progress")228 assert migration_plan.wait_for_state("Completed")229 assert migration_plan.wait_for_state("Successful")230 migrated_vm = get_migrated_vm(vm_obj, provider)231 assert vm_obj.mac_address == migrated_vm.mac_address232@pytest.mark.parametrize('source_type, dest_type, template_type',233 [['nfs', 'nfs', Templates.RHEL7_MINIMAL]])234def test_migration_with_edited_mapping(request, appliance, source_provider, provider,235 source_type, dest_type, template_type,236 mapping_data_vm_obj_single_datastore):237 """238 Test migration with edited infrastructure mapping.239 Polarion:240 assignee: sshveta241 caseimportance: medium242 caseposneg: positive243 testtype: functional244 startsin: 5.10245 casecomponent: V2V246 initialEstimate: 1h247 """248 infrastructure_mapping_collection = appliance.collections.v2v_infra_mappings249 mapping_data = infra_mapping_default_data(source_provider, provider)250 mapping = infrastructure_mapping_collection.create(**mapping_data)251 mapping.update(mapping_data_vm_obj_single_datastore.infra_mapping_data)252 # vm_obj is a list, with only 1 VM object, hence [0]253 src_vm_obj = mapping_data_vm_obj_single_datastore.vm_list[0]254 migration_plan_collection = appliance.collections.v2v_migration_plans255 migration_plan = migration_plan_collection.create(256 name=fauxfactory.gen_alphanumeric(start="plan_"),257 description=fauxfactory.gen_alphanumeric(15, start="plan_desc_"),258 infra_map=mapping.name,259 vm_list=mapping_data_vm_obj_single_datastore.vm_list,260 target_provider=provider)261 assert migration_plan.wait_for_state("Started")262 assert migration_plan.wait_for_state("In_Progress")263 assert migration_plan.wait_for_state("Completed")264 assert migration_plan.wait_for_state("Successful")265 migrated_vm = get_migrated_vm(src_vm_obj, provider)266 @request.addfinalizer267 def _cleanup():268 infrastructure_mapping_collection.delete(mapping)269 cleanup_target(provider, migrated_vm)270 assert src_vm_obj.mac_address == migrated_vm.mac_address271@pytest.mark.tier(3)272@pytest.mark.parametrize(273 "source_type, dest_type, template_type",274 [["nfs", "nfs", Templates.UBUNTU16_TEMPLATE]])275def test_migration_restart(request, appliance, provider,276 source_type, dest_type, template_type,277 mapping_data_vm_obj_single_datastore):278 """279 Test migration by restarting evmserverd in middle of the process280 Polarion:281 assignee: sshveta282 initialEstimate: 1h283 caseimportance: medium284 caseposneg: positive285 testtype: functional286 startsin: 5.10287 casecomponent: V2V288 """289 infrastructure_mapping_collection = appliance.collections.v2v_infra_mappings290 mapping_data = mapping_data_vm_obj_single_datastore.infra_mapping_data291 mapping = infrastructure_mapping_collection.create(**mapping_data)292 src_vm_obj = mapping_data_vm_obj_single_datastore.vm_list[0]293 migration_plan_collection = appliance.collections.v2v_migration_plans294 migration_plan = migration_plan_collection.create(295 name=fauxfactory.gen_alphanumeric(start="plan_"),296 description=fauxfactory.gen_alphanumeric(15, start="plan_desc_"),297 infra_map=mapping.name,298 target_provider=provider,299 vm_list=mapping_data_vm_obj_single_datastore.vm_list,300 )301 assert migration_plan.wait_for_state("Started")302 # reboot system when the actual disk migration elapsed a 240 second time duration303 migration_plan.in_progress(plan_elapsed_time=240)304 appliance.restart_evm_rude()305 appliance.wait_for_miq_ready()306 try:307 assert migration_plan.wait_for_state("In_Progress")308 except WebDriverException:309 pass310 assert migration_plan.wait_for_state("Completed")311 assert migration_plan.wait_for_state("Successful")312 migrated_vm = get_migrated_vm(src_vm_obj, provider)313 @request.addfinalizer314 def _cleanup():315 infrastructure_mapping_collection.delete(mapping)316 cleanup_target(provider, migrated_vm)317 assert src_vm_obj.mac_address == migrated_vm.mac_address318@pytest.mark.tier(2)319def test_if_no_password_is_exposed_in_logs_during_migration(appliance, source_provider, provider,320 request, mapping_data_vm_obj_mini):321 """322 title: OSP: Test if no password is exposed in logs during migration323 Polarion:324 assignee: mnadeem325 casecomponent: V2V326 initialEstimate: 1/8h327 startsin: 5.10328 subcomponent: OSP329 testSteps:330 1. Create infrastructure mapping for Vmware to OSP/RHV331 2. Create migration plan332 3. Start migration333 expectedResults:334 1. Mapping created and visible in UI335 2.336 3. logs should not show password during migration337 """338 cred = []339 ssh_key_name = source_provider.data['private-keys']['vmware-ssh-key']['credentials']340 cred.append(credentials[source_provider.data.get("credentials")]["password"])341 cred.append(credentials[ssh_key_name]["password"])342 cred.append(credentials[provider.data.get("credentials")]["password"])343 if provider.one_of(OpenStackProvider):344 osp_key_name = provider.data['private-keys']['conversion_host_ssh_key']['credentials']345 cred.append(credentials[osp_key_name]["password"])346 automation_log = LogValidator("/var/www/miq/vmdb/log/automation.log", failure_patterns=cred,347 hostname=appliance.hostname)348 evm_log = LogValidator("/var/www/miq/vmdb/log/evm.log", failure_patterns=cred,349 hostname=appliance.hostname)350 automation_log.start_monitoring()351 evm_log.start_monitoring()352 migration_plan_collection = appliance.collections.v2v_migration_plans353 migration_plan = migration_plan_collection.create(354 name=fauxfactory.gen_alphanumeric(start="plan_"),355 description=fauxfactory.gen_alphanumeric(start="plan_desc_"),356 infra_map=mapping_data_vm_obj_mini.infra_mapping_data.get("name"),357 vm_list=mapping_data_vm_obj_mini.vm_list,358 target_provider=provider359 )360 assert migration_plan.wait_for_state("Started")361 assert migration_plan.wait_for_state("In_Progress")362 assert migration_plan.wait_for_state("Completed")363 assert migration_plan.wait_for_state("Successful")364 src_vm = mapping_data_vm_obj_mini.vm_list[0]365 migrated_vm = get_migrated_vm(src_vm, provider)366 @request.addfinalizer367 def _cleanup():368 cleanup_target(provider, migrated_vm)369 migration_plan.delete_completed_plan()370 # Check log files for any exposed password371 assert automation_log.validate()...

Full Screen

Full Screen

test_allocation.py

Source:test_allocation.py Github

copy

Full Screen

...16 stateless_job.create()17 stateless_job.wait_for_all_pods_running()18 verify_allocation(stateless_job)19 stateless_job.stop()20 stateless_job.wait_for_state(goal_state="KILLED")21 assert stateless_job.pool.get_allocation("cpu") == 022def test_allocation_create_job__restart_placement_engine_and_hostmgr(23 stateless_job,24 hostmgr,25 placement_engines,26):27 """28 1. Create a job29 2. Restart placement engine and host manager30 3. Wait for all pods to transit to running state and verify the allocation is as expected.31 """32 stateless_job.job_spec.instance_count = 5033 stateless_job.create()34 restart_hostmgr_and_placement_engine(hostmgr, placement_engines)35 stateless_job.wait_for_all_pods_running()36 verify_allocation(stateless_job)37 stateless_job.stop()38 stateless_job.wait_for_state(goal_state="KILLED")39 assert stateless_job.pool.get_allocation("cpu") == 040def test_allocation_update_job__add_instances(stateless_job, in_place):41 """42 1. Create a job and verify the allocation is as expected.43 2. Update the job to increase the instance count and verify the allocation is as expected.44 """45 stateless_job.job_spec.instance_count = 3046 stateless_job.create()47 stateless_job.wait_for_all_pods_running()48 verify_allocation(stateless_job)49 stateless_job.job_spec.instance_count = 5050 update = StatelessUpdate(51 stateless_job, updated_job_spec=stateless_job.job_spec)52 update.create(in_place=in_place)53 update.wait_for_state(goal_state="SUCCEEDED")54 verify_allocation(stateless_job)55 stateless_job.stop()56 stateless_job.wait_for_state(goal_state="KILLED")57 assert stateless_job.pool.get_allocation("cpu") == 058def test_allocation_update_job__add_instances_restart_hostmgr_and_placement_engine(59 stateless_job,60 in_place,61 hostmgr,62 placement_engines,63):64 """65 1. Create a job66 2. Restart hostmgr and placement engines.67 3. Wait for job to come up and verify the allocation is as expected.68 4. Update the job to increase the instance count.69 5. Restart hostmgr and placement engines.70 6. Wait for all pods to transit to running state and verify the allocation is as expected.71 """72 stateless_job.job_spec.instance_count = 3073 stateless_job.create()74 restart_hostmgr_and_placement_engine(hostmgr, placement_engines)75 stateless_job.wait_for_all_pods_running()76 verify_allocation(stateless_job)77 stateless_job.job_spec.instance_count = 5078 update = StatelessUpdate(79 stateless_job, updated_job_spec=stateless_job.job_spec)80 update.create(in_place=in_place)81 restart_hostmgr_and_placement_engine(hostmgr, placement_engines)82 update.wait_for_state(goal_state="SUCCEEDED")83 verify_allocation(stateless_job)84 stateless_job.stop()85 stateless_job.wait_for_state(goal_state="KILLED")86 assert stateless_job.pool.get_allocation("cpu") == 087def test_allocation_update_job__remove_instances(stateless_job, in_place):88 """89 1. Create a job and verify the allocation is as expected.90 2. Update the job to reduce the instance count and verify the allocation is as expected.91 """92 stateless_job.job_spec.instance_count = 5093 stateless_job.create()94 stateless_job.wait_for_all_pods_running()95 verify_allocation(stateless_job)96 stateless_job.job_spec.instance_count = 3097 update = StatelessUpdate(98 stateless_job, updated_job_spec=stateless_job.job_spec)99 update.create(in_place=in_place)100 update.wait_for_state(goal_state="SUCCEEDED")101 verify_allocation(stateless_job)102 stateless_job.stop()103 stateless_job.wait_for_state(goal_state="KILLED")104 assert stateless_job.pool.get_allocation("cpu") == 0105def test_allocation_update_job__remove_instances_restart_hostmgr_and_placement_engine(106 stateless_job,107 in_place,108 hostmgr,109 placement_engines,110):111 """112 1. Create a job113 2. Restart hostmgr and placement engines.114 3. Wait for job to come up and verify the allocation is as expected.115 4. Update the job to reduce the instance count116 5. Restart hostmgr and placement engines.117 6. Wait for all pods to transit to running state and verify the allocation is as expected.118 """119 stateless_job.job_spec.instance_count = 50120 stateless_job.create()121 restart_hostmgr_and_placement_engine(hostmgr, placement_engines)122 stateless_job.wait_for_all_pods_running()123 verify_allocation(stateless_job)124 stateless_job.job_spec.instance_count = 30125 update = StatelessUpdate(126 stateless_job, updated_job_spec=stateless_job.job_spec)127 update.create(in_place=in_place)128 restart_hostmgr_and_placement_engine(hostmgr, placement_engines)129 update.wait_for_state(goal_state="SUCCEEDED")130 verify_allocation(stateless_job)131 stateless_job.stop()132 stateless_job.wait_for_state(goal_state="KILLED")133 assert stateless_job.pool.get_allocation("cpu") == 0134def test_allocation_update_job__add_and_remove_instances(stateless_job, in_place):135 """136 1. Create a job and verify the allocation is as expected.137 2. Update the job to increase the instance count and verify the allocation is as expected.138 3. Update the job to reduce the instance count and verify the allocation is as expected.139 """140 stateless_job.job_spec.instance_count = 30141 stateless_job.create()142 stateless_job.wait_for_all_pods_running()143 verify_allocation(stateless_job)144 stateless_job.job_spec.instance_count = 50145 update = StatelessUpdate(146 stateless_job, updated_job_spec=stateless_job.job_spec)147 update.create(in_place=in_place)148 update.wait_for_state(goal_state="SUCCEEDED")149 verify_allocation(stateless_job)150 stateless_job.job_spec.instance_count = 10151 update = StatelessUpdate(152 stateless_job, updated_job_spec=stateless_job.job_spec)153 update.create(in_place=in_place)154 update.wait_for_state(goal_state="SUCCEEDED")155 verify_allocation(stateless_job)156 stateless_job.stop()157 stateless_job.wait_for_state(goal_state="KILLED")158 assert stateless_job.pool.get_allocation("cpu") == 0159def test_allocation_update_job__add_and_remove_instances_restart_hostmgr_and_placement_engine(160 stateless_job,161 in_place,162 hostmgr,163 placement_engines,164):165 """166 1. Create a job167 2. Restart hostmgr and placement engines.168 3. Wait for job to come up and verify the allocation is as expected.169 4. Update the job to increase the instance count170 5. Restart hostmgr and placement engines.171 6. Wait for all pods to transit to running state and verify the allocation is as expected.172 7. Update the job to reduce the instance count173 8. Restart hostmgr and placement engines.174 9. Wait for all pods to transit to running stateand verify the allocation is as expected.175 """176 stateless_job.job_spec.instance_count = 30177 stateless_job.create()178 restart_hostmgr_and_placement_engine(hostmgr, placement_engines)179 stateless_job.wait_for_all_pods_running()180 verify_allocation(stateless_job)181 stateless_job.job_spec.instance_count = 50182 update = StatelessUpdate(183 stateless_job, updated_job_spec=stateless_job.job_spec)184 update.create(in_place=in_place)185 restart_hostmgr_and_placement_engine(hostmgr, placement_engines)186 update.wait_for_state(goal_state="SUCCEEDED")187 verify_allocation(stateless_job)188 stateless_job.job_spec.instance_count = 10189 update = StatelessUpdate(190 stateless_job, updated_job_spec=stateless_job.job_spec)191 update.create(in_place=in_place)192 restart_hostmgr_and_placement_engine(hostmgr, placement_engines)193 update.wait_for_state(goal_state="SUCCEEDED")194 verify_allocation(stateless_job)195 stateless_job.stop()196 stateless_job.wait_for_state(goal_state="KILLED")197 assert stateless_job.pool.get_allocation("cpu") == 0198def test_allocation_update_job__overwrite_update_restart_hostmgr_and_placement_engine(199 stateless_job,200 in_place,201 hostmgr,202 placement_engines,203):204 """205 1. Create a job206 2. Restart hostmgr and placement engines.207 3. Wait for job to come up and verify the allocation is as expected.208 4. Update the job to increase the instance count209 5. Restart hostmgr and placement engines.210 6. Update the job to reduce the instance count211 7. Restart hostmgr and placement engines.212 8. Wait for all pods to transit to running stateand verify the allocation is as expected.213 """214 stateless_job.job_spec.instance_count = 30215 stateless_job.create()216 restart_hostmgr_and_placement_engine(hostmgr, placement_engines)217 stateless_job.wait_for_all_pods_running()218 verify_allocation(stateless_job)219 stateless_job.job_spec.instance_count = 50220 update = StatelessUpdate(221 stateless_job, updated_job_spec=stateless_job.job_spec)222 update.create(in_place=in_place)223 restart_hostmgr_and_placement_engine(hostmgr, placement_engines)224 stateless_job.job_spec.instance_count = 10225 update = StatelessUpdate(226 stateless_job, updated_job_spec=stateless_job.job_spec)227 update.create(in_place=in_place)228 restart_hostmgr_and_placement_engine(hostmgr, placement_engines)229 update.wait_for_state(goal_state="SUCCEEDED")230 verify_allocation(stateless_job)231 stateless_job.stop()232 stateless_job.wait_for_state(goal_state="KILLED")233 assert stateless_job.pool.get_allocation("cpu") == 0234def verify_allocation(job):235 assert job.pool.get_allocation("cpu") - (236 job.get_spec().instance_count *237 job.get_spec().default_spec.containers[0].resource.cpu_limit238 ) < 0.01239 assert job.pool.get_allocation("memory") - (240 job.get_spec().instance_count *241 job.get_spec().default_spec.containers[0].resource.mem_limit_mb242 ) < 0.01243 assert job.pool.get_allocation("disk") - (244 job.get_spec().instance_count *245 job.get_spec().default_spec.containers[0].resource.disk_limit_mb246 ) < 0.01...

Full Screen

Full Screen

test_revocable.py

Source:test_revocable.py Github

copy

Full Screen

...20 non_revocable_job1 = StatelessJob(21 job_file="test_stateless_preemptible_job_memory_large_spec.yaml"22 )23 non_revocable_job1.create()24 non_revocable_job1.wait_for_state(goal_state="RUNNING")25 non_revocable_job1.wait_for_all_pods_running()26 revocable_job = StatelessJob(27 job_file="test_stateless_job_revocable_spec.yaml"28 )29 revocable_job.create()30 revocable_job.wait_for_state(goal_state="RUNNING")31 revocable_job.wait_for_all_pods_running()32 # launch second non-revocable job which will pre-empt revocable job33 non_revocable_job2 = StatelessJob(34 job_file="test_stateless_job_memory_large_spec.yaml"35 )36 non_revocable_job2.create()37 non_revocable_job2.wait_for_state(goal_state="RUNNING")38 non_revocable_job2.wait_for_all_pods_running()39 # no revocable job tasks should be running40 def zero_tasks_running():41 count = 042 for pod_id in range(0, revocable_job.job_spec.instance_count):43 pod_state = revocable_job.get_pod(pod_id).get_pod_status().state44 if pod_state == pod.POD_STATE_RUNNING:45 count += 146 return count == 047 revocable_job.wait_for_condition(zero_tasks_running)48 revocable_job.stop()49 non_revocable_job1.stop()50 non_revocable_job2.stop()51 revocable_job.wait_for_terminated()52 non_revocable_job1.wait_for_terminated()53 non_revocable_job2.wait_for_terminated()54# Resouce Pool reservation memory: 1000MB55# Slack Limit: 20% (default), 200MB56# Revocable Job Memory request: 100MB * 3(instances) = 300MB57def test__revocable_job_slack_limit():58 revocable_job = StatelessJob(59 job_file="test_stateless_job_revocable_slack_limit_spec.yaml"60 )61 revocable_job.create()62 revocable_job.wait_for_state(goal_state="RUNNING")63 # 2 tasks are running out of 364 def partial_tasks_running():65 count = 066 for pod_id in range(0, revocable_job.job_spec.instance_count):67 pod_state = revocable_job.get_pod(pod_id).get_pod_status().state68 if pod_state == pod.POD_STATE_RUNNING:69 count += 170 return count == 271 revocable_job.wait_for_condition(partial_tasks_running)72 # cleanup job from jobmgr73 revocable_job.stop()74 revocable_job.wait_for_terminated()75# Entitlement: MEMORY: 2000 (1000 reservation and 1000 limit)76# Launch non_revoacble & non_preemptible job with mem: 1000MB77# Launch non_revoacble & preemptible job with mem: 1000MB78# Launch revocable job -> no memory is left to satisfy request79# Kill any non_revocable job, and it will satisfy revocable job80def test__stop_nonrevocable_job_to_free_resources_for_revocable_job():81 non_revocable_job1 = StatelessJob(82 job_file="test_stateless_job_memory_large_spec.yaml"83 )84 non_revocable_job1.create()85 non_revocable_job1.wait_for_state("RUNNING")86 non_revocable_job2 = StatelessJob(87 job_file="test_stateless_preemptible_job_memory_large_spec.yaml"88 )89 non_revocable_job2.create()90 non_revocable_job2.wait_for_state("RUNNING")91 non_revocable_job1.wait_for_all_pods_running()92 non_revocable_job2.wait_for_all_pods_running()93 revocable_job = StatelessJob(94 job_file="test_stateless_job_revocable_spec.yaml"95 )96 revocable_job.create()97 # no tasks should be running98 def no_task_running():99 count = 0100 for pod_id in range(0, revocable_job.job_spec.instance_count):101 pod_state = revocable_job.get_pod(pod_id).get_pod_status().state102 if pod_state == pod.POD_STATE_RUNNING:103 count += 1104 return count == 0105 # give job 5 seconds to run, even after that no tasks should be running106 time.sleep(5)107 revocable_job.wait_for_condition(no_task_running)108 # stop non_revocable job to free up resources for revocable job109 non_revocable_job2.stop()110 non_revocable_job2.wait_for_terminated()111 # After non_revocable job is killed, all revocable tasks should be running112 revocable_job.wait_for_all_pods_running()113 # cleanup jobs from jobmgr114 non_revocable_job1.stop()115 revocable_job.stop()116 non_revocable_job1.wait_for_terminated()117 revocable_job.wait_for_terminated()118# Simple scenario, where revocable and non-revocable job119# run simultaneously120# Physical cluster capacity cpus: 12121# Non-Revocable Job cpus: 5 (constrained by resource pool reservation)122# Revocable Job cpus: 5 * 2 = 10123def test__create_revocable_job():124 revocable_job1 = StatelessJob(125 job_file="test_stateless_job_revocable_spec.yaml"126 )127 revocable_job1.create()128 revocable_job1.wait_for_state(goal_state="RUNNING")129 revocable_job1.wait_for_all_pods_running()130 revocable_job2 = StatelessJob(131 job_file="test_stateless_job_revocable_spec.yaml"132 )133 revocable_job2.create()134 revocable_job2.wait_for_state(goal_state="RUNNING")135 revocable_job2.wait_for_all_pods_running()136 non_revocable_job = StatelessJob(137 job_file="test_stateless_job_cpus_large_spec.yaml"138 )139 non_revocable_job.create()140 non_revocable_job.wait_for_state(goal_state="RUNNING")141 non_revocable_job.wait_for_all_pods_running()142 # cleanup jobs from jobmgr143 revocable_job1.stop()144 revocable_job2.stop()145 non_revocable_job.stop()146 revocable_job1.wait_for_terminated()147 revocable_job2.wait_for_terminated()148 non_revocable_job.wait_for_terminated()149# Revocable tasks are moved to revocable queue, so they do not block150# Non-Revocable tasks.151# Launch 2 revocable jobs, one will starve due to memory152def test__revocable_tasks_move_to_revocable_queue():153 revocable_job1 = StatelessJob(154 job_file="test_stateless_job_revocable_spec.yaml"155 )156 revocable_job1.create()157 revocable_job1.wait_for_state(goal_state="RUNNING")158 revocable_job1.wait_for_all_pods_running()159 # 1 task is running out of 3160 def partial_tasks_running():161 count = 0162 for pod_id in range(0, revocable_job2.job_spec.instance_count):163 pod_state = revocable_job2.get_pod(pod_id).get_pod_status().state164 if pod_state == pod.POD_STATE_RUNNING:165 count += 1166 return count == 1167 revocable_job2 = StatelessJob(168 job_file="test_stateless_job_revocable_slack_limit_spec.yaml"169 )170 revocable_job2.create()171 # sleep for 5 seconds to make sure job has enough time172 time.sleep(5)173 revocable_job2.wait_for_condition(partial_tasks_running)174 non_revocable_job = StatelessJob(job_file="test_stateless_job_spec.yaml")175 non_revocable_job.create()176 non_revocable_job.wait_for_state("RUNNING")177 non_revocable_job.wait_for_all_pods_running()178 # cleanup jobs from jobmgr179 revocable_job1.stop()180 revocable_job2.stop()181 non_revocable_job.stop()182 revocable_job1.wait_for_terminated()183 revocable_job2.wait_for_terminated()184 non_revocable_job.wait_for_terminated()185# Launch Revocable batch & stateless job.186# Launch Non-Revocable batch and stateless job.187# Ensure batch jobs SUCCEED and stateless jobs RUNNING.188def test__simple_revocable_batch_and_stateless_colocate():189 revocable_stateless_job = StatelessJob(190 job_file="test_stateless_job_revocable_spec.yaml"191 )192 revocable_stateless_job.create()193 revocable_stateless_job.wait_for_state(goal_state="RUNNING")194 revocable_stateless_job.wait_for_all_pods_running()195 non_revocable_stateless_job = StatelessJob(196 job_file="test_stateless_job_spec.yaml"197 )198 non_revocable_stateless_job.create()199 non_revocable_stateless_job.wait_for_state(goal_state="RUNNING")200 non_revocable_stateless_job.wait_for_all_pods_running()201 revocable_batch_job = Job(202 job_file="test_job_revocable.yaml",203 config=IntegrationTestConfig(pool_file='test_stateless_respool.yaml'),204 )205 revocable_batch_job.create()206 revocable_batch_job.wait_for_state(goal_state="RUNNING")207 non_revocable_batch_job = Job(208 job_file="test_job.yaml",209 config=IntegrationTestConfig(pool_file='test_stateless_respool.yaml'),210 )211 non_revocable_batch_job.create()212 non_revocable_batch_job.wait_for_state(goal_state="RUNNING")213 revocable_batch_job.wait_for_state()...

Full Screen

Full Screen

test_task.py

Source:test_task.py Github

copy

Full Screen

...16]17@pytest.mark.smoketest18def test__stop_start_all_tasks_kills_tasks_and_job(long_running_job):19 long_running_job.create()20 long_running_job.wait_for_state(goal_state="RUNNING")21 long_running_job.stop()22 long_running_job.wait_for_state(goal_state="KILLED")23 try:24 long_running_job.start()25 except grpc.RpcError as e:26 assert e.code() == grpc.StatusCode.INVALID_ARGUMENT27 assert e.details() == "cannot start tasks in a terminated job"28 return29 raise Exception("was able to start terminated job")30def test__stop_start_partial_tests_with_single_range(long_running_job):31 long_running_job.create()32 long_running_job.wait_for_state(goal_state="RUNNING")33 range = task_pb2.InstanceRange(to=1)34 setattr(range, "from", 0)35 def wait_for_instance_to_stop():36 return long_running_job.get_task(0).state_str == "KILLED"37 long_running_job.stop(ranges=[range])38 long_running_job.wait_for_condition(wait_for_instance_to_stop)39 def wait_for_instance_to_run():40 return long_running_job.get_task(0).state_str == "RUNNING"41 long_running_job.start(ranges=[range])42 long_running_job.wait_for_condition(wait_for_instance_to_run)43 long_running_job.stop()44 long_running_job.wait_for_state(goal_state="KILLED")45def test__stop_start_partial_tests_with_multiple_ranges(long_running_job):46 long_running_job.create()47 long_running_job.wait_for_state(goal_state="RUNNING")48 range1 = task_pb2.InstanceRange(to=1)49 setattr(range1, "from", 0)50 range2 = task_pb2.InstanceRange(to=2)51 setattr(range2, "from", 1)52 def wait_for_instance_to_stop():53 return (54 long_running_job.get_task(0).state_str == "KILLED"55 and long_running_job.get_task(1).state_str == "KILLED"56 )57 long_running_job.stop(ranges=[range1, range2])58 long_running_job.wait_for_condition(wait_for_instance_to_stop)59 def wait_for_instance_to_run():60 return (61 long_running_job.get_task(0).state_str == "RUNNING"62 and long_running_job.get_task(1).state_str == "RUNNING"63 )64 long_running_job.start(ranges=[range1, range2])65 long_running_job.wait_for_condition(wait_for_instance_to_run)66 long_running_job.stop()67 long_running_job.wait_for_state(goal_state="KILLED")68def test__start_stop_task_without_job_id(peloton_client):69 job_without_id = Job(70 client=peloton_client,71 )72 resp = job_without_id.start()73 assert resp.HasField("error")74 assert resp.error.HasField("notFound")75 resp = job_without_id.stop()76 assert resp.HasField("error")77 assert resp.error.HasField("notFound")78def test__start_stop_task_with_nonexistent_job_id(peloton_client):79 job_with_nonexistent_id = Job(client=peloton_client)80 job_with_nonexistent_id.job_id = "nonexistent-job-id"81 resp = job_with_nonexistent_id.start()82 assert resp.HasField("error")83 assert resp.error.HasField("notFound")84 resp = job_with_nonexistent_id.stop()85 assert resp.HasField("error")86 assert resp.error.HasField("notFound")87def test_controller_task_limit(peloton_client):88 # This tests the controller limit of a resource pool. Once it is fully89 # allocated by a controller task, subsequent tasks can't be admitted.90 # 1. start controller job1 which uses all the controller limit91 # 2. start controller job2, make sure it remains pending.92 # 3. kill job1, make sure job2 starts running.93 # job1 uses all the controller limit94 job1 = Job(95 client=peloton_client,96 job_file="test_controller_job.yaml",97 config=IntegrationTestConfig(98 pool_file="test_respool_controller_limit.yaml"99 ),100 )101 job1.create()102 job1.wait_for_state(goal_state="RUNNING")103 # job2 should remain pending as job1 used the controller limit104 job2 = Job(105 client=peloton_client,106 job_file="test_controller_job.yaml",107 config=IntegrationTestConfig(108 pool_file="test_respool_controller_limit.yaml"109 ),110 )111 job2.create()112 # sleep for 5 seconds to make sure job 2 has enough time113 time.sleep(5)114 # make sure job2 can't run115 job2.wait_for_state(goal_state="PENDING")116 # stop job1117 job1.stop()118 job1.wait_for_state(goal_state="KILLED")119 # make sure job2 starts running120 job2.wait_for_state(goal_state="RUNNING")121 kill_jobs([job2])122def test_controller_task_limit_executor_can_run(peloton_client):123 # This tests the controller limit isn't applied to non-controller jobs.124 # 1. start controller cjob1 which uses all the controller limit125 # 2. start controller cjob2, make sure it remains pending.126 # 3. start non-controller job, make sure it succeeds.127 # job1 uses all the controller limit128 cjob1 = Job(129 client=peloton_client,130 job_file="test_controller_job.yaml",131 config=IntegrationTestConfig(132 pool_file="test_respool_controller_limit.yaml"133 ),134 )135 cjob1.create()136 cjob1.wait_for_state(goal_state="RUNNING")137 # job2 should remain pending as job1 used the controller limit138 cjob2 = Job(139 client=peloton_client,140 job_file="test_controller_job.yaml",141 config=IntegrationTestConfig(142 pool_file="test_respool_controller_limit.yaml"143 ),144 )145 cjob2.create()146 # sleep for 5 seconds to make sure job 2 has enough time147 time.sleep(5)148 # make sure job2 can't run149 cjob2.wait_for_state(goal_state="PENDING")150 # start a normal executor job151 job = Job(152 client=peloton_client,153 job_file="test_job.yaml",154 config=IntegrationTestConfig(155 pool_file="test_respool_controller_limit.yaml"156 ),157 )158 job.create()159 # make sure job can run and finish160 job.wait_for_state(goal_state="SUCCEEDED")161 kill_jobs([cjob1, cjob2])162def test_job_succeeds_if_controller_task_succeeds(peloton_client):163 # only controller task in cjob would succeed.164 # other tasks would fail, but only controller task should determine165 # job terminal state166 cjob = Job(167 client=peloton_client,168 job_file="test_job_succecced_controller_task.yaml",169 )170 cjob.create()171 cjob.wait_for_state(goal_state="SUCCEEDED")172 kill_jobs([cjob])173def test_task_killed_in_ready_succeeds_when_re_enqueued(peloton_client, placement_engines):174 # Tests that a if task is deleted which is in READY state in resource175 # manager and if is re-enqueued succeeds.176 # stop the placement engines to keep the tasks in READY state177 placement_engines.stop()178 # decorate the client to add peloton private API stubs179 c = with_private_stubs(peloton_client)180 # create long running job with 2 instances181 long_running_job = Job(182 job_file="long_running_job.yaml",183 options=[with_instance_count(2)],184 client=c,185 )186 long_running_job.create()187 long_running_job.wait_for_state(goal_state="PENDING")188 task = long_running_job.get_task(0)189 # wait for task to reach READY190 task.wait_for_pending_state(goal_state="READY")191 # kill the task192 task.stop()193 # re-enqueue the task194 task.start()195 # gentlemen, start your (placement) engines196 placement_engines.start()197 def wait_for_instance_to_run():198 return long_running_job.get_task(0).state_str == "RUNNING"...

Full Screen

Full Screen

TestAttachResume.py

Source:TestAttachResume.py Github

copy

Full Screen

...27 event = lldb.SBEvent()28 while self.dbg.GetListener().GetNextEvent(event):29 self._state = lldb.SBProcess.GetStateFromEvent(event)30 # using process.GetState() does not work: llvm.org/pr1617231 def wait_for_state(s, timeout=5):32 t = 033 period = 0.134 while self._state != s:35 process_events()36 time.sleep(period)37 t += period38 if t > timeout:39 return False40 return True41 self.setAsync(True)42 self.runCmd("c")43 self.assertTrue(wait_for_state(lldb.eStateRunning),44 'Process not running after continue')45 self.runCmd("process interrupt")46 self.assertTrue(wait_for_state(lldb.eStateStopped),47 'Process not stopped after interrupt')48 # be sure to continue/interrupt/continue (r204504)49 self.runCmd("c")50 self.assertTrue(wait_for_state(lldb.eStateRunning),51 'Process not running after continue')52 self.runCmd("process interrupt")53 self.assertTrue(wait_for_state(lldb.eStateStopped),54 'Process not stopped after interrupt')55 # check that this breakpoint is auto-cleared on detach (r204752)56 self.runCmd("br set -f main.cpp -l 12")57 self.runCmd("c")58 self.assertTrue(wait_for_state(lldb.eStateRunning),59 'Process not running after continue')60 self.assertTrue(wait_for_state(lldb.eStateStopped),61 'Process not stopped after breakpoint')62 self.expect('br list', 'Breakpoint not hit',63 patterns = ['hit count = 1'])64 self.runCmd("c")65 self.assertTrue(wait_for_state(lldb.eStateRunning),66 'Process not running after continue')67 # make sure to detach while in running state (r204759)68 self.runCmd("detach")69 self.assertTrue(wait_for_state(lldb.eStateDetached),70 'Process not detached after detach')71if __name__ == '__main__':72 import atexit73 lldb.SBDebugger.Initialize()74 atexit.register(lambda: lldb.SBDebugger.Terminate())...

Full Screen

Full Screen

Automation Testing Tutorials

Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.

LambdaTest Learning Hubs:

YouTube

You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.

Run wpt automation tests on LambdaTest cloud grid

Perform automation testing on 3000+ real desktop and mobile devices online.

Try LambdaTest Now !!

Get 100 minutes of automation test minutes FREE!!

Next-Gen App & Browser Testing Cloud

Was this article helpful?

Helpful

NotHelpful