Index: common/api/dm/service/v1/graph_data.proto |
diff --git a/common/api/dm/service/v1/graph_data.proto b/common/api/dm/service/v1/graph_data.proto |
index c0b7bb82d98d0400804e6294376098dd4b12ab2a..baff0bb974b8b01e8e5784d6ec9b9c71b4b923d7 100644 |
--- a/common/api/dm/service/v1/graph_data.proto |
+++ b/common/api/dm/service/v1/graph_data.proto |
@@ -5,6 +5,7 @@ |
syntax = "proto3"; |
import "google/protobuf/timestamp.proto"; |
+import "google/protobuf/duration.proto"; |
import "github.com/luci/luci-go/common/api/template/template.proto"; |
@@ -12,6 +13,81 @@ import "types.proto"; |
package dm; |
+message AbnormalFinish { |
iannucci
2016/06/08 02:54:24
Here are all the abnormal finish status types. The
|
+ enum Status { |
+ // This entity has a failed result. |
+ // |
+ // Executions: the distributor reported that the task executed and failed, OR |
+ // the distributor reports success while the Execution is in the RUNNING |
+ // state. |
+ // |
+ // Attempts: the last Execution had a FAILED Status. |
+ // |
+ // Retryable. |
+ FAILED = 0; |
iannucci
2016/06/08 02:54:24
I'm not super sure how to deal with this one: this
dnj (Google)
2016/06/09 18:00:57
I think things pre-recipe-engine can fail (e.g., b
iannucci
2016/06/15 00:46:02
Yeah I think this makes sense. For recipe quests w
|
+ |
+ // This entity failed in a bad way. |
+ // |
+ // Executions: The distributor told us that the job died violently while in |
+ // the SCHEDULING, RUNNING or STOPPING state. |
+ // |
+ // Attempts: the last Execution had a CRASHED Status. |
+ // |
+ // Retryable. |
+ CRASHED = 1; |
+ |
+ // Waited too long for the job to start. |
+ // |
+ // Executions: the distributor couldn't start the job in time, OR DM failed |
+ // to get a status update from the distributor in time (e.g. the state was |
+ // SCHEDULING for too long). |
+ // |
+ // Attempts: the last Execution had an EXPIRED Status. |
+ // |
+ // Retryable. |
+ EXPIRED = 2; |
+ |
+ // The job started, but took too long. |
+ // |
+ // Executions: the distributor started the job, but it couldn't complete in |
+ // time, OR DM failed to get a status update from the distributor in time |
+ // (e.g. the state was RUNNING for too long). |
+ // |
+ // Attempts: the last Execution had an TIMED_OUT Status. |
+ // |
+ // Retryable. |
+ TIMED_OUT = 3; |
+ |
+ // The job was cancelled by an external entity (human, automated system). |
+ // |
+ // Executions: the distributor informing DM that the job was preemptively |
+ // cancelled. |
+ // |
+ // Attempts: the last Execution had a CANCELLED Status, or this Attempt |
+ // was cancelled via DM. |
+ CANCELLED = 4; |
iannucci
2016/06/08 02:54:24
later when DM supports cancellation directly, this
dnj (Google)
2016/06/09 18:00:57
nit:
In American English, the verb cancel is usual
iannucci
2016/06/15 00:46:01
Who said protos were American? THEY LIVE IN CYBERS
dnj (Google)
2016/06/16 16:57:22
I'ma tell Obama.
|
+ |
+ // The job was prevented from running by the distributor (quota, permissions, |
+ // etc.) |
+ // |
+ // Executions: the distributor refused to run this job. |
+ // |
+ // Attempts: the last Execution had a REJECTED Status. |
dnj (Google)
2016/06/09 18:00:57
Is this not retryable? If we're out of quota, we m
iannucci
2016/06/15 00:46:02
I think this should definitely be retried at a hig
|
+ REJECTED = 5; |
+ |
+ // The job is unrecognized. |
+ // |
+ // Executions: the distributor doesn't know about this job, or has forgotten |
+ // about it. |
+ // |
+ // Attempts: the last Execution had a REJECTED Status. |
dnj (Google)
2016/06/09 18:00:57
MISSING Status?
iannucci
2016/06/15 00:46:01
oops
|
+ MISSING = 6; |
+ } |
+ |
+ Status status = 1; |
+ string reason = 2; |
+} |
+ |
message Quest { |
message ID { |
string id = 1; |
@@ -23,8 +99,51 @@ message Quest { |
bool DNE = 2; |
message Desc { |
+ // TODO(iannucci): have a 'simple_idempotent' quest mode which: |
+ // * isn't allowed/expected to call any API methods (ActivateExecution, |
+ // EnsureGraphData, or WalkGraph) |
+ // * only provides data back through the distributor-specific 'state' |
+ // field. |
+ // |
+ // Examples of use for this would be: |
+ // * simple test binaries that run/output to an ISOLATED_OUTDIR |
+ // * testing / ad-hoc bash scripts |
+ |
string distributor_config_name = 1; |
string json_payload = 2; |
+ |
+ message Meta { |
+ // This names the user/service account for all Attempts on this quest. You |
+ // must have permission to use this account when creating the Quest and/or |
+ // Attempts. |
+ string as_account = 1; |
+ |
+ message Retry { |
+ // The number of times in a row to retry Executions which have an |
+ // ABNORMAL_FINISHED status of FAILED. |
+ uint32 failed = 1; |
+ |
+ // The number of times in a row to retry Executions which have an |
+ // ABNORMAL_FINISHED status of EXPIRED. |
+ uint32 expired = 2; |
+ |
+ // The number of times in a row to retry Executions which have an |
+ // ABNORMAL_FINISHED status of TIMED_OUT. |
+ uint32 timed_out = 3; |
+ |
+ // The number of times in a row to retry Executions which have an |
+ // ABNORMAL_FINISHED status of CRASHED. |
+ uint32 crashed = 4; |
+ } |
+ |
+ // This affects how DM will retry the job payload in various exceptional |
+ // circumstances. |
+ Retry retry = 2; |
+ } |
+ |
+ // This is metadata which doesn't affect the functionality of the payload, |
+ // but does affect how DM and/or the distributor run/schedule that payload. |
+ Meta meta = 3; |
} |
message TemplateSpec { |
@@ -61,12 +180,20 @@ message Attempt { |
bool DNE = 2; |
enum State { |
- NEEDS_EXECUTION = 0; |
+ // The Attempt is waiting to be Executed |
dnj (Google)
2016/06/09 18:00:57
nit: period at end.
iannucci
2016/06/15 00:46:02
Done.
|
+ SCHEDULING = 0; |
+ |
+ // The Attempt is currently waiting for the current Execution to finish. |
EXECUTING = 1; |
dnj (Google)
2016/06/09 18:00:57
IMO: s/the current/its current/
iannucci
2016/06/15 00:46:02
Done.
|
- ADDING_DEPS = 2; |
- BLOCKED = 3; |
- AWAITING_EXECUTION_STATE = 4; |
- FINISHED = 5; |
+ |
+ // The Attempt is waiting for dependent Attempts to be resolved. |
+ WAITING = 2; |
+ |
+ // The Attempt is in its final state. |
+ FINISHED = 3; |
+ |
+ // The Attempt is in an abnormal final state |
dnj (Google)
2016/06/09 18:00:57
nit: period at end.
iannucci
2016/06/15 00:46:02
Done.
|
+ ABNORMAL_FINISHED = 4; |
} |
message Data { |
@@ -74,35 +201,37 @@ message Attempt { |
google.protobuf.Timestamp modified = 2; |
uint32 num_executions = 3; |
- message NeedsExecution { |
- google.protobuf.Timestamp pending = 1; |
- } |
+ // This attempt is ready to be Executed, but hasn't been sent to the |
+ // distributor yet. |
+ message Scheduling {} |
+ // This attempt has a live Execution (with the specified ID). Check the |
+ // Execution state for more information. |
message Executing { |
uint32 cur_execution_id = 1; |
} |
- message AddingDeps { |
- uint32 num_adding = 1; |
- uint32 num_waiting = 2; |
- } |
- |
- message Blocked { |
+ // This attempt's last Execution stopped by adding dependencies. |
+ message Waiting { |
uint32 num_waiting = 1; |
} |
+ // This attempt is complete. |
message Finished { |
google.protobuf.Timestamp expiration = 1; |
uint32 json_result_size = 2; |
string json_result = 3; |
+ |
+ // This is the distributor-specific state of the final Execution. |
+ string persistent_state_result = 4; |
dnj (Google)
2016/06/09 18:00:57
Is "bytes" more appropriate? "string" implies UTF8
iannucci
2016/06/15 00:46:02
but string is the only map-key/immutable thing in
dnj (Google)
2016/06/16 16:57:22
Yeah I hear 'ya. But I think you did the right thi
|
} |
oneof attempt_type { |
- NeedsExecution needs_execution = 4; |
- Executing executing = 5; |
- AddingDeps adding_deps = 6; |
- Blocked blocked = 7; |
+ Scheduling scheduling = 5; |
+ Executing executing = 6; |
+ Waiting waiting = 7; |
Finished finished = 8; |
+ AbnormalFinish abnormal_finish = 9; |
} |
} |
Data data = 3; |
@@ -170,39 +299,53 @@ message Execution { |
ID id = 1; |
enum State { |
- // The execution has been accepted by the distributor, but is not running yet |
- SCHEDULED = 0; |
+ // The execution has been accepted by the distributor, but is not running |
+ // yet. |
+ SCHEDULING = 0; |
- // The execution is running |
+ // The execution is running (has activated with DM). |
RUNNING = 1; |
- // The execution was unable to be accepted by the distributor |
- REJECTED = 2; |
+ // The execution has been told to stop by DM, but we haven't heard from |
+ // the distributor yet. |
+ STOPPING = 2; |
- // The execution was accepted by the distributor, but couldn't run in time. |
- TIMED_OUT = 3; |
+ // The execution is in its final state. |
+ FINISHED = 3; |
- // The execution ran and completed |
- FINISHED = 4; |
+ // The execution is in an abnormal final state |
+ ABNORMAL_FINISHED = 4; |
+ } |
+ |
+ message Data { |
+ google.protobuf.Timestamp created = 1; |
+ google.protobuf.Timestamp modified = 2; |
- // The execution ran, but the distributor claims it did not complete |
- FAILED = 5; |
+ message DistributorInfo { |
+ string config_name = 1; |
+ string config_version = 2; |
+ string token = 3; |
+ string url = 4; |
+ } |
+ DistributorInfo distributor_info = 3; |
- // The distributor claims to not know anything about this execution |
- MISSING = 6; |
+ message Scheduling {} |
- // Some entity (DM, Human, Distributor) requested that this execution not run. |
- CANCELLED = 7; |
- } |
+ message Running {} |
- message Data { |
- State state = 1; |
- string state_reason = 2; |
+ message Stopping {} |
- google.protobuf.Timestamp created = 3; |
+ message Finished { |
+ string persistent_state = 1; |
+ } |
- string distributor_token = 4; |
- string distributor_info_url = 5; |
+ oneof execution_type { |
+ Scheduling scheduling = 4; |
+ Running running = 5; |
+ Stopping stopping = 6; |
+ Finished finished = 7; |
+ AbnormalFinish abnormal_finish = 8; |
+ } |
} |
Data data = 2; |