|  | @@ -69,12 +69,14 @@ int census_supported(void);
 | 
	
		
			
				|  |  |  /** Return the census features currently enabled. */
 | 
	
		
			
				|  |  |  int census_enabled(void);
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -/* Internally, Census relies on a context, which should be propagated across
 | 
	
		
			
				|  |  | - * RPC's. From the RPC subsystems viewpoint, this is an opaque data structure.
 | 
	
		
			
				|  |  | - * A context must be used as the first argument to all other census
 | 
	
		
			
				|  |  | - * functions. Conceptually, contexts should be thought of as specific to
 | 
	
		
			
				|  |  | - * single RPC/thread. The context can be serialized for passing across the
 | 
	
		
			
				|  |  | - * wire. */
 | 
	
		
			
				|  |  | +/**
 | 
	
		
			
				|  |  | +  Context is a handle used by census to represent the current tracing and
 | 
	
		
			
				|  |  | +  tagging information. Contexts should be propagated across RPC's. Contexts
 | 
	
		
			
				|  |  | +  are created by any of the census_start_*_op() functions. A context is
 | 
	
		
			
				|  |  | +  typically used as argument to most census functions. Conceptually, contexts
 | 
	
		
			
				|  |  | +  should be thought of as specific to single RPC/thread. The context can be
 | 
	
		
			
				|  |  | +  serialized for passing across the wire, via census_context_serialize().
 | 
	
		
			
				|  |  | +*/
 | 
	
		
			
				|  |  |  typedef struct census_context census_context;
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  /* This function is called by the RPC subsystem whenever it needs to get a
 | 
	
	
		
			
				|  | @@ -91,19 +93,6 @@ typedef struct census_context census_context;
 | 
	
		
			
				|  |  |  size_t census_context_serialize(const census_context *context, char *buffer,
 | 
	
		
			
				|  |  |                                  size_t buf_size);
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -/* Create a new census context, possibly from a serialized buffer. If 'buffer'
 | 
	
		
			
				|  |  | - * is non-NULL, it is assumed that it is a buffer encoded by
 | 
	
		
			
				|  |  | - * census_context_serialize(). If `buffer` is NULL, a new, empty context is
 | 
	
		
			
				|  |  | - * created. The decoded/new contest is returned in 'context'.
 | 
	
		
			
				|  |  | - *
 | 
	
		
			
				|  |  | - * Returns 0 if no errors, non-zero if buffer is incorrectly formatted, in
 | 
	
		
			
				|  |  | - * which case a new empty context will be returned. */
 | 
	
		
			
				|  |  | -int census_context_deserialize(const char *buffer, census_context **context);
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -/* The given context is destroyed. Once destroyed, using the context in
 | 
	
		
			
				|  |  | - * future census calls will result in undefined behavior. */
 | 
	
		
			
				|  |  | -void census_context_destroy(census_context *context);
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  |  /* Distributed traces can have a number of options. */
 | 
	
		
			
				|  |  |  enum census_trace_mask_values {
 | 
	
		
			
				|  |  |    CENSUS_TRACE_MASK_NONE = 0,      /* Default, empty flags */
 | 
	
	
		
			
				|  | @@ -114,13 +103,15 @@ enum census_trace_mask_values {
 | 
	
		
			
				|  |  |      will be the logical or of census_trace_mask_values values. */
 | 
	
		
			
				|  |  |  int census_trace_mask(const census_context *context);
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -/* The concept of "operation" is a fundamental concept for Census. An
 | 
	
		
			
				|  |  | -   operation is a logical representation of a action in a RPC-using system. It
 | 
	
		
			
				|  |  | -   is most typically used to represent a single RPC, or a significant sub-part
 | 
	
		
			
				|  |  | -   thereof (e.g. a single logical "read" RPC to a distributed storage system
 | 
	
		
			
				|  |  | -   might do several other actions in parallel, from looking up metadata
 | 
	
		
			
				|  |  | +/** Set the trace mask associated with a context. */
 | 
	
		
			
				|  |  | +void census_set_trace_mask(int trace_mask);
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +/* The concept of "operation" is a fundamental concept for Census. In an RPC
 | 
	
		
			
				|  |  | +   system, and operation typcially represents a single RPC, or a significant
 | 
	
		
			
				|  |  | +   sub-part thereof (e.g. a single logical "read" RPC to a distributed storage
 | 
	
		
			
				|  |  | +   system might do several other actions in parallel, from looking up metadata
 | 
	
		
			
				|  |  |     indices to making requests of other services - each of these could be a
 | 
	
		
			
				|  |  | -   sub-operation with the larger RPC operation. Census uses operations for the
 | 
	
		
			
				|  |  | +   sub-operation with the larger RPC operation). Census uses operations for the
 | 
	
		
			
				|  |  |     following:
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |     CPU accounting: If enabled, census will measure the thread CPU time
 | 
	
	
		
			
				|  | @@ -131,123 +122,152 @@ int census_trace_mask(const census_context *context);
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |     Distributed tracing: Each operation serves as a logical trace span.
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -   Stats collection: Stats are broken down operation (e.g. latency
 | 
	
		
			
				|  |  | -   breakdown for each service/method combination).
 | 
	
		
			
				|  |  | +   Stats collection: Stats are broken down by operation (e.g. latency
 | 
	
		
			
				|  |  | +   breakdown for each unique RPC path).
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |     The following functions serve to delineate the start and stop points for
 | 
	
		
			
				|  |  |     each logical operation. */
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +/**
 | 
	
		
			
				|  |  | +  This structure (opaquely) represents a timestamp as used by census to
 | 
	
		
			
				|  |  | +  record the time at which an RPC operation begins.
 | 
	
		
			
				|  |  | +*/
 | 
	
		
			
				|  |  | +typedef struct census_timestamp census_timestamp;
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +/**
 | 
	
		
			
				|  |  | +  Mark the beginning of an RPC operation. The information required to call the
 | 
	
		
			
				|  |  | +  functions to record the start of RPC operations (both client and server) may
 | 
	
		
			
				|  |  | +  not be callable at the true start time of the operation, due to information
 | 
	
		
			
				|  |  | +  not being available (e.g. the census context data will not be available in a
 | 
	
		
			
				|  |  | +  server RPC until at least initial metadata has been processed). To ensure
 | 
	
		
			
				|  |  | +  correct CPU accounting and latency recording, RPC systems can call this
 | 
	
		
			
				|  |  | +  function to get the timestamp of operation beginning. This can later be used
 | 
	
		
			
				|  |  | +  as an argument to census_start_{client,server}_rpc_op(). NB: for correct
 | 
	
		
			
				|  |  | +  CPU accounting, the system must guarantee that the same thread is used
 | 
	
		
			
				|  |  | +  for all request processing after this function is called.
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +  @return A timestamp representing the operation start time.
 | 
	
		
			
				|  |  | +*/
 | 
	
		
			
				|  |  | +census_timestamp *census_start_rpc_op_timestamp(void);
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +/**
 | 
	
		
			
				|  |  | +  Represent functions to map RPC name ID to service/method names. Census
 | 
	
		
			
				|  |  | +  breaks down all RPC stats by service and method names. We leave the
 | 
	
		
			
				|  |  | +  definition and format of these to the RPC system. For efficiency purposes,
 | 
	
		
			
				|  |  | +  we encode these as a single 64 bit identifier, and allow the RPC system to
 | 
	
		
			
				|  |  | +  provide a structure for functions that can convert these to service and
 | 
	
		
			
				|  |  | +  method strings.
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +  TODO(aveitch): Instead of providing this as an argument to the rpc_start_op()
 | 
	
		
			
				|  |  | +  functions, maybe it should be set once at census initialization.
 | 
	
		
			
				|  |  | +*/
 | 
	
		
			
				|  |  | +typedef struct {
 | 
	
		
			
				|  |  | +  const char *(*get_rpc_service_name)(gpr_int64 id);
 | 
	
		
			
				|  |  | +  const char *(*get_rpc_method_name)(gpr_int64 id);
 | 
	
		
			
				|  |  | +} census_rpc_name_info;
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  |  /**
 | 
	
		
			
				|  |  | -   Start a client rpc operation. This function will create a new context. If
 | 
	
		
			
				|  |  | +   Start a client rpc operation. This function should be called as early in the
 | 
	
		
			
				|  |  | +   client RPC path as possible. This function will create a new context. If
 | 
	
		
			
				|  |  |     the context argument is non-null, then the new context will inherit all
 | 
	
		
			
				|  |  |     its properties, with the following changes:
 | 
	
		
			
				|  |  |     - create a new operation ID for the new context, marking it as a child of
 | 
	
		
			
				|  |  |       the previous operation.
 | 
	
		
			
				|  |  | -   - use the new RPC service/method/peer information for tracing and stats
 | 
	
		
			
				|  |  | +   - use the new RPC path and peer information for tracing and stats
 | 
	
		
			
				|  |  |       collection purposes, rather than those from the original context
 | 
	
		
			
				|  |  | -   - if trace_mask is non-zero, update the trace mask entries associated with
 | 
	
		
			
				|  |  | -     the original context.
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -   If the context argument is NULL, then a new root context is created.  This
 | 
	
		
			
				|  |  | +   If the context argument is NULL, then a new root context is created. This
 | 
	
		
			
				|  |  |     is particularly important for tracing purposes (the trace spans generated
 | 
	
		
			
				|  |  |     will be unassociated with any other trace spans, except those
 | 
	
		
			
				|  |  | -   downstream). Whatever it's value, the trace_mask will be used for tracing
 | 
	
		
			
				|  |  | -   operations associated with the new context.
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -   @param context The base context. Can be NULL.
 | 
	
		
			
				|  |  | -   @param service RPC service name. On some systems, may include other
 | 
	
		
			
				|  |  | -   parts of RPC identification (e.g. host on gRPC systems).
 | 
	
		
			
				|  |  | -   @param method RPC method name
 | 
	
		
			
				|  |  | -   @param peer RPC peer
 | 
	
		
			
				|  |  | -   @param trace_mask An or of census_trace_mask_values values
 | 
	
		
			
				|  |  | -   @param start_time If NULL, the time of function call is used as the
 | 
	
		
			
				|  |  | -   start time for the operation. If non-NULL, then the time should be in the
 | 
	
		
			
				|  |  | -   past, when the operation was deemed to have started. This is used when
 | 
	
		
			
				|  |  | -   the other information used as arguments is not yet available.
 | 
	
		
			
				|  |  | +   downstream). The trace_mask will be used for tracing operations associated
 | 
	
		
			
				|  |  | +   with the new context.
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +   In some RPC systems (e.g. where load balancing is used), peer information
 | 
	
		
			
				|  |  | +   may not be available at the time the operation starts. In this case, use a
 | 
	
		
			
				|  |  | +   NULL value for peer, and set it later using the
 | 
	
		
			
				|  |  | +   census_set_rpc_client_peer() function.
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +   @param context The parent context. Can be NULL.
 | 
	
		
			
				|  |  | +   @param rpc_name_id The rpc name identifier to be associated with this RPC.
 | 
	
		
			
				|  |  | +   @param rpc_name_info Used to decode rpc_name_id.
 | 
	
		
			
				|  |  | +   @param peer RPC peer. If not available at the time, NULL can be used,
 | 
	
		
			
				|  |  | +               and a later census_set_rpc_client_peer() call made.
 | 
	
		
			
				|  |  | +   @param trace_mask An OR of census_trace_mask_values values. Only used in
 | 
	
		
			
				|  |  | +                     the creation of a new root context (context == NULL).
 | 
	
		
			
				|  |  | +   @param start_time A timestamp returned from census_start_rpc_op_timestamp().
 | 
	
		
			
				|  |  | +                     Can be NULL. Used to set the true time the operation
 | 
	
		
			
				|  |  | +                     begins.
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |     @return A new census context.
 | 
	
		
			
				|  |  |   */
 | 
	
		
			
				|  |  | -census_context *census_start_client_rpc_op(census_context *context,
 | 
	
		
			
				|  |  | -                                           const char *service,
 | 
	
		
			
				|  |  | -                                           const char *method, const char *peer,
 | 
	
		
			
				|  |  | -                                           int trace_mask,
 | 
	
		
			
				|  |  | -                                           gpr_timespec *start_time);
 | 
	
		
			
				|  |  | +census_context *census_start_client_rpc_op(
 | 
	
		
			
				|  |  | +    const census_context *context, gpr_int64 rpc_name_id,
 | 
	
		
			
				|  |  | +    const census_rpc_name_info *rpc_name_info, const char *peer, int trace_mask,
 | 
	
		
			
				|  |  | +    const census_timestamp *start_time);
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  /**
 | 
	
		
			
				|  |  | -   Indicate the start of a server rpc operation, updating the current
 | 
	
		
			
				|  |  | -   context (which should have been created from census_context_deserialize()
 | 
	
		
			
				|  |  | -   (as passed from the client side of the RPC operation) or census_start_op().
 | 
	
		
			
				|  |  | -   - if trace_mask is non-zero, update the trace mask entries associated with
 | 
	
		
			
				|  |  | -     the original context.
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -   @param context The base context. Cannot be NULL.
 | 
	
		
			
				|  |  | -   @param service RPC service name. On some systems, may include other
 | 
	
		
			
				|  |  | -   parts of RPC identification (e.g. host on gRPC systems).
 | 
	
		
			
				|  |  | -   @param method RPC method name
 | 
	
		
			
				|  |  | -   @param peer RPC peer
 | 
	
		
			
				|  |  | -   @param trace_mask An or of census_trace_mask_values values
 | 
	
		
			
				|  |  | -   @param start_time If NULL, the time of function call is used as the
 | 
	
		
			
				|  |  | -   start time for the operation. If non-NULL, then the time should be in the
 | 
	
		
			
				|  |  | -   past, when the operation was deemed to have started. This is used when
 | 
	
		
			
				|  |  | -   the other information used as arguments is not yet available.
 | 
	
		
			
				|  |  | +  Add peer information to a context representing a client RPC operation.
 | 
	
		
			
				|  |  | +*/
 | 
	
		
			
				|  |  | +void census_set_rpc_client_peer(census_context *context, const char *peer);
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +/**
 | 
	
		
			
				|  |  | +   Start a server RPC operation. Returns a new context to be used in future
 | 
	
		
			
				|  |  | +   census calls. If buffer is non-NULL, then the buffer contents should
 | 
	
		
			
				|  |  | +   represent the client context, as generated by census_context_serialize().
 | 
	
		
			
				|  |  | +   If buffer is NULL, a new root context is created.
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +   @param buffer Buffer containing bytes output from census_context_serialize().
 | 
	
		
			
				|  |  | +   @param rpc_name_id The rpc name identifier to be associated with this RPC.
 | 
	
		
			
				|  |  | +   @param rpc_name_info Used to decode rpc_name_id.
 | 
	
		
			
				|  |  | +   @param peer RPC peer.
 | 
	
		
			
				|  |  | +   @param trace_mask An OR of census_trace_mask_values values. Only used in
 | 
	
		
			
				|  |  | +                     the creation of a new root context (buffer == NULL).
 | 
	
		
			
				|  |  | +   @param start_time A timestamp returned from census_start_rpc_op_timestamp().
 | 
	
		
			
				|  |  | +                     Can be NULL. Used to set the true time the operation
 | 
	
		
			
				|  |  | +                     begins.
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +   @return A new census context.
 | 
	
		
			
				|  |  |   */
 | 
	
		
			
				|  |  | -void census_start_server_rpc_op(census_context *context, const char *service,
 | 
	
		
			
				|  |  | -                                const char *method, const char *peer,
 | 
	
		
			
				|  |  | -                                int trace_mask, gpr_timespec *start_time);
 | 
	
		
			
				|  |  | +census_context *census_start_server_rpc_op(
 | 
	
		
			
				|  |  | +    const char *buffer, gpr_int64 rpc_name_id,
 | 
	
		
			
				|  |  | +    const census_rpc_name_info *rpc_name_info, const char *peer, int trace_mask,
 | 
	
		
			
				|  |  | +    census_timestamp *start_time);
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  /**
 | 
	
		
			
				|  |  | -   Start a new, non-RPC census operation. In general, this function works very
 | 
	
		
			
				|  |  | -   similarly to census_start_client_rpc_op, with the primary differennce being
 | 
	
		
			
				|  |  | -   the abscence of peer information, and the replacement of service and method
 | 
	
		
			
				|  |  | -   names with the more general family/name. If the context argument is
 | 
	
		
			
				|  |  | -   non-null, then the new context will inherit all its properties, with the
 | 
	
		
			
				|  |  | -   following changes:
 | 
	
		
			
				|  |  | +   Start a new, non-RPC operation. In general, this function works very
 | 
	
		
			
				|  |  | +   similarly to census_start_client_rpc_op, with the primary difference being
 | 
	
		
			
				|  |  | +   the replacement of host/path information with the more generic family/name
 | 
	
		
			
				|  |  | +   tags. If the context argument is non-null, then the new context will
 | 
	
		
			
				|  |  | +   inherit all its properties, with the following changes:
 | 
	
		
			
				|  |  |     - create a new operation ID for the new context, marking it as a child of
 | 
	
		
			
				|  |  |       the previous operation.
 | 
	
		
			
				|  |  |     - use the family and name information for tracing and stats collection
 | 
	
		
			
				|  |  |       purposes, rather than those from the original context
 | 
	
		
			
				|  |  | -   - if trace_mask is non-zero, update the trace mask entries associated with
 | 
	
		
			
				|  |  | -     the original context.
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -   If the context argument is NULL, then a new root context is created.  This
 | 
	
		
			
				|  |  | +   If the context argument is NULL, then a new root context is created. This
 | 
	
		
			
				|  |  |     is particularly important for tracing purposes (the trace spans generated
 | 
	
		
			
				|  |  |     will be unassociated with any other trace spans, except those
 | 
	
		
			
				|  |  | -   downstream). Whatever it's value, the trace_mask will be used for tracing
 | 
	
		
			
				|  |  | +   downstream). The trace_mask will be used for tracing
 | 
	
		
			
				|  |  |     operations associated with the new context.
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |     @param context The base context. Can be NULL.
 | 
	
		
			
				|  |  |     @param family Family name to associate with the trace
 | 
	
		
			
				|  |  |     @param name Name within family to associated with traces/stats
 | 
	
		
			
				|  |  | -   @param trace_mask An or of census_trace_mask_values values
 | 
	
		
			
				|  |  | -   @param start_time If NULL, the time of function call is used as the
 | 
	
		
			
				|  |  | -   start time for the operation. If non-NULL, then the time should be in the
 | 
	
		
			
				|  |  | -   past, when the operation was deemed to have started. This is used when
 | 
	
		
			
				|  |  | -   the other information used as arguments is not yet available.
 | 
	
		
			
				|  |  | +   @param trace_mask An OR of census_trace_mask_values values. Only used if
 | 
	
		
			
				|  |  | +                     context is NULL.
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |     @return A new census context.
 | 
	
		
			
				|  |  |   */
 | 
	
		
			
				|  |  |  census_context *census_start_op(census_context *context, const char *family,
 | 
	
		
			
				|  |  | -                                const char *name, int trace_mask,
 | 
	
		
			
				|  |  | -                                gpr_timespec *start_time);
 | 
	
		
			
				|  |  | +                                const char *name, int trace_mask);
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -/** End a tracing operation. Must be matched with an earlier
 | 
	
		
			
				|  |  | - * census_start_*_op*() call. */
 | 
	
		
			
				|  |  | -void census_trace_end_op(census_context *context, int status);
 | 
	
		
			
				|  |  | +/** End an operation started by any of the census_start_*_op*() calls. */
 | 
	
		
			
				|  |  | +void census_end_op(census_context *context, int status);
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  /** Insert a trace record into the trace stream. The record consists of an
 | 
	
		
			
				|  |  |   * arbitrary size buffer, the size of which is provided in 'n'. */
 | 
	
		
			
				|  |  |  void census_trace_print(census_context *context, const char *buffer, size_t n);
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -/** Retrieve active ops as a proto. Note that since we don't have proto
 | 
	
		
			
				|  |  | -    manipulation available in the grpc core yet, arguments etc. are left
 | 
	
		
			
				|  |  | -    unspecified for now. */
 | 
	
		
			
				|  |  | -void census_get_active_ops_as_proto(/* pointer to proto */);
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -/** Retrieve all active trace records as a proto. Note that since we don't
 | 
	
		
			
				|  |  | -    have proto manipulation available in the grpc core yet, arguments etc. are
 | 
	
		
			
				|  |  | -    left unspecified for now. This function will clear existing trace
 | 
	
		
			
				|  |  | -    records. */
 | 
	
		
			
				|  |  | -void census_get_trace_as_proto(/* pointer to proto */);
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  |  /* A census statistic to be recorded comprises two parts: an ID for the
 | 
	
		
			
				|  |  |   * particular statistic and the value to be recorded against it. */
 | 
	
		
			
				|  |  |  typedef struct {
 |