bug: nonce on all operation to prevent id collision

Michael Muré created

Change summary

bug/op_create.go        | 15 ---------------
bug/operation.go        | 26 ++++++++++++++++++++++++++
entity/dag/operation.go | 16 +++++++++++++---
entity/id.go            |  2 +-
identity/version.go     |  2 +-
5 files changed, 41 insertions(+), 20 deletions(-)

Detailed changes

bug/op_create.go 🔗

@@ -1,7 +1,6 @@
 package bug
 
 import (
-	"crypto/rand"
 	"encoding/json"
 	"fmt"
 	"strings"
@@ -18,10 +17,6 @@ var _ Operation = &CreateOperation{}
 // CreateOperation define the initial creation of a bug
 type CreateOperation struct {
 	OpBase
-	// mandatory random bytes to ensure a better randomness of the data of the first
-	// operation of a bug, used to later generate the ID
-	// len(Nonce) should be > 20 and < 64 bytes
-	Nonce   []byte            `json:"nonce"`
 	Title   string            `json:"title"`
 	Message string            `json:"message"`
 	Files   []repository.Hash `json:"files"`
@@ -147,19 +142,9 @@ func (op *CreateOperation) UnmarshalJSON(data []byte) error {
 // Sign post method for gqlgen
 func (op *CreateOperation) IsAuthored() {}
 
-func makeNonce(len int) []byte {
-	result := make([]byte, len)
-	_, err := rand.Read(result)
-	if err != nil {
-		panic(err)
-	}
-	return result
-}
-
 func NewCreateOp(author identity.Interface, unixTime int64, title, message string, files []repository.Hash) *CreateOperation {
 	return &CreateOperation{
 		OpBase:  newOpBase(CreateOp, author, unixTime),
-		Nonce:   makeNonce(20),
 		Title:   title,
 		Message: message,
 		Files:   files,

bug/operation.go 🔗

@@ -1,6 +1,7 @@
 package bug
 
 import (
+	"crypto/rand"
 	"encoding/json"
 	"fmt"
 	"time"
@@ -138,6 +139,12 @@ type OpBase struct {
 	// TODO: part of the data model upgrade, this should eventually be a timestamp + lamport
 	UnixTime int64             `json:"timestamp"`
 	Metadata map[string]string `json:"metadata,omitempty"`
+
+	// mandatory random bytes to ensure a better randomness of the data used to later generate the ID
+	// len(Nonce) should be > 20 and < 64 bytes
+	// It has no functional purpose and should be ignored.
+	Nonce []byte `json:"nonce"`
+
 	// Not serialized. Store the op's id in memory.
 	id entity.Id
 	// Not serialized. Store the extra metadata in memory,
@@ -151,10 +158,20 @@ func newOpBase(opType OperationType, author identity.Interface, unixTime int64)
 		OperationType: opType,
 		Author_:       author,
 		UnixTime:      unixTime,
+		Nonce:         makeNonce(20),
 		id:            entity.UnsetId,
 	}
 }
 
+func makeNonce(len int) []byte {
+	result := make([]byte, len)
+	_, err := rand.Read(result)
+	if err != nil {
+		panic(err)
+	}
+	return result
+}
+
 func (base *OpBase) UnmarshalJSON(data []byte) error {
 	// Compute the Id when loading the op from disk.
 	base.id = entity.DeriveId(data)
@@ -164,6 +181,7 @@ func (base *OpBase) UnmarshalJSON(data []byte) error {
 		Author        json.RawMessage   `json:"author"`
 		UnixTime      int64             `json:"timestamp"`
 		Metadata      map[string]string `json:"metadata,omitempty"`
+		Nonce         []byte            `json:"nonce"`
 	}{}
 
 	if err := json.Unmarshal(data, &aux); err != nil {
@@ -180,6 +198,7 @@ func (base *OpBase) UnmarshalJSON(data []byte) error {
 	base.Author_ = author
 	base.UnixTime = aux.UnixTime
 	base.Metadata = aux.Metadata
+	base.Nonce = aux.Nonce
 
 	return nil
 }
@@ -222,6 +241,13 @@ func (base *OpBase) Validate(op Operation, opType OperationType) error {
 		}
 	}
 
+	if len(base.Nonce) > 64 {
+		return fmt.Errorf("nonce is too big")
+	}
+	if len(base.Nonce) < 20 {
+		return fmt.Errorf("nonce is too small")
+	}
+
 	return nil
 }
 

entity/dag/operation.go 🔗

@@ -10,13 +10,23 @@ import (
 // data structure and storage.
 type Operation interface {
 	// Id return the Operation identifier
+	//
 	// Some care need to be taken to define a correct Id derivation and enough entropy in the data used to avoid
 	// collisions. Notably:
-	// - the Id of the first Operation will be used as the Id of the Entity. Collision need to be avoided across entities of the same type
-	//   (example: no collision within the "bug" namespace).
+	// - the Id of the first Operation will be used as the Id of the Entity. Collision need to be avoided across entities
+	//   of the same type (example: no collision within the "bug" namespace).
 	// - collisions can also happen within the set of Operations of an Entity. Simple Operation might not have enough
 	//   entropy to yield unique Ids (example: two "close" operation within the same second, same author).
-	// A common way to derive an Id will be to use the DeriveId function on the serialized operation data.
+	//   If this is a concern, it is recommended to include a piece of random data in the operation's data, to guarantee
+	//   a minimal amount of entropy and avoid collision.
+	//
+	//   Author's note: I tried to find a clever way around that inelegance (stuffing random useless data into the stored
+	//   structure is not exactly elegant) but I failed to find a proper way. Essentially, anything that would reuse some
+	//   other data (parent operation's Id, lamport clock) or the graph structure (depth) impose that the Id would only
+	//   make sense in the context of the graph and yield some deep coupling between Entity and Operation. This in turn
+	//   make the whole thing even less elegant.
+	//
+	// A common way to derive an Id will be to use the entity.DeriveId() function on the serialized operation data.
 	Id() entity.Id
 	// Validate check if the Operation data is valid
 	Validate() error

entity/id.go 🔗

@@ -18,7 +18,7 @@ const UnsetId = Id("unset")
 // Id is an identifier for an entity or part of an entity
 type Id string
 
-// DeriveId generate an Id from some data, taken from a root part of the entity.
+// DeriveId generate an Id from the serialization of the object or part of the object.
 func DeriveId(data []byte) Id {
 	// My understanding is that sha256 is enough to prevent collision (git use that, so ...?)
 	// If you read this code, I'd be happy to be schooled.

identity/version.go 🔗

@@ -37,7 +37,7 @@ type version struct {
 	keys []*Key
 
 	// mandatory random bytes to ensure a better randomness of the data of the first
-	// version of a bug, used to later generate the ID
+	// version of an identity, used to later generate the ID
 	// len(Nonce) should be > 20 and < 64 bytes
 	// It has no functional purpose and should be ignored.
 	// TODO: optional after first version?