entity: generalize the combined Ids, use 64 length

Michael Muré created

Change summary

bug/comment.go                | 46 -------------------------
bug/comment_test.go           | 27 --------------
bug/op_add_comment.go         |  2 
bug/op_create.go              |  2 
bug/op_create_test.go         |  3 +
cache/repo_cache_bug.go       |  2 
entity/id.go                  |  2 
entity/id_interleaved.go      | 68 +++++++++++++++++++++++++++++++++++++
entity/id_interleaved_test.go | 36 +++++++++++++++++++
9 files changed, 110 insertions(+), 78 deletions(-)

Detailed changes

bug/comment.go 🔗

@@ -1,8 +1,6 @@
 package bug
 
 import (
-	"strings"
-
 	"github.com/dustin/go-humanize"
 
 	"github.com/MichaelMure/git-bug/entity"
@@ -33,50 +31,6 @@ func (c Comment) Id() entity.Id {
 	return c.id
 }
 
-const compiledCommentIdFormat = "BCBCBCBBBCBBBBCBBBBCBBBBCBBBBCBBBBCBBBBC"
-
-// DeriveCommentId compute a merged Id for a comment holding information from
-// both the Bug's Id and the Comment's Id. This allow to later find efficiently
-// a Comment because we can access the bug directly instead of searching for a
-// Bug that has a Comment matching the Id.
-//
-// To allow the use of an arbitrary length prefix of this merged Id, Ids from Bug
-// and Comment are interleaved with this irregular pattern to give the best chance
-// to find the Comment even with a 7 character prefix.
-//
-// A complete merged Id hold 30 characters for the Bug and 10 for the Comment,
-// which give a key space of 36^30 for the Bug (~5 * 10^46) and 36^10 for the
-// Comment (~3 * 10^15). This asymmetry assume a reasonable number of Comment
-// within a Bug, while still allowing for a vast key space for Bug (that is, a
-// globally merged bug database) with a low risk of collision.
-func DeriveCommentId(bugId entity.Id, commentId entity.Id) entity.Id {
-	var id strings.Builder
-	for _, char := range compiledCommentIdFormat {
-		if char == 'B' {
-			id.WriteByte(bugId[0])
-			bugId = bugId[1:]
-		} else {
-			id.WriteByte(commentId[0])
-			commentId = commentId[1:]
-		}
-	}
-	return entity.Id(id.String())
-}
-
-func SplitCommentId(prefix string) (bugPrefix string, commentPrefix string) {
-	var bugIdPrefix strings.Builder
-	var commentIdPrefix strings.Builder
-
-	for i, char := range prefix {
-		if compiledCommentIdFormat[i] == 'B' {
-			bugIdPrefix.WriteRune(char)
-		} else {
-			commentIdPrefix.WriteRune(char)
-		}
-	}
-	return bugIdPrefix.String(), commentIdPrefix.String()
-}
-
 // FormatTimeRel format the UnixTime of the comment for human consumption
 func (c Comment) FormatTimeRel() string {
 	return humanize.Time(c.UnixTime.Time())

bug/comment_test.go 🔗

@@ -1,27 +0,0 @@
-package bug
-
-import (
-	"testing"
-
-	"github.com/stretchr/testify/require"
-
-	"github.com/MichaelMure/git-bug/entity"
-)
-
-func TestCommentId(t *testing.T) {
-	bugId := entity.Id("abcdefghijklmnopqrstuvwxyz1234__________")
-	opId := entity.Id("ABCDEFGHIJ______________________________")
-	expectedId := entity.Id("aAbBcCdefDghijEklmnFopqrGstuvHwxyzI1234J")
-
-	mergedId := DeriveCommentId(bugId, opId)
-	require.Equal(t, expectedId, mergedId)
-
-	// full length
-	splitBugId, splitCommentId := SplitCommentId(mergedId.String())
-	require.Equal(t, string(bugId[:30]), splitBugId)
-	require.Equal(t, string(opId[:10]), splitCommentId)
-
-	splitBugId, splitCommentId = SplitCommentId(string(expectedId[:6]))
-	require.Equal(t, string(bugId[:3]), splitBugId)
-	require.Equal(t, string(opId[:3]), splitCommentId)
-}

bug/op_add_comment.go 🔗

@@ -36,7 +36,7 @@ func (op *AddCommentOperation) Apply(snapshot *Snapshot) {
 	snapshot.addActor(op.Author)
 	snapshot.addParticipant(op.Author)
 
-	commentId := DeriveCommentId(snapshot.Id(), op.Id())
+	commentId := entity.CombineIds(snapshot.Id(), op.Id())
 	comment := Comment{
 		id:       commentId,
 		Message:  op.Message,

bug/op_create.go 🔗

@@ -66,7 +66,7 @@ func (op *CreateOperation) Apply(snapshot *Snapshot) {
 
 	snapshot.Title = op.Title
 
-	commentId := DeriveCommentId(snapshot.Id(), op.Id())
+	commentId := entity.CombineIds(snapshot.Id(), op.Id())
 	comment := Comment{
 		id:       commentId,
 		Message:  op.Message,

bug/op_create_test.go 🔗

@@ -7,6 +7,7 @@ import (
 
 	"github.com/stretchr/testify/require"
 
+	"github.com/MichaelMure/git-bug/entity"
 	"github.com/MichaelMure/git-bug/identity"
 	"github.com/MichaelMure/git-bug/repository"
 	"github.com/MichaelMure/git-bug/util/timestamp"
@@ -29,7 +30,7 @@ func TestCreate(t *testing.T) {
 	id := create.Id()
 	require.NoError(t, id.Validate())
 
-	commentId := DeriveCommentId(create.Id(), create.Id())
+	commentId := entity.CombineIds(create.Id(), create.Id())
 
 	comment := Comment{
 		id:       commentId,

cache/repo_cache_bug.go 🔗

@@ -265,7 +265,7 @@ func (c *RepoCache) resolveBugMatcher(f func(*BugExcerpt) bool) (entity.Id, erro
 // bug/comment Id prefix. Returns the Bug containing the Comment and the Comment's
 // Id.
 func (c *RepoCache) ResolveComment(prefix string) (*BugCache, entity.Id, error) {
-	bugPrefix, _ := bug.SplitCommentId(prefix)
+	bugPrefix, _ := entity.SeparateIds(prefix)
 	bugCandidate := make([]entity.Id, 0, 5)
 
 	// build a list of possible matching bugs

entity/id.go 🔗

@@ -65,7 +65,7 @@ func (i Id) MarshalGQL(w io.Writer) {
 
 // IsValid tell if the Id is valid
 func (i Id) Validate() error {
-	// Special case to
+	// Special case to detect outdated repo
 	if len(i) == 40 {
 		return fmt.Errorf("outdated repository format, please use https://github.com/MichaelMure/git-bug-migration to upgrade")
 	}

entity/id_interleaved.go 🔗

@@ -0,0 +1,68 @@
+package entity
+
+import (
+	"strings"
+)
+
+// CombineIds compute a merged Id holding information from both the primary Id
+// and the secondary Id.
+//
+// This allow to later find efficiently a secondary element because we can access
+// the primary one directly instead of searching for a primary that has a
+// secondary matching the Id.
+//
+// An example usage is Comment in a Bug. The interleaved Id will hold part of the
+// Bug Id and part of the Comment Id.
+//
+// To allow the use of an arbitrary length prefix of this Id, Ids from primary
+// and secondary are interleaved with this irregular pattern to give the
+// best chance to find the secondary even with a 7 character prefix.
+//
+// Format is: PSPSPSPPPSPPPPSPPPPSPPPPSPPPPSPPPPSPPPPSPPPPSPPPPSPPPPSPPPPSPPPP
+//
+// A complete interleaved Id hold 50 characters for the primary and 14 for the
+// secondary, which give a key space of 36^50 for the primary (~6 * 10^77) and
+// 36^14 for the secondary (~6 * 10^21). This asymmetry assume a reasonable number
+// of secondary within a primary Entity, while still allowing for a vast key space
+// for the primary (that is, a globally merged database) with a low risk of collision.
+//
+// Here is the breakdown of several common prefix length:
+//
+// 5:    3P, 2S
+// 7:    4P, 3S
+// 10:   6P, 4S
+// 16:  11P, 5S
+func CombineIds(primary Id, secondary Id) Id {
+	var id strings.Builder
+
+	for i := 0; i < idLength; i++ {
+		switch {
+		default:
+			id.WriteByte(primary[0])
+			primary = primary[1:]
+		case i == 1, i == 3, i == 5, i == 9, i >= 10 && i%5 == 4:
+			id.WriteByte(secondary[0])
+			secondary = secondary[1:]
+		}
+	}
+
+	return Id(id.String())
+}
+
+// SeparateIds extract primary and secondary prefix from an arbitrary length prefix
+// of an Id created with CombineIds.
+func SeparateIds(prefix string) (primaryPrefix string, secondaryPrefix string) {
+	var primary strings.Builder
+	var secondary strings.Builder
+
+	for i, r := range prefix {
+		switch {
+		default:
+			primary.WriteRune(r)
+		case i == 1, i == 3, i == 5, i == 9, i >= 10 && i%5 == 4:
+			secondary.WriteRune(r)
+		}
+	}
+
+	return primary.String(), secondary.String()
+}

entity/id_interleaved_test.go 🔗

@@ -0,0 +1,36 @@
+package entity
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/require"
+)
+
+func TestInterleaved(t *testing.T) {
+	primary := Id("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWX______________")
+	secondary := Id("YZ0123456789+/________________________________________________")
+	expectedId := Id("aYbZc0def1ghij2klmn3opqr4stuv5wxyz6ABCD7EFGH8IJKL9MNOP+QRST/UVWX")
+
+	interleaved := CombineIds(primary, secondary)
+	require.Equal(t, expectedId, interleaved)
+
+	// full length
+	splitPrimary, splitSecondary := SeparateIds(interleaved.String())
+	require.Equal(t, string(primary[:50]), splitPrimary)
+	require.Equal(t, string(secondary[:14]), splitSecondary)
+
+	// partial
+	splitPrimary, splitSecondary = SeparateIds(string(expectedId[:7]))
+	require.Equal(t, string(primary[:4]), splitPrimary)
+	require.Equal(t, string(secondary[:3]), splitSecondary)
+
+	// partial
+	splitPrimary, splitSecondary = SeparateIds(string(expectedId[:10]))
+	require.Equal(t, string(primary[:6]), splitPrimary)
+	require.Equal(t, string(secondary[:4]), splitSecondary)
+
+	// partial
+	splitPrimary, splitSecondary = SeparateIds(string(expectedId[:16]))
+	require.Equal(t, string(primary[:11]), splitPrimary)
+	require.Equal(t, string(secondary[:5]), splitSecondary)
+}