Merge remote-tracking branch 'origin/master'
This commit is contained in:
@@ -1179,6 +1179,15 @@ func productSeriesResolveDimTokenID(ctx context.Context, pg *sql.DB, column stri
|
|||||||
var id int64
|
var id int64
|
||||||
err := pg.QueryRowContext(ctx, `SELECT dim_id FROM mk_dim_token_map WHERE dim_column=$1 AND token=$2`, column, tok).Scan(&id)
|
err := pg.QueryRowContext(ctx, `SELECT dim_id FROM mk_dim_token_map WHERE dim_column=$1 AND token=$2`, column, tok).Scan(&id)
|
||||||
if err == nil {
|
if err == nil {
|
||||||
|
// mk_dim_token_map can become polluted by heuristic inference/persistence (especially for dimval1).
|
||||||
|
// If we can cross-check against item-scoped images, prefer the item-specific inference to avoid
|
||||||
|
// conflating different tokens onto the same dim_id.
|
||||||
|
if column == "dimval1" && mmitemID > 0 {
|
||||||
|
if inferred, ok := productSeriesInferDimIDFromImages(pg, mmitemID, column, tok); ok && inferred > 0 && inferred != id {
|
||||||
|
log.Printf("[DimTokenConflict] column=dimval1 token=%s mmitem_id=%d mapped=%d inferred=%d", tok, mmitemID, id, inferred)
|
||||||
|
return inferred, true, nil
|
||||||
|
}
|
||||||
|
}
|
||||||
return id, id > 0, nil
|
return id, id > 0, nil
|
||||||
}
|
}
|
||||||
if err != sql.ErrNoRows {
|
if err != sql.ErrNoRows {
|
||||||
@@ -1186,12 +1195,20 @@ func productSeriesResolveDimTokenID(ctx context.Context, pg *sql.DB, column stri
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Fallback: infer from dfblob filenames.
|
// Fallback: infer from dfblob filenames.
|
||||||
// For dimval3, prefer token map as the source of truth when present; use image inference only when missing.
|
// Prefer item-scoped inference when we have mmitem_id. We intentionally avoid persisting these
|
||||||
if column == "dimval3" && mmitemID > 0 {
|
// inferred rows into mk_dim_token_map to prevent global pollution.
|
||||||
if inferred, ok := productSeriesInferDimIDFromImages(pg, mmitemID, column, tok); ok {
|
if mmitemID > 0 {
|
||||||
|
if inferred, ok := productSeriesInferDimIDFromImages(pg, mmitemID, column, tok); ok && inferred > 0 {
|
||||||
return inferred, true, nil
|
return inferred, true, nil
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// As a last resort, do a global inference. This is intentionally disabled for dimval1 because it is
|
||||||
|
// too easy to mis-infer and conflate tokens across the whole catalog.
|
||||||
|
if column == "dimval1" {
|
||||||
|
return 0, false, nil
|
||||||
|
}
|
||||||
|
|
||||||
v := productSeriesResolveDimvalFromFileNameToken(pg, column, tok, 0)
|
v := productSeriesResolveDimvalFromFileNameToken(pg, column, tok, 0)
|
||||||
if v == "" {
|
if v == "" {
|
||||||
return 0, false, nil
|
return 0, false, nil
|
||||||
@@ -1200,15 +1217,6 @@ func productSeriesResolveDimTokenID(ctx context.Context, pg *sql.DB, column stri
|
|||||||
if perr != nil || parsed <= 0 {
|
if perr != nil || parsed <= 0 {
|
||||||
return 0, false, nil
|
return 0, false, nil
|
||||||
}
|
}
|
||||||
if column == "dimval1" {
|
|
||||||
// Persist only for dimval1 where tokens are globally stable.
|
|
||||||
_, _ = pg.ExecContext(ctx, `
|
|
||||||
INSERT INTO mk_dim_token_map (dim_column, token, dim_id, updated_at)
|
|
||||||
VALUES ($1,$2,$3,now())
|
|
||||||
ON CONFLICT (dim_column, token)
|
|
||||||
DO UPDATE SET dim_id = EXCLUDED.dim_id, updated_at = EXCLUDED.updated_at
|
|
||||||
`, column, tok, parsed)
|
|
||||||
}
|
|
||||||
return parsed, true, nil
|
return parsed, true, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user