Merge remote-tracking branch 'origin/master'
This commit is contained in:
@@ -1179,6 +1179,15 @@ func productSeriesResolveDimTokenID(ctx context.Context, pg *sql.DB, column stri
|
||||
var id int64
|
||||
err := pg.QueryRowContext(ctx, `SELECT dim_id FROM mk_dim_token_map WHERE dim_column=$1 AND token=$2`, column, tok).Scan(&id)
|
||||
if err == nil {
|
||||
// mk_dim_token_map can become polluted by heuristic inference/persistence (especially for dimval1).
|
||||
// If we can cross-check against item-scoped images, prefer the item-specific inference to avoid
|
||||
// conflating different tokens onto the same dim_id.
|
||||
if column == "dimval1" && mmitemID > 0 {
|
||||
if inferred, ok := productSeriesInferDimIDFromImages(pg, mmitemID, column, tok); ok && inferred > 0 && inferred != id {
|
||||
log.Printf("[DimTokenConflict] column=dimval1 token=%s mmitem_id=%d mapped=%d inferred=%d", tok, mmitemID, id, inferred)
|
||||
return inferred, true, nil
|
||||
}
|
||||
}
|
||||
return id, id > 0, nil
|
||||
}
|
||||
if err != sql.ErrNoRows {
|
||||
@@ -1186,12 +1195,20 @@ func productSeriesResolveDimTokenID(ctx context.Context, pg *sql.DB, column stri
|
||||
}
|
||||
|
||||
// Fallback: infer from dfblob filenames.
|
||||
// For dimval3, prefer token map as the source of truth when present; use image inference only when missing.
|
||||
if column == "dimval3" && mmitemID > 0 {
|
||||
if inferred, ok := productSeriesInferDimIDFromImages(pg, mmitemID, column, tok); ok {
|
||||
// Prefer item-scoped inference when we have mmitem_id. We intentionally avoid persisting these
|
||||
// inferred rows into mk_dim_token_map to prevent global pollution.
|
||||
if mmitemID > 0 {
|
||||
if inferred, ok := productSeriesInferDimIDFromImages(pg, mmitemID, column, tok); ok && inferred > 0 {
|
||||
return inferred, true, nil
|
||||
}
|
||||
}
|
||||
|
||||
// As a last resort, do a global inference. This is intentionally disabled for dimval1 because it is
|
||||
// too easy to mis-infer and conflate tokens across the whole catalog.
|
||||
if column == "dimval1" {
|
||||
return 0, false, nil
|
||||
}
|
||||
|
||||
v := productSeriesResolveDimvalFromFileNameToken(pg, column, tok, 0)
|
||||
if v == "" {
|
||||
return 0, false, nil
|
||||
@@ -1200,15 +1217,6 @@ func productSeriesResolveDimTokenID(ctx context.Context, pg *sql.DB, column stri
|
||||
if perr != nil || parsed <= 0 {
|
||||
return 0, false, nil
|
||||
}
|
||||
if column == "dimval1" {
|
||||
// Persist only for dimval1 where tokens are globally stable.
|
||||
_, _ = pg.ExecContext(ctx, `
|
||||
INSERT INTO mk_dim_token_map (dim_column, token, dim_id, updated_at)
|
||||
VALUES ($1,$2,$3,now())
|
||||
ON CONFLICT (dim_column, token)
|
||||
DO UPDATE SET dim_id = EXCLUDED.dim_id, updated_at = EXCLUDED.updated_at
|
||||
`, column, tok, parsed)
|
||||
}
|
||||
return parsed, true, nil
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user