Merge remote-tracking branch 'origin/master'

This commit is contained in:
M_Kececi
2026-06-24 21:49:39 +03:00
parent 215b7fd321
commit e67fad9f90

View File

@@ -1179,6 +1179,15 @@ func productSeriesResolveDimTokenID(ctx context.Context, pg *sql.DB, column stri
var id int64 var id int64
err := pg.QueryRowContext(ctx, `SELECT dim_id FROM mk_dim_token_map WHERE dim_column=$1 AND token=$2`, column, tok).Scan(&id) err := pg.QueryRowContext(ctx, `SELECT dim_id FROM mk_dim_token_map WHERE dim_column=$1 AND token=$2`, column, tok).Scan(&id)
if err == nil { if err == nil {
// mk_dim_token_map can become polluted by heuristic inference/persistence (especially for dimval1).
// If we can cross-check against item-scoped images, prefer the item-specific inference to avoid
// conflating different tokens onto the same dim_id.
if column == "dimval1" && mmitemID > 0 {
if inferred, ok := productSeriesInferDimIDFromImages(pg, mmitemID, column, tok); ok && inferred > 0 && inferred != id {
log.Printf("[DimTokenConflict] column=dimval1 token=%s mmitem_id=%d mapped=%d inferred=%d", tok, mmitemID, id, inferred)
return inferred, true, nil
}
}
return id, id > 0, nil return id, id > 0, nil
} }
if err != sql.ErrNoRows { if err != sql.ErrNoRows {
@@ -1186,12 +1195,20 @@ func productSeriesResolveDimTokenID(ctx context.Context, pg *sql.DB, column stri
} }
// Fallback: infer from dfblob filenames. // Fallback: infer from dfblob filenames.
// For dimval3, prefer token map as the source of truth when present; use image inference only when missing. // Prefer item-scoped inference when we have mmitem_id. We intentionally avoid persisting these
if column == "dimval3" && mmitemID > 0 { // inferred rows into mk_dim_token_map to prevent global pollution.
if inferred, ok := productSeriesInferDimIDFromImages(pg, mmitemID, column, tok); ok { if mmitemID > 0 {
if inferred, ok := productSeriesInferDimIDFromImages(pg, mmitemID, column, tok); ok && inferred > 0 {
return inferred, true, nil return inferred, true, nil
} }
} }
// As a last resort, do a global inference. This is intentionally disabled for dimval1 because it is
// too easy to mis-infer and conflate tokens across the whole catalog.
if column == "dimval1" {
return 0, false, nil
}
v := productSeriesResolveDimvalFromFileNameToken(pg, column, tok, 0) v := productSeriesResolveDimvalFromFileNameToken(pg, column, tok, 0)
if v == "" { if v == "" {
return 0, false, nil return 0, false, nil
@@ -1200,15 +1217,6 @@ func productSeriesResolveDimTokenID(ctx context.Context, pg *sql.DB, column stri
if perr != nil || parsed <= 0 { if perr != nil || parsed <= 0 {
return 0, false, nil return 0, false, nil
} }
if column == "dimval1" {
// Persist only for dimval1 where tokens are globally stable.
_, _ = pg.ExecContext(ctx, `
INSERT INTO mk_dim_token_map (dim_column, token, dim_id, updated_at)
VALUES ($1,$2,$3,now())
ON CONFLICT (dim_column, token)
DO UPDATE SET dim_id = EXCLUDED.dim_id, updated_at = EXCLUDED.updated_at
`, column, tok, parsed)
}
return parsed, true, nil return parsed, true, nil
} }