Saturday, February 25, 2012

Removing duplicate entries

Hi,

I would lke to know what is the best way to remove duplicate entries from a GIANT table ?

I'm using this but I think that too slow..

IF EXISTS (SELECT * FROM DBO.SYSOBJECTS WHERE ID = OBJECT_ID(N'[DBO].[#PABX_TEMP]') AND OBJECTPROPERTY(ID, N'ISUSERTABLE') = 1)
DROP TABLE [DBO].[#PABX_TEMP]

CREATE TABLE [#PABX_TEMP] (
[CHAVE] [int],
[COD_CLIENTE] [varchar] (50) COLLATE Latin1_General_CI_AS NULL ,
[DATA_HORA] [datetime] NULL ,
[NRTELEFONE] [varchar] (255) COLLATE Latin1_General_CI_AS NULL ,
[RAMAL] [varchar] (255) COLLATE Latin1_General_CI_AS NULL ,
[WCOS] [varchar] (255) COLLATE Latin1_General_CI_AS NULL ,
[TEMPO_SEGUNDOS] [real] NULL ,
[TIPO] [varchar] (255) COLLATE Latin1_General_CI_AS NULL ,
[TIPO_ORIGINAL] [varchar] (255) COLLATE Latin1_General_CI_AS NULL ,
[TRONCO] [varchar] (50) COLLATE Latin1_General_CI_AS NULL ,
[TEMPO_ATENDIMENTO] [real] NULL ,
[JAPROCESSADO] [int] NULL ,
[VALOR] [money] NULL ,
[VALOR_CONC] [money] NULL ,
[VALOR_TARIFA] [money] NULL ,
[VALOR_TARIFA_CONC] [money] NULL ,
[CLASSIFICA] [varchar] (50) COLLATE Latin1_General_CI_AS NULL ,
[LOCALIDADE] [varchar] (255) COLLATE Latin1_General_CI_AS NULL ,
[VALOR_TEMPO] [money] NULL ,
[NUMERO_E1] [varchar] (50) COLLATE Latin1_General_CI_AS NULL ,
[BLOQUEADO] [bit] NULL ,
[DATA_BLOQUEIO] [datetime] NULL ,
[TRANSFERIDO] [varchar] (255) COLLATE Latin1_General_CI_AS NULL
)

INSERT INTO #PABX_TEMP
SELECT MIN(CHAVE) as 'CHAVE', [COD_CLIENTE], [DATA_HORA], [NRTELEFONE],
[RAMAL], [WCOS], [TEMPO_SEGUNDOS], [TIPO], [TIPO_ORIGINAL],
[TRONCO], [TEMPO_ATENDIMENTO], [JAPROCESSADO], [VALOR], [VALOR_CONC],
[VALOR_TARIFA], [VALOR_TARIFA_CONC], [CLASSIFICA], [LOCALIDADE],
[VALOR_TEMPO], [NUMERO_E1], [BLOQUEADO], [DATA_BLOQUEIO], [TRANSFERIDO]
FROM PABX WHERE (BLOQUEADO = 0 OR BLOQUEADO IS NULL)
GROUP BY [COD_CLIENTE], [DATA_HORA], [NRTELEFONE],
[RAMAL], [WCOS], [TEMPO_SEGUNDOS], [TIPO], [TIPO_ORIGINAL],
[TRONCO], [TEMPO_ATENDIMENTO], [JAPROCESSADO], [VALOR], [VALOR_CONC],
[VALOR_TARIFA], [VALOR_TARIFA_CONC], [CLASSIFICA], [LOCALIDADE],
[VALOR_TEMPO], [NUMERO_E1], [BLOQUEADO], [DATA_BLOQUEIO], [TRANSFERIDO]

DELETE FROM PABX WHERE (BLOQUEADO = 0 OR BLOQUEADO IS NULL) AND CHAVE NOT IN (SELECT CHAVE FROM #PABX_TEMP)

DROP TABLE #PABX_TEMP

Thanks

Try replacing your NOT IN with NOT EXIST; NOT EXIST tends to be more efficient. Here are some posts to reference:

http://forums.microsoft.com/MSDN/ShowPost.aspx?PostID=299702&SiteID=1
http://forums.microsoft.com/MSDN/ShowPost.aspx?PostID=532892&SiteID=1
http://forums.microsoft.com/MSDN/ShowPost.aspx?PostID=607796&SiteID=1

|||

OK, I'm gonna test

Thanks

No comments:

Post a Comment