एसक्यूएल इंसर्ट लेकिन डुप्लिकेट से बचें

मैं कुछ त्वरित आवेषण करना चाहता हूं लेकिन तालिका में डुप्लिकेट से बचें। तर्क के लिए इसे मार्केटप्रिसेस कहने दें, मैं इसे करने के दो तरीकों से प्रयोग कर रहा हूं लेकिन यह सुनिश्चित नहीं करता कि बेंचमार्क कैसे करें जो तेजी से होगा।एसक्यूएल इंसर्ट लेकिन डुप्लिकेट से बचें

INSERT INTO MarketPrices (SecurityCode, BuyPrice, SellPrice, IsMarketOpen) 
SELECT @SecurityCode, @BuyPrice, @SellPrice, @IsMarketOpen 
EXCEPT 
SELECT SecurityCode, BuyPrice, SellPrice, j.bool as IsActive FROM MarketPrices 
CROSS JOIN (SELECT 0 as bool UNION SELECT 1 as bool) as j

या

DECLARE @MktId int 
SET @MktId = (SELECT SecurityId FROM MarketPrices 
       where SecurityCode = @SecurityCode 
       and [email protected] 
       and SellPrice = @SellPrice) 

IF (@MktId is NULL) 
BEGIN 
    INSERT INTO MarketPrices (SecurityCode, BuyPrice, SellPrice, IsMarketOpen) 
    VALUES 
    (@SecurityCode,@BuyPrice, @SellPrice, @IsMarketOpen) 
END

मान लें कि @whatever संग्रहीत प्रक्रिया में एक इनपुट पैरामीटर है।

मैं प्रत्येक सुरक्षा कोड के लिए एक नया रिकॉर्ड डालने में सक्षम होना चाहता हूं जब BuyPrice या SellPrice या दोनों अन्य सभी पिछली घटनाओं से अलग होते हैं। मुझे IsMarketOpen के बारे में परवाह नहीं है।

क्या उपर्युक्त दृष्टिकोणों में से किसी के बारे में कुछ भी स्पष्ट रूप से बेवकूफ है? क्या एक दूसरे की तुलना में तेज़ है?

स्रोत

2009-11-06 Ravi

याद रखें कि दूसरा दृष्टिकोण लेनदेन में संलग्न होना चाहिए, अन्य बुद्धिमान आप समवर्ती मुद्दों हो सकता है। –

क्या आप अभी एक अद्वितीय इंडेक्स नहीं बना सकते हैं? मुझे एमएस एसक्यूएल में कोई अनुभव नहीं है, लेकिन मुझे लगता है कि ऐसे इंडेस –

@valya होना चाहिए: मजेदार कैसे लोग संदेह करते हैं कि SQL सर्वर भी सबसे सरल चीजें कर सकता है। मुझे यह भी यकीन नहीं है कि आप अद्वितीय इंडेक्स का समर्थन किए बिना * एक रिलेशनल डेटाबेस इंजन * को कार्यान्वित कर सकते हैं। – Tomalak

संपादित: समवर्ती वातावरण में race conditions रोकने सहसंबद्ध सबक्वेरी में WITH (UPDLOCK) का उपयोग करें या EXCEPT 'को SELECT d। नीचे लिखी गई टेस्ट स्क्रिप्ट के लिए इसकी आवश्यकता नहीं है, क्योंकि यह अस्थायी सारणी का उपयोग करता है जो केवल मौजूदा कनेक्शन के लिए दृश्यमान हैं, लेकिन वास्तविक वातावरण में, उपयोगकर्ता तालिकाओं के विरुद्ध परिचालन करना आवश्यक होगा।

MERGEUPDLOCK की आवश्यकता नहीं है।

एमसीएल के जवाब फिर से प्रेरित होकर: अद्वितीय सूचकांक & डेटाबेस एक त्रुटि फेंक देना, मैं बेंचमार्क conditional inserts बनाम try/catch का फैसला किया।

परिणाम प्रयास/पकड़ पर सशर्त डालने का समर्थन करते हैं, लेकिन वाईएमएमवी। यह एक बहुत ही सरल परिदृश्य (एक स्तंभ, छोटी सी मेज, आदि), एक मशीन पर निष्पादित है, आदि

यहाँ के परिणाम हैं (एसक्यूएल सर्वर 2008, 10.0.1600.2 निर्माण):

duplicates (short table)  
    try/catch:    14440 milliseconds/100000 inserts 
    conditional insert:  2983 milliseconds/100000 inserts 
    except:     2966 milliseconds/100000 inserts 
    merge:      2983 milliseconds/100000 inserts 

uniques 
    try/catch:     3920 milliseconds/100000 inserts 
    conditional insert:  3860 milliseconds/100000 inserts 
    except:     3873 milliseconds/100000 inserts 
    merge:      3890 milliseconds/100000 inserts 

    straight insert:   3173 milliseconds/100000 inserts 

duplicates (tall table) 
    try/catch:    14436 milliseconds/100000 inserts 
    conditional insert:  3063 milliseconds/100000 inserts 
    except:     3063 milliseconds/100000 inserts 
    merge:      3030 milliseconds/100000 inserts

सूचना, यहां तक कि अनूठे आवेषणों पर भी, थोड़ा एक सशर्त डालने की कोशिश/पकड़ने के लिए अधिक ओवरहेड है। मुझे आश्चर्य है कि यह संस्करण, सीपीयू, कोर की संख्या इत्यादि से भिन्न होता है।

मैंने IF सशर्त प्रविष्टियों को बेंचमार्क नहीं किया, बस WHERE। मुझे लगता है कि IF विविधता अधिक ओवरहेड दिखाएगी, क्योंकि ए) क्या आपके पास दो कथन होंगे, और बी) आपको लेनदेन में दो कथन लपेटने और अलगाव स्तर को क्रमबद्ध करने के लिए सेट करना होगा (!)। यदि कोई परीक्षण करने के लिए चाहता था, तो आपको अस्थायी तालिका को नियमित उपयोगकर्ता तालिका में बदलना होगा (serializable स्थानीय temp तालिकाओं पर लागू नहीं होता है)।

-- tested on SQL 2008. 
-- to run on SQL 2005, comment out the statements using MERGE 
set nocount on 

if object_id('tempdb..#temp') is not null drop table #temp 
create table #temp (col1 int primary key) 
go 

------------------------------------------------------- 

-- duplicate insert test against a table w/ 1 record 

------------------------------------------------------- 

insert #temp values (1) 
go 

declare @x int, @y int, @now datetime, @duration int 
select @x = 1, @y = 0, @now = getdate() 
while @y < 100000 begin 
    set @y = @y+1 
    begin try 
    insert #temp select @x 
    end try 
    begin catch end catch 
end 
set @duration = datediff(ms,@now,getdate()) 
raiserror('duplicates (short table), try/catch: %i milliseconds/%i inserts',-1,-1,@duration,@y) with nowait 
go 

declare @x int, @y int, @now datetime, @duration int 
select @x = 1, @y = 0, @now = getdate() 
while @y < 100000 begin 
    set @y = @y+1 
    insert #temp select @x where not exists (select * from #temp where col1 = @x) 
end 
set @duration = datediff(ms,@now,getdate()) 
raiserror('duplicates (short table), conditional insert: %i milliseconds/%i inserts',-1,-1,@duration, @y) with nowait 
go 

declare @x int, @y int, @now datetime, @duration int 
select @x = 1, @y = 0, @now = getdate() 
while @y < 100000 begin 
    set @y = @y+1 
    insert #temp select @x except select col1 from #temp 
end 
set @duration = datediff(ms,@now,getdate()) 
raiserror('duplicates (short table), except: %i milliseconds/%i inserts',-1,-1,@duration, @y) with nowait 
go 

-- comment this batch out for SQL 2005 
declare @x int, @y int, @now datetime, @duration int 
select @x = 1, @y = 0, @now = getdate() 
while @y < 100000 begin 
    set @y = @y+1 
    merge #temp t using (select @x) s (col1) on t.col1 = s.col1 when not matched by target then insert values (col1); 
end 
set @duration = datediff(ms,@now,getdate()) 
raiserror('duplicates (short table), merge: %i milliseconds/%i inserts',-1,-1,@duration, @y) with nowait 
go 

------------------------------------------------------- 

-- unique insert test against an initially empty table 

------------------------------------------------------- 

truncate table #temp 
declare @x int, @now datetime, @duration int 
select @x = 0, @now = getdate() 
while @x < 100000 begin 
    set @x = @x+1 
    insert #temp select @x 
end 
set @duration = datediff(ms,@now,getdate()) 
raiserror('uniques, straight insert: %i milliseconds/%i inserts',-1,-1,@duration, @x) with nowait 
go 

truncate table #temp 
declare @x int, @now datetime, @duration int 
select @x = 0, @now = getdate() 
while @x < 100000 begin 
    set @x = @x+1 
    begin try 
    insert #temp select @x 
    end try 
    begin catch end catch 
end 
set @duration = datediff(ms,@now,getdate()) 
raiserror('uniques, try/catch: %i milliseconds/%i inserts',-1,-1,@duration, @x) with nowait 
go 

truncate table #temp 
declare @x int, @now datetime, @duration int 
select @x = 0, @now = getdate() 
while @x < 100000 begin 
    set @x = @x+1 
    insert #temp select @x where not exists (select * from #temp where col1 = @x) 
end 
set @duration = datediff(ms,@now,getdate()) 
raiserror('uniques, conditional insert: %i milliseconds/%i inserts',-1,-1,@duration, @x) with nowait 
go 

truncate table #temp 
declare @x int, @now datetime, @duration int 
select @x = 0, @now = getdate() 
while @x < 100000 begin 
    set @x = @x+1 
    insert #temp select @x except select col1 from #temp 
end 
set @duration = datediff(ms,@now,getdate()) 
raiserror('uniques, except: %i milliseconds/%i inserts',-1,-1,@duration, @x) with nowait 
go 

-- comment this batch out for SQL 2005 
truncate table #temp 
declare @x int, @now datetime, @duration int 
select @x = 1, @now = getdate() 
while @x < 100000 begin 
    set @x = @x+1 
    merge #temp t using (select @x) s (col1) on t.col1 = s.col1 when not matched by target then insert values (col1); 
end 
set @duration = datediff(ms,@now,getdate()) 
raiserror('uniques, merge: %i milliseconds/%i inserts',-1,-1,@duration, @x) with nowait 
go 

------------------------------------------------------- 

-- duplicate insert test against a table w/ 100000 records 

------------------------------------------------------- 

declare @x int, @y int, @now datetime, @duration int 
select @x = 1, @y = 0, @now = getdate() 
while @y < 100000 begin 
    set @y = @y+1 
    begin try 
    insert #temp select @x 
    end try 
    begin catch end catch 
end 
set @duration = datediff(ms,@now,getdate()) 
raiserror('duplicates (tall table), try/catch: %i milliseconds/%i inserts',-1,-1,@duration,@y) with nowait 
go 

declare @x int, @y int, @now datetime, @duration int 
select @x = 1, @y = 0, @now = getdate() 
while @y < 100000 begin 
    set @y = @y+1 
    insert #temp select @x where not exists (select * from #temp where col1 = @x) 
end 
set @duration = datediff(ms,@now,getdate()) 
raiserror('duplicates (tall table), conditional insert: %i milliseconds/%i inserts',-1,-1,@duration, @y) with nowait 
go 

declare @x int, @y int, @now datetime, @duration int 
select @x = 1, @y = 0, @now = getdate() 
while @y < 100000 begin 
    set @y = @y+1 
    insert #temp select @x except select col1 from #temp 
end 
set @duration = datediff(ms,@now,getdate()) 
raiserror('duplicates (tall table), except: %i milliseconds/%i inserts',-1,-1,@duration, @y) with nowait 
go 

-- comment this batch out for SQL 2005 
declare @x int, @y int, @now datetime, @duration int 
select @x = 1, @y = 0, @now = getdate() 
while @y < 100000 begin 
    set @y = @y+1 
    merge #temp t using (select @x) s (col1) on t.col1 = s.col1 when not matched by target then insert values (col1); 
end 
set @duration = datediff(ms,@now,getdate()) 
raiserror('duplicates (tall table), merge: %i milliseconds/%i inserts',-1,-1,@duration, @y) with nowait 
go

स्रोत

2009-11-06 17:31:51

यहां एक अद्वितीय इंडेक्स का उपयोग करने का मुख्य कारण डेटा अखंडता की गारंटी देना है। मुझे संदेह है कि एक कोशिश/पकड़ ब्लॉक में एक असफल डालने के लिए अधिकांश अनुप्रयोगों में एक बाधा बनने वाला नहीं है, खासकर उस परिदृश्य में जहां डुप्लिकेट डालने के बहुत सारे प्रयास नहीं हैं (क्योंकि आपका बेंचमार्क उसमें समान प्रदर्शन दिखाता है मामला)। लेकिन मुझे संदेह होगा कि एक अप्रत्याशित डेटा मॉडल होने पर किसी समस्या पर समस्या आ रही है। साथ ही, SQL सर्वर 2008 पर, मैं इन अन्य रणनीतियों में से किसी एक पर मेर्ज के उपयोग की खोज करने का सुझाव दूंगा। – mlibby

@ एमसीएल पुन: अद्वितीय सूचकांक, मैं पूरी तरह से सहमत हूं, उसके पास डेटा अखंडता के लिए एक सूचकांक होना चाहिए, और यदि वह उचित प्रदर्शन चाहता है तो उसे एक की आवश्यकता होगी। पुन: मेर्ज, मैंने अभी इसका परीक्षण किया है, और यह सभी परिदृश्यों में एक सशर्त सम्मिलन के समान * बहुत * करता है। –

धन्यवाद दोस्तों, काश मैं आपके उत्तरों को स्वीकार कर सकता हूं। मैं डेटा अखंडता के लिए एक अनूठी अनुक्रमणिका डालने जा रहा हूं और फिर सशर्त डालने का उपयोग करता हूं क्योंकि यह प्रदर्शन और पठनीयता के मामले में सबसे अच्छा लगता है। – Ravi

संपादित:, एक समवर्ती वातावरण में race conditions रोकने सहसंबद्ध सबक्वेरी में WITH (UPDLOCK) उपयोग करने के लिए।

मुझे लगता है कि यह मानक तरीका होगा:

INSERT INTO MarketPrices (SecurityCode, BuyPrice, SellPrice, IsMarketOpen) 
SELECT @SecurityCode, @BuyPrice, @SellPrice, @IsMarketOpen 
WHERE NOT EXISTS (
    SELECT * FROM MarketPrices WITH (UPDLOCK) 
    WHERE SecurityCode = @SecurityCode 
    AND BuyPrice = @BuyPrice 
    AND SellPrice = @SellPrice 
)

अपने क्षेत्रों में से किसी नल हैं, तो आप शर्त है कि जोड़ना होगा।

आपकी पहली विधि दिलचस्प है, लेकिन EXCEPT के लिए आवश्यकताएं आप हुप्स के माध्यम से कूद रहे हैं। यह विधि अनिवार्य रूप से वही है, लेकिन यह आपको कॉलम मिलान समस्या के आसपास ले जाती है।

वैकल्पिक रूप से:

INSERT INTO MarketPrices (SecurityCode, BuyPrice, SellPrice, IsMarketOpen) 
SELECT SecurityCode, BuyPrice, SellPrice, @IsMarketOpen 
FROM (
    SELECT @SecurityCode, @BuyPrice, @SellPrice 
    EXCEPT 
    SELECT SecurityCode, BuyPrice, SellPrice FROM MarketPrices WITH (UPDLOCK) 
) a (SecurityCode, BuyPrice, SellPrice)

इस उदाहरण में छोड़कर के बारे में अच्छी बात यह है कि यह अपनी ओर से किसी भी अतिरिक्त कोडिंग के बिना NULLs संभालती है। पहले उदाहरण में एक ही चीज़ को प्राप्त करने के लिए, आपको प्रत्येक जोड़ी को एनयूएलएल के साथ-साथ समानता, लंबे हाथ के लिए परीक्षण करने की आवश्यकता होगी।

आपकी दूसरी विधि ठीक है, लेकिन आपको चर की आवश्यकता नहीं है। टॉमलाक के समाधान को देखें, उसने इसे अच्छी तरह साफ कर दिया। साथ ही, यदि आप चिंता करते हैं, तो आपको समवर्ती आवेषण की संभावना को स्पष्ट रूप से संभालना होगा।

स्रोत

2009-11-06 16:33:15

मैं किसी भी समय एक अर्थपूर्ण समाधान के लिए जाना होगा। आपके दो प्रस्ताव मेरे लिए काफी अस्पष्ट लगते हैं (हालांकि उत्तरार्द्ध पूर्व की तुलना में बेहतर है)।

IF NOT EXISTS (
    SELECT 1 
    FROM MarketPrices 
    WHERE SecurityCode = @SecurityCode 
     AND BuyPrice = @BuyPrice 
     AND SellPrice = @SellPrice 
) 
BEGIN 
    INSERT MarketPrices 
    (SecurityCode, BuyPrice, SellPrice, IsMarketOpen) 
    VALUES 
    (@SecurityCode, @BuyPrice, @SellPrice, @IsMarketOpen) 
END

एक समूह सूचकांक SecurityCode, BuyPrice, SellPrice से अधिक EXISTS क्वेरी यथोचित तेजी से जाना चाहिए के साथ

।

बेंचमार्किंग यह WHILE लूप का समय है, मैं कहूंगा। इसका परीक्षण करें और खुद के लिए देखें।

स्रोत

2009-11-06 16:33:58 Tomalak

एक और विकल्प: फ़ील्ड (सुरक्षा कोड, BuyPrice, SellPrice) पर एक अनन्य अनुक्रमणिका बनाएं, एक साधारण डालने जारी करें, और डेटाबेस को यह तय करने दें कि रिकॉर्ड डुप्लीकेट हैं या नहीं। डुप्लिकेट डालने के प्रयास पर सम्मिलन विफल हो जाएगा।

विशिष्टता की गारंटी के लिए कोड (चाहे बाहरी भाषा या एसक्यूएल प्रो) का उपयोग करना पर्याप्त सख्त नहीं है और आखिरकार आपको बहुत ही डुप्लिकेट की ओर ले जाने की उम्मीद है।

स्रोत

2009-11-06 16:50:15 mlibby

मुझे लगता है कि आप सही हो सकते हैं, खासकर जब समवर्ती प्रविष्टियों की बात आती है – Ravi

मुझे यह बेंचमार्क देखने में दिलचस्पी होगी। एक अद्वितीय इंडेक्स मानते हुए, जिसमें अधिक ओवरहेड है: एक सशर्त डालने का WHERE क्लॉज, या एक TRY/CATCH ब्लॉक का अपवाद हैंडलिंग? यदि आप उम्मीद करते हैं कि 99% आपके आवेषण डुप्लीकेट नहीं हैं, तो मुझे लगता है कि TRY/CATCH ब्लॉक अधिक कुशल हो सकता है। –

मैं ठीक से करने जा रहा हूं कि जब मैं घर जाऊंगा - परिणाम यहां पोस्ट करेंगे – Ravi

यदि आप जाल डुप्लिकेट की जरूरत नहीं है, तो आप हमेशा "डुप्लीकेट उपेक्षा" सही पर सेट के साथ एक अद्वितीय सूचकांक बना सकते हैं:

यहाँ स्क्रिप्ट है। एसक्यूएल सर्वर आपके लिए इसका ख्याल रखेगा।

स्रोत

2010-12-11 08:26:51 IamIC

नीचे मैंने पीटर रेडोकिया के उत्कृष्ट उत्तर में Only inserting a row if it's not already there से शीर्ष उत्तरों जोड़े हैं।

टेकअवे कि race safe with try/catch तकनीक का उपयोग कर मामूली (~ 1%) race safe with updlock, holdlock तकनीक की तुलना में तेजी है जब वहाँ कोई वास्तविक टकराव हो रहा है (यानी आप उम्मीद करते हैं कि टकराव बहुत ही दुर्लभ हो जाएगा - इस uniques परिदृश्य है), और एक है थोड़ी धीमी (~ 20%) जब हमेशा टक्कर होती है (यह duplicates परिदृश्य है)। यह लॉक एस्केलेशन जैसे जटिल मुद्दों को ध्यान में नहीं ले रहा है।

यहां परिणाम हैं (SQL सर्वर 2014, 12.0.2000 बनाएँ।8):

duplicates (short table)  
    try/catch:      15546 milliseconds/100000 inserts 
    conditional insert:    1460 milliseconds/100000 inserts 
    except:       1490 milliseconds/100000 inserts 
    merge:       1420 milliseconds/100000 inserts 
    race safe with try/catch:   1650 milliseconds/100000 inserts 
    race safe with updlock, holdlock: 1330 milliseconds/100000 inserts 

uniques 
    try/catch:      2266 milliseconds/100000 inserts 
    conditional insert:    2156 milliseconds/100000 inserts 
    except:       2273 milliseconds/100000 inserts 
    merge:       2136 milliseconds/100000 inserts 
    race safe with try/catch:   2400 milliseconds/100000 inserts 
    race safe with updlock, holdlock: 2430 milliseconds/100000 inserts 

    straight insert:     1686 milliseconds/100000 inserts 

duplicates (tall table) 
    try/catch:      15826 milliseconds/100000 inserts 
    conditional insert:    1530 milliseconds/100000 inserts 
    except:       1506 milliseconds/100000 inserts 
    merge:       1443 milliseconds/100000 inserts 
    race safe with try/catch:   1636 milliseconds/100000 inserts 
    race safe with updlock, holdlock: 1426 milliseconds/100000 inserts

डुप्लिकेट (कम तालिका) अनुभाग:

declare @x int, @y int, @now datetime, @duration int 
select @x = 1, @y = 0, @now = getdate() 
while @y < 100000 begin 
    set @y = @y+1 
    begin try 
    insert #temp select @x where not exists (select * from #temp where col1 = @x) 
    end try 
    begin catch 
    if error_number() <> 2627 
     throw 
    end catch 
end 
set @duration = datediff(ms,@now,getdate()) 
raiserror('duplicates (short table), race safe with try/catch: %i milliseconds/%i inserts',-1,-1,@duration,@y) with nowait 
go 

declare @x int, @y int, @now datetime, @duration int 
select @x = 1, @y = 0, @now = getdate() 
while @y < 100000 begin 
    set @y = @y+1 
    insert #temp select @x where not exists (select * from #temp with (updlock, holdlock) where col1 = @x) 
end 
set @duration = datediff(ms,@now,getdate()) 
raiserror('duplicates (short table), race safe with updlock, holdlock: %i milliseconds/%i inserts',-1,-1,@duration, @y) with nowait 
go

Uniques अनुभाग

truncate table #temp 
declare @x int, @now datetime, @duration int 
select @x = 0, @now = getdate() 
while @x < 100000 begin 
    set @x = @x+1 
    begin try 
    insert #temp select @x where not exists (select * from #temp where col1 = @x) 
    end try 
    begin catch 
    if error_number() <> 2627 
     throw 
    end catch 
end 
set @duration = datediff(ms,@now,getdate()) 
raiserror('uniques, race safe with try/catch: %i milliseconds/%i inserts',-1,-1,@duration, @x) with nowait 
go 

truncate table #temp 
declare @x int, @now datetime, @duration int 
select @x = 0, @now = getdate() 
while @x < 100000 begin 
    set @x = @x+1 
    insert #temp select @x where not exists (select * from #temp with (updlock, holdlock) where col1 = @x) 
end 
set @duration = datediff(ms,@now,getdate()) 
raiserror('uniques, race safe with updlock, holdlock: %i milliseconds/%i inserts',-1,-1,@duration, @x) with nowait 
go

डुप्लिकेट (लंबा तालिका) अनुभाग

declare @x int, @y int, @now datetime, @duration int 
select @x = 1, @y = 0, @now = getdate() 
while @y < 100000 begin 
    set @y = @y+1 
    begin try 
    insert #temp select @x where not exists (select * from #temp where col1 = @x) 
    end try 
    begin catch 
    if error_number() <> 2627 
     throw 
    end catch 
end 
set @duration = datediff(ms,@now,getdate()) 
raiserror('duplicates (tall table), race safe with try/catch: %i milliseconds/%i inserts',-1,-1,@duration,@y) with nowait 
go 

declare @x int, @y int, @now datetime, @duration int 
select @x = 1, @y = 0, @now = getdate() 
while @y < 100000 begin 
    set @y = @y+1 
    insert #temp select @x where not exists (select * from #temp with (updlock, holdlock) where col1 = @x) 
end 
set @duration = datediff(ms,@now,getdate()) 
raiserror('duplicates (tall table), race safe with updlock, holdlock: %i milliseconds/%i inserts',-1,-1,@duration, @y) with nowait 
go

स्रोत

2015-05-18 20:35:29

एसक्यूएल इंसर्ट लेकिन डुप्लिकेट से बचें

उत्तर

संबंधित मुद्दे