2017-01-19 5 views
5

सभी,संसाधनों को पार किया गया। BigQuery

मैं BigQuery के माध्यम से चल रहे एनालिटिक्स के लिए हमारे कुछ बड़े कोड प्राप्त करने का प्रयास कर रहा हूं, लेकिन मैं कई राज्यों और मौजूद डेटा की मात्रा के लिए मुद्दों में भागना जारी रखता हूं। हम वर्षों के आंकड़ों के बारे में बात कर रहे हैं। यह हो सकता है कि मेरी क्वेरी अप-टू-स्नफ नहीं है, लेकिन विशिष्ट समूह के आधार पर रकम प्राप्त करने का प्रयास करना मुझे चाहिए।

संसाधनों को त्रुटि से अधिक होने से रोकने के लिए मुझे क्वेरी के भीतर क्या बदलने की आवश्यकता है?

SELECT 
    COMPANY_NAME, 
    RATING_CLASS, 
    COMPANY_KEY, 
    -- State Info & Calculations 
    -- Over is used as a WINDOW function to SUM ALL results within the given query 
    SUM(ZIP5_MED_SUPP_TOOL_NUM_QUOTE) OVER() AS STATE_MED_SUPP_TOOL_NUM_QUOTE, 
    -- ZIP3 Info & Calculations 
    ZIP3, 
    ZIP3_MED_SUPP_TOOL_NUM_QUOTE AS ZIP3_MED_SUPP_TOOL_NUM_QUOTE, 
    ZIP3_TOTAL_RESULT_APPEARANCE, 
    ZIP3_LOWEST_COUNT/ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_LOWEST, 
    ZIP3_AVG_RATIO_TO_LOWEST AS ZIP3_AVG_RATIO_TO_LOWEST, 
    ZIP3_AVG_RANK AS ZIP3_AVG_RANK, 
    ZIP3_TOP5_COUNT/ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_TOP5, 
    ZIP3_TOP10_COUNT/ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_TOP10, 
    ZIP3_AVG_CENT_DIFF AS ZIP3_AVG_CENT_DIFF, 
    ZIP3_DISCOUNTED_LOWEST_COUNT/ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_DISCOUNTED_LOWEST, 
    ZIP3_DISCOUNTED_AVG_RATIO_TO_LOWEST AS ZIP3_DISCOUNTED_AVG_RATIO_TO_LOWEST, 
    ZIP3_DISCOUNTED_AVG_RANK AS ZIP3_DISCOUNTED_AVG_RANK, 
    ZIP3_DISCOUNTED_TOP5_COUNT/ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_DISCOUNTED_TOP5, 
    ZIP3_DISCOUNTED_TOP10_COUNT/ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_DISCOUNTED_TOP10, 
    ZIP3_DISCOUNTED_AVG_CENT_DIFF AS ZIP3_DISCOUNTED_AVG_CENT_DIFF, 
    -- ZIP5 Info & Calculations 
    ZIP5, 
    ZIP5_MED_SUPP_TOOL_NUM_QUOTE AS ZIP5_MED_SUPP_TOOL_NUM_QUOTE, 
    ZIP5_TOTAL_RESULT_APPEARANCE, 
    ZIP5_LOWEST_COUNT/ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_LOWEST, 
    ZIP5_AVG_RATIO_TO_LOWEST AS ZIP5_AVG_RATIO_TO_LOWEST, 
    ZIP5_AVG_RANK AS ZIP5_AVG_RANK, 
    ZIP5_TOP5_COUNT/ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_TOP5, 
    ZIP5_TOP10_COUNT/ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_TOP10, 
    ZIP5_AVG_CENT_DIFF AS ZIP5_AVG_CENT_DIFF, 
    ZIP5_DISCOUNTED_LOWEST_COUNT/ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_DISCOUNTED_LOWEST, 
    ZIP5_DISCOUNTED_AVG_RATIO_TO_LOWEST AS ZIP5_DISCOUNTED_AVG_RATIO_TO_LOWEST, 
    ZIP5_DISCOUNTED_AVG_RANK AS ZIP5_DISCOUNTED_AVG_RANK, 
    ZIP5_DISCOUNTED_TOP5_COUNT/ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_DISCOUNTED_TOP5, 
    ZIP5_DISCOUNTED_TOP10_COUNT/ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_DISCOUNTED_TOP10, 
    ZIP5_DISCOUNTED_AVG_CENT_DIFF AS ZIP5_DISCOUNTED_AVG_CENT_DIFF, 
FROM (
    SELECT 
    COMPANY_NAME, 
    COMPANY_KEY, 
    RATING_CLASS, 
    -- ZIP3 
    ZIP3, 
    COUNT(DISTINCT logging_key) OVER (PARTITION BY ZIP3) AS ZIP3_MED_SUPP_TOOL_NUM_QUOTE, 
    COUNT(*) OVER (PARTITION BY ZIP3, company_key, rating_class) AS ZIP3_TOTAL_RESULT_APPEARANCE, 
    SUM(CASE 
     WHEN lowest = TRUE THEN 1 
     ELSE 0 END) OVER (PARTITION BY ZIP3, company_key, rating_class) AS ZIP3_LOWEST_COUNT, 
    AVG(discounted_ratio_to_min) OVER (PARTITION BY ZIP3, company_key, rating_class) AS ZIP3_AVG_RATIO_TO_LOWEST, 
    AVG(discounted_rate_order) OVER (PARTITION BY ZIP3, company_key, rating_class) AS ZIP3_AVG_RANK, 
    SUM(CASE 
     WHEN top5 = TRUE THEN 1 
     ELSE 0 END) OVER (PARTITION BY ZIP3, company_key, rating_class) AS ZIP3_TOP5_COUNT, 
    SUM(CASE 
     WHEN top10 = TRUE THEN 1 
     ELSE 0 END) OVER (PARTITION BY ZIP3, company_key, rating_class) AS ZIP3_TOP10_COUNT, 
    AVG(discounted_cent_diff) OVER (PARTITION BY ZIP3, company_key, rating_class) AS ZIP3_AVG_CENT_DIFF, 
    SUM(CASE 
     WHEN DISCOUNTED_lowest = TRUE THEN 1 
     ELSE 0 END) OVER (PARTITION BY ZIP3, company_key, rating_class) AS ZIP3_DISCOUNTED_LOWEST_COUNT, 
    AVG(discounted_ratio_to_min) OVER (PARTITION BY ZIP3, company_key, rating_class) AS ZIP3_DISCOUNTED_AVG_RATIO_TO_LOWEST, 
    AVG(discounted_rate_order) OVER (PARTITION BY ZIP3, company_key, rating_class) AS ZIP3_DISCOUNTED_AVG_RANK, 
    SUM(CASE 
     WHEN DISCOUNTED_top5 = TRUE THEN 1 
     ELSE 0 END) OVER (PARTITION BY ZIP3, company_key, rating_class) AS ZIP3_DISCOUNTED_TOP5_COUNT, 
    SUM(CASE 
     WHEN DISCOUNTED_top10 = TRUE THEN 1 
     ELSE 0 END) OVER (PARTITION BY ZIP3, company_key, rating_class) AS ZIP3_DISCOUNTED_TOP10_COUNT, 
    AVG(discounted_cent_diff) OVER (PARTITION BY ZIP3, company_key, rating_class) AS ZIP3_DISCOUNTED_AVG_CENT_DIFF, 
    -- ZIP5 
    ZIP5, 
    COUNT(DISTINCT logging_key) OVER (PARTITION BY ZIP5) AS ZIP5_MED_SUPP_TOOL_NUM_QUOTE, 
    COUNT(*) OVER (PARTITION BY ZIP5, company_key, rating_class) AS ZIP5_TOTAL_RESULT_APPEARANCE, 
    SUM(CASE 
     WHEN lowest = TRUE THEN 1 
     ELSE 0 END) OVER (PARTITION BY ZIP5, company_key, rating_class) AS ZIP5_LOWEST_COUNT, 
    AVG(discounted_ratio_to_min) OVER (PARTITION BY ZIP5, company_key, rating_class) AS ZIP5_AVG_RATIO_TO_LOWEST, 
    AVG(discounted_rate_order) OVER (PARTITION BY ZIP5, company_key, rating_class) AS ZIP5_AVG_RANK, 
    SUM(CASE 
     WHEN top5 = TRUE THEN 1 
     ELSE 0 END) OVER (PARTITION BY ZIP5, company_key, rating_class) AS ZIP5_TOP5_COUNT, 
    SUM(CASE 
     WHEN top10 = TRUE THEN 1 
     ELSE 0 END) OVER (PARTITION BY ZIP5, company_key, rating_class) AS ZIP5_TOP10_COUNT, 
    AVG(discounted_cent_diff) OVER (PARTITION BY ZIP5, company_key, rating_class) AS ZIP5_AVG_CENT_DIFF, 
    SUM(CASE 
     WHEN DISCOUNTED_lowest = TRUE THEN 1 
     ELSE 0 END) OVER (PARTITION BY ZIP5, company_key, rating_class) AS ZIP5_DISCOUNTED_LOWEST_COUNT, 
    AVG(discounted_ratio_to_min) OVER (PARTITION BY ZIP5, company_key, rating_class) AS ZIP5_DISCOUNTED_AVG_RATIO_TO_LOWEST, 
    AVG(discounted_rate_order) OVER (PARTITION BY ZIP5, company_key, rating_class) AS ZIP5_DISCOUNTED_AVG_RANK, 
    SUM(CASE 
     WHEN DISCOUNTED_top5 = TRUE THEN 1 
     ELSE 0 END) OVER (PARTITION BY ZIP5, company_key, rating_class) AS ZIP5_DISCOUNTED_TOP5_COUNT, 
    SUM(CASE 
     WHEN DISCOUNTED_top10 = TRUE THEN 1 
     ELSE 0 END) OVER (PARTITION BY ZIP5, company_key, rating_class) AS ZIP5_DISCOUNTED_TOP10_COUNT, 
    AVG(discounted_cent_diff) OVER (PARTITION BY ZIP5, company_key, rating_class) AS ZIP5_DISCOUNTED_AVG_CENT_DIFF, 
    FROM 
    [csgapi:qh_med_supp_tool.v2_TX] 
    WHERE 
    SEARCH_TIMESTAMP >= TIMESTAMP('2016-01-01T00:00:00.000Z') 
    AND SEARCH_TIMESTAMP <= TIMESTAMP('2017-01-01T00:00:00.000Z') 
    AND user_key NOT IN ("6522869941010432", "6277136540237824") 
    GROUP BY 
    COMPANY_NAME, 
    COMPANY_KEY, 
    RATING_CLASS, 
    ZIP3, 
    ZIP5, 
    LOWEST, 
    RATIO_TO_MIN, 
    RATE_ORDER, 
    TOP5, 
    TOP10, 
    CENT_DIFF, 
    DISCOUNTED_LOWEST, 
    DISCOUNTED_RATIO_TO_MIN, 
    DISCOUNTED_RATE_ORDER, 
    DISCOUNTED_TOP5, 
    DISCOUNTED_TOP10, 
    DISCOUNTED_CENT_DIFF, 
    LOGGING_KEY) 
GROUP BY 
    COMPANY_NAME, 
    COMPANY_KEY, 
    RATING_CLASS, 
    -- ZIP3 General 
    ZIP3, 
    ZIP3_MED_SUPP_TOOL_NUM_QUOTE, 
    ZIP3_TOTAL_RESULT_APPEARANCE, 
    ZIP3_LOWEST, 
    ZIP3_AVG_RATIO_TO_LOWEST, 
    ZIP3_AVG_RANK, 
    ZIP3_TOP5, 
    ZIP3_TOP10, 
    ZIP3_AVG_CENT_DIFF, 
    ZIP3_DISCOUNTED_LOWEST, 
    ZIP3_DISCOUNTED_AVG_RATIO_TO_LOWEST, 
    ZIP3_DISCOUNTED_AVG_RANK, 
    ZIP3_DISCOUNTED_TOP5, 
    ZIP3_DISCOUNTED_TOP10, 
    ZIP3_DISCOUNTED_AVG_CENT_DIFF, 
    -- ZIP5 General 
    ZIP5, 
    ZIP5_MED_SUPP_TOOL_NUM_QUOTE, 
    ZIP5_TOTAL_RESULT_APPEARANCE, 
    ZIP5_LOWEST, 
    ZIP5_AVG_RATIO_TO_LOWEST, 
    ZIP5_AVG_RANK, 
    ZIP5_TOP5, 
    ZIP5_TOP10, 
    ZIP5_AVG_CENT_DIFF, 
    ZIP5_DISCOUNTED_LOWEST, 
    ZIP5_DISCOUNTED_AVG_RATIO_TO_LOWEST, 
    ZIP5_DISCOUNTED_AVG_RANK, 
    ZIP5_DISCOUNTED_TOP5, 
    ZIP5_DISCOUNTED_TOP10, 
    ZIP5_DISCOUNTED_AVG_CENT_DIFF 

सुझाव दिया सुधारों के साथ अपडेट किया गया प्रश्न:

SELECT 
    main.COMPANY_NAME AS COMPANY_NAME, 
    main.COMPANY_KEY AS COMPANY_KEY, 
    main.RATING_CLASS AS RATING_CLASS, 
    state_count.STATE_MED_SUPP_TOOL_NUM_QUOTE AS STATE_MED_SUPP_TOOL_NUM_QUOTE, 
    -- ZIP3 
    main.ZIP3 AS ZIP3, 
    ZIP3_COUNT.ZIP3_MED_SUPP_TOOL_NUM_QUOTE AS ZIP3_MED_SUPP_TOOL_NUM_QUOTE, 
    ZIP3_SUB.ZIP3_TOTAL_RESULT_APPEARANCE, 
    ZIP3_SUB.ZIP3_AVG_RATIO_TO_LOWEST AS ZIP3_AVG_RATIO_TO_LOWEST, 
    ZIP3_SUB.ZIP3_TOP5_COUNT/ZIP3_SUB.ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_TOP5, 
    ZIP3_SUB.ZIP3_LOWEST_COUNT/ZIP3_SUB.ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_LOWEST, 
    ZIP3_SUB.ZIP3_TOP10_COUNT/ZIP3_SUB.ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_TOP10, 
    ZIP3_SUB.ZIP3_AVG_RANK AS ZIP3_AVG_RANK, 
    ZIP3_SUB.ZIP3_AVG_CENT_DIFF AS ZIP3_AVG_CENT_DIFF, 
    ZIP3_SUB.ZIP3_DISCOUNTED_AVG_RATIO_TO_LOWEST AS ZIP3_DISCOUNTED_AVG_RATIO_TO_LOWEST, 
    ZIP3_SUB.ZIP3_DISCOUNTED_TOP5_COUNT/ZIP3_SUB.ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_DISCOUNTED_TOP5, 
    ZIP3_SUB.ZIP3_DISCOUNTED_LOWEST_COUNT/ZIP3_SUB.ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_DISCOUNTED_LOWEST, 
    ZIP3_SUB.ZIP3_DISCOUNTED_TOP10_COUNT/ZIP3_SUB.ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_DISCOUNTED_TOP10, 
    ZIP3_SUB.ZIP3_DISCOUNTED_AVG_RANK AS ZIP3_DISCOUNTED_AVG_RANK, 
    ZIP3_SUB.ZIP3_DISCOUNTED_AVG_CENT_DIFF AS ZIP3_DISCOUNTED_AVG_CENT_DIFF, 
    -- ZIP5 
    main.ZIP5 AS ZIP5, 
    ZIP5_COUNT.ZIP5_MED_SUPP_TOOL_NUM_QUOTE AS ZIP5_MED_SUPP_TOOL_NUM_QUOTE, 
    ZIP5_SUB.ZIP5_TOTAL_RESULT_APPEARANCE, 
    ZIP5_SUB.ZIP5_AVG_RATIO_TO_LOWEST AS ZIP5_AVG_RATIO_TO_LOWEST, 
    ZIP5_SUB.ZIP5_TOP5_COUNT/ZIP5_SUB.ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_TOP5, 
    ZIP5_SUB.ZIP5_LOWEST_COUNT/ZIP5_SUB.ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_LOWEST, 
    ZIP5_SUB.ZIP5_TOP10_COUNT/ZIP5_SUB.ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_TOP10, 
    ZIP5_SUB.ZIP5_AVG_RANK AS ZIP5_AVG_RANK, 
    ZIP5_SUB.ZIP5_AVG_CENT_DIFF AS ZIP5_AVG_CENT_DIFF, 
    ZIP5_SUB.ZIP5_DISCOUNTED_AVG_RATIO_TO_LOWEST AS ZIP5_DISCOUNTED_AVG_RATIO_TO_LOWEST, 
    ZIP5_SUB.ZIP5_DISCOUNTED_TOP5_COUNT/ZIP5_SUB.ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_DISCOUNTED_TOP5, 
    ZIP5_SUB.ZIP5_DISCOUNTED_LOWEST_COUNT/ZIP5_SUB.ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_DISCOUNTED_LOWEST, 
    ZIP5_SUB.ZIP5_DISCOUNTED_TOP10_COUNT/ZIP5_SUB.ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_DISCOUNTED_TOP10, 
    ZIP5_SUB.ZIP5_DISCOUNTED_AVG_RANK AS ZIP5_DISCOUNTED_AVG_RANK, 
    ZIP5_SUB.ZIP5_DISCOUNTED_AVG_CENT_DIFF AS ZIP5_DISCOUNTED_AVG_CENT_DIFF, 
FROM (
    SELECT 
    COMPANY_NAME, 
    COMPANY_KEY, 
    RATING_CLASS, 
    ZIP3, 
    ZIP5, 
    STATE, 
    FROM 
    [csgapi:qh_med_supp_tool.v2_TX] 
    WHERE 
    SEARCH_TIMESTAMP >= TIMESTAMP('2016-01-01T06:00:00.000Z') 
    AND SEARCH_TIMESTAMP <= TIMESTAMP('2017-01-01T05:00:00.000Z') 
    AND user_key NOT IN ("6522869941010432", 
     "6277136540237824", 
     "4872666167115776", 
     "6396348765044736", 
     "6139303562313728", 
     "4988973881491456") 
    AND portal_key NOT IN ("5878607637381120") 
    GROUP BY 
    COMPANY_NAME, 
    COMPANY_KEY, 
    RATING_CLASS, 
    ZIP3, 
    ZIP5, 
    STATE, 
    ) AS MAIN 
LEFT JOIN (
    SELECT 
    ZIP3, 
    COUNT(*) AS ZIP3_TOTAL_RESULT_APPEARANCE, 
    COMPANY_KEY, 
    RATING_CLASS, 
    AVG(discounted_ratio_to_min) AS ZIP3_AVG_RATIO_TO_LOWEST, 
    SUM(CASE 
     WHEN TOP5 = TRUE THEN 1 
     ELSE 0 END) AS ZIP3_TOP5_COUNT, 
    SUM(CASE 
     WHEN LOWEST = TRUE THEN 1 
     ELSE 0 END) AS ZIP3_LOWEST_COUNT, 
    SUM(CASE 
     WHEN TOP10 = TRUE THEN 1 
     ELSE 0 END) AS ZIP3_TOP10_COUNT, 
    AVG(discounted_rate_order) AS ZIP3_AVG_RANK, 
    AVG(discounted_cent_diff) AS ZIP3_AVG_CENT_DIFF, 
    AVG(discounted_ratio_to_min) AS ZIP3_DISCOUNTED_AVG_RATIO_TO_LOWEST, 
    SUM(CASE 
     WHEN DISCOUNTED_TOP5 = TRUE THEN 1 
     ELSE 0 END) AS ZIP3_DISCOUNTED_TOP5_COUNT, 
    SUM(CASE 
     WHEN DISCOUNTED_LOWEST = TRUE THEN 1 
     ELSE 0 END) AS ZIP3_DISCOUNTED_LOWEST_COUNT, 
    SUM(CASE 
     WHEN DISCOUNTED_TOP10 = TRUE THEN 1 
     ELSE 0 END) AS ZIP3_DISCOUNTED_TOP10_COUNT, 
    AVG(discounted_rate_order) AS ZIP3_DISCOUNTED_AVG_RANK, 
    AVG(discounted_cent_diff) AS ZIP3_DISCOUNTED_AVG_CENT_DIFF, 
    FROM 
    [csgapi:qh_med_supp_tool.v2_TX] 
    WHERE 
    SEARCH_TIMESTAMP >= TIMESTAMP('2016-01-01T06:00:00.000Z') 
    AND SEARCH_TIMESTAMP <= TIMESTAMP('2017-01-01T05:00:00.000Z') 
    AND user_key NOT IN ("6522869941010432", 
     "6277136540237824") 
    GROUP BY 
    ZIP3, 
    COMPANY_NAME, 
    COMPANY_KEY, 
    RATING_CLASS, 
    ) AS ZIP3_SUB 
ON 
    main.ZIP3 = ZIP3_SUB.ZIP3 
    AND main.COMPANY_KEY = ZIP3_SUB.COMPANY_KEY 
    AND main.RATING_CLASS = ZIP3_SUB.RATING_CLASS 
LEFT JOIN (
    SELECT 
    ZIP3, 
    EXACT_COUNT_DISTINCT(logging_key) AS ZIP3_MED_SUPP_TOOL_NUM_QUOTE 
    FROM 
    [csgapi:qh_med_supp_tool.v2_TX] 
    WHERE 
    SEARCH_TIMESTAMP >= TIMESTAMP('2016-01-01T06:00:00.000Z') 
    AND SEARCH_TIMESTAMP <= TIMESTAMP('2017-01-01T05:00:00.000Z') 
    AND user_key NOT IN ("6522869941010432", 
     "6277136540237824") 
    GROUP BY 
    ZIP3) AS ZIP3_COUNT 
ON 
    main.ZIP3 = ZIP3_COUNT.ZIP3 
LEFT JOIN (
    SELECT 
    ZIP5, 
    COUNT(*) AS ZIP5_TOTAL_RESULT_APPEARANCE, 
    COMPANY_KEY, 
    RATING_CLASS, 
    AVG(discounted_ratio_to_min) AS ZIP5_AVG_RATIO_TO_LOWEST, 
    SUM(CASE 
     WHEN TOP5 = TRUE THEN 1 
     ELSE 0 END) AS ZIP5_TOP5_COUNT, 
    SUM(CASE 
     WHEN LOWEST = TRUE THEN 1 
     ELSE 0 END) AS ZIP5_LOWEST_COUNT, 
    SUM(CASE 
     WHEN TOP10 = TRUE THEN 1 
     ELSE 0 END) AS ZIP5_TOP10_COUNT, 
    AVG(discounted_rate_order) AS ZIP5_AVG_RANK, 
    AVG(discounted_cent_diff) AS ZIP5_AVG_CENT_DIFF, 
    AVG(discounted_ratio_to_min) AS ZIP5_DISCOUNTED_AVG_RATIO_TO_LOWEST, 
    SUM(CASE 
     WHEN DISCOUNTED_TOP5 = TRUE THEN 1 
     ELSE 0 END) AS ZIP5_DISCOUNTED_TOP5_COUNT, 
    SUM(CASE 
     WHEN DISCOUNTED_LOWEST = TRUE THEN 1 
     ELSE 0 END) AS ZIP5_DISCOUNTED_LOWEST_COUNT, 
    SUM(CASE 
     WHEN DISCOUNTED_TOP10 = TRUE THEN 1 
     ELSE 0 END) AS ZIP5_DISCOUNTED_TOP10_COUNT, 
    AVG(discounted_rate_order) AS ZIP5_DISCOUNTED_AVG_RANK, 
    AVG(discounted_cent_diff) AS ZIP5_DISCOUNTED_AVG_CENT_DIFF, 
    FROM 
    [csgapi:qh_med_supp_tool.v2_TX] 
    WHERE 
    SEARCH_TIMESTAMP >= TIMESTAMP('2016-01-01T06:00:00.000Z') 
    AND SEARCH_TIMESTAMP <= TIMESTAMP('2017-01-01T05:00:00.000Z') 
    AND user_key NOT IN ("6522869941010432", 
     "6277136540237824") 
    GROUP BY 
    ZIP5, 
    COMPANY_NAME, 
    COMPANY_KEY, 
    RATING_CLASS, 
    ) AS ZIP5_SUB 
ON 
    main.ZIP5 = ZIP5_SUB.ZIP5 
    AND main.COMPANY_KEY = ZIP5_SUB.COMPANY_KEY 
    AND main.RATING_CLASS = ZIP5_SUB.RATING_CLASS 
LEFT JOIN (
    SELECT 
    ZIP5, 
    EXACT_COUNT_DISTINCT(logging_key) AS ZIP5_MED_SUPP_TOOL_NUM_QUOTE 
    FROM 
    [csgapi:qh_med_supp_tool.v2_TX] 
    WHERE 
    SEARCH_TIMESTAMP >= TIMESTAMP('2016-01-01T06:00:00.000Z') 
    AND SEARCH_TIMESTAMP <= TIMESTAMP('2017-01-01T05:00:00.000Z') 
    AND user_key NOT IN ("6522869941010432", 
     "6277136540237824") 
    GROUP BY 
    ZIP5) AS ZIP5_COUNT 
ON 
    main.ZIP5 = ZIP5_COUNT.ZIP5 
LEFT JOIN (
    SELECT 
    STATE, 
    EXACT_COUNT_DISTINCT(logging_key) AS STATE_MED_SUPP_TOOL_NUM_QUOTE 
    FROM 
    [csgapi:qh_med_supp_tool.v2_TX] 
    WHERE 
    SEARCH_TIMESTAMP >= TIMESTAMP('2016-01-01T06:00:00.000Z') 
    AND SEARCH_TIMESTAMP <= TIMESTAMP('2017-01-01T05:00:00.000Z') 
    AND user_key NOT IN ("6522869941010432", 
     "6277136540237824") 
    GROUP BY 
    STATE) AS STATE_COUNT 
ON 
    main.STATE = STATE_COUNT.STATE 

स्पष्टीकरण: इस तथ्य की ओर कारण BigQuery एक साझा संसाधन, BigQuery संसाधनों की राशि है कि क्वेरी की आवश्यकता होती है और उसके अनुसार संसाधन आवंटित करेगा अनुमान लगाती है । क्वेरी को एक ही तालिका में एकाधिक जोड़ों में विभाजित करके, गणना मूल रूप से इच्छित संसाधनों से अधिक संसाधनों द्वारा की जा सकती है। क्वेरी को प्रतिबंधित करने के तरीके के बारे में एक और तकनीकी स्पष्टीकरण के लिए, कृपया स्टैक ओवरफ्लो प्रतिक्रिया here पर जॉर्डन टिगानी देखें।

+0

का उपयोग करने के लिए बेहतर है, यह सोचकर, यह क्वेरी कितनी गीगाबाइट मोटे तौर पर संसाधित करती है? –

+0

मेरे पास कुछ विचार हैं कि आपकी क्वेरी इतनी महंगी क्यों है। कारणों में से एक संभावित रूप से है क्योंकि यह अत्यधिक इंजीनियर है। लेकिन स्पष्ट रूप से (तर्कसंगत) के बारे में स्पष्ट तस्वीर के बिना आप यहां हासिल करने की कोशिश कर रहे हैं - अंतिम निष्कर्ष निकालना और निर्णय में गलत होना मुश्किल है। मेरा सुझाव है कि आप हमें इस बारे में विस्तृत स्पष्टीकरण दें कि आप इस प्रश्न में शामिल होने की कोशिश कर रहे हैं, इसलिए हम आपको अंधे होने में मदद करने में सक्षम होंगे क्योंकि हम अब हैं: ओ) –

उत्तर

2

किसी भी तरह से मुझे नीचे की जरूरत है जो आपको चाहिए। मैं गलत के रूप में यह रिवर्स अपने तर्क इंजीनियर वास्तव में सिर्फ अपेक्षाकृत अंधा प्रयास है हो सकता है, इतना भारी न्यायाधीश नहीं है अगर मैं गलत यहाँ हूँ
परीक्षण नहीं किया जा सकता है, लेकिन मैं यह वास्तव में

SELECT 
    main.COMPANY_NAME AS COMPANY_NAME, 
    main.COMPANY_KEY AS COMPANY_KEY, 
    main.RATING_CLASS AS RATING_CLASS, 
    SUM(ZIP5_MED_SUPP_TOOL_NUM_QUOTE) OVER() AS STATE_MED_SUPP_TOOL_NUM_QUOTE, 
    -- ZIP3 
    main.ZIP3 AS ZIP3, 
    ZIP3_MED_SUPP_TOOL_NUM_QUOTE AS ZIP3_MED_SUPP_TOOL_NUM_QUOTE, 
    ZIP3_TOTAL_RESULT_APPEARANCE, 
    ZIP3_LOWEST_COUNT/ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_LOWEST, 
    ZIP3_AVG_RATIO_TO_LOWEST AS ZIP3_AVG_RATIO_TO_LOWEST, 
    ZIP3_AVG_RANK AS ZIP3_AVG_RANK, 
    ZIP3_TOP5_COUNT/ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_TOP5, 
    ZIP3_TOP10_COUNT/ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_TOP10, 
    ZIP3_AVG_CENT_DIFF AS ZIP3_AVG_CENT_DIFF, 
    ZIP3_DISCOUNTED_LOWEST_COUNT/ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_DISCOUNTED_LOWEST, 
    ZIP3_DISCOUNTED_AVG_RATIO_TO_LOWEST AS ZIP3_DISCOUNTED_AVG_RATIO_TO_LOWEST, 
    ZIP3_DISCOUNTED_AVG_RANK AS ZIP3_DISCOUNTED_AVG_RANK, 
    ZIP3_DISCOUNTED_TOP5_COUNT/ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_DISCOUNTED_TOP5, 
    ZIP3_DISCOUNTED_TOP10_COUNT/ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_DISCOUNTED_TOP10, 
    ZIP3_DISCOUNTED_AVG_CENT_DIFF AS ZIP3_DISCOUNTED_AVG_CENT_DIFF, 
    -- ZIP5 
    main.ZIP5 AS ZIP5, 
    ZIP5_MED_SUPP_TOOL_NUM_QUOTE AS ZIP5_MED_SUPP_TOOL_NUM_QUOTE, 
    ZIP5_TOTAL_RESULT_APPEARANCE, 
    ZIP5_LOWEST_COUNT/ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_LOWEST, 
    ZIP5_AVG_RATIO_TO_LOWEST AS ZIP5_AVG_RATIO_TO_LOWEST, 
    ZIP5_AVG_RANK AS ZIP5_AVG_RANK, 
    ZIP5_TOP5_COUNT/ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_TOP5, 
    ZIP5_TOP10_COUNT/ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_TOP10, 
    ZIP5_AVG_CENT_DIFF AS ZIP5_AVG_CENT_DIFF, 
    ZIP5_DISCOUNTED_LOWEST_COUNT/ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_DISCOUNTED_LOWEST, 
    ZIP5_DISCOUNTED_AVG_RATIO_TO_LOWEST AS ZIP5_DISCOUNTED_AVG_RATIO_TO_LOWEST, 
    ZIP5_DISCOUNTED_AVG_RANK AS ZIP5_DISCOUNTED_AVG_RANK, 
    ZIP5_DISCOUNTED_TOP5_COUNT/ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_DISCOUNTED_TOP5, 
    ZIP5_DISCOUNTED_TOP10_COUNT/ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_DISCOUNTED_TOP10, 
    ZIP5_DISCOUNTED_AVG_CENT_DIFF AS ZIP5_DISCOUNTED_AVG_CENT_DIFF 
FROM (
    SELECT COMPANY_NAME, COMPANY_KEY, RATING_CLASS, ZIP3, ZIP5 
    FROM [csgapi:qh_med_supp_tool.v2_TX] 
    WHERE SEARCH_TIMESTAMP >= TIMESTAMP('2016-01-01T00:00:00.000Z') 
    AND SEARCH_TIMESTAMP <= TIMESTAMP('2017-01-01T00:00:00.000Z') 
    AND user_key NOT IN ("6522869941010432", "6277136540237824") 
    GROUP BY COMPANY_NAME, COMPANY_KEY, RATING_CLASS, ZIP3, ZIP5 
) AS main 
LEFT JOIN (
    SELECT 
    ZIP3, company_key, rating_class, 
    COUNT(*)             AS ZIP3_TOTAL_RESULT_APPEARANCE, 
    SUM(CASE WHEN lowest = TRUE THEN 1 ELSE 0 END)   AS ZIP3_LOWEST_COUNT, 
    AVG(discounted_ratio_to_min)        AS ZIP3_AVG_RATIO_TO_LOWEST, 
    AVG(discounted_rate_order)        AS ZIP3_AVG_RANK, 
    SUM(CASE WHEN top5 = TRUE THEN 1 ELSE 0 END)    AS ZIP3_TOP5_COUNT, 
    SUM(CASE WHEN top10 = TRUE THEN 1 ELSE 0 END)    AS ZIP3_TOP10_COUNT, 
    AVG(discounted_cent_diff)         AS ZIP3_AVG_CENT_DIFF, 
    SUM(CASE WHEN DISCOUNTED_lowest = TRUE THEN 1 ELSE 0 END) AS ZIP3_DISCOUNTED_LOWEST_COUNT, 
    AVG(discounted_ratio_to_min)        AS ZIP3_DISCOUNTED_AVG_RATIO_TO_LOWEST, 
    AVG(discounted_rate_order)        AS ZIP3_DISCOUNTED_AVG_RANK, 
    SUM(CASE WHEN DISCOUNTED_top5 = TRUE THEN 1 ELSE 0 END) AS ZIP3_DISCOUNTED_TOP5_COUNT, 
    SUM(CASE WHEN DISCOUNTED_top10 = TRUE THEN 1 ELSE 0 END) AS ZIP3_DISCOUNTED_TOP10_COUNT, 
    AVG(discounted_cent_diff)         AS ZIP3_DISCOUNTED_AVG_CENT_DIFF, 
    FROM [csgapi:qh_med_supp_tool.v2_TX] 
    WHERE SEARCH_TIMESTAMP >= TIMESTAMP('2016-01-01T00:00:00.000Z') 
     AND SEARCH_TIMESTAMP <= TIMESTAMP('2017-01-01T00:00:00.000Z') 
     AND user_key NOT IN ("6522869941010432", "6277136540237824") 
    GROUP BY ZIP3, company_key, rating_class 
) AS zip3_sub 
ON main.ZIP3 = zip3_sub.ZIP3 AND main.company_key = zip3_sub.company_key AND main.rating_class = zip3_sub.rating_class 
LEFT JOIN ( 
    SELECT 
    ZIP5, company_key, rating_class, 
    COUNT(*)             AS ZIP5_TOTAL_RESULT_APPEARANCE, 
    SUM(CASE WHEN lowest = TRUE THEN 1 ELSE 0 END)   AS ZIP5_LOWEST_COUNT, 
    AVG(discounted_ratio_to_min)        AS ZIP5_AVG_RATIO_TO_LOWEST, 
    AVG(discounted_rate_order)        AS ZIP5_AVG_RANK, 
    SUM(CASE WHEN top5 = TRUE THEN 1 ELSE 0 END)    AS ZIP5_TOP5_COUNT, 
    SUM(CASE WHEN top10 = TRUE THEN 1 ELSE 0 END)    AS ZIP5_TOP10_COUNT, 
    AVG(discounted_cent_diff)         AS ZIP5_AVG_CENT_DIFF, 
    SUM(CASE WHEN DISCOUNTED_lowest = TRUE THEN 1 ELSE 0 END) AS ZIP5_DISCOUNTED_LOWEST_COUNT, 
    AVG(discounted_ratio_to_min)        AS ZIP5_DISCOUNTED_AVG_RATIO_TO_LOWEST, 
    AVG(discounted_rate_order)        AS ZIP5_DISCOUNTED_AVG_RANK, 
    SUM(CASE WHEN DISCOUNTED_top5 = TRUE THEN 1 ELSE 0 END) AS ZIP5_DISCOUNTED_TOP5_COUNT, 
    SUM(CASE WHEN DISCOUNTED_top10 = TRUE THEN 1 ELSE 0 END) AS ZIP5_DISCOUNTED_TOP10_COUNT, 
    AVG(discounted_cent_diff)         AS ZIP5_DISCOUNTED_AVG_CENT_DIFF, 
    FROM [csgapi:qh_med_supp_tool.v2_TX] 
    WHERE SEARCH_TIMESTAMP >= TIMESTAMP('2016-01-01T00:00:00.000Z') 
    AND SEARCH_TIMESTAMP <= TIMESTAMP('2017-01-01T00:00:00.000Z') 
    AND user_key NOT IN ("6522869941010432", "6277136540237824") 
    GROUP BY ZIP5, company_key, rating_class 
) AS zip5_sub 
ON main.ZIP5 = zip5_sub.ZIP5 AND main.company_key = zip5_sub.company_key AND main.rating_class = zip5_sub.rating_class 
LEFT JOIN (
    SELECT ZIP3, COUNT(DISTINCT logging_key) AS ZIP3_MED_SUPP_TOOL_NUM_QUOTE 
    FROM [csgapi:qh_med_supp_tool.v2_TX] 
    WHERE SEARCH_TIMESTAMP >= TIMESTAMP('2016-01-01T00:00:00.000Z') 
    AND SEARCH_TIMESTAMP <= TIMESTAMP('2017-01-01T00:00:00.000Z') 
    AND user_key NOT IN ("6522869941010432", "6277136540237824") 
    GROUP BY ZIP3 
) AS zip3_count 
ON main.ZIP3 = zip3_count.ZIP3 
LEFT JOIN (
    SELECT ZIP5, COUNT(DISTINCT logging_key) AS ZIP5_MED_SUPP_TOOL_NUM_QUOTE 
    FROM [csgapi:qh_med_supp_tool.v2_TX] 
    WHERE SEARCH_TIMESTAMP >= TIMESTAMP('2016-01-01T00:00:00.000Z') 
    AND SEARCH_TIMESTAMP <= TIMESTAMP('2017-01-01T00:00:00.000Z') 
    AND user_key NOT IN ("6522869941010432", "6277136540237824") 
    GROUP BY ZIP5 
) AS zip5_count 
ON main.ZIP5 = zip5_count.ZIP5 
सफल होने के लिए सस्ता होने जा रहा है महसूस हो रहा है

इसके अलावा, कृपया ध्यान दें: BigQuery विरासत एसक्यूएल में - COUNT(DISTINCT) फ़ंक्शन संभाव्य है - सांख्यिकीय अनुमान देता है और सटीक होने की गारंटी नहीं देता है।
इसके बजाय आप EXACT_COUNT_DISTINCT() फ़ंक्शन का उपयोग कर सकते हैं - यह आप सही संख्या लेकिन पीछे के अंत पर एक छोटे से अधिक महंगा देता

और निश्चित रूप से पूरे क्वेरी BigQuery Standard SQL के लिए फिर से लिखा जा सकता है जब COUNT (DISTINCT) सटीक गणना पैदा करता है और मानक SQL है प्रति BigQuery टीम की सिफारिश

+0

वाह। तुमने मेरा दिमाग उड़ा दिया। मुझे लगता है कि BigQuery के संदर्भ में और वे संसाधन कैसे आवंटित करते हैं, यह सही अर्थ बनाता है। हमसे यह साझा करने के लिए धन्यवाद। मैंने आपके द्वारा सुझाए गए परिवर्तनों के लिए एक अपडेटेड क्वेरी पोस्ट की है। यह काम कर रहा है। औसत रन टाइम 86 सेकंड से 30 सेकंड तक चला गया। – SmittySmee

संबंधित मुद्दे