Google Big Query
Connecting the Scope3 measurement to your Big Query datasets
Enabling Scope3 measurement directly from your Big Query data via a remote function that uses a cloud function to bridge between Big Query and the Scope3 calculate/daily API.
Setup
Follow the Google Cloud instructions for creating a remote connection here. Follow the create connection instructions note that you need to copy the service account ID for the setup access step.
Next you will need to enable and create a cloud function. Instructions are here.When you are ready to deploy your cloud function paste the following as the function 'bq_scope3_measured.
Cloud Function
const functions = require('@google-cloud/functions-framework');
const scope3_api_fetch = async (body,udc,sessionUser) => {
const data = {}
data.method = "POST"
data.body = JSON.stringify(body)
data.headers = {
"Content-Type": "application/json",
"AccessClientId": udc.AccessClientId,
"AccessClientSecret": udc.AccessClientSecret,
SessionUser: sessionUser,
}
return await fetch("https://api.scope3.com/v1/calculate/daily?includeRows=true", data);
};
const transform = async (body) => {
if (!body.calls || !body.calls[0]) {
const errmsg = "no calls object found";
throw new Error(errmsg);
}
if (!body.userDefinedContext) {
const errmsg = "No UserDefined Context object , Credentials Missing";
throw new Error(errmsg);
}
const request_body = {}
request_body.rows = []
body.calls.forEach((r) => {
request_body.rows.push(r[0])
});
const res = await scope3_api_fetch(request_body,body.userDefinedContext,body.sessionUser);
const json = await res.json()
const final = {
"replies": json.rows
}
return final
}
functions.http('bq_scope3_measure', async (req, res) => {
try {
const api_response = await transform(req.body)
res.send(api_response)
} catch (e) {
console.error(e);
res.status(404).send(e);
}
});
Big Query Remote Function
Navigate to a SQL workspace in the Big Query Google Cloud console open a SQL tab and insert the follow replacing
The fields YourProjectFunctions
, ExternalConnection
, YourClientID
, YourAccessSecret
, YourEndPoint
.
YourProjectFunctions
is the project name and dataset where you store your remote functions in Big Query. An example Acme.functions
ExternalConnection
is the connection name of the remote connection you created in Big Query. It may for example take the form Acme.us-central1.remote_functions
YourClientID
is the Scope3 API client ID provisioned for your account with Scope3.
YourAccessSecret
is the Scope3 API access secreted provisioned for your account with scope3.
YourEndPoint
is the cloud function published endpoint. It may take the form of https://function-1-scope3_calculate_daily-abcdefg.a.run.app
CREATE OR REPLACE FUNCTION `YourProjectFunctions`.BQAPIBridgeCalculateDaily(
PARAM JSON)
RETURNS JSON
REMOTE WITH CONNECTION `ExternalConnection`
OPTIONS (
endpoint = "YourEndPoint",
max_batching_rows=4000,
user_defined_context = [
("AccessClientId","YourClientID"),
("AccessClientSecret", "YourAccessSecret")]
)
Usage of the function
We have added the ability to calculate emissions directly from Big Query SQL . This document serves as a guide to using the function.
The main function that allows calculating emissions is
YourProjectFunctions
.BQAPIBridgeCalculateDaily()
The function takes as its only argument, a JSON structure parameter, that is directly passed as a row in the rows
array of the /calculate_daily
API function. The function returns the JSON object returned from the API.
The schema of the JSON object passed to the SQL function must be identical to the schema defined in the 'row' field of calculate/daily API.
A ‘hello emissions’ example
SELECT `YourProjectFunctions`.BQAPIBridgeCalculateDaily(
TO_JSON(STRUCT (
'2023-04-23' as date,
'cnn.com' as inventoryId,
1000 as impressions,
'NY' as region,
'US' as country
)))
or in a JSON view
[{
"f0_": "{\"adSelectionEmissions\":122.37120132272172,\"baseEmissions\":42.60415998211429,\"consumerDeviceEmissions\":128.6876481436838,\"domainCoverage\":\"modeled\",\"gmpEligible\":true,\"matchedChannel\":\"display-web\",\"matchedProperty\":\"cnn.com\",\"mediaDistributionEmissions\":42.60415998211429,\"supplyDirectness\":6,\"supplyPathEmissions\":122.37120132272172,\"totalEmissions\":164.975361304836}"
}]
A more complex example demonstrating type conversion of the returned JSON struct.
select request_domain, sum(impressions) as total_impressions,
sum(cast (JSON_VALUE(J['mediaDistributionEmissions']) as FLOAT64)) as mediaDistributionEmissions ,
sum(cast (JSON_VALUE(J['adSelectionEmissions']) as FLOAT64)) as adSelectionEmissions,
sum(cast (JSON_VALUE(J['consumerDeviceEmissions']) as FLOAT64)) as consumerDeviceEmissions
from (
SELECT request_domain, impressions, `YourProjectFunctions`.BQAPIBridgeCalculateDaily(
TO_JSON(STRUCT (
X.ymd as date,
X.domain as inventoryId,
X.impressions as impressions,
X.state as region,
X.country as country
))) as J FROM `YourProject.YourTable` as X WHERE LIMIT 1000) as Y WHERE
JSON_VALUE(J['domainCoverage']) != 'missing' group by 1 order by 2 desc limit 10
The JSON view of the results
[{
"request_domain": "sfgate.com",
"total_impressions": "5465",
"mediaDistributionEmissions": "268.58009034010973",
"adSelectionEmissions": "402.39636365102729",
"consumerDeviceEmissions": "696.56211343994119"
}, {
"request_domain": "cafedelites.com",
"total_impressions": "4750",
"mediaDistributionEmissions": "233.85431805484646",
"adSelectionEmissions": "225.40558594639117",
"consumerDeviceEmissions": "605.42910134304122"
}, {
"request_domain": "ultimate-guitar.com",
"total_impressions": "2272",
"mediaDistributionEmissions": "111.60855657567809",
"adSelectionEmissions": "1159.2657538000708",
"consumerDeviceEmissions": "289.58629857923995"
}, {
"request_domain": "scenenews.com",
"total_impressions": "1637",
"mediaDistributionEmissions": "80.314327197357088",
"adSelectionEmissions": "910.01004621853974",
"consumerDeviceEmissions": "208.64998713653864"
}, {
"request_domain": "aubtu.biz",
"total_impressions": "1054",
"mediaDistributionEmissions": "51.763256095067",
"adSelectionEmissions": "1000.2566047386345",
"consumerDeviceEmissions": "134.34153111906642"
}, {
"request_domain": "bhg.com",
"total_impressions": "785",
"mediaDistributionEmissions": "38.612318658419596",
"adSelectionEmissions": "39.9889004833201",
"consumerDeviceEmissions": "100.05512516932366"
}, {
"request_domain": "webmd.com",
"total_impressions": "696",
"mediaDistributionEmissions": "34.23890124325515",
"adSelectionEmissions": "220.20756019338262",
"consumerDeviceEmissions": "88.711295691527724"
}, {
"request_domain": "heavy.com",
"total_impressions": "603",
"mediaDistributionEmissions": "29.636227883246864",
"adSelectionEmissions": "183.8381701755043",
"consumerDeviceEmissions": "76.857631181021873"
}, {
"request_domain": "thetruthaboutcars.com",
"total_impressions": "598",
"mediaDistributionEmissions": "29.384942729233543",
"adSelectionEmissions": "125.65922293682448",
"consumerDeviceEmissions": "76.220337390134461"
}, {
"request_domain": "news.yahoo.com",
"total_impressions": "572",
"mediaDistributionEmissions": "28.352404185614727",
"adSelectionEmissions": "149.81229114243214",
"consumerDeviceEmissions": "72.906409677519918"
}]
Note the need to convert types from the return values.
The condition below extracts the ‘domainCoverage’ field converts it to a string and allows for comparison with the string ‘missing’.
JSON_VALUE(J['domainCoverage']) != 'missing'
The condition below converts the emission value to a numeric value such that it can summed.
cast (JSON_VALUE(J['mediaDistributionEmissions']) as FLOAT64)
Updated about 1 year ago