2017-08-05 18:17:15 +00:00
|
|
|
// Copyright 2016 Google Inc. All Rights Reserved.
|
|
|
|
//
|
|
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
// you may not use this file except in compliance with the License.
|
|
|
|
// You may obtain a copy of the License at
|
|
|
|
//
|
|
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
//
|
|
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
// See the License for the specific language governing permissions and
|
|
|
|
// limitations under the License.
|
|
|
|
|
|
|
|
package bigquery
|
|
|
|
|
|
|
|
import (
|
2017-12-03 20:01:25 +00:00
|
|
|
"io"
|
|
|
|
|
2017-08-05 18:17:15 +00:00
|
|
|
"golang.org/x/net/context"
|
|
|
|
bq "google.golang.org/api/bigquery/v2"
|
|
|
|
)
|
|
|
|
|
|
|
|
// LoadConfig holds the configuration for a load job.
|
|
|
|
type LoadConfig struct {
|
|
|
|
// Src is the source from which data will be loaded.
|
|
|
|
Src LoadSource
|
|
|
|
|
|
|
|
// Dst is the table into which the data will be loaded.
|
|
|
|
Dst *Table
|
|
|
|
|
|
|
|
// CreateDisposition specifies the circumstances under which the destination table will be created.
|
|
|
|
// The default is CreateIfNeeded.
|
|
|
|
CreateDisposition TableCreateDisposition
|
|
|
|
|
|
|
|
// WriteDisposition specifies how existing data in the destination table is treated.
|
|
|
|
// The default is WriteAppend.
|
|
|
|
WriteDisposition TableWriteDisposition
|
2017-12-03 20:01:25 +00:00
|
|
|
|
|
|
|
// The labels associated with this job.
|
|
|
|
Labels map[string]string
|
|
|
|
|
|
|
|
// If non-nil, the destination table is partitioned by time.
|
|
|
|
TimePartitioning *TimePartitioning
|
2018-03-30 09:41:12 +00:00
|
|
|
|
|
|
|
// Custom encryption configuration (e.g., Cloud KMS keys).
|
|
|
|
DestinationEncryptionConfig *EncryptionConfig
|
|
|
|
|
|
|
|
// SchemaUpdateOptions allows the schema of the destination table to be
|
|
|
|
// updated as a side effect of the load job.
|
|
|
|
SchemaUpdateOptions []string
|
2017-12-03 20:01:25 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func (l *LoadConfig) toBQ() (*bq.JobConfiguration, io.Reader) {
|
|
|
|
config := &bq.JobConfiguration{
|
|
|
|
Labels: l.Labels,
|
|
|
|
Load: &bq.JobConfigurationLoad{
|
2018-03-30 09:41:12 +00:00
|
|
|
CreateDisposition: string(l.CreateDisposition),
|
|
|
|
WriteDisposition: string(l.WriteDisposition),
|
|
|
|
DestinationTable: l.Dst.toBQ(),
|
|
|
|
TimePartitioning: l.TimePartitioning.toBQ(),
|
|
|
|
DestinationEncryptionConfiguration: l.DestinationEncryptionConfig.toBQ(),
|
|
|
|
SchemaUpdateOptions: l.SchemaUpdateOptions,
|
2017-12-03 20:01:25 +00:00
|
|
|
},
|
|
|
|
}
|
|
|
|
media := l.Src.populateLoadConfig(config.Load)
|
|
|
|
return config, media
|
|
|
|
}
|
|
|
|
|
|
|
|
func bqToLoadConfig(q *bq.JobConfiguration, c *Client) *LoadConfig {
|
|
|
|
lc := &LoadConfig{
|
2018-03-30 09:41:12 +00:00
|
|
|
Labels: q.Labels,
|
|
|
|
CreateDisposition: TableCreateDisposition(q.Load.CreateDisposition),
|
|
|
|
WriteDisposition: TableWriteDisposition(q.Load.WriteDisposition),
|
|
|
|
Dst: bqToTable(q.Load.DestinationTable, c),
|
|
|
|
TimePartitioning: bqToTimePartitioning(q.Load.TimePartitioning),
|
|
|
|
DestinationEncryptionConfig: bqToEncryptionConfig(q.Load.DestinationEncryptionConfiguration),
|
|
|
|
SchemaUpdateOptions: q.Load.SchemaUpdateOptions,
|
2017-12-03 20:01:25 +00:00
|
|
|
}
|
|
|
|
var fc *FileConfig
|
|
|
|
if len(q.Load.SourceUris) == 0 {
|
|
|
|
s := NewReaderSource(nil)
|
|
|
|
fc = &s.FileConfig
|
|
|
|
lc.Src = s
|
|
|
|
} else {
|
|
|
|
s := NewGCSReference(q.Load.SourceUris...)
|
|
|
|
fc = &s.FileConfig
|
|
|
|
lc.Src = s
|
|
|
|
}
|
|
|
|
bqPopulateFileConfig(q.Load, fc)
|
|
|
|
return lc
|
2017-08-05 18:17:15 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// A Loader loads data from Google Cloud Storage into a BigQuery table.
|
|
|
|
type Loader struct {
|
2017-12-03 20:01:25 +00:00
|
|
|
JobIDConfig
|
2017-08-05 18:17:15 +00:00
|
|
|
LoadConfig
|
|
|
|
c *Client
|
|
|
|
}
|
|
|
|
|
|
|
|
// A LoadSource represents a source of data that can be loaded into
|
|
|
|
// a BigQuery table.
|
|
|
|
//
|
|
|
|
// This package defines two LoadSources: GCSReference, for Google Cloud Storage
|
|
|
|
// objects, and ReaderSource, for data read from an io.Reader.
|
|
|
|
type LoadSource interface {
|
2017-12-03 20:01:25 +00:00
|
|
|
// populates config, returns media
|
|
|
|
populateLoadConfig(*bq.JobConfigurationLoad) io.Reader
|
2017-08-05 18:17:15 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// LoaderFrom returns a Loader which can be used to load data into a BigQuery table.
|
|
|
|
// The returned Loader may optionally be further configured before its Run method is called.
|
2017-09-13 12:09:48 +00:00
|
|
|
// See GCSReference and ReaderSource for additional configuration options that
|
|
|
|
// affect loading.
|
2017-08-05 18:17:15 +00:00
|
|
|
func (t *Table) LoaderFrom(src LoadSource) *Loader {
|
|
|
|
return &Loader{
|
|
|
|
c: t.c,
|
|
|
|
LoadConfig: LoadConfig{
|
|
|
|
Src: src,
|
|
|
|
Dst: t,
|
|
|
|
},
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Run initiates a load job.
|
|
|
|
func (l *Loader) Run(ctx context.Context) (*Job, error) {
|
2017-12-03 20:01:25 +00:00
|
|
|
job, media := l.newJob()
|
|
|
|
return l.c.insertJob(ctx, job, media)
|
|
|
|
}
|
|
|
|
|
|
|
|
func (l *Loader) newJob() (*bq.Job, io.Reader) {
|
|
|
|
config, media := l.LoadConfig.toBQ()
|
|
|
|
return &bq.Job{
|
2018-03-30 09:41:12 +00:00
|
|
|
JobReference: l.JobIDConfig.createJobRef(l.c),
|
2017-12-03 20:01:25 +00:00
|
|
|
Configuration: config,
|
|
|
|
}, media
|
2017-08-05 18:17:15 +00:00
|
|
|
}
|