-
Notifications
You must be signed in to change notification settings - Fork 62
Gcs remote data #121
Gcs remote data #121
Changes from all commits
e41ea37
c08ced5
c787862
d485760
32cd3e5
dc6dfce
ab7a74b
371e463
bd81d84
41cd0e5
efc0c1e
2e5bd3d
07c3c1c
b3ca5ff
93d8794
5ae7ee4
9d62d5e
02c9eae
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Large diffs are not rendered by default.
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -22,6 +22,7 @@ type RemoteDataHandlerConfig struct { | |
Retries int // Number of times to attempt to initialize a new config on failure. | ||
Region string | ||
SignedURLDurationMinutes int | ||
SigningPrincipal string | ||
RemoteDataStoreClient *storage.DataStore | ||
} | ||
|
||
|
@@ -45,6 +46,12 @@ func GetRemoteDataHandler(cfg RemoteDataHandlerConfig) RemoteDataHandler { | |
return &remoteDataHandler{ | ||
remoteURL: implementations.NewAWSRemoteURL(awsConfig, presignedURLDuration), | ||
} | ||
case common.GCP: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @katrogan / @honnix with the new change to return the entire data as part of the getData API, we should probably think of making signing optional (we have to keep it around to ensure that very large datasets can still be served). By optional I mean, that if it is not specified, the signing should just be skipped. I do not think my comment needs to be added as part of this commit, but as a follow up? @katrogan what do you think? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yeah that was what i had in the original idl pr - adding a mode to specify what data we want returned and making only the signed url a non-default option. returning the signed url data optionally sounds good to me |
||
signedURLDuration := time.Minute * time.Duration(cfg.SignedURLDurationMinutes) | ||
return &remoteDataHandler{ | ||
remoteURL: implementations.NewGCPRemoteURL(cfg.SigningPrincipal, signedURLDuration), | ||
} | ||
|
||
case common.Local: | ||
logger.Infof(context.TODO(), "setting up local signer ----- ") | ||
// Since minio = aws s3, we are creating the same client but using the config primitives from aws | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,193 @@ | ||
package implementations | ||
|
||
import ( | ||
"context" | ||
"time" | ||
|
||
gax "github.com/googleapis/gax-go/v2" | ||
"github.com/lyft/flyteidl/gen/pb-go/flyteidl/admin" | ||
"golang.org/x/oauth2" | ||
|
||
credentials "cloud.google.com/go/iam/credentials/apiv1" | ||
gcs "cloud.google.com/go/storage" | ||
"github.com/golang/protobuf/ptypes/timestamp" | ||
"github.com/lyft/flyteadmin/pkg/data/interfaces" | ||
"github.com/lyft/flyteadmin/pkg/errors" | ||
"github.com/lyft/flytestdlib/logger" | ||
"github.com/lyft/flytestdlib/storage" | ||
"google.golang.org/api/option" | ||
credentialspb "google.golang.org/genproto/googleapis/iam/credentials/v1" | ||
"google.golang.org/grpc/codes" | ||
) | ||
|
||
const gcsScheme = "gs" | ||
|
||
type iamCredentialsInterface interface { | ||
SignBlob(ctx context.Context, req *credentialspb.SignBlobRequest, opts ...gax.CallOption) (*credentialspb.SignBlobResponse, error) | ||
GenerateAccessToken(ctx context.Context, req *credentialspb.GenerateAccessTokenRequest, opts ...gax.CallOption) (*credentialspb.GenerateAccessTokenResponse, error) | ||
} | ||
|
||
type gcsClientWrapper struct { | ||
delegate *gcs.Client | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. hey @honnix I'm not super familiar with the delegate pattern, what's the reason for using it here? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is this so you can mock out the bucket like you commented below? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes. That is the only way I could figure out how to test this fluid API. |
||
} | ||
|
||
type bucketHandleWrapper struct { | ||
delegate *gcs.BucketHandle | ||
} | ||
|
||
type objectHandleWrapper struct { | ||
delegate *gcs.ObjectHandle | ||
} | ||
|
||
type gcsInterface interface { | ||
Bucket(name string) bucketHandleInterface | ||
} | ||
|
||
type bucketHandleInterface interface { | ||
Object(name string) objectHandleInterface | ||
} | ||
|
||
type objectHandleInterface interface { | ||
Attrs(ctx context.Context) (attrs *gcs.ObjectAttrs, err error) | ||
} | ||
|
||
// GCP-specific implementation of RemoteURLInterface | ||
type GCPRemoteURL struct { | ||
iamCredentialsClient iamCredentialsInterface | ||
gcsClient gcsInterface | ||
signDuration time.Duration | ||
signingPrincipal string | ||
} | ||
|
||
type GCPGCSObject struct { | ||
bucket string | ||
object string | ||
} | ||
|
||
type impersonationTokenSource struct { | ||
iamCredentialsClient iamCredentialsInterface | ||
signingPrincipal string | ||
} | ||
|
||
func (c *gcsClientWrapper) Bucket(name string) bucketHandleInterface { | ||
return &bucketHandleWrapper{delegate: c.delegate.Bucket(name)} | ||
} | ||
|
||
func (b *bucketHandleWrapper) Object(name string) objectHandleInterface { | ||
return &objectHandleWrapper{delegate: b.delegate.Object(name)} | ||
} | ||
|
||
func (o *objectHandleWrapper) Attrs(ctx context.Context) (attrs *gcs.ObjectAttrs, err error) { | ||
return o.delegate.Attrs(ctx) | ||
} | ||
|
||
func (g *GCPRemoteURL) splitURI(ctx context.Context, uri string) (GCPGCSObject, error) { | ||
scheme, container, key, err := storage.DataReference(uri).Split() | ||
if err != nil { | ||
return GCPGCSObject{}, err | ||
} | ||
if scheme != gcsScheme { | ||
logger.Debugf(ctx, "encountered unexpected scheme: %s for GCS URI: %s", scheme, uri) | ||
return GCPGCSObject{}, errors.NewFlyteAdminErrorf(codes.InvalidArgument, | ||
"unexpected scheme %s for GCS URI", scheme) | ||
} | ||
return GCPGCSObject{ | ||
bucket: container, | ||
object: key, | ||
}, nil | ||
} | ||
|
||
func (g *GCPRemoteURL) signURL(ctx context.Context, gcsURI GCPGCSObject) (string, error) { | ||
opts := &gcs.SignedURLOptions{ | ||
Method: "GET", | ||
GoogleAccessID: g.signingPrincipal, | ||
SignBytes: func(b []byte) ([]byte, error) { | ||
req := &credentialspb.SignBlobRequest{ | ||
Payload: b, | ||
Name: "projects/-/serviceAccounts/" + g.signingPrincipal, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. what's the meaning of this string prefix? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's a GCP resource name: https://cloud.google.com/iam/docs/reference/rest/v1/projects.serviceAccounts/signBlob Basically, it means the |
||
} | ||
resp, err := g.iamCredentialsClient.SignBlob(ctx, req) | ||
if err != nil { | ||
return nil, err | ||
} | ||
return resp.SignedBlob, nil | ||
}, | ||
Expires: time.Now().Add(g.signDuration), | ||
} | ||
|
||
return gcs.SignedURL(gcsURI.bucket, gcsURI.object, opts) | ||
} | ||
|
||
func (g *GCPRemoteURL) Get(ctx context.Context, uri string) (admin.UrlBlob, error) { | ||
logger.Debugf(ctx, "Getting signed url for - %s", uri) | ||
gcsURI, err := g.splitURI(ctx, uri) | ||
if err != nil { | ||
logger.Debugf(ctx, "failed to extract gcs bucket and object from uri: %s", uri) | ||
return admin.UrlBlob{}, errors.NewFlyteAdminErrorf(codes.InvalidArgument, "invalid uri: %s", uri) | ||
} | ||
|
||
// First, get the size of the url blob. | ||
attrs, err := g.gcsClient.Bucket(gcsURI.bucket).Object(gcsURI.object).Attrs(ctx) | ||
honnix marked this conversation as resolved.
Show resolved
Hide resolved
|
||
if err != nil { | ||
logger.Debugf(ctx, "failed to get object size for %s with %v", uri, err) | ||
return admin.UrlBlob{}, errors.NewFlyteAdminErrorf( | ||
codes.Internal, "failed to get object size for %s with %v", uri, err) | ||
} | ||
|
||
urlStr, err := g.signURL(ctx, gcsURI) | ||
if err != nil { | ||
logger.Warning(ctx, | ||
"failed to presign url for uri [%s] for %v with err %v", uri, g.signDuration, err) | ||
return admin.UrlBlob{}, errors.NewFlyteAdminErrorf(codes.Internal, | ||
"failed to presign url for uri [%s] for %v with err %v", uri, g.signDuration, err) | ||
} | ||
return admin.UrlBlob{ | ||
Url: urlStr, | ||
Bytes: attrs.Size, | ||
}, nil | ||
} | ||
|
||
func (ts impersonationTokenSource) Token() (*oauth2.Token, error) { | ||
req := credentialspb.GenerateAccessTokenRequest{ | ||
Name: "projects/-/serviceAccounts/" + ts.signingPrincipal, | ||
Scope: []string{"https://www.googleapis.com/auth/devstorage.read_only"}, | ||
} | ||
|
||
resp, err := ts.iamCredentialsClient.GenerateAccessToken(context.Background(), &req) | ||
if err != nil { | ||
return nil, err | ||
} | ||
|
||
return &oauth2.Token{ | ||
AccessToken: resp.AccessToken, | ||
Expiry: asTime(resp.ExpireTime), | ||
}, nil | ||
} | ||
|
||
func asTime(t *timestamp.Timestamp) time.Time { | ||
return time.Unix(t.GetSeconds(), int64(t.GetNanos())).UTC() | ||
} | ||
|
||
func NewGCPRemoteURL(signingPrincipal string, signDuration time.Duration) interfaces.RemoteURLInterface { | ||
iamCredentialsClient, err := credentials.NewIamCredentialsClient(context.Background()) | ||
if err != nil { | ||
panic(err) | ||
} | ||
|
||
gcsClient, err := gcs.NewClient(context.Background(), | ||
option.WithScopes(gcs.ScopeReadOnly), | ||
option.WithTokenSource(impersonationTokenSource{ | ||
iamCredentialsClient: iamCredentialsClient, | ||
signingPrincipal: signingPrincipal, | ||
})) | ||
if err != nil { | ||
panic(err) | ||
} | ||
|
||
return &GCPRemoteURL{ | ||
iamCredentialsClient: iamCredentialsClient, | ||
gcsClient: &gcsClientWrapper{delegate: gcsClient}, | ||
signDuration: signDuration, | ||
signingPrincipal: signingPrincipal, | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is the last version staying on protobuf v1.3.5. Since v1.4.0,
github.com/golang/protobuf
started to depend ongoogle.golang.org/protobuf
: https://github.com/golang/protobuf/blob/v1.4.0/ptypes/timestamp/timestamp.pb.go#L9So sooner or later we will need to face this issue again.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
oh this is awesome, thank you for finding it