> ## Documentation Index
> Fetch the complete documentation index at: https://docs.permutive.com/llms.txt
> Use this file to discover all available pages before exploring further.

# AWS S3

> Export first-party event data to AWS S3 via streaming or batch routing and import audience data for cohort building and activation

export const NoBadge = () => {
  return <span style={{
    display: 'inline-block',
    padding: '0.125rem 0.5rem',
    borderRadius: '0.25rem',
    fontSize: '0.625rem',
    background: '#F7D0E2',
    color: '#1A1A1A',
    fontWeight: '500'
  }}>
      No
    </span>;
};

export const YesBadge = () => {
  return <span style={{
    display: 'inline-block',
    padding: '0.125rem 0.5rem',
    borderRadius: '0.25rem',
    fontSize: '0.625rem',
    background: '#C7E8F9',
    color: '#1A1A1A',
    fontWeight: '500'
  }}>
      Yes
    </span>;
};

export const BadgeRowCenter = ({label, children}) => {
  return <div style={{
    display: 'flex',
    justifyContent: 'space-between',
    alignItems: 'center',
    marginBottom: '0.5rem'
  }}>
      <span style={{
    fontSize: '0.625rem',
    color: '#6b7280',
    textTransform: 'uppercase',
    fontWeight: '500',
    letterSpacing: '0.05em'
  }}>
        {label}
      </span>
      {children}
    </div>;
};

export const BadgeRow = ({label, children}) => {
  return <div style={{
    display: 'flex',
    justifyContent: 'space-between',
    alignItems: 'flex-start',
    marginBottom: '0.5rem'
  }}>
      <span style={{
    fontSize: '0.625rem',
    color: '#6b7280',
    textTransform: 'uppercase',
    fontWeight: '500',
    letterSpacing: '0.05em'
  }}>
        {label}
      </span>
      {children}
    </div>;
};

export const BadgeContainer = ({children}) => {
  return <div style={{
    display: 'flex',
    gap: '0.25rem',
    flexWrap: 'wrap',
    justifyContent: 'flex-end',
    minWidth: '0',
    flex: '1'
  }}>
      {children}
    </div>;
};

export const ProductRequiredBadge = ({product}) => {
  const getBadgeStyle = product => {
    switch (product) {
      case 'Core Platform':
        return {
          background: '#CB88FC',
          color: '#1A1A1A'
        };
        --purple;
      case 'Routing':
        return {
          background: '#CB88FC',
          color: '#1A1A1A'
        };
        --purple;
      case 'Contextual':
        return {
          background: '#CB88FC',
          color: '#1A1A1A'
        };
        --purple;
      default:
        return {
          background: '#A7B3D9',
          color: '#1A1A1A'
        };
        --haze;
    }
  };
  const style = getBadgeStyle(product);
  return <span style={{
    display: 'inline-block',
    padding: '0.125rem 0.375rem',
    borderRadius: '0.25rem',
    fontSize: '0.625rem',
    background: style.background,
    color: style.color,
    fontWeight: '500'
  }}>
      {product}
    </span>;
};

export const SdkRequiredBadge = ({required}) => {
  const getBadgeStyle = required => {
    switch (required) {
      case 'Yes':
        return {
          background: '#C7E8F9',
          color: '#1A1A1A'
        };
        --blue;
      case 'No':
        return {
          background: '#F7D0E2',
          color: '#1A1A1A'
        };
        --pink;
      default:
        return {
          background: '#A7B3D9',
          color: '#1A1A1A'
        };
        --haze;
    }
  };
  const style = getBadgeStyle(required);
  return <span style={{
    display: 'inline-block',
    padding: '0.125rem 0.375rem',
    borderRadius: '0.25rem',
    fontSize: '0.625rem',
    background: style.background,
    color: style.color,
    fontWeight: '500'
  }}>
      {required}
    </span>;
};

export const CapabilityBadge = ({capability}) => {
  const getBadgeStyle = capability => {
    switch (capability) {
      case 'Event Collection':
        return {
          background: '#EFDFC8',
          color: '#1A1A1A'
        };
        --clay;
      case 'Cohort Activation':
        return {
          background: '#EFDFC8',
          color: '#1A1A1A'
        };
        --clay;
      case 'Campaign Optimization':
        return {
          background: '#EFDFC8',
          color: '#1A1A1A'
        };
        --clay;
      case 'Identity Signal':
        return {
          background: '#EFDFC8',
          color: '#1A1A1A'
        };
        --clay;
      case 'Contextual Signal':
        return {
          background: '#EFDFC8',
          color: '#1A1A1A'
        };
        --clay;
      case 'Connectivity':
        return {
          background: '#EFDFC8',
          color: '#1A1A1A'
        };
        --clay;
      case 'Routing':
        return {
          background: '#EFDFC8',
          color: '#1A1A1A'
        };
        --clay;
      case 'Data Collaboration':
        return {
          background: '#EFDFC8',
          color: '#1A1A1A'
        };
        --clay;
      default:
        return {
          background: '#A7B3D9',
          color: '#1A1A1A'
        };
        --haze;
    }
  };
  const style = getBadgeStyle(capability);
  return <span style={{
    display: 'inline-block',
    padding: '0.125rem 0.375rem',
    borderRadius: '0.25rem',
    fontSize: '0.625rem',
    background: style.background,
    color: style.color,
    fontWeight: '500',
    whiteSpace: 'nowrap'
  }}>
      {capability}
    </span>;
};

export const EnvironmentBadge = ({environment}) => {
  const getBadgeStyle = environment => {
    switch (environment) {
      case 'Web':
        return {
          background: '#F9C1A8',
          color: '#1A1A1A'
        };
        --peach;
      case 'iOS':
        return {
          background: '#F9C1A8',
          color: '#1A1A1A'
        };
        --peach;
      case 'Android':
        return {
          background: '#F9C1A8',
          color: '#1A1A1A'
        };
        --peach;
      case 'CTV':
        return {
          background: '#F9C1A8',
          color: '#1A1A1A'
        };
        --peach;
      case 'API Direct':
        return {
          background: '#F9C1A8',
          color: '#1A1A1A'
        };
        --peach;
      default:
        return {
          background: '#A7B3D9',
          color: '#1A1A1A'
        };
        --haze;
    }
  };
  const style = getBadgeStyle(environment);
  return <span style={{
    display: 'inline-block',
    padding: '0.125rem 0.375rem',
    borderRadius: '0.25rem',
    fontSize: '0.625rem',
    background: style.background,
    color: style.color,
    fontWeight: '500',
    whiteSpace: 'nowrap'
  }}>
      {environment}
    </span>;
};

export const DirectionBadge = ({direction}) => {
  const getBadgeStyle = direction => {
    switch (direction) {
      case 'Bidirectional':
        return {
          background: '#FA8784',
          color: '#1A1A1A'
        };
        --tomato;
      case 'Destination':
        return {
          background: '#FA8784',
          color: '#1A1A1A'
        };
        --tomato;
      case 'Source':
        return {
          background: '#FA8784',
          color: '#1A1A1A'
        };
        --tomato;
      default:
        return {
          background: '#A7B3D9',
          color: '#1A1A1A'
        };
        --haze;
    }
  };
  const style = getBadgeStyle(direction);
  return <span style={{
    display: 'inline-block',
    padding: '0.125rem 0.375rem',
    borderRadius: '0.25rem',
    fontSize: '0.625rem',
    background: style.background,
    color: style.color,
    fontWeight: '500'
  }}>
      {direction}
    </span>;
};

<Card title="">
  <div style={{ display: 'flex', alignItems: 'center', marginBottom: '1rem' }}>
    <div style={{ width: '32px', height: '32px', marginRight: '0.75rem', display: 'flex', alignItems: 'center', justifyContent: 'center', flexShrink: 0 }}>
      <img src="https://mintcdn.com/permutive/pNhz39ducTVcQczh/images/integrations/logos/aws-s3.svg?fit=max&auto=format&n=pNhz39ducTVcQczh&q=85&s=17628a54653992dbf21ba4b93ced919b" alt="AWS S3" style={{ maxWidth: '32px', maxHeight: '32px', display: 'block' }} width="32" height="32" data-path="images/integrations/logos/aws-s3.svg" />
    </div>

    <h3 style={{ margin: 0, fontSize: '1.125rem', fontWeight: '600' }}>AWS S3</h3>
  </div>

  <div style={{ marginBottom: '1rem' }}>
    <BadgeRowCenter label="Direction">
      <DirectionBadge direction="Bidirectional" />
    </BadgeRowCenter>

    <BadgeRowCenter label="Environment">
      <BadgeContainer>
        <EnvironmentBadge environment="Web" />

        <EnvironmentBadge environment="iOS" />

        <EnvironmentBadge environment="Android" />

        <EnvironmentBadge environment="CTV" />

        <EnvironmentBadge environment="API Direct" />
      </BadgeContainer>
    </BadgeRowCenter>

    <BadgeRowCenter label="Capability">
      <BadgeContainer>
        <CapabilityBadge capability="Connectivity" />

        <CapabilityBadge capability="Routing" />
      </BadgeContainer>
    </BadgeRowCenter>

    <BadgeRowCenter label="SDK Required">
      <SdkRequiredBadge required="No" />
    </BadgeRowCenter>

    <BadgeRowCenter label="Product(s) Required">
      <BadgeContainer>
        <ProductRequiredBadge product="Core Platform" />

        <ProductRequiredBadge product="Routing" />
      </BadgeContainer>
    </BadgeRowCenter>
  </div>

  <p style={{ margin: 0, fontSize: '0.875rem', color: '#6b7280', lineHeight: '1.5' }}>
    AWS S3 allows publishers to securely store and manage large volumes of advertising and audience data in the cloud.
  </p>
</Card>

<CardGroup cols={2}>
  <Card title="Setup" href="#setup" icon="gear" />

  <Card title="Troubleshooting" href="#troubleshooting" icon="wrench" />
</CardGroup>

## Overview

The AWS S3 integration enables publishers to leverage Permutive's bi-directional data capabilities with their S3 storage. This integration operates in two modes:

**Routing (Destination)**: Export first-party event data from Permutive to S3 buckets. Permutive offers two distinct routing modes:

* **S3 Streaming**: Near real-time streaming, ideal for low-latency data pipelines and analytics
* **S3 Batch**: Daily scheduled exports, suitable for data warehouse ingestion and batch processing workflows

Read more in [Routing](/products/connectivity/routing) documentation.

<Note>
  Routing capability requires the Routing package in addition to Core Platform. Contact your Customer Success Manager to enable Routing.
</Note>

**Connectivity (Source)**: Import audience data from your S3 storage into Permutive for cohort building and activation across your publisher inventory.

Both Routing modes support exporting event data, identity data (aliases), and segment metadata to customer-owned S3 buckets with Hive-style partitioning and compression.

## Environment Compatibility

| Environment    | Supported    | Notes |
| -------------- | ------------ | ----- |
| **Web**        | <YesBadge /> | --    |
| **iOS**        | <YesBadge /> | --    |
| **Android**    | <YesBadge /> | --    |
| **CTV**        | <YesBadge /> | --    |
| **API Direct** | <YesBadge /> | --    |

## Prerequisites

For **Routing (exporting data to S3)**:

* AWS account with permissions to create S3 buckets and IAM users/policies
* S3 bucket created in the appropriate AWS region
* IAM user with programmatic access credentials (Access Key ID and Secret Access Key)
* Ability to configure S3 bucket policies with specific permissions
* Secure method to share AWS credentials with Permutive (1Password or GPG encryption recommended)

## Setup

<Tabs>
  <Tab title="Routing Streaming Setup">
    S3 Streaming routing exports your first-party event data to an S3 bucket in near real-time as GZIP-compressed JSONL files with Hive-style partitioning. Data arrives with approximately 5-minute latency, making it ideal for low-latency data pipelines and ingestion into AWS services such as Athena, Redshift, or EMR.

    Setup requires coordination with Permutive support. You will need to prepare your AWS environment (S3 bucket, IAM user with programmatic access) and then share your bucket details and credentials with the Permutive team.

    ### Prerequisites

    * AWS account with permissions to create S3 buckets and IAM users/policies
    * An S3 bucket in a region-specific location (e.g., `us-east-1`, `eu-west-1`) with public access blocked
    * A dedicated IAM user with `s3:List*`, `s3:Get*`, `s3:Delete*`, and `s3:Put*` permissions on the bucket
    * A secure method to share AWS credentials with Permutive (1Password or GPG encryption recommended)

    For complete setup steps, see [Setting up S3 Streaming Routing](/guides/connectivity/routing/setting-up-s3-streaming-routing).

    ### What Happens After Setup

    Once routing is active:

    1. **Files stream to S3** in near real-time with approximately 5-minute latency
    2. **Hive-style partitions** are created automatically by hour
    3. **Event data** is written as GZIP-compressed JSONL files
    4. **File naming** follows the pattern `{timestamp}-{hash}-{worker_id}.jsonl.gz`

    See the [Streaming Schema](#streaming-schema) section below for detailed schema information.
  </Tab>

  <Tab title="Routing Batch Setup">
    S3 Batch routing exports your first-party event data to an S3 bucket on a scheduled 24-hour cycle. You can choose between JSON (GZIP compressed) or Parquet (Snappy compressed) format based on your data processing needs. Batch exports are suitable for data warehouse ingestion and batch processing workflows.

    Setup requires coordination with Permutive support. You will need to prepare an S3 bucket, choose your preferred data format, and apply a Permutive-provided bucket policy.

    ### Prerequisites

    * AWS account with permissions to create S3 buckets
    * An S3 bucket with public access blocked
    * Understanding of your data format requirements (JSON vs Parquet)

    <Warning>
      **Organization-Level Scope:** S3 Batch routing operates at the organization level, exporting data for all workspaces within your organization, unlike streaming routing which is workspace-specific.
    </Warning>

    For complete setup steps, see [Setting up S3 Batch Routing](/guides/connectivity/routing/setting-up-s3-batch-routing).

    ### What Happens After Setup

    Once batch routing is active:

    1. **Daily exports run automatically** on the configured 24-hour schedule
    2. **Event data is partitioned** by event type and date
    3. **Snapshot tables are replaced** each export cycle
    4. **Files are written** in your chosen format (JSON or Parquet)

    See the [Batch Schema](#batch-schema) section below for detailed schema information.
  </Tab>

  <Tab title="Connectivity Setup">
    ### Prerequisites

    * An AWS account with access to the S3 bucket you want to connect
    * Permission to modify the S3 bucket policy
    * Your data organized in the required directory structure

    ### Step 1: Set Up Your Bucket Structure

    Permutive uses the concept of a **Schema** (containing multiple **Tables**) to organize your data. Structure your bucket so that each table is a directory under your schema prefix:

    ```
    s3://<bucket_name>/<prefix>/<table_1>/
    s3://<bucket_name>/<prefix>/<table_2>/
    ```

    **Partitioned tables (recommended):**

    ```
    s3://<bucket_name>/<prefix>/<table_n>/<partition_name>=<value>/<data_file>.parquet
    ```

    **Non-partitioned tables:**

    ```
    s3://<bucket_name>/<prefix>/<table_n>/<data_file>.parquet
    ```

    <Note>
      We recommend using Parquet format with ZSTD compression for optimal performance. CSV (including gzipped) is also supported.
    </Note>

    ### Step 2: Configure Bucket Permissions

    <Steps>
      <Step title="Start Creating the Connection">
        In the Permutive dashboard, go to **Connectivity > Catalog** and select **Amazon S3**. Begin entering your connection details. Once you enter your bucket name, Permutive will generate a bucket policy for you.
      </Step>

      <Step title="Apply the Bucket Policy">
        Copy the generated S3 Bucket Policy from the Permutive dashboard:

        1. Open the AWS S3 Console and navigate to your bucket
        2. Go to the **Permissions** tab
        3. Under **Bucket policy**, click **Edit**
        4. Paste the policy provided by Permutive
        5. Click **Save changes**
      </Step>
    </Steps>

    ### Step 3: Create the Connection in Permutive

    <Steps>
      <Step title="Enter Connection Details">
        Fill in the following fields:

        | Field                 | Description                                           |
        | :-------------------- | :---------------------------------------------------- |
        | **Name**              | A descriptive name for your connection                |
        | **S3 Bucket Name**    | The bucket name (without `s3://` prefix)              |
        | **S3 Bucket Region**  | The AWS region where your bucket is located           |
        | **Schema Prefix**     | The path within your bucket that contains your tables |
        | **Data Format**       | Choose Parquet (recommended) or CSV                   |
        | **Data Partitioning** | Select whether tables are partitioned or not          |
      </Step>

      <Step title="Save the Connection">
        Click **Save** to create the connection. It will appear on your **Connections** page with a "Processing" status while Permutive validates access.
      </Step>
    </Steps>

    ### Step 4: Create an Import

    Once your connection is active, go to **Connectivity > Imports** and click **Create Import**, then select your S3 connection.

    For the complete setup guide with detailed instructions, see [Connecting to Amazon S3](/guides/connectivity/sources/connecting-to-amazon-s3).
  </Tab>
</Tabs>

## Data Types

### Streaming Schema

<AccordionGroup>
  <Accordion title="Event Data (events)">
    Events in S3 Streaming are exported in newline-delimited JSON format with the following structure:

    <ResponseField name="time" type="string" required>
      Unix timestamp in milliseconds as a string
    </ResponseField>

    <ResponseField name="event_id" type="string" required>
      Unique identifier for this event
    </ResponseField>

    <ResponseField name="user_id" type="string" required>
      Permutive user ID
    </ResponseField>

    <ResponseField name="event_name" type="string" required>
      Name of the event (e.g., `Pageview`, `slotclicked`)
    </ResponseField>

    <ResponseField name="organization_id" type="string" required>
      Organization identifier
    </ResponseField>

    <ResponseField name="project_id" type="string" required>
      Workspace/project identifier
    </ResponseField>

    <ResponseField name="session_id" type="string">
      Session identifier (optional)
    </ResponseField>

    <ResponseField name="view_id" type="string">
      Page view identifier (optional)
    </ResponseField>

    <ResponseField name="source_url" type="string">
      Source URL (optional)
    </ResponseField>

    <ResponseField name="segments" type="array[integer]">
      Array of segment IDs the user belongs to
    </ResponseField>

    <ResponseField name="properties" type="object">
      Custom event properties as key-value pairs
    </ResponseField>

    ### Example Event

    ```json theme={"dark"}
    {
      "time": "1665851625945",
      "event_id": "c0b8266d-3c4d-43d6-8855-6f42d657adda",
      "user_id": "87bcd76b-5eb6-4c46-afa8-017d1e7148ca",
      "event_name": "slotclicked",
      "organization_id": "be668577-07f5-444d-98e0-222b990951b1",
      "project_id": "72f6d4b5-1e85-4c79-b4f9-da2dd1f3be6d",
      "session_id": "4a96de87-f8b1-4240-a1a8-7b9c6cff569a",
      "view_id": "16f2af62-f38d-44d1-bcea-ba5b4da39be2",
      "source_url": null,
      "segments": [],
      "properties": {
        "campaign_id": 2387641642,
        "line_item_id": 4792767025
      }
    }
    ```
  </Accordion>

  <Accordion title="Identity Sync Data (sync_aliases)">
    Identity synchronization events contain cross-device and identity mapping data.

    <ResponseField name="time" type="string" required>
      Unix timestamp in milliseconds as a string
    </ResponseField>

    <ResponseField name="user_id" type="string" required>
      Permutive user ID
    </ResponseField>

    <ResponseField name="organization_id" type="string" required>
      Organization identifier
    </ResponseField>

    <ResponseField name="project_id" type="string" required>
      Workspace/project identifier
    </ResponseField>

    <ResponseField name="aliases" type="array" required>
      Array of alias objects, each containing:

      * `id`: The alias identifier value
      * `tag`: The alias type (e.g., `email_sha256`, `device_id`)
    </ResponseField>

    ### Example Sync Alias

    ```json theme={"dark"}
    {
      "time": "1665663771749",
      "user_id": "b5653712-26ee-41a8-8b30-c128092df93b",
      "organization_id": "be668577-07f5-444d-98e0-222b990951b1",
      "project_id": "be668577-07f5-444d-98e0-222b990951b1",
      "aliases": [
        {"id": "a1b2c3d4e5f6...", "tag": "email_sha256"},
        {"id": "device_12345", "tag": "device_id"}
      ]
    }
    ```
  </Accordion>

  <Accordion title="Segment Metadata (segment)">
    Segment metadata snapshots containing segment definitions. These files are NOT date-partitioned.

    <ResponseField name="id" type="string" required>
      Segment UUID
    </ResponseField>

    <ResponseField name="code" type="integer" required>
      Segment number/ID used in the segments array of events
    </ResponseField>

    <ResponseField name="name" type="string" required>
      Human-readable segment name
    </ResponseField>

    <ResponseField name="tags" type="array[string]">
      Array of tags associated with the segment
    </ResponseField>

    <ResponseField name="metadata" type="object">
      Additional segment metadata
    </ResponseField>

    <ResponseField name="workspace" type="string" required>
      Workspace identifier
    </ResponseField>

    <ResponseField name="ancestors" type="array[string]">
      Array of ancestor workspace/organization IDs
    </ResponseField>

    <ResponseField name="workspaceState" type="string">
      State of the workspace (e.g., "Active", "Deleted")
    </ResponseField>

    <ResponseField name="deleted" type="boolean">
      Whether the segment has been deleted
    </ResponseField>

    ### Example Segment

    ```json theme={"dark"}
    {
      "id": "5289b895-4ee7-44f8-81a6-1899142ed2d2",
      "code": 1057,
      "name": "High Value Users",
      "tags": [],
      "metadata": {},
      "workspace": "45582cb9-bb5c-4eb4-9c0d-7a2cebf4eeb1",
      "ancestors": ["45582cb9-bb5c-4eb4-9c0d-7a2cebf4eeb1", "be668577-07f5-444d-98e0-222b990951b1"],
      "workspaceState": "Active",
      "deleted": false
    }
    ```
  </Accordion>
</AccordionGroup>

### Batch Schema

<AccordionGroup>
  <Accordion title="Event Tables (e.g., pageview_events)">
    Batch exports create separate tables for each event type (e.g., `pageview_events`, `videoview_events`). All event tables share a common structure:

    <ResponseField name="time" type="timestamp">
      Timestamp for when the event was received by Permutive (in UTC)
    </ResponseField>

    <ResponseField name="event_id" type="string">
      Unique identifier for each individual event
    </ResponseField>

    <ResponseField name="user_id" type="string">
      Identifier unique to a particular user
    </ResponseField>

    <ResponseField name="session_id" type="string">
      Identifier unique to a user's session. Sessions last 30 minutes unless a user stays on site
    </ResponseField>

    <ResponseField name="view_id" type="string">
      Identifier unique to a particular page or screen view
    </ResponseField>

    <ResponseField name="workspace_id" type="string">
      Identifier for the workspace which the event originated from
    </ResponseField>

    <ResponseField name="segments" type="array[integer]">
      A list of all segment IDs the user was in when the event fired
    </ResponseField>

    <ResponseField name="cohorts" type="array[string]">
      A list of all cohort codes the user was in when the event fired
    </ResponseField>

    <ResponseField name="properties" type="object">
      Event-specific properties as a nested object. Structure varies by event type.
    </ResponseField>

    ### Example Pageview Event

    ```json theme={"dark"}
    {
      "time": "2026-01-15T14:30:00Z",
      "event_id": "c0b8266d-3c4d-43d6-8855-6f42d657adda",
      "user_id": "87bcd76b-5eb6-4c46-afa8-017d1e7148ca",
      "session_id": "4a96de87-f8b1-4240-a1a8-7b9c6cff569a",
      "view_id": "16f2af62-f38d-44d1-bcea-ba5b4da39be2",
      "workspace_id": "72f6d4b5-1e85-4c79-b4f9-da2dd1f3be6d",
      "segments": [123, 456],
      "cohorts": ["abc123", "def456"],
      "properties": {
        "client": {
          "domain": "example.com",
          "type": "web",
          "url": "https://example.com/article",
          "referrer": "https://google.com",
          "title": "Example Article",
          "user_agent": "Mozilla/5.0..."
        }
      }
    }
    ```
  </Accordion>

  <Accordion title="Aliases Table (aliases)">
    Identity data and alias mappings for cross-device tracking.

    <ResponseField name="time" type="timestamp" required>
      Timestamp when the alias was captured
    </ResponseField>

    <ResponseField name="event_type" type="string">
      Type of alias event
    </ResponseField>

    <ResponseField name="permutive_id" type="string" required>
      Permutive user identifier
    </ResponseField>

    <ResponseField name="id" type="string" required>
      External identity value (e.g., hashed email, device ID)
    </ResponseField>

    <ResponseField name="tag" type="string" required>
      Identity tag or namespace (e.g., `email_sha256`, `device_id`)
    </ResponseField>

    <ResponseField name="workspace_id" type="string">
      Workspace identifier
    </ResponseField>

    ### Example Alias

    ```json theme={"dark"}
    {
      "time": "2026-01-15T14:30:00Z",
      "event_type": "alias_sync",
      "permutive_id": "87bcd76b-5eb6-4c46-afa8-017d1e7148ca",
      "id": "a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2",
      "tag": "email_sha256",
      "workspace_id": "72f6d4b5-1e85-4c79-b4f9-da2dd1f3be6d"
    }
    ```
  </Accordion>

  <Accordion title="Domains Table (domains)">
    Domain-level metadata. This is a snapshot table that is fully replaced with each export.

    <ResponseField name="name" type="string" required>
      Domain name
    </ResponseField>

    <ResponseField name="workspace_id" type="string">
      Workspace identifier
    </ResponseField>

    ### Example Domain

    ```json theme={"dark"}
    {
      "name": "example.com",
      "workspace_id": "72f6d4b5-1e85-4c79-b4f9-da2dd1f3be6d"
    }
    ```
  </Accordion>

  <Accordion title="Segment Metadata Table (segment_metadata)">
    Segment definitions and metadata. This is a snapshot table that is fully replaced with each export.

    <ResponseField name="number" type="integer" required>
      Segment ID number
    </ResponseField>

    <ResponseField name="name" type="string" required>
      Segment name
    </ResponseField>

    <ResponseField name="tags" type="array[string]">
      Array of tags associated with the segment
    </ResponseField>

    <ResponseField name="metadata" type="string">
      JSON string containing additional segment metadata
    </ResponseField>

    <ResponseField name="workspace_id" type="string">
      Workspace identifier
    </ResponseField>

    ### Example Segment Metadata

    ```json theme={"dark"}
    {
      "number": 123,
      "name": "High Value Users",
      "tags": ["advertising", "premium"],
      "metadata": "{\"description\": \"Users with high engagement\"}",
      "workspace_id": "72f6d4b5-1e85-4c79-b4f9-da2dd1f3be6d"
    }
    ```
  </Accordion>
</AccordionGroup>

### File Formats and Compression

<AccordionGroup>
  <Accordion title="Streaming Format">
    * **Format**: Newline-delimited JSON (`.jsonl`)
    * **Compression**: GZIP (`.gz`)
    * **File Extension**: `.jsonl.gz`
    * **Character Encoding**: UTF-8
  </Accordion>

  <Accordion title="Batch Formats">
    #### JSON Format

    * **Format**: Newline-delimited JSON
    * **Compression**: GZIP
    * **File Extension**: `.json.gz`
    * **Character Encoding**: UTF-8

    #### Parquet Format

    * **Format**: Apache Parquet columnar format
    * **Compression**: Snappy
    * **File Extension**: `.snappy.parquet`
    * **Schema**: Derived from BigQuery table structure

    <Info>
      Parquet format is recommended for data warehouse ingestion and analytics workloads due to better compression and query performance.
    </Info>
  </Accordion>
</AccordionGroup>

## Troubleshooting

<AccordionGroup>
  <Accordion title="Permission Denied Errors">
    **Symptom**: Files are not appearing in S3 bucket, or logs show permission errors.

    **Solution**:

    1. Verify the IAM user has all required permissions:
       * `s3:PutObject`
       * `s3:GetObject`
       * `s3:DeleteObject`
       * `s3:ListBucket`

    2. Check that the bucket policy includes the correct bucket ARN:
       ```json theme={"dark"}
       "Resource": [
         "arn:aws:s3:::YOUR_BUCKET_NAME/*",
         "arn:aws:s3:::YOUR_BUCKET_NAME"
       ]
       ```

    3. Verify the `bucket-owner-full-control` ACL condition is correctly configured

    4. Ensure the IAM user credentials (Access Key ID and Secret Access Key) are current and not expired

    <Warning>
      If you recently rotated AWS credentials, contact Permutive support at [technical-services@permutive.com](mailto:technical-services@permutive.com) to update the stored credentials.
    </Warning>
  </Accordion>

  <Accordion title="Data Not Appearing in S3 (Streaming)">
    **Symptom**: No files appearing in S3 bucket after setup, or files stopped appearing.

    **Solution**:

    1. Verify the Permutive SDK is properly deployed and events are being collected (check Event Inspector in the Dashboard)

    2. Low-traffic sites may see longer delays between files due to batch size thresholds

    3. Verify the bucket region matches the configured region:
       * Region must be specific (e.g., `eu-central-1`, not just `EU`)

    4. Verify bucket path structure is correct:
       ```
       s3://{bucket}/{prefix}/type=events/year=YYYY/month=MM/day=DD/hour=HH/
       ```

    5. If issues persist, contact Permutive support at [technical-services@permutive.com](mailto:technical-services@permutive.com) with your integration details
  </Accordion>

  <Accordion title="Data Not Appearing in S3 (Batch)">
    **Symptom**: Daily batch exports are missing or delayed.

    **Solution**:

    1. Batch exports run on 24-hour cycles. Check if sufficient time has passed since the last export window.

    2. Verify the Permutive SDK is properly deployed and events are being collected

    3. Contact Permutive support at [technical-services@permutive.com](mailto:technical-services@permutive.com) to check batch export job logs and status
  </Accordion>

  <Accordion title="Incorrect Bucket Path Structure">
    **Symptom**: Files appearing in unexpected locations or wrong folder structure.

    **Solution**:

    1. Verify the `bucketPrefix` configuration:
       * Should NOT include leading `/` unless intentional
       * Should NOT include bucket name
       * Example: `permutive/` not `/permutive/` or `s3://bucket/permutive/`

    2. For Streaming, data uses Hive-style partitioning:
       * `type=events/year=2026/month=01/day=15/hour=14/`
       * This is expected behavior and cannot be customized

    3. For Batch, data is organized by table name:
       * `data/{table_name}/year=2026/month=1/day=15/`
       * This is expected behavior and cannot be customized
  </Accordion>

  <Accordion title="Bucket Policy Validation Errors">
    **Symptom**: AWS returns validation errors when applying bucket policy.

    **Solution**:

    1. Ensure the bucket policy JSON is valid:
       * Check for missing commas, brackets, or quotes
       * Use AWS Policy Generator or an online JSON validator

    2. Verify ARN format is correct:
       * Bucket ARN: `arn:aws:s3:::BUCKET_NAME`
       * Object ARN: `arn:aws:s3:::BUCKET_NAME/*`
       * Note the three colons `:::` before bucket name

    3. Confirm the `StringEquals` condition is correctly formatted:
       ```json theme={"dark"}
       "Condition": {
         "StringEquals": {"s3:x-amz-acl": "bucket-owner-full-control"}
       }
       ```
  </Accordion>

  <Accordion title="Missing Event Types or Fields">
    **Symptom**: Some event types or fields are not appearing in exported data.

    **Solution**:

    1. Verify events are being collected in Permutive:
       * Check Event Inspector in the Dashboard to confirm events are tracked
       * Use browser developer console to verify SDK is firing events

    2. Check event schema matches expected structure:
       * Events must include required fields: `event_id`, `user_id`, `event_name`, etc.
       * Custom properties are in the `properties` object

    3. Schema changes may require integration reconfiguration:
       * Contact Permutive support if you've made significant schema changes
  </Accordion>

  <Accordion title="KMS Encryption Issues">
    **Symptom**: Errors related to KMS encryption when writing to S3.

    **Solution**:

    1. If using customer-managed KMS keys, verify the Permutive IAM user has KMS permissions:
       ```json theme={"dark"}
       {
         "Effect": "Allow",
         "Action": [
           "kms:Decrypt",
           "kms:Encrypt",
           "kms:GenerateDataKey"
         ],
         "Resource": "arn:aws:kms:REGION:ACCOUNT_ID:key/KEY_ID"
       }
       ```

    2. Confirm the KMS key policy allows the Permutive IAM user to use the key

    3. Verify the S3 bucket's default encryption settings are compatible

    <Note>
      AWS-managed S3 encryption (SSE-S3) is supported by default. Customer-managed KMS keys require additional configuration. Contact Permutive support for KMS requirements.
    </Note>
  </Accordion>
</AccordionGroup>

## Changelog

<Info>
  No changes listed yet. For detailed changelog information, visit our [Changelog](https://changelog.permutive.com/).
</Info>
