Skip to content

Fix the site canonical URL to include the visitor token if necessary #3126

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/dry-knives-hide.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"gitbook-v2": patch
---

Fix the site canonical URL to include the visitor token if necessary
74 changes: 73 additions & 1 deletion packages/gitbook-v2/src/lib/data/urls.test.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { describe, expect, it } from 'bun:test';

import { getURLLookupAlternatives, normalizeURL } from './urls';
import { getSiteCanonicalURL, getURLLookupAlternatives, normalizeURL } from './urls';

describe('getURLLookupAlternatives', () => {
it('should return all URLs up to the root', () => {
Expand Down Expand Up @@ -364,6 +364,78 @@ describe('getURLLookupAlternatives', () => {
});
});

describe('getSiteCanonicalURL', () => {
it('should have the jwt token in canonical url if token was from the source url', () => {
expect(
getSiteCanonicalURL(
{
site: 'site_foo',
siteSpace: 'sitesp_foo',
basePath: '/foo/',
siteBasePath: '/foo/',
organization: 'org_foo',
space: 'space_foo',
pathname: '/hello/world',
complete: false,
apiToken: 'api_token_foo',
canonicalUrl: 'https://example.com/docs/foo/hello/world',
},
{
source: 'url',
token: 'jwt_foo',
}
).toString()
).toEqual('https://example.com/docs/foo/hello/world?jwt_token=jwt_foo');
});

it('should not have the jwt token in canonical url if token was NOT from the source url', () => {
// va cookie
expect(
getSiteCanonicalURL(
{
site: 'site_foo',
siteSpace: 'sitesp_foo',
basePath: '/foo/',
siteBasePath: '/foo/',
organization: 'org_foo',
space: 'space_foo',
pathname: '/hello/world',
complete: false,
apiToken: 'api_token_foo',
canonicalUrl: 'https://example.com/docs/foo/hello/world',
},
{
source: 'visitor-auth-cookie',
basePath: '/foo/',
token: 'jwt_foo',
}
).toString()
).toEqual('https://example.com/docs/foo/hello/world');

// gitbook visitor cookie
expect(
getSiteCanonicalURL(
{
site: 'site_foo',
siteSpace: 'sitesp_foo',
basePath: '/foo/',
siteBasePath: '/foo/',
organization: 'org_foo',
space: 'space_foo',
pathname: '/hello/world',
complete: false,
apiToken: 'api_token_foo',
canonicalUrl: 'https://example.com/docs/foo/hello/world',
},
{
source: 'gitbook-visitor-cookie',
token: 'jwt_foo',
}
).toString()
).toEqual('https://example.com/docs/foo/hello/world');
});
});

describe('normalizeURL', () => {
it('should remove trailing slashes', () => {
expect(normalizeURL(new URL('https://docs.mycompany.com/hello/'))).toEqual(
Expand Down
19 changes: 19 additions & 0 deletions packages/gitbook-v2/src/lib/data/urls.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
import { VISITOR_AUTH_PARAM, type VisitorTokenLookup } from '@/lib/visitor-token';
import type { PublishedSiteContent } from '@gitbook/api';

/**
* For a given GitBook URL, return a list of alternative URLs that could be matched against to lookup the content.
* The approach is optimized to aim at reusing cached lookup results as much as possible.
Expand Down Expand Up @@ -110,6 +113,22 @@ export function getURLLookupAlternatives(input: URL) {
return { urls: alternatives, basePath, changeRequest, revision };
}

/**
* Get the canonical URL for a resolved site,
* including the visitor token if available.
*/
export function getSiteCanonicalURL(
siteURLData: PublishedSiteContent,
visitorToken: VisitorTokenLookup
): URL {
const siteCanonicalURL = new URL(siteURLData.canonicalUrl);
if (visitorToken?.source === 'url') {
siteCanonicalURL.searchParams.set(VISITOR_AUTH_PARAM, visitorToken.token);
}

return siteCanonicalURL;
}

/**
* Normalize a URL to remove duplicate slashes and trailing slashes
* and transform the pathname to lowercase.
Expand Down
4 changes: 2 additions & 2 deletions packages/gitbook-v2/src/lib/data/visitor.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@ describe('getVisitorAuthBasePath', () => {
{
site: 'site_foo',
siteSpace: 'sitesp_foo',
basePath: '/foo',
siteBasePath: '/foo',
basePath: '/foo/',
siteBasePath: '/foo/',
organization: 'org_foo',
space: 'space_foo',
pathname: '/hello/world',
Expand Down
12 changes: 5 additions & 7 deletions packages/gitbook-v2/src/middleware.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import { serveResizedImage } from '@/routes/image';
import {
DataFetcherError,
getPublishedContentByURL,
getSiteCanonicalURL,
getVisitorAuthBasePath,
normalizeURL,
throwIfDataError,
Expand Down Expand Up @@ -146,20 +147,17 @@ async function serveSiteRoutes(requestURL: URL, request: NextRequest) {

// We use the host/origin from the canonical URL to ensure the links are
// correctly generated when the site is proxied. e.g. https://proxy.gitbook.com/site/siteId/...
const siteCanonicalURL = new URL(siteURLData.canonicalUrl);
const siteCanonicalURL = getSiteCanonicalURL(siteURLData, visitorToken);

//
// Make sure the URL is clean of any va token after a successful lookup
// The token is stored in a cookie that is set on the redirect response
//
const incomingURL = mode === 'url' ? requestURL : siteCanonicalURL;
const requestURLWithoutToken = normalizeVisitorAuthURL(incomingURL);
if (
requestURLWithoutToken !== incomingURL &&
requestURLWithoutToken.toString() !== incomingURL.toString()
) {
const incomingURLWithoutToken = normalizeVisitorAuthURL(incomingURL);
if (incomingURLWithoutToken.toString() !== incomingURL.toString()) {
Comment on lines 156 to +158
Copy link
Member Author

@taranvohra taranvohra Apr 9, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For proxy requests our incoming URL is the siteCanonicalURL but it does not include the va token. So a VA site when proxied would not remove the token from the URL after a successful resolution. But with this PR it fixes that

return writeResponseCookies(
NextResponse.redirect(requestURLWithoutToken.toString()),
NextResponse.redirect(incomingURLWithoutToken.toString()),
cookies
);
}
Expand Down
2 changes: 1 addition & 1 deletion packages/gitbook/src/lib/visitor-token.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ import { type JwtPayload, jwtDecode } from 'jwt-decode';
import type { NextRequest } from 'next/server';
import hash from 'object-hash';

const VISITOR_AUTH_PARAM = 'jwt_token';
export const VISITOR_AUTH_PARAM = 'jwt_token';
export const VISITOR_TOKEN_COOKIE = 'gitbook-visitor-token';

/**
Expand Down