Added files.
This commit is contained in:
parent
5d3a3908a6
commit
6a110561e3
69 changed files with 7370 additions and 1 deletions
15
.editorconfig
Normal file
15
.editorconfig
Normal file
|
@ -0,0 +1,15 @@
|
|||
root = true
|
||||
|
||||
[*]
|
||||
indent_style = tab
|
||||
indent_size = 2
|
||||
charset = utf-8
|
||||
insert_final_newline = true
|
||||
end_of_line = lf
|
||||
trim_trailing_whitespace = true
|
||||
|
||||
[*.md]
|
||||
trim_trailing_whitespace = false
|
||||
|
||||
[*.{yml,yaml}]
|
||||
indent_style = space
|
16
.eslintrc.cjs
Normal file
16
.eslintrc.cjs
Normal file
|
@ -0,0 +1,16 @@
|
|||
module.exports = {
|
||||
root: true,
|
||||
parser: '@typescript-eslint/parser',
|
||||
parserOptions: {
|
||||
tsconfigRootDir: __dirname,
|
||||
project: ['./tsconfig.json', './test/tsconfig.json'],
|
||||
},
|
||||
ignorePatterns: ['**/.eslintrc.cjs'],
|
||||
extends: [
|
||||
'plugin:@valkyriecoms/recommended',
|
||||
],
|
||||
rules: {
|
||||
'@typescript-eslint/prefer-nullish-coalescing': 'off',
|
||||
'import/no-default-export': 'off',
|
||||
},
|
||||
};
|
3
.gitignore
vendored
Normal file
3
.gitignore
vendored
Normal file
|
@ -0,0 +1,3 @@
|
|||
/node_modules
|
||||
/built
|
||||
npm-debug.log
|
9
.npmignore
Normal file
9
.npmignore
Normal file
|
@ -0,0 +1,9 @@
|
|||
/node_modules
|
||||
/src
|
||||
/test
|
||||
.gitignore
|
||||
npm-debug.log
|
||||
gulpfile.js
|
||||
tsconfig.json
|
||||
.eslintrc.cjs
|
||||
.editorconfig
|
24
.swcrc
Normal file
24
.swcrc
Normal file
|
@ -0,0 +1,24 @@
|
|||
{
|
||||
"$schema": "https://json.schemastore.org/swcrc",
|
||||
"jsc": {
|
||||
"parser": {
|
||||
"syntax": "typescript",
|
||||
"dynamicImport": true,
|
||||
"decorators": true
|
||||
},
|
||||
"transform": {
|
||||
"legacyDecorator": true,
|
||||
"decoratorMetadata": true
|
||||
},
|
||||
"experimental": {
|
||||
"keepImportAssertions": true
|
||||
},
|
||||
"baseUrl": ".",
|
||||
"paths": {},
|
||||
"target": "es2021"
|
||||
},
|
||||
"module": {
|
||||
"type": "es6"
|
||||
},
|
||||
"minify": true
|
||||
}
|
161
README.md
161
README.md
|
@ -1,2 +1,161 @@
|
|||
# summaly
|
||||
summaly
|
||||
================================================================
|
||||
|
||||
Installation
|
||||
----------------------------------------------------------------
|
||||
```
|
||||
npm install @valkyriecoms/summaly
|
||||
```
|
||||
|
||||
Usage
|
||||
----------------------------------------------------------------
|
||||
As a function:
|
||||
|
||||
```javascript
|
||||
import { summaly } from 'summaly';
|
||||
|
||||
summaly(url[, opts])
|
||||
```
|
||||
|
||||
As Fastify plugin:
|
||||
(will listen `GET` of `/`)
|
||||
|
||||
```javascript
|
||||
import Summaly from 'summaly';
|
||||
|
||||
fastify.register(Summaly[, opts])
|
||||
```
|
||||
|
||||
Run the server:
|
||||
|
||||
```
|
||||
git clone https://toastielab.dev/Valkyriecoms/summaly.git
|
||||
cd summaly
|
||||
NODE_ENV=development npm install
|
||||
npm run build
|
||||
npm run serve
|
||||
```
|
||||
|
||||
#### opts (SummalyOptions)
|
||||
|
||||
| Property | Type | Description | Default |
|
||||
|:--------------------------|:-----------------------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:-----------------------|
|
||||
| **lang** | *string* | Accept-Language for the request | `null` |
|
||||
| **followRedirects** | *boolean* | Whether follow redirects | `true` |
|
||||
| **plugins** | *plugin[]* (see below) | Custom plugins | `null` |
|
||||
| **agent** | *Got.Agents* | Custom HTTP agent (see below) | `null` |
|
||||
| **userAgent** | *string* | User-Agent for the request | `SummalyBot/[version]` |
|
||||
| **responseTimeout** | *number* | Set timeouts for each phase, such as host name resolution and socket communication. | `20000` |
|
||||
| **operationTimeout** | *number* | Set the timeout from the start to the end of the request. | `60000` |
|
||||
| **contentLengthLimit** | *number* | If set to true, an error will occur if the content-length value returned from the other server is larger than this parameter (or if the received body size exceeds this parameter). | `10485760` |
|
||||
| **contentLengthRequired** | *boolean* | If set to true, it will be an error if the other server does not return content-length. | `false` |
|
||||
|
||||
#### Plugin
|
||||
|
||||
``` typescript
|
||||
interface SummalyPlugin {
|
||||
test: (url: URL) => boolean;
|
||||
summarize: (url: URL) => Promise<Summary>;
|
||||
}
|
||||
```
|
||||
|
||||
urls are WHATWG URL since v4.
|
||||
|
||||
#### Custom HTTP agent for proxy
|
||||
You can specify agents to be passed to Got for proxy use, etc.
|
||||
https://github.com/sindresorhus/got/blob/v12.6.0/documentation/tips.md#proxying
|
||||
|
||||
**⚠️If you set some agent, local IP rejecting will not work.⚠️**
|
||||
(Summaly usually rejects local IPs.)
|
||||
|
||||
(Summaly currently does not support http2.)
|
||||
|
||||
### Returns
|
||||
|
||||
A Promise of an Object that contains properties below:
|
||||
|
||||
※ Almost all values are nullable. player should not be null.
|
||||
|
||||
#### SummalyResult
|
||||
|
||||
| Property | Type | Description |
|
||||
|:----------------|:-------------------|:-----------------------------------------------------------|
|
||||
| **title** | *string* \| *null* | The title of the web page |
|
||||
| **icon** | *string* \| *null* | The url of the icon of the web page |
|
||||
| **description** | *string* \| *null* | The description of the web page |
|
||||
| **thumbnail** | *string* \| *null* | The url of the thumbnail of the web page |
|
||||
| **sitename** | *string* \| *null* | The name of the web site |
|
||||
| **player** | *Player* | The player of the web page |
|
||||
| **sensitive** | *boolean* | Whether the url is sensitive |
|
||||
| **activityPub** | *string* \| *null* | The url of the ActivityPub representation of that web page |
|
||||
| **url** | *string* | The url of the web page |
|
||||
|
||||
#### Summary
|
||||
|
||||
`Omit<SummalyResult, "url">`
|
||||
|
||||
#### Player
|
||||
|
||||
| Property | Type | Description |
|
||||
|:-----------|:-------------------|:------------------------------------------------|
|
||||
| **url** | *string* \| *null* | The url of the player |
|
||||
| **width** | *number* \| *null* | The width of the player |
|
||||
| **height** | *number* \| *null* | The height of the player |
|
||||
| **allow** | *string[]* | The names of the allowed permissions for iframe |
|
||||
|
||||
Currently the possible items in `allow` are:
|
||||
|
||||
* `autoplay`
|
||||
* `clipboard-write`
|
||||
* `fullscreen`
|
||||
* `encrypted-media`
|
||||
* `picture-in-picture`
|
||||
* `web-share`
|
||||
|
||||
See [Permissions Policy](https://developer.mozilla.org/en-US/docs/Web/HTTP/Permissions_Policy) in MDN for details of them.
|
||||
|
||||
### Example
|
||||
|
||||
```javascript
|
||||
import { summaly } from 'summaly';
|
||||
|
||||
const summary = await summaly('https://www.youtube.com/watch?v=NMIEAhH_fTU');
|
||||
|
||||
console.log(summary);
|
||||
```
|
||||
|
||||
will be ... ↓
|
||||
|
||||
```json
|
||||
{
|
||||
"title": "【アイドルマスター】「Stage Bye Stage」(歌:島村卯月、渋谷凛、本田未央)",
|
||||
"icon": "https://www.youtube.com/s/desktop/28b0985e/img/favicon.ico",
|
||||
"description": "Website▶https://columbia.jp/idolmaster/Playlist▶https://www.youtube.com/playlist?list=PL83A2998CF3BBC86D2018年7月18日発売予定THE IDOLM@STER CINDERELLA GIRLS CG STAR...",
|
||||
"thumbnail": "https://i.ytimg.com/vi/NMIEAhH_fTU/maxresdefault.jpg",
|
||||
"player": {
|
||||
"url": "https://www.youtube.com/embed/NMIEAhH_fTU?feature=oembed",
|
||||
"width": 200,
|
||||
"height": 113,
|
||||
"allow": [
|
||||
"autoplay",
|
||||
"clipboard-write",
|
||||
"encrypted-media",
|
||||
"picture-in-picture",
|
||||
"web-share",
|
||||
"fullscreen",
|
||||
]
|
||||
},
|
||||
"sitename": "YouTube",
|
||||
"sensitive": false,
|
||||
"activityPub": null,
|
||||
"url": "https://www.youtube.com/watch?v=NMIEAhH_fTU"
|
||||
}
|
||||
```
|
||||
|
||||
Testing
|
||||
----------------------------------------------------------------
|
||||
`npm run test`
|
||||
|
||||
License
|
||||
----------------------------------------------------------------
|
||||
[MIT](LICENSE)
|
||||
|
|
208
jest.config.js
Normal file
208
jest.config.js
Normal file
|
@ -0,0 +1,208 @@
|
|||
/*
|
||||
* For a detailed explanation regarding each configuration property and type check, visit:
|
||||
* https://jestjs.io/docs/en/configuration.html
|
||||
*/
|
||||
|
||||
export default {
|
||||
// All imported modules in your tests should be mocked automatically
|
||||
// automock: false,
|
||||
|
||||
// Stop running tests after `n` failures
|
||||
// bail: 0,
|
||||
|
||||
// The directory where Jest should store its cached dependency information
|
||||
// cacheDirectory: "C:\\Users\\ai\\AppData\\Local\\Temp\\jest",
|
||||
|
||||
// Automatically clear mock calls and instances between every test
|
||||
// clearMocks: false,
|
||||
|
||||
// Indicates whether the coverage information should be collected while executing the test
|
||||
// collectCoverage: false,
|
||||
|
||||
// An array of glob patterns indicating a set of files for which coverage information should be collected
|
||||
collectCoverageFrom: ['src/**/*.ts'],
|
||||
|
||||
// The directory where Jest should output its coverage files
|
||||
coverageDirectory: "coverage",
|
||||
|
||||
// An array of regexp pattern strings used to skip coverage collection
|
||||
// coveragePathIgnorePatterns: [
|
||||
// "\\\\node_modules\\\\"
|
||||
// ],
|
||||
|
||||
// Indicates which provider should be used to instrument code for coverage
|
||||
coverageProvider: "v8",
|
||||
|
||||
// A list of reporter names that Jest uses when writing coverage reports
|
||||
// coverageReporters: [
|
||||
// "json",
|
||||
// "text",
|
||||
// "lcov",
|
||||
// "clover"
|
||||
// ],
|
||||
|
||||
// An object that configures minimum threshold enforcement for coverage results
|
||||
// coverageThreshold: undefined,
|
||||
|
||||
// A path to a custom dependency extractor
|
||||
// dependencyExtractor: undefined,
|
||||
|
||||
// Make calling deprecated APIs throw helpful error messages
|
||||
// errorOnDeprecated: false,
|
||||
|
||||
// Force coverage collection from ignored files using an array of glob patterns
|
||||
// forceCoverageMatch: [],
|
||||
|
||||
// A path to a module which exports an async function that is triggered once before all test suites
|
||||
// globalSetup: undefined,
|
||||
|
||||
// A path to a module which exports an async function that is triggered once after all test suites
|
||||
// globalTeardown: undefined,
|
||||
|
||||
// A set of global variables that need to be available in all test environments
|
||||
globals: {
|
||||
},
|
||||
|
||||
// The maximum amount of workers used to run your tests. Can be specified as % or a number. E.g. maxWorkers: 10% will use 10% of your CPU amount + 1 as the maximum worker number. maxWorkers: 2 will use a maximum of 2 workers.
|
||||
// maxWorkers: "50%",
|
||||
|
||||
// An array of directory names to be searched recursively up from the requiring module's location
|
||||
// moduleDirectories: [
|
||||
// "node_modules"
|
||||
// ],
|
||||
|
||||
// An array of file extensions your modules use
|
||||
// moduleFileExtensions: [
|
||||
// "js",
|
||||
// "json",
|
||||
// "jsx",
|
||||
// "ts",
|
||||
// "tsx",
|
||||
// "node"
|
||||
// ],
|
||||
|
||||
// A map from regular expressions to module names or to arrays of module names that allow to stub out resources with a single module
|
||||
moduleNameMapper: {
|
||||
// Do not resolve .wasm.js to .wasm by the rule below
|
||||
'^(.+)\\.wasm\\.js$': '$1.wasm.js',
|
||||
// SWC converts @/foo/bar.js to `../../src/foo/bar.js`, and then this rule
|
||||
// converts it again to `../../src/foo/bar` which then can be resolved to
|
||||
// `.ts` files.
|
||||
// See https://github.com/swc-project/jest/issues/64#issuecomment-1029753225
|
||||
// TODO: Use `--allowImportingTsExtensions` on TypeScript 5.0 so that we can
|
||||
// directly import `.ts` files without this hack.
|
||||
'^(\\.{1,2}/.*)\\.js$': '$1',
|
||||
},
|
||||
|
||||
// An array of regexp pattern strings, matched against all module paths before considered 'visible' to the module loader
|
||||
// modulePathIgnorePatterns: [],
|
||||
|
||||
// Activates notifications for test results
|
||||
// notify: false,
|
||||
|
||||
// An enum that specifies notification mode. Requires { notify: true }
|
||||
// notifyMode: "failure-change",
|
||||
|
||||
// A preset that is used as a base for Jest's configuration
|
||||
//preset: "ts-jest/presets/js-with-ts-esm",
|
||||
|
||||
// Run tests from one or more projects
|
||||
// projects: undefined,
|
||||
|
||||
// Use this configuration option to add custom reporters to Jest
|
||||
// reporters: undefined,
|
||||
|
||||
// Automatically reset mock state between every test
|
||||
// resetMocks: false,
|
||||
|
||||
// Reset the module registry before running each individual test
|
||||
// resetModules: false,
|
||||
|
||||
// A path to a custom resolver
|
||||
// resolver: './jest-resolver.cjs',
|
||||
|
||||
// Automatically restore mock state between every test
|
||||
restoreMocks: true,
|
||||
|
||||
// The root directory that Jest should scan for tests and modules within
|
||||
// rootDir: undefined,
|
||||
|
||||
// A list of paths to directories that Jest should use to search for files in
|
||||
roots: [
|
||||
"<rootDir>"
|
||||
],
|
||||
|
||||
// Allows you to use a custom runner instead of Jest's default test runner
|
||||
// runner: "jest-runner",
|
||||
|
||||
// The paths to modules that run some code to configure or set up the testing environment before each test
|
||||
// setupFiles: [],
|
||||
|
||||
// A list of paths to modules that run some code to configure or set up the testing framework before each test
|
||||
// setupFilesAfterEnv: [],
|
||||
|
||||
// The number of seconds after which a test is considered as slow and reported as such in the results.
|
||||
// slowTestThreshold: 5,
|
||||
|
||||
// A list of paths to snapshot serializer modules Jest should use for snapshot testing
|
||||
// snapshotSerializers: [],
|
||||
|
||||
// The test environment that will be used for testing
|
||||
testEnvironment: "node",
|
||||
|
||||
// Options that will be passed to the testEnvironment
|
||||
// testEnvironmentOptions: {},
|
||||
|
||||
// Adds a location field to test results
|
||||
// testLocationInResults: false,
|
||||
|
||||
// The glob patterns Jest uses to detect test files
|
||||
testMatch: [
|
||||
"<rootDir>/test/index.ts",
|
||||
],
|
||||
|
||||
// An array of regexp pattern strings that are matched against all test paths, matched tests are skipped
|
||||
// testPathIgnorePatterns: [
|
||||
// "\\\\node_modules\\\\"
|
||||
// ],
|
||||
|
||||
// The regexp pattern or array of patterns that Jest uses to detect test files
|
||||
// testRegex: [],
|
||||
|
||||
// This option allows the use of a custom results processor
|
||||
// testResultsProcessor: undefined,
|
||||
|
||||
// This option allows use of a custom test runner
|
||||
// testRunner: "jasmine2",
|
||||
|
||||
// This option sets the URL for the jsdom environment. It is reflected in properties such as location.href
|
||||
// testURL: "http://localhost",
|
||||
|
||||
// Setting this value to "fake" allows the use of fake timers for functions such as "setTimeout"
|
||||
// timers: "real",
|
||||
|
||||
// A map from regular expressions to paths to transformers
|
||||
transform: {
|
||||
"^.+\\.(t|j)sx?$": ["@swc/jest"],
|
||||
},
|
||||
|
||||
// An array of regexp pattern strings that are matched against all source file paths, matched files will skip transformation
|
||||
// transformIgnorePatterns: [
|
||||
// "\\\\node_modules\\\\",
|
||||
// "\\.pnp\\.[^\\\\]+$"
|
||||
// ],
|
||||
|
||||
// An array of regexp pattern strings that are matched against all modules before the module loader will automatically return a mock for them
|
||||
// unmockedModulePathPatterns: undefined,
|
||||
|
||||
// Indicates whether each individual test should be reported during the run
|
||||
// verbose: undefined,
|
||||
|
||||
// An array of regexp patterns that are matched against all source file paths before re-running tests in watch mode
|
||||
// watchPathIgnorePatterns: [],
|
||||
|
||||
// Whether to use watchman for file crawling
|
||||
// watchman: true,
|
||||
|
||||
extensionsToTreatAsEsm: ['.ts'],
|
||||
};
|
52
package.json
Normal file
52
package.json
Normal file
|
@ -0,0 +1,52 @@
|
|||
{
|
||||
"name": "@valkyriecoms/summaly",
|
||||
"version": "5.1.0",
|
||||
"description": "Get web page's summary",
|
||||
"author": "Toastie <toastie@toastiet0ast.com>",
|
||||
"license": "MIT",
|
||||
"repository": "https: //toastielab.dev/Valkyriecoms/summaly.git",
|
||||
"bugs": "https://toastielab.dev/Valkyriecoms/summaly/issues",
|
||||
"main": "./built/index.js",
|
||||
"type": "module",
|
||||
"types": "./built/index.d.ts",
|
||||
"packageManager": "pnpm@8.13.1",
|
||||
"files": [
|
||||
"built",
|
||||
"LICENSE"
|
||||
],
|
||||
"scripts": {
|
||||
"build": "tsc",
|
||||
"eslint": "eslint src --ext .js,.jsx,.ts,.tsx",
|
||||
"test": "node --experimental-vm-modules node_modules/jest/bin/jest.js --silent=false --verbose false",
|
||||
"serve": "fastify start ./built/index.js"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@jest/globals": "^29.7.0",
|
||||
"@swc/core": "^1.3.101",
|
||||
"@swc/jest": "^0.2.29",
|
||||
"@types/cheerio": "0.22.18",
|
||||
"@types/debug": "4.1.7",
|
||||
"@types/escape-regexp": "^0.0.1",
|
||||
"@types/node": "20.10.6",
|
||||
"@typescript-eslint/eslint-plugin": "^6.16.0",
|
||||
"@typescript-eslint/parser": "^6.16.0",
|
||||
"@valkyriecoms/eslint-plugin": "1.0.0",
|
||||
"debug": "^4.3.4",
|
||||
"eslint": "^8.56.0",
|
||||
"eslint-plugin-import": "^2.29.1",
|
||||
"fastify": "^4.25.2",
|
||||
"fastify-cli": "^5.9.0",
|
||||
"jest": "^29.7.0",
|
||||
"typescript": "5.3.3"
|
||||
},
|
||||
"dependencies": {
|
||||
"cheerio": "1.0.0-rc.12",
|
||||
"escape-regexp": "0.0.1",
|
||||
"got": "^12.6.1",
|
||||
"html-entities": "2.3.2",
|
||||
"iconv-lite": "0.6.3",
|
||||
"jschardet": "3.0.0",
|
||||
"private-ip": "2.3.3",
|
||||
"trace-redirect": "1.0.6"
|
||||
}
|
||||
}
|
4789
pnpm-lock.yaml
Normal file
4789
pnpm-lock.yaml
Normal file
File diff suppressed because it is too large
Load diff
4
src/@types/package.json.d.ts
vendored
Normal file
4
src/@types/package.json.d.ts
vendored
Normal file
|
@ -0,0 +1,4 @@
|
|||
declare module '*/package.json' {
|
||||
export const name: string;
|
||||
export const version: string;
|
||||
}
|
283
src/general.ts
Normal file
283
src/general.ts
Normal file
|
@ -0,0 +1,283 @@
|
|||
import { URL } from 'node:url';
|
||||
import { decode as decodeHtml } from 'html-entities';
|
||||
import * as cheerio from 'cheerio';
|
||||
import clip from './utils/clip.js';
|
||||
import cleanupTitle from './utils/cleanup-title.js';
|
||||
|
||||
import { get, head, scpaping } from './utils/got.js';
|
||||
import type { default as Summary, Player } from './summary.js';
|
||||
|
||||
/**
|
||||
* Contains only the html snippet for a sanitized iframe as the thumbnail is
|
||||
* mostly covered in OpenGraph instead.
|
||||
*
|
||||
* Width should always be 100%.
|
||||
*/
|
||||
async function getOEmbedPlayer($: cheerio.CheerioAPI, pageUrl: string): Promise<Player | null> {
|
||||
const href = $('link[type="application/json+oembed"]').attr('href');
|
||||
if (!href) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const oEmbedUrl = (() => {
|
||||
try {
|
||||
return new URL(href, pageUrl);
|
||||
} catch { return null; }
|
||||
})();
|
||||
if (!oEmbedUrl) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const oEmbed = await get(oEmbedUrl.href).catch(() => null);
|
||||
if (!oEmbed) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const body = (() => {
|
||||
try {
|
||||
return JSON.parse(oEmbed);
|
||||
} catch { /* empty */ }
|
||||
})();
|
||||
|
||||
if (!body || body.version !== '1.0' || !['rich', 'video'].includes(body.type)) {
|
||||
// Not a well formed rich oEmbed
|
||||
return null;
|
||||
}
|
||||
|
||||
if (!body.html.startsWith('<iframe ') || !body.html.endsWith('</iframe>')) {
|
||||
// It includes something else than an iframe
|
||||
return null;
|
||||
}
|
||||
|
||||
const oEmbedHtml = cheerio.load(body.html);
|
||||
const iframe = oEmbedHtml('iframe');
|
||||
|
||||
if (iframe.length !== 1) {
|
||||
// Somehow we either have multiple iframes or none
|
||||
return null;
|
||||
}
|
||||
|
||||
if (iframe.parents().length !== 2) {
|
||||
// Should only have the body and html elements as the parents
|
||||
return null;
|
||||
}
|
||||
|
||||
const url = iframe.attr('src');
|
||||
if (!url) {
|
||||
// No src?
|
||||
return null;
|
||||
}
|
||||
|
||||
try {
|
||||
if ((new URL(url)).protocol !== 'https:') {
|
||||
// Allow only HTTPS for best security
|
||||
return null;
|
||||
}
|
||||
} catch (e) {
|
||||
return null;
|
||||
}
|
||||
|
||||
// Height is the most important, width is okay to be null. The implementer
|
||||
// should choose fixed height instead of fixed aspect ratio if width is null.
|
||||
//
|
||||
// For example, Spotify's embed page does not strictly follow aspect ratio
|
||||
// and thus keeping the height is better than keeping the aspect ratio.
|
||||
//
|
||||
// Spotify gives `width: 100%, height: 152px` for iframe while `width: 456,
|
||||
// height: 152` for oEmbed data, and we treat any percentages as null here.
|
||||
let width: number | null = Number(iframe.attr('width') ?? body.width);
|
||||
if (Number.isNaN(width)) {
|
||||
width = null;
|
||||
}
|
||||
const height = Math.min(Number(iframe.attr('height') ?? body.height), 1024);
|
||||
if (Number.isNaN(height)) {
|
||||
// No proper height info
|
||||
return null;
|
||||
}
|
||||
|
||||
// TODO: This implementation only allows basic syntax of `allow`.
|
||||
// Might need to implement better later.
|
||||
const safeList = [
|
||||
'autoplay',
|
||||
'clipboard-write',
|
||||
'fullscreen',
|
||||
'encrypted-media',
|
||||
'picture-in-picture',
|
||||
'web-share',
|
||||
];
|
||||
// YouTube has these but they are almost never used.
|
||||
const ignoredList = [
|
||||
'gyroscope',
|
||||
'accelerometer',
|
||||
];
|
||||
const allowedPermissions =
|
||||
(iframe.attr('allow') ?? '').split(/\s*;\s*/g)
|
||||
.filter(s => s)
|
||||
.filter(s => !ignoredList.includes(s));
|
||||
if (iframe.attr('allowfullscreen') === '') {
|
||||
allowedPermissions.push('fullscreen');
|
||||
}
|
||||
if (allowedPermissions.some(allow => !safeList.includes(allow))) {
|
||||
// This iframe is probably too powerful to be embedded
|
||||
return null;
|
||||
}
|
||||
|
||||
return {
|
||||
url,
|
||||
width,
|
||||
height,
|
||||
allow: allowedPermissions,
|
||||
};
|
||||
}
|
||||
|
||||
export type GeneralScrapingOptions = {
|
||||
lang?: string | null;
|
||||
userAgent?: string;
|
||||
responseTimeout?: number;
|
||||
operationTimeout?: number;
|
||||
contentLengthLimit?: number;
|
||||
contentLengthRequired?: boolean;
|
||||
}
|
||||
|
||||
export default async (_url: URL | string, opts?: GeneralScrapingOptions): Promise<Summary | null> => {
|
||||
let lang = opts?.lang;
|
||||
// eslint-disable-next-line no-param-reassign
|
||||
if (lang && !lang.match(/^[\w-]+(\s*,\s*[\w-]+)*$/)) lang = null;
|
||||
|
||||
const url = typeof _url === 'string' ? new URL(_url) : _url;
|
||||
|
||||
const res = await scpaping(url.href, {
|
||||
lang: lang || undefined,
|
||||
userAgent: opts?.userAgent,
|
||||
responseTimeout: opts?.responseTimeout,
|
||||
operationTimeout: opts?.operationTimeout,
|
||||
contentLengthLimit: opts?.contentLengthLimit,
|
||||
contentLengthRequired: opts?.contentLengthRequired,
|
||||
});
|
||||
const $ = res.$;
|
||||
const twitterCard =
|
||||
$('meta[name="twitter:card"]').attr('content') ||
|
||||
$('meta[property="twitter:card"]').attr('content');
|
||||
|
||||
// According to docs, name attribute of meta tag is used for twitter card but for compatibility,
|
||||
// this library will also look for property attribute.
|
||||
// See https://developer.twitter.com/en/docs/twitter-for-websites/cards/overview/summary
|
||||
// Property attribute is used for open graph.
|
||||
// See https://ogp.me/
|
||||
|
||||
let title: string | null | undefined =
|
||||
$('meta[property="og:title"]').attr('content') ||
|
||||
$('meta[name="twitter:title"]').attr('content') ||
|
||||
$('meta[property="twitter:title"]').attr('content') ||
|
||||
$('title').text();
|
||||
|
||||
// eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
|
||||
if (title === undefined || title === null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
title = clip(decodeHtml(title), 100);
|
||||
|
||||
let image: string | null | undefined =
|
||||
$('meta[property="og:image"]').attr('content') ||
|
||||
$('meta[name="twitter:image"]').attr('content') ||
|
||||
$('meta[property="twitter:image"]').attr('content') ||
|
||||
$('link[rel="image_src"]').attr('href') ||
|
||||
$('link[rel="apple-touch-icon"]').attr('href') ||
|
||||
$('link[rel="apple-touch-icon image_src"]').attr('href');
|
||||
|
||||
image = image ? (new URL(image, url.href)).href : null;
|
||||
|
||||
const playerUrl =
|
||||
(twitterCard !== 'summary_large_image' && $('meta[name="twitter:player"]').attr('content')) ||
|
||||
(twitterCard !== 'summary_large_image' && $('meta[property="twitter:player"]').attr('content')) ||
|
||||
$('meta[property="og:video"]').attr('content') ||
|
||||
$('meta[property="og:video:secure_url"]').attr('content') ||
|
||||
$('meta[property="og:video:url"]').attr('content');
|
||||
|
||||
const playerWidth = parseInt(
|
||||
$('meta[name="twitter:player:width"]').attr('content') ||
|
||||
$('meta[property="twitter:player:width"]').attr('content') ||
|
||||
$('meta[property="og:video:width"]').attr('content') ||
|
||||
'');
|
||||
|
||||
const playerHeight = parseInt(
|
||||
$('meta[name="twitter:player:height"]').attr('content') ||
|
||||
$('meta[property="twitter:player:height"]').attr('content') ||
|
||||
$('meta[property="og:video:height"]').attr('content') ||
|
||||
'');
|
||||
|
||||
let description: string | null | undefined =
|
||||
$('meta[property="og:description"]').attr('content') ||
|
||||
$('meta[name="twitter:description"]').attr('content') ||
|
||||
$('meta[property="twitter:description"]').attr('content') ||
|
||||
$('meta[name="description"]').attr('content');
|
||||
|
||||
description = description
|
||||
? clip(decodeHtml(description), 300)
|
||||
: null;
|
||||
|
||||
if (title === description) {
|
||||
description = null;
|
||||
}
|
||||
|
||||
const siteName = decodeHtml(
|
||||
$('meta[property="og:site_name"]').attr('content') ||
|
||||
$('meta[name="application-name"]').attr('content') ||
|
||||
url.host,
|
||||
);
|
||||
|
||||
const favicon =
|
||||
$('link[rel="shortcut icon"]').attr('href') ||
|
||||
$('link[rel="icon"]').attr('href') ||
|
||||
'/favicon.ico';
|
||||
|
||||
const activityPub =
|
||||
$('link[rel="alternate"][type="application/activity+json"]').attr('href') || null;
|
||||
|
||||
// https://developer.mixi.co.jp/connect/mixi_plugin/mixi_check/spec_mixi_check/#toc-18-
|
||||
const sensitive =
|
||||
$('meta[property=\'mixi:content-rating\']').attr('content') === '1';
|
||||
|
||||
const find = async (path: string) => {
|
||||
const target = new URL(path, url.href);
|
||||
try {
|
||||
await head(target.href);
|
||||
return target;
|
||||
} catch (e) {
|
||||
return null;
|
||||
}
|
||||
};
|
||||
|
||||
const getIcon = async () => {
|
||||
return (await find(favicon)) || null;
|
||||
};
|
||||
|
||||
const [icon, oEmbed] = await Promise.all([
|
||||
getIcon(),
|
||||
getOEmbedPlayer($, url.href),
|
||||
]);
|
||||
|
||||
// Clean up the title
|
||||
title = cleanupTitle(title, siteName);
|
||||
|
||||
if (title === '') {
|
||||
title = siteName;
|
||||
}
|
||||
|
||||
return {
|
||||
title: title || null,
|
||||
icon: icon?.href || null,
|
||||
description: description || null,
|
||||
thumbnail: image || null,
|
||||
player: oEmbed ?? {
|
||||
url: playerUrl || null,
|
||||
width: Number.isNaN(playerWidth) ? null : playerWidth,
|
||||
height: Number.isNaN(playerHeight) ? null : playerHeight,
|
||||
allow: ['autoplay', 'encrypted-media', 'fullscreen'],
|
||||
},
|
||||
sitename: siteName || null,
|
||||
sensitive,
|
||||
activityPub,
|
||||
};
|
||||
};
|
152
src/index.ts
Normal file
152
src/index.ts
Normal file
|
@ -0,0 +1,152 @@
|
|||
/**
|
||||
* summaly
|
||||
* https://github.com/misskey-dev/summaly
|
||||
*/
|
||||
|
||||
import { URL } from 'node:url';
|
||||
import tracer from 'trace-redirect';
|
||||
import * as Got from 'got';
|
||||
import { SummalyResult } from './summary.js';
|
||||
import { SummalyPlugin } from './iplugin.js';
|
||||
export * from './iplugin.js';
|
||||
import general, { GeneralScrapingOptions } from './general.js';
|
||||
import { setAgent } from './utils/got.js';
|
||||
import { plugins as builtinPlugins } from './plugins/index.js';
|
||||
import type { FastifyInstance } from 'fastify';
|
||||
|
||||
export type SummalyOptions = {
|
||||
/**
|
||||
* Accept-Language for the request
|
||||
*/
|
||||
lang?: string | null;
|
||||
|
||||
/**
|
||||
* Whether follow redirects
|
||||
*/
|
||||
followRedirects?: boolean;
|
||||
|
||||
/**
|
||||
* Custom Plugins
|
||||
*/
|
||||
plugins?: SummalyPlugin[];
|
||||
|
||||
/**
|
||||
* Custom HTTP agent
|
||||
*/
|
||||
agent?: Got.Agents;
|
||||
|
||||
/**
|
||||
* User-Agent for the request
|
||||
*/
|
||||
userAgent?: string;
|
||||
|
||||
/**
|
||||
* Response timeout.
|
||||
* Set timeouts for each phase, such as host name resolution and socket communication.
|
||||
*/
|
||||
responseTimeout?: number;
|
||||
|
||||
/**
|
||||
* Operation timeout.
|
||||
* Set the timeout from the start to the end of the request.
|
||||
*/
|
||||
operationTimeout?: number;
|
||||
|
||||
/**
|
||||
* Maximum content length.
|
||||
* If set to true, an error will occur if the content-length value returned from the other server is larger than this parameter (or if the received body size exceeds this parameter).
|
||||
*/
|
||||
contentLengthLimit?: number;
|
||||
|
||||
/**
|
||||
* Content length required.
|
||||
* If set to true, it will be an error if the other server does not return content-length.
|
||||
*/
|
||||
contentLengthRequired?: boolean;
|
||||
};
|
||||
|
||||
export const summalyDefaultOptions = {
|
||||
lang: null,
|
||||
followRedirects: true,
|
||||
plugins: [],
|
||||
} as SummalyOptions;
|
||||
|
||||
/**
|
||||
* Summarize an web page
|
||||
*/
|
||||
export const summaly = async (url: string, options?: SummalyOptions): Promise<SummalyResult> => {
|
||||
if (options?.agent) setAgent(options.agent);
|
||||
|
||||
const opts = Object.assign(summalyDefaultOptions, options);
|
||||
|
||||
const plugins = builtinPlugins.concat(opts.plugins || []);
|
||||
|
||||
let actualUrl = url;
|
||||
if (opts.followRedirects) {
|
||||
// I could use .catch(() => url), but it's a pain to feed trace-redirect to jest, so I'll use try-catch.
|
||||
try {
|
||||
actualUrl = await tracer(url);
|
||||
} catch (e) {
|
||||
actualUrl = url;
|
||||
}
|
||||
}
|
||||
|
||||
const _url = new URL(actualUrl);
|
||||
|
||||
// Find matching plugin
|
||||
const match = plugins.filter(plugin => plugin.test(_url))[0];
|
||||
|
||||
// Get summary
|
||||
const scrapingOptions: GeneralScrapingOptions = {
|
||||
lang: opts.lang,
|
||||
userAgent: opts.userAgent,
|
||||
responseTimeout: opts.responseTimeout,
|
||||
operationTimeout: opts.operationTimeout,
|
||||
contentLengthLimit: opts.contentLengthLimit,
|
||||
contentLengthRequired: opts.contentLengthRequired,
|
||||
};
|
||||
|
||||
// eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
|
||||
const summary = await (match ? match.summarize : general)(_url, scrapingOptions);
|
||||
|
||||
if (summary == null) {
|
||||
throw new Error('failed summarize');
|
||||
}
|
||||
|
||||
return Object.assign(summary, {
|
||||
url: actualUrl,
|
||||
});
|
||||
};
|
||||
|
||||
export default function (fastify: FastifyInstance, options: SummalyOptions, done: (err?: Error) => void) {
|
||||
fastify.get<{
|
||||
Querystring: {
|
||||
url?: string;
|
||||
lang?: string;
|
||||
};
|
||||
}>('/', async (req, reply) => {
|
||||
const url = req.query.url as string;
|
||||
// eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
|
||||
if (url == null) {
|
||||
return reply.status(400).send({
|
||||
error: 'url is required',
|
||||
});
|
||||
}
|
||||
|
||||
try {
|
||||
const summary = await summaly(url, {
|
||||
lang: req.query.lang as string,
|
||||
followRedirects: false,
|
||||
...options,
|
||||
});
|
||||
|
||||
return summary;
|
||||
} catch (e) {
|
||||
return reply.status(500).send({
|
||||
error: e,
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
done();
|
||||
}
|
8
src/iplugin.ts
Normal file
8
src/iplugin.ts
Normal file
|
@ -0,0 +1,8 @@
|
|||
import Summary from './summary.js';
|
||||
import type { URL } from 'node:url';
|
||||
import { GeneralScrapingOptions } from '@/general';
|
||||
|
||||
export interface SummalyPlugin {
|
||||
test: (url: URL) => boolean;
|
||||
summarize: (url: URL, opts?: GeneralScrapingOptions) => Promise<Summary | null>;
|
||||
}
|
60
src/plugins/amazon.ts
Normal file
60
src/plugins/amazon.ts
Normal file
|
@ -0,0 +1,60 @@
|
|||
import { URL } from 'node:url';
|
||||
import { scpaping } from '../utils/got.js';
|
||||
import summary from '../summary.js';
|
||||
|
||||
export function test(url: URL): boolean {
|
||||
return url.hostname === 'www.amazon.com' ||
|
||||
url.hostname === 'www.amazon.co.jp' ||
|
||||
url.hostname === 'www.amazon.ca' ||
|
||||
url.hostname === 'www.amazon.com.br' ||
|
||||
url.hostname === 'www.amazon.com.mx' ||
|
||||
url.hostname === 'www.amazon.co.uk' ||
|
||||
url.hostname === 'www.amazon.de' ||
|
||||
url.hostname === 'www.amazon.fr' ||
|
||||
url.hostname === 'www.amazon.it' ||
|
||||
url.hostname === 'www.amazon.es' ||
|
||||
url.hostname === 'www.amazon.nl' ||
|
||||
url.hostname === 'www.amazon.cn' ||
|
||||
url.hostname === 'www.amazon.in' ||
|
||||
url.hostname === 'www.amazon.au';
|
||||
}
|
||||
|
||||
export async function summarize(url: URL): Promise<summary> {
|
||||
const res = await scpaping(url.href);
|
||||
const $ = res.$;
|
||||
|
||||
const title = $('#title').text();
|
||||
|
||||
const description =
|
||||
$('#productDescription').text() ||
|
||||
$('meta[name="description"]').attr('content');
|
||||
|
||||
const thumbnail: string | undefined = $('#landingImage').attr('src');
|
||||
|
||||
const playerUrl =
|
||||
$('meta[property="twitter:player"]').attr('content') ||
|
||||
$('meta[name="twitter:player"]').attr('content');
|
||||
|
||||
const playerWidth =
|
||||
$('meta[property="twitter:player:width"]').attr('content') ||
|
||||
$('meta[name="twitter:player:width"]').attr('content');
|
||||
|
||||
const playerHeight =
|
||||
$('meta[property="twitter:player:height"]').attr('content') ||
|
||||
$('meta[name="twitter:player:height"]').attr('content');
|
||||
|
||||
return {
|
||||
title: title ? title.trim() : null,
|
||||
icon: 'https://www.amazon.com/favicon.ico',
|
||||
description: description ? description.trim() : null,
|
||||
thumbnail: thumbnail ? thumbnail.trim() : null,
|
||||
player: {
|
||||
url: playerUrl || null,
|
||||
width: playerWidth ? parseInt(playerWidth) : null,
|
||||
height: playerHeight ? parseInt(playerHeight) : null,
|
||||
allow: playerUrl ? ['fullscreen', 'encrypted-media'] : [],
|
||||
},
|
||||
sitename: 'Amazon',
|
||||
activityPub: null,
|
||||
};
|
||||
}
|
17
src/plugins/branchio-deeplinks.ts
Normal file
17
src/plugins/branchio-deeplinks.ts
Normal file
|
@ -0,0 +1,17 @@
|
|||
import { URL } from 'node:url';
|
||||
import general, { GeneralScrapingOptions } from '../general.js';
|
||||
import Summary from '../summary.js';
|
||||
|
||||
export function test(url: URL): boolean {
|
||||
// Matching deep links with Branch.io
|
||||
return /^[a-zA-Z0-9]+\.app\.link$/.test(url.hostname) ||
|
||||
url.hostname === 'spotify.link';
|
||||
}
|
||||
|
||||
export async function summarize(url: URL, opts?: GeneralScrapingOptions): Promise<Summary | null> {
|
||||
// https://help.branch.io/using-branch/docs/creating-a-deep-link#redirections
|
||||
// Prevent branch.io's own page from opening by forcibly redirecting to the web version
|
||||
url.searchParams.append('$web_only', 'true');
|
||||
|
||||
return await general(url, opts);
|
||||
}
|
10
src/plugins/index.ts
Normal file
10
src/plugins/index.ts
Normal file
|
@ -0,0 +1,10 @@
|
|||
import * as amazon from './amazon.js';
|
||||
import * as wikipedia from './wikipedia.js';
|
||||
import * as branchioDeeplinks from './branchio-deeplinks.js';
|
||||
import { SummalyPlugin } from '@/iplugin.js';
|
||||
|
||||
export const plugins: SummalyPlugin[] = [
|
||||
amazon,
|
||||
wikipedia,
|
||||
branchioDeeplinks,
|
||||
];
|
48
src/plugins/wikipedia.ts
Normal file
48
src/plugins/wikipedia.ts
Normal file
|
@ -0,0 +1,48 @@
|
|||
import { URL } from 'node:url';
|
||||
import debug from 'debug';
|
||||
import { get } from '../utils/got.js';
|
||||
import summary from '../summary.js';
|
||||
import clip from './../utils/clip.js';
|
||||
|
||||
const log = debug('summaly:plugins:wikipedia');
|
||||
|
||||
export function test(url: URL): boolean {
|
||||
if (!url.hostname) return false;
|
||||
return /\.wikipedia\.org$/.test(url.hostname);
|
||||
}
|
||||
|
||||
export async function summarize(url: URL): Promise<summary> {
|
||||
const lang = url.host ? url.host.split('.')[0] : null;
|
||||
const title = url.pathname ? url.pathname.split('/')[2] : null;
|
||||
const endpoint = `https://${lang}.wikipedia.org/w/api.php?format=json&action=query&prop=extracts&exintro=&explaintext=&titles=${title}`;
|
||||
|
||||
log(`lang is ${lang}`);
|
||||
log(`title is ${title}`);
|
||||
log(`endpoint is ${endpoint}`);
|
||||
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
let body = await get(endpoint) as any;
|
||||
body = JSON.parse(body);
|
||||
log(body);
|
||||
|
||||
if (!('query' in body) || !('pages' in body.query)) {
|
||||
throw new Error('fetch failed');
|
||||
}
|
||||
|
||||
const info = body.query.pages[Object.keys(body.query.pages)[0]];
|
||||
|
||||
return {
|
||||
title: info.title,
|
||||
icon: 'https://wikipedia.org/static/favicon/wikipedia.ico',
|
||||
description: clip(info.extract, 300),
|
||||
thumbnail: `https://wikipedia.org/static/images/project-logos/${lang}wiki.png`,
|
||||
player: {
|
||||
url: null,
|
||||
width: null,
|
||||
height: null,
|
||||
allow: [],
|
||||
},
|
||||
sitename: 'Wikipedia',
|
||||
activityPub: null,
|
||||
};
|
||||
}
|
72
src/summary.ts
Normal file
72
src/summary.ts
Normal file
|
@ -0,0 +1,72 @@
|
|||
type Summary = {
|
||||
/**
|
||||
* The title of that web page
|
||||
*/
|
||||
title: string | null;
|
||||
|
||||
/**
|
||||
* The url of the icon of that web page
|
||||
*/
|
||||
icon: string | null;
|
||||
|
||||
/**
|
||||
* The description of that web page
|
||||
*/
|
||||
description: string | null;
|
||||
|
||||
/**
|
||||
* The url of the thumbnail of that web page
|
||||
*/
|
||||
thumbnail: string | null;
|
||||
|
||||
/**
|
||||
* The name of site of that web page
|
||||
*/
|
||||
sitename: string | null;
|
||||
|
||||
/**
|
||||
* The player of that web page
|
||||
*/
|
||||
player: Player;
|
||||
|
||||
/**
|
||||
* Possibly sensitive
|
||||
*/
|
||||
sensitive?: boolean;
|
||||
|
||||
/**
|
||||
* The url of the ActivityPub representation of that web page
|
||||
*/
|
||||
activityPub: string | null;
|
||||
};
|
||||
|
||||
export type SummalyResult = Summary & {
|
||||
/**
|
||||
* The actual url of that web page
|
||||
*/
|
||||
url: string;
|
||||
};
|
||||
|
||||
export default Summary;
|
||||
|
||||
export type Player = {
|
||||
/**
|
||||
* The url of the player
|
||||
*/
|
||||
url: string | null;
|
||||
|
||||
/**
|
||||
* The width of the player
|
||||
*/
|
||||
width: number | null;
|
||||
|
||||
/**
|
||||
* The height of the player
|
||||
*/
|
||||
height: number | null;
|
||||
|
||||
/**
|
||||
* The allowed permissions of the iframe
|
||||
*/
|
||||
allow: string[];
|
||||
};
|
26
src/utils/cleanup-title.ts
Normal file
26
src/utils/cleanup-title.ts
Normal file
|
@ -0,0 +1,26 @@
|
|||
/* eslint-disable no-param-reassign */
|
||||
import escapeRegExp from 'escape-regexp';
|
||||
|
||||
export default function(title: string, siteName?: string | null): string {
|
||||
title = title.trim();
|
||||
|
||||
if (siteName) {
|
||||
siteName = siteName.trim();
|
||||
|
||||
const x = escapeRegExp(siteName);
|
||||
|
||||
const patterns = [
|
||||
`^(.+?)\\s?[\\-\\|:・]\\s?${x}$`,
|
||||
];
|
||||
|
||||
for (let i = 0; i < patterns.length; i++) {
|
||||
const pattern = new RegExp(patterns[i]);
|
||||
const [, match] = pattern.exec(title) ?? [null, null];
|
||||
if (match) {
|
||||
return match;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return title;
|
||||
}
|
16
src/utils/clip.ts
Normal file
16
src/utils/clip.ts
Normal file
|
@ -0,0 +1,16 @@
|
|||
import nullOrEmpty from "./null-or-empty";
|
||||
|
||||
export default function (s: string, max: number): string {
|
||||
if (nullOrEmpty(s)) {
|
||||
return s;
|
||||
}
|
||||
|
||||
// eslint-disable-next-line no-param-reassign
|
||||
s = s.trim();
|
||||
|
||||
if (s.length > max) {
|
||||
return s.substr(0, max) + '...';
|
||||
} else {
|
||||
return s;
|
||||
}
|
||||
}
|
43
src/utils/encoding.ts
Normal file
43
src/utils/encoding.ts
Normal file
|
@ -0,0 +1,43 @@
|
|||
import iconv from 'iconv-lite';
|
||||
import jschardet from 'jschardet';
|
||||
|
||||
const regCharset = new RegExp(/charset\s*=\s*["']?([\w-]+)/, 'i');
|
||||
|
||||
/**
|
||||
* Detect HTML encoding
|
||||
* @param body Body in Buffer
|
||||
* @returns encoding
|
||||
*/
|
||||
export function detectEncoding(body: Buffer): string {
|
||||
// By detection
|
||||
const detected = jschardet.detect(body, { minimumThreshold: 0.99 });
|
||||
// eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
|
||||
if (detected) {
|
||||
const candicate = detected.encoding;
|
||||
const encoding = toEncoding(candicate);
|
||||
if (encoding != null) return encoding;
|
||||
}
|
||||
|
||||
// From meta
|
||||
const matchMeta = body.toString('ascii').match(regCharset);
|
||||
if (matchMeta) {
|
||||
const candicate = matchMeta[1];
|
||||
const encoding = toEncoding(candicate);
|
||||
if (encoding != null) return encoding;
|
||||
}
|
||||
|
||||
return 'utf-8';
|
||||
}
|
||||
|
||||
export function toUtf8(body: Buffer, encoding: string): string {
|
||||
return iconv.decode(body, encoding);
|
||||
}
|
||||
|
||||
function toEncoding(candicate: string): string | null {
|
||||
if (iconv.encodingExists(candicate)) {
|
||||
if (['shift_jis', 'shift-jis', 'windows-31j', 'x-sjis'].includes(candicate.toLowerCase())) return 'cp932';
|
||||
return candicate;
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
}
|
186
src/utils/got.ts
Normal file
186
src/utils/got.ts
Normal file
|
@ -0,0 +1,186 @@
|
|||
import { dirname } from 'node:path';
|
||||
import { fileURLToPath } from 'node:url';
|
||||
import { readFileSync } from 'node:fs';
|
||||
import got, * as Got from 'got';
|
||||
import * as cheerio from 'cheerio';
|
||||
import PrivateIp from 'private-ip';
|
||||
import { StatusError } from './status-error.js';
|
||||
import { detectEncoding, toUtf8 } from './encoding.js';
|
||||
|
||||
const _filename = fileURLToPath(import.meta.url);
|
||||
const _dirname = dirname(_filename);
|
||||
|
||||
export let agent: Got.Agents = {};
|
||||
|
||||
export function setAgent(_agent: Got.Agents) {
|
||||
// eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
|
||||
agent = _agent || {};
|
||||
}
|
||||
|
||||
export type GotOptions = {
|
||||
url: string;
|
||||
method: 'GET' | 'POST' | 'HEAD';
|
||||
body?: string;
|
||||
headers: Record<string, string | undefined>;
|
||||
typeFilter?: RegExp;
|
||||
responseTimeout?: number;
|
||||
operationTimeout?: number;
|
||||
contentLengthLimit?: number;
|
||||
contentLengthRequired?: boolean;
|
||||
}
|
||||
|
||||
const repo = JSON.parse(readFileSync(`${_dirname}/../../package.json`, 'utf8'));
|
||||
|
||||
const DEFAULT_RESPONSE_TIMEOUT = 20 * 1000;
|
||||
const DEFAULT_OPERATION_TIMEOUT = 60 * 1000;
|
||||
const DEFAULT_MAX_RESPONSE_SIZE = 10 * 1024 * 1024;
|
||||
const DEFAULT_BOT_UA = `SummalyBot/${repo.version}`;
|
||||
|
||||
export async function scpaping(
|
||||
url: string,
|
||||
opts?: {
|
||||
lang?: string;
|
||||
userAgent?: string;
|
||||
responseTimeout?: number;
|
||||
operationTimeout?: number;
|
||||
contentLengthLimit?: number;
|
||||
contentLengthRequired?: boolean;
|
||||
},
|
||||
) {
|
||||
const args: Omit<GotOptions, 'method'> = {
|
||||
url,
|
||||
headers: {
|
||||
'accept': 'text/html,application/xhtml+xml',
|
||||
'user-agent': opts?.userAgent ?? DEFAULT_BOT_UA,
|
||||
'accept-language': opts?.lang,
|
||||
},
|
||||
typeFilter: /^(text\/html|application\/xhtml\+xml)/,
|
||||
responseTimeout: opts?.responseTimeout,
|
||||
operationTimeout: opts?.operationTimeout,
|
||||
contentLengthLimit: opts?.contentLengthLimit,
|
||||
contentLengthRequired: opts?.contentLengthRequired,
|
||||
};
|
||||
|
||||
const headResponse = await getResponse({
|
||||
...args,
|
||||
method: 'HEAD',
|
||||
});
|
||||
|
||||
// SUMMALY_ALLOW_PRIVATE_IP is for testing purposes
|
||||
const allowPrivateIp = process.env.SUMMALY_ALLOW_PRIVATE_IP === 'true' || Object.keys(agent).length > 0;
|
||||
if (!allowPrivateIp && headResponse.ip && PrivateIp(headResponse.ip)) {
|
||||
throw new StatusError(`Private IP rejected ${headResponse.ip}`, 400, 'Private IP Rejected');
|
||||
}
|
||||
|
||||
const response = await getResponse({
|
||||
...args,
|
||||
method: 'GET',
|
||||
});
|
||||
|
||||
const encoding = detectEncoding(response.rawBody);
|
||||
const body = toUtf8(response.rawBody, encoding);
|
||||
const $ = cheerio.load(body);
|
||||
|
||||
return {
|
||||
body,
|
||||
$,
|
||||
response,
|
||||
};
|
||||
}
|
||||
|
||||
export async function get(url: string) {
|
||||
const res = await getResponse({
|
||||
url,
|
||||
method: 'GET',
|
||||
headers: {
|
||||
'accept': '*/*',
|
||||
},
|
||||
});
|
||||
|
||||
return res.body;
|
||||
}
|
||||
|
||||
export async function head(url: string) {
|
||||
return await getResponse({
|
||||
url,
|
||||
method: 'HEAD',
|
||||
headers: {
|
||||
'accept': '*/*',
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
async function getResponse(args: GotOptions) {
|
||||
const timeout = args.responseTimeout ?? DEFAULT_RESPONSE_TIMEOUT;
|
||||
const operationTimeout = args.operationTimeout ?? DEFAULT_OPERATION_TIMEOUT;
|
||||
|
||||
const req = got<string>(args.url, {
|
||||
method: args.method,
|
||||
headers: args.headers,
|
||||
body: args.body,
|
||||
timeout: {
|
||||
lookup: timeout,
|
||||
connect: timeout,
|
||||
secureConnect: timeout,
|
||||
socket: timeout, // read timeout
|
||||
response: timeout,
|
||||
send: timeout,
|
||||
request: operationTimeout, // whole operation timeout
|
||||
},
|
||||
agent,
|
||||
http2: false,
|
||||
retry: {
|
||||
limit: 0,
|
||||
},
|
||||
});
|
||||
|
||||
const res = await receiveResponse({ req, opts: args });
|
||||
|
||||
// Check html
|
||||
const contentType = res.headers['content-type'];
|
||||
if (args.typeFilter && !contentType?.match(args.typeFilter)) {
|
||||
throw new Error(`Rejected by type filter ${contentType}`);
|
||||
}
|
||||
|
||||
// Check size in response header
|
||||
const contentLength = res.headers['content-length'];
|
||||
if (contentLength) {
|
||||
const maxSize = args.contentLengthLimit ?? DEFAULT_MAX_RESPONSE_SIZE;
|
||||
const size = Number(contentLength);
|
||||
if (size > maxSize) {
|
||||
throw new Error(`maxSize exceeded (${size} > ${maxSize}) on response`);
|
||||
}
|
||||
} else {
|
||||
if (args.contentLengthRequired) {
|
||||
throw new Error('content-length required');
|
||||
}
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
async function receiveResponse<T>(args: {
|
||||
req: Got.CancelableRequest<Got.Response<T>>,
|
||||
opts: GotOptions,
|
||||
}) {
|
||||
const req = args.req;
|
||||
const maxSize = args.opts.contentLengthLimit ?? DEFAULT_MAX_RESPONSE_SIZE;
|
||||
|
||||
// Check size of data being received
|
||||
req.on('downloadProgress', (progress: Got.Progress) => {
|
||||
if (progress.transferred > maxSize && progress.percent !== 1) {
|
||||
req.cancel(`maxSize exceeded (${progress.transferred} > ${maxSize}) on response`);
|
||||
}
|
||||
});
|
||||
|
||||
// Getting the response with status code error formatting
|
||||
const res = await req.catch(e => {
|
||||
if (e instanceof Got.HTTPError) {
|
||||
throw new StatusError(`${e.response.statusCode} ${e.response.statusMessage}`, e.response.statusCode, e.response.statusMessage);
|
||||
} else {
|
||||
throw e;
|
||||
}
|
||||
});
|
||||
|
||||
return res;
|
||||
}
|
12
src/utils/null-or-empty.ts
Normal file
12
src/utils/null-or-empty.ts
Normal file
|
@ -0,0 +1,12 @@
|
|||
/* eslint-disable @typescript-eslint/no-unnecessary-condition */
|
||||
export default function (val: string): boolean {
|
||||
if (val === undefined) {
|
||||
return true;
|
||||
} else if (val === null) {
|
||||
return true;
|
||||
} else if (val.trim() === '') {
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
14
src/utils/status-error.ts
Normal file
14
src/utils/status-error.ts
Normal file
|
@ -0,0 +1,14 @@
|
|||
export class StatusError extends Error {
|
||||
public name: string;
|
||||
public statusCode: number;
|
||||
public statusMessage?: string;
|
||||
public isPermanentError: boolean;
|
||||
|
||||
constructor(message: string, statusCode: number, statusMessage?: string) {
|
||||
super(message);
|
||||
this.name = 'StatusError';
|
||||
this.statusCode = statusCode;
|
||||
this.statusMessage = statusMessage;
|
||||
this.isPermanentError = typeof this.statusCode === 'number' && this.statusCode >= 400 && this.statusCode < 500;
|
||||
}
|
||||
}
|
3
test/htmls/activitypub.html
Normal file
3
test/htmls/activitypub.html
Normal file
|
@ -0,0 +1,3 @@
|
|||
<!DOCTYPE html>
|
||||
<meta charset="utf-8">
|
||||
<link rel="alternate" type="application/activity+json" href="https://valkyriecoms.test/notes/abcdefg">
|
15
test/htmls/basic.html
Normal file
15
test/htmls/basic.html
Normal file
|
@ -0,0 +1,15 @@
|
|||
<!doctype html>
|
||||
|
||||
<html lang="en">
|
||||
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<title>KISS principle</title>
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<h1>KISS principle</h1>
|
||||
<p>KISS is an acronym for ”Keep it simple, stupid” as a design principle noted by the U.S. Navy in 1960.</p>
|
||||
</body>
|
||||
|
||||
</html>
|
16
test/htmls/dirty-title.html
Normal file
16
test/htmls/dirty-title.html
Normal file
|
@ -0,0 +1,16 @@
|
|||
<!doctype html>
|
||||
|
||||
<html lang="en">
|
||||
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<meta property="og:site_name" content="Alice's Site">
|
||||
<title>Strawberry Pasta | Alice's Site</title>
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<h1>Strawberry Pasta</h1>
|
||||
<p>Strawberry pasta is a kind of pasta with strawberry sauce.</p>
|
||||
</body>
|
||||
|
||||
</html>
|
16
test/htmls/mixi-sensitive.html
Normal file
16
test/htmls/mixi-sensitive.html
Normal file
|
@ -0,0 +1,16 @@
|
|||
<!doctype html>
|
||||
|
||||
<html lang="en">
|
||||
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<meta property="mixi:content-rating" content="1">
|
||||
<title>SENSITIVE CONTENT!!</title>
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<h1>Yo</h1>
|
||||
<p>Hey hey hey toastie.</p>
|
||||
</body>
|
||||
|
||||
</html>
|
15
test/htmls/no-favicon.html
Normal file
15
test/htmls/no-favicon.html
Normal file
|
@ -0,0 +1,15 @@
|
|||
<!doctype html>
|
||||
|
||||
<html lang="en">
|
||||
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<title>Strawberry Pasta</title>
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<h1>Strawberry Pasta</h1>
|
||||
<p>Alice's Strawberry Pasta</p>
|
||||
</body>
|
||||
|
||||
</html>
|
15
test/htmls/no-metas.html
Normal file
15
test/htmls/no-metas.html
Normal file
|
@ -0,0 +1,15 @@
|
|||
<!doctype html>
|
||||
|
||||
<html lang="en">
|
||||
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<title>KISS principle</title>
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<h1>KISS principle</h1>
|
||||
<p>KISS is an acronym for ”Keep it simple, stupid” as a design principle noted by the U.S. Navy in 1960.</p>
|
||||
</body>
|
||||
|
||||
</html>
|
3
test/htmls/oembed-and-og-video.html
Normal file
3
test/htmls/oembed-and-og-video.html
Normal file
|
@ -0,0 +1,3 @@
|
|||
<!DOCTYPE html>
|
||||
<meta property="og:video:url" content="https://example.com/embedurl" />
|
||||
<link type="application/json+oembed" href="http://localhost:3060/oembed.json" />
|
3
test/htmls/oembed-and-og.html
Normal file
3
test/htmls/oembed-and-og.html
Normal file
|
@ -0,0 +1,3 @@
|
|||
<!DOCTYPE html>
|
||||
<meta property="og:description" content="blobcats rule the world">
|
||||
<link type="application/json+oembed" href="http://localhost:3060/oembed.json" />
|
3
test/htmls/oembed-nonexistent.html
Normal file
3
test/htmls/oembed-nonexistent.html
Normal file
|
@ -0,0 +1,3 @@
|
|||
<!DOCTYPE html>
|
||||
<link type="application/json+oembed" href="http://localhost:3060/oembe.json" />
|
||||
<meta property="og:description" content="nonexistent">
|
2
test/htmls/oembed-relative.html
Normal file
2
test/htmls/oembed-relative.html
Normal file
|
@ -0,0 +1,2 @@
|
|||
<!DOCTYPE html>
|
||||
<link type="application/json+oembed" href="oembed.json" />
|
3
test/htmls/oembed-wrong-path.html
Normal file
3
test/htmls/oembed-wrong-path.html
Normal file
|
@ -0,0 +1,3 @@
|
|||
<!DOCTYPE html>
|
||||
<link type="application/json+oembed" href="http://localhost:+3060/oembed.json" />
|
||||
<meta property="og:description" content="wrong url">
|
2
test/htmls/oembed.html
Normal file
2
test/htmls/oembed.html
Normal file
|
@ -0,0 +1,2 @@
|
|||
<!DOCTYPE html>
|
||||
<link type="application/json+oembed" href="http://localhost:3060/oembed.json" />
|
16
test/htmls/og-description.html
Normal file
16
test/htmls/og-description.html
Normal file
|
@ -0,0 +1,16 @@
|
|||
<!doctype html>
|
||||
|
||||
<html lang="en">
|
||||
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<meta property="og:description" content="Strawberry Pasta">
|
||||
<title>YEE HAW</title>
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<h1>Yo</h1>
|
||||
<p>Hey hey hey toastie.</p>
|
||||
</body>
|
||||
|
||||
</html>
|
16
test/htmls/og-image.html
Normal file
16
test/htmls/og-image.html
Normal file
|
@ -0,0 +1,16 @@
|
|||
<!doctype html>
|
||||
|
||||
<html lang="en">
|
||||
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<meta property="og:image" content="https://himasaku.net/himasaku.png">
|
||||
<title>YEE HAW</title>
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<h1>Yo</h1>
|
||||
<p>Hey hey hey toastie.</p>
|
||||
</body>
|
||||
|
||||
</html>
|
16
test/htmls/og-site_name.html
Normal file
16
test/htmls/og-site_name.html
Normal file
|
@ -0,0 +1,16 @@
|
|||
<!doctype html>
|
||||
|
||||
<html lang="en">
|
||||
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<meta property="og:site_name" content="Strawberry Pasta">
|
||||
<title>YEE HAW</title>
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<h1>Yo</h1>
|
||||
<p>Hey hey hey toastie.</p>
|
||||
</body>
|
||||
|
||||
</html>
|
16
test/htmls/og-title.html
Normal file
16
test/htmls/og-title.html
Normal file
|
@ -0,0 +1,16 @@
|
|||
<!doctype html>
|
||||
|
||||
<html lang="en">
|
||||
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<meta property="og:title" content="Strawberry Pasta">
|
||||
<title>YEE HAW</title>
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<h1>Yo</h1>
|
||||
<p>Hey hey hey toastie.</p>
|
||||
</body>
|
||||
|
||||
</html>
|
40
test/htmls/player-perrtube-video.html
Normal file
40
test/htmls/player-perrtube-video.html
Normal file
|
@ -0,0 +1,40 @@
|
|||
<!doctype html>
|
||||
|
||||
<html lang="en">
|
||||
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<title>PeerTube:video</title>
|
||||
<!--
|
||||
twitter:card = summary_large_image
|
||||
twitter:player = <undefined>
|
||||
og:video:url = Points embed URL
|
||||
-->
|
||||
|
||||
<meta property="og:platform" content="PeerTube">
|
||||
<meta property="og:type" content="video" />
|
||||
<meta property="og:site_name" content="Site" />
|
||||
<meta property="og:title" content="Title" />
|
||||
<meta property="og:image" content="https://example.com/imageurl" />
|
||||
<meta property="og:url" content="https://example.com/pageurl" />
|
||||
<meta property="og:description" content="Desc" />
|
||||
<meta property="og:video:url" content="https://example.com/embedurl" />
|
||||
<meta property="og:video:secure_url" content="https://example.com/embedurl" />
|
||||
<meta property="og:video:type" content="text/html" />
|
||||
<meta property="og:video:width" content="640" />
|
||||
<meta property="og:video:height" content="480" />
|
||||
<meta property="name" content="Desc" />
|
||||
<meta property="twitter:card" content="summary_large_image" />
|
||||
<meta property="twitter:site" content="@userid" />
|
||||
<meta property="twitter:title" content="Title" />
|
||||
<meta property="twitter:description" content="Desc" />
|
||||
<meta property="twitter:image" content="https://example.com/imageurl" />
|
||||
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<h1>Yo</h1>
|
||||
<p>Hey hey hey toastie.</p>
|
||||
</body>
|
||||
|
||||
</html>
|
33
test/htmls/player-pleroma-image.html
Normal file
33
test/htmls/player-pleroma-image.html
Normal file
|
@ -0,0 +1,33 @@
|
|||
<!doctype html>
|
||||
|
||||
<html lang="en">
|
||||
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<title>Pleroma:image</title>
|
||||
<!--
|
||||
twitter:card = summary_large_image
|
||||
twitter:player = <defined>, and URL points thumbnail image.
|
||||
og:video:* = <undefined>
|
||||
-->
|
||||
|
||||
<meta content="Title" property="og:title">
|
||||
<meta content="https://example.com/pageurl" property="og:url">
|
||||
<meta content="Desc" property="og:description">
|
||||
<meta content="article" property="og:type">
|
||||
<meta content="https://example.com/imageurl" property="og:image">
|
||||
<meta content="150" property="og:image:width">
|
||||
<meta content="150" property="og:image:height">
|
||||
<meta content="Title" property="twitter:title">
|
||||
<meta content="Desc" property="twitter:description">
|
||||
<meta content="summary_large_image" property="twitter:card">
|
||||
<meta content="https://example.com/imageurl" property="twitter:player"><!-- This URL is an image. -->
|
||||
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<h1>Yo</h1>
|
||||
<p>Hey hey hey toastie.</p>
|
||||
</body>
|
||||
|
||||
</html>
|
38
test/htmls/player-pleroma-video.html
Normal file
38
test/htmls/player-pleroma-video.html
Normal file
|
@ -0,0 +1,38 @@
|
|||
<!doctype html>
|
||||
|
||||
<html lang="en">
|
||||
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<title>Pleroma:video</title>
|
||||
<!--
|
||||
twitter:card = player
|
||||
twitter:player = Points embed URL
|
||||
og:video:url = <undefined>
|
||||
-->
|
||||
|
||||
<meta content="Title" property="og:title">
|
||||
<meta content="https://example.com/pageurl" property="og:url">
|
||||
<meta content="Desc" property="og:description">
|
||||
<meta content="article" property="og:type">
|
||||
<meta content="https://example.com/videourl" property="og:video">
|
||||
<meta content="https://example.com/imageurl" property="og:image">
|
||||
<meta content="Title" property="twitter:title">
|
||||
<meta content="Desc" property="twitter:description">
|
||||
<meta content="player" property="twitter:card">
|
||||
<meta content="https://example.com/embedurl" property="twitter:player">
|
||||
<meta content="480" property="twitter:player:width">
|
||||
<meta content="480" property="twitter:player:height">
|
||||
<meta content="https://example.com/videourl" property="twitter:player:stream">
|
||||
<meta content="video/mp4" property="twitter:player:stream:content_type">
|
||||
<meta content="summary_large_image" property="twitter:card">
|
||||
<meta content="https://example.com/imageurl" property="twitter:player">
|
||||
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<h1>Yo</h1>
|
||||
<p>Hey hey hey toastie.</p>
|
||||
</body>
|
||||
|
||||
</html>
|
16
test/htmls/twitter-description.html
Normal file
16
test/htmls/twitter-description.html
Normal file
|
@ -0,0 +1,16 @@
|
|||
<!doctype html>
|
||||
|
||||
<html lang="en">
|
||||
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<meta property="twitter:description" content="Strawberry Pasta">
|
||||
<title>YEE HAW</title>
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<h1>Yo</h1>
|
||||
<p>Hey hey hey toastie.</p>
|
||||
</body>
|
||||
|
||||
</html>
|
14
test/htmls/twitter-image.html
Normal file
14
test/htmls/twitter-image.html
Normal file
|
@ -0,0 +1,14 @@
|
|||
<!DOCTYPE html>
|
||||
|
||||
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<meta property="twitter:image" content="https://himasaku.net/himasaku.png">
|
||||
<title>YEE HAW</title>
|
||||
</head>
|
||||
<body>
|
||||
<h1>Yo</h1>
|
||||
<p>Hey hey hey toastie.</p>
|
||||
</body>
|
||||
</html>
|
13
test/htmls/twitter-title.html
Normal file
13
test/htmls/twitter-title.html
Normal file
|
@ -0,0 +1,13 @@
|
|||
<!DOCTYPE html>
|
||||
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<meta property="twitter:title" content="YEE HAW">
|
||||
<title>YEE HAW</title>
|
||||
</head>
|
||||
<body>
|
||||
<h1>Yo</h1>
|
||||
<p>Hey hey hey toastie.</p>
|
||||
</body>
|
||||
</html>
|
637
test/index.ts
Normal file
637
test/index.ts
Normal file
|
@ -0,0 +1,637 @@
|
|||
/**
|
||||
* Tests!
|
||||
*/
|
||||
|
||||
'use strict';
|
||||
|
||||
/* dependencies below */
|
||||
|
||||
import fs, { readdirSync } from 'node:fs';
|
||||
import process from 'node:process';
|
||||
import { dirname } from 'node:path';
|
||||
import { fileURLToPath } from 'node:url';
|
||||
import { Agent as httpAgent } from 'node:http';
|
||||
import { Agent as httpsAgent } from 'node:https';
|
||||
import { expect, test, describe, beforeEach, afterEach } from '@jest/globals';
|
||||
import fastify from 'fastify';
|
||||
import { summaly } from '../src/index.js';
|
||||
import { StatusError } from '../src/utils/status-error.js';
|
||||
|
||||
const _filename = fileURLToPath(import.meta.url);
|
||||
const _dirname = dirname(_filename);
|
||||
|
||||
/* settings below */
|
||||
|
||||
Error.stackTraceLimit = Infinity;
|
||||
|
||||
// During the test the env variable is set to test
|
||||
process.env.NODE_ENV = 'test';
|
||||
process.env.SUMMALY_ALLOW_PRIVATE_IP = 'true';
|
||||
|
||||
const port = 3060;
|
||||
const host = `http://localhost:${port}`;
|
||||
|
||||
// Display detail of unhandled promise rejection
|
||||
process.on('unhandledRejection', console.dir);
|
||||
|
||||
let app: ReturnType<typeof fastify> | null = null;
|
||||
|
||||
afterEach(async () => {
|
||||
if (app) {
|
||||
await app.close();
|
||||
app = null;
|
||||
}
|
||||
});
|
||||
|
||||
/* tests below */
|
||||
|
||||
test('basic', async () => {
|
||||
app = fastify();
|
||||
app.get('/', (request, reply) => {
|
||||
const content = fs.readFileSync(_dirname + '/htmls/basic.html');
|
||||
reply.header('content-length', content.length);
|
||||
reply.header('content-type', 'text/html');
|
||||
return reply.send(content);
|
||||
});
|
||||
await app.listen({ port });
|
||||
expect(await summaly(host)).toEqual({
|
||||
title: 'KISS principle',
|
||||
icon: null,
|
||||
description: null,
|
||||
thumbnail: null,
|
||||
player: {
|
||||
url: null,
|
||||
width: null,
|
||||
height: null,
|
||||
'allow': [
|
||||
'autoplay',
|
||||
'encrypted-media',
|
||||
'fullscreen',
|
||||
],
|
||||
},
|
||||
sitename: 'localhost:3060',
|
||||
sensitive: false,
|
||||
url: host,
|
||||
activityPub: null,
|
||||
});
|
||||
});
|
||||
|
||||
test('Stage Bye Stage', async () => {
|
||||
// If this test fails, you must rewrite the result data and the example in README.md.
|
||||
|
||||
const summary = await summaly('https://www.youtube.com/watch?v=NMIEAhH_fTU');
|
||||
expect(summary).toEqual(
|
||||
{
|
||||
'title': '【アイドルマスター】「Stage Bye Stage」(歌:島村卯月、渋谷凛、本田未央)',
|
||||
'icon': 'https://www.youtube.com/s/desktop/4feff1e2/img/favicon.ico',
|
||||
'description': 'Website▶https://columbia.jp/idolmaster/Playlist▶https://www.youtube.com/playlist?list=PL83A2998CF3BBC86D2018年7月18日発売予定THE IDOLM@STER CINDERELLA GIRLS CG STAR...',
|
||||
'thumbnail': 'https://i.ytimg.com/vi/NMIEAhH_fTU/maxresdefault.jpg',
|
||||
'player': {
|
||||
'url': 'https://www.youtube.com/embed/NMIEAhH_fTU?feature=oembed',
|
||||
'width': 200,
|
||||
'height': 113,
|
||||
'allow': [
|
||||
'autoplay',
|
||||
'clipboard-write',
|
||||
'encrypted-media',
|
||||
'picture-in-picture',
|
||||
'web-share',
|
||||
'fullscreen',
|
||||
],
|
||||
},
|
||||
'sitename': 'YouTube',
|
||||
'sensitive': false,
|
||||
'activityPub': null,
|
||||
'url': 'https://www.youtube.com/watch?v=NMIEAhH_fTU',
|
||||
},
|
||||
);
|
||||
});
|
||||
|
||||
test('If a favicon is not specified in the HTML but is present in the root, it will be set correctly.', async () => {
|
||||
app = fastify();
|
||||
app.get('/', (request, reply) => {
|
||||
const content = fs.readFileSync(_dirname + '/htmls/no-favicon.html');
|
||||
reply.header('content-length', content.length);
|
||||
reply.header('content-type', 'text/html');
|
||||
return reply.send(content);
|
||||
});
|
||||
app.get('/favicon.ico', (_, reply) => reply.status(200).send());
|
||||
await app.listen({ port });
|
||||
|
||||
const summary = await summaly(host);
|
||||
expect(summary.icon).toBe(`${host}/favicon.ico`);
|
||||
});
|
||||
|
||||
test('If the favicon is not specified in the HTML and does not exist in the root, it will be null.', async () => {
|
||||
app = fastify();
|
||||
app.get('/', (request, reply) => {
|
||||
const content = fs.readFileSync(_dirname + '/htmls/no-favicon.html');
|
||||
reply.header('content-length', content.length);
|
||||
reply.header('content-type', 'text/html');
|
||||
return reply.send(content);
|
||||
});
|
||||
app.get('*', (_, reply) => reply.status(404).send());
|
||||
await app.listen({ port });
|
||||
|
||||
const summary = await summaly(host);
|
||||
expect(summary.icon).toBe(null);
|
||||
});
|
||||
|
||||
test('The title is cleaned up.', async () => {
|
||||
app = fastify();
|
||||
app.get('/', (request, reply) => {
|
||||
const content = fs.readFileSync(_dirname + '/htmls/og-title.html');
|
||||
reply.header('content-length', content.length);
|
||||
reply.header('content-type', 'text/html');
|
||||
return reply.send(content);
|
||||
});
|
||||
await app.listen({ port });
|
||||
|
||||
const summary = await summaly(host);
|
||||
expect(summary.title).toBe('Strawberry Pasta');
|
||||
});
|
||||
|
||||
describe('Private IP blocking', () => {
|
||||
beforeEach(() => {
|
||||
process.env.SUMMALY_ALLOW_PRIVATE_IP = 'false';
|
||||
app = fastify();
|
||||
app.get('*', (request, reply) => {
|
||||
const content = fs.readFileSync(_dirname + '/htmls/og-title.html');
|
||||
reply.header('content-length', content.length);
|
||||
reply.header('content-type', 'text/html');
|
||||
return reply.send(content);
|
||||
});
|
||||
return app.listen({ port });
|
||||
});
|
||||
|
||||
test('I can not get private IP server information', async () => {
|
||||
const summary = await summaly(host).catch((e: StatusError) => e);
|
||||
if (summary instanceof StatusError) {
|
||||
expect(summary.name).toBe('StatusError');
|
||||
} else {
|
||||
expect(summary).toBeInstanceOf(StatusError);
|
||||
}
|
||||
});
|
||||
|
||||
test('Allow private IP if agent is specified', async () => {
|
||||
const summary = await summaly(host, {
|
||||
agent: {
|
||||
http: new httpAgent({ keepAlive: true }),
|
||||
https: new httpsAgent({ keepAlive: true }),
|
||||
},
|
||||
});
|
||||
expect(summary.title).toBe('Strawberry Pasta');
|
||||
});
|
||||
|
||||
test('Do not allow private ip if agent is an empty object', async () => {
|
||||
const summary = await summaly(host, { agent: {} }).catch((e: StatusError) => e);
|
||||
if (summary instanceof StatusError) {
|
||||
expect(summary.name).toBe('StatusError');
|
||||
} else {
|
||||
expect(summary).toBeInstanceOf(StatusError);
|
||||
}
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
process.env.SUMMALY_ALLOW_PRIVATE_IP = 'true';
|
||||
});
|
||||
});
|
||||
|
||||
describe('OGP', () => {
|
||||
test('title', async () => {
|
||||
app = fastify();
|
||||
app.get('*', (request, reply) => {
|
||||
const content = fs.readFileSync(_dirname + '/htmls/og-title.html');
|
||||
reply.header('content-length', content.length);
|
||||
reply.header('content-type', 'text/html');
|
||||
return reply.send(content);
|
||||
});
|
||||
await app.listen({ port });
|
||||
|
||||
const summary = await summaly(host);
|
||||
expect(summary.title).toBe('Strawberry Pasta');
|
||||
});
|
||||
|
||||
test('description', async () => {
|
||||
app = fastify();
|
||||
app.get('/', (request, reply) => {
|
||||
const content = fs.readFileSync(_dirname + '/htmls/og-description.html');
|
||||
reply.header('content-length', content.length);
|
||||
reply.header('content-type', 'text/html');
|
||||
return reply.send(content);
|
||||
});
|
||||
await app.listen({ port });
|
||||
|
||||
const summary = await summaly(host);
|
||||
expect(summary.description).toBe('Strawberry Pasta');
|
||||
});
|
||||
|
||||
test('site_name', async () => {
|
||||
app = fastify();
|
||||
app.get('/', (request, reply) => {
|
||||
const content = fs.readFileSync(_dirname + '/htmls/og-site_name.html');
|
||||
reply.header('content-length', content.length);
|
||||
reply.header('content-type', 'text/html');
|
||||
return reply.send(content);
|
||||
});
|
||||
await app.listen({ port });
|
||||
|
||||
const summary = await summaly(host);
|
||||
expect(summary.sitename).toBe('Strawberry Pasta');
|
||||
});
|
||||
|
||||
test('thumbnail', async () => {
|
||||
app = fastify();
|
||||
app.get('/', (request, reply) => {
|
||||
const content = fs.readFileSync(_dirname + '/htmls/og-image.html');
|
||||
reply.header('content-length', content.length);
|
||||
reply.header('content-type', 'text/html');
|
||||
return reply.send(content);
|
||||
});
|
||||
await app.listen({ port });
|
||||
|
||||
const summary = await summaly(host);
|
||||
expect(summary.thumbnail).toBe('https://himasaku.net/himasaku.png');
|
||||
});
|
||||
});
|
||||
|
||||
describe('TwitterCard', () => {
|
||||
test('title', async () => {
|
||||
app = fastify();
|
||||
app.get('/', (request, reply) => {
|
||||
const content = fs.readFileSync(_dirname + '/htmls/twitter-title.html');
|
||||
reply.header('content-length', content.length);
|
||||
reply.header('content-type', 'text/html');
|
||||
return reply.send(content);
|
||||
});
|
||||
await app.listen({ port });
|
||||
|
||||
const summary = await summaly(host);
|
||||
expect(summary.title).toBe('Strawberry Pasta');
|
||||
});
|
||||
|
||||
test('description', async () => {
|
||||
app = fastify();
|
||||
app.get('/', (request, reply) => {
|
||||
const content = fs.readFileSync(_dirname + '/htmls/twitter-description.html');
|
||||
reply.header('content-length', content.length);
|
||||
reply.header('content-type', 'text/html');
|
||||
return reply.send(content);
|
||||
});
|
||||
await app.listen({ port });
|
||||
|
||||
const summary = await summaly(host);
|
||||
expect(summary.description).toBe('Strawberry Pasta');
|
||||
});
|
||||
|
||||
test('thumbnail', async () => {
|
||||
app = fastify();
|
||||
app.get('/', (request, reply) => {
|
||||
const content = fs.readFileSync(_dirname + '/htmls/twitter-image.html');
|
||||
reply.header('content-length', content.length);
|
||||
reply.header('content-type', 'text/html');
|
||||
return reply.send(content);
|
||||
});
|
||||
await app.listen({ port });
|
||||
|
||||
const summary = await summaly(host);
|
||||
expect(summary.thumbnail).toBe('https://himasaku.net/himasaku.png');
|
||||
});
|
||||
|
||||
test('Player detection - PeerTube:video => video', async () => {
|
||||
app = fastify();
|
||||
app.get('/', (request, reply) => {
|
||||
const content = fs.readFileSync(_dirname + '/htmls/player-peertube-video.html');
|
||||
reply.header('content-length', content.length);
|
||||
reply.header('content-type', 'text/html');
|
||||
return reply.send(content);
|
||||
});
|
||||
await app.listen({ port });
|
||||
|
||||
const summary = await summaly(host);
|
||||
expect(summary.player.url).toBe('https://example.com/embedurl');
|
||||
expect(summary.player.allow).toStrictEqual(['autoplay', 'encrypted-media', 'fullscreen']);
|
||||
});
|
||||
|
||||
test('Player detection - Pleroma:video => video', async () => {
|
||||
app = fastify();
|
||||
app.get('/', (request, reply) => {
|
||||
const content = fs.readFileSync(_dirname + '/htmls/player-pleroma-video.html');
|
||||
reply.header('content-length', content.length);
|
||||
reply.header('content-type', 'text/html');
|
||||
return reply.send(content);
|
||||
});
|
||||
await app.listen({ port });
|
||||
|
||||
const summary = await summaly(host);
|
||||
expect(summary.player.url).toBe('https://example.com/embedurl');
|
||||
expect(summary.player.allow).toStrictEqual(['autoplay', 'encrypted-media', 'fullscreen']);
|
||||
});
|
||||
|
||||
test('Player detection - Pleroma:image => image', async () => {
|
||||
app = fastify();
|
||||
app.get('/', (request, reply) => {
|
||||
const content = fs.readFileSync(_dirname + '/htmls/player-pleroma-image.html');
|
||||
reply.header('content-length', content.length);
|
||||
reply.header('content-type', 'text/html');
|
||||
return reply.send(content);
|
||||
});
|
||||
await app.listen({ port });
|
||||
|
||||
const summary = await summaly(host);
|
||||
expect(summary.thumbnail).toBe('https://example.com/imageurl');
|
||||
});
|
||||
});
|
||||
|
||||
describe('oEmbed', () => {
|
||||
const setUpFastify = async (oEmbedPath: string, htmlPath = 'htmls/oembed.html') => {
|
||||
app = fastify();
|
||||
app.get('/', (request, reply) => {
|
||||
const content = fs.readFileSync(new URL(htmlPath, import.meta.url));
|
||||
reply.header('content-length', content.length);
|
||||
reply.header('content-type', 'text/html');
|
||||
return reply.send(content);
|
||||
});
|
||||
app.get('/oembed.json', (request, reply) => {
|
||||
const content = fs.readFileSync(new URL(oEmbedPath, new URL('oembed/', import.meta.url)));
|
||||
reply.header('content-length', content.length);
|
||||
reply.header('content-type', 'application/json');
|
||||
return reply.send(content);
|
||||
});
|
||||
await app.listen({ port });
|
||||
};
|
||||
|
||||
for (const filename of readdirSync(new URL('oembed/invalid', import.meta.url))) {
|
||||
test(`Invalidity test: ${filename}`, async () => {
|
||||
await setUpFastify(`invalid/${filename}`);
|
||||
const summary = await summaly(host);
|
||||
expect(summary.player.url).toBe(null);
|
||||
});
|
||||
}
|
||||
|
||||
test('basic properties', async () => {
|
||||
await setUpFastify('oembed.json');
|
||||
const summary = await summaly(host);
|
||||
expect(summary.player.url).toBe('https://example.com/');
|
||||
expect(summary.player.width).toBe(500);
|
||||
expect(summary.player.height).toBe(300);
|
||||
});
|
||||
|
||||
test('type: video', async () => {
|
||||
await setUpFastify('oembed-video.json');
|
||||
const summary = await summaly(host);
|
||||
expect(summary.player.url).toBe('https://example.com/');
|
||||
expect(summary.player.width).toBe(500);
|
||||
expect(summary.player.height).toBe(300);
|
||||
});
|
||||
|
||||
test('max height', async () => {
|
||||
await setUpFastify('oembed-too-tall.json');
|
||||
const summary = await summaly(host);
|
||||
expect(summary.player.height).toBe(1024);
|
||||
});
|
||||
|
||||
test('children are ignored', async () => {
|
||||
await setUpFastify('oembed-iframe-child.json');
|
||||
const summary = await summaly(host);
|
||||
expect(summary.player.url).toBe('https://example.com/');
|
||||
});
|
||||
|
||||
test('allows fullscreen', async () => {
|
||||
await setUpFastify('oembed-allow-fullscreen.json');
|
||||
const summary = await summaly(host);
|
||||
expect(summary.player.url).toBe('https://example.com/');
|
||||
expect(summary.player.allow).toStrictEqual(['fullscreen']);
|
||||
});
|
||||
|
||||
test('allows legacy allowfullscreen', async () => {
|
||||
await setUpFastify('oembed-allow-fullscreen-legacy.json');
|
||||
const summary = await summaly(host);
|
||||
expect(summary.player.url).toBe('https://example.com/');
|
||||
expect(summary.player.allow).toStrictEqual(['fullscreen']);
|
||||
});
|
||||
|
||||
test('allows safelisted permissions', async () => {
|
||||
await setUpFastify('oembed-allow-safelisted-permissions.json');
|
||||
const summary = await summaly(host);
|
||||
expect(summary.player.url).toBe('https://example.com/');
|
||||
expect(summary.player.allow).toStrictEqual([
|
||||
'autoplay', 'clipboard-write', 'fullscreen',
|
||||
'encrypted-media', 'picture-in-picture', 'web-share',
|
||||
]);
|
||||
});
|
||||
|
||||
test('ignores rare permissions', async () => {
|
||||
await setUpFastify('oembed-ignore-rare-permissions.json');
|
||||
const summary = await summaly(host);
|
||||
expect(summary.player.url).toBe('https://example.com/');
|
||||
expect(summary.player.allow).toStrictEqual(['autoplay']);
|
||||
});
|
||||
|
||||
test('oEmbed with relative path', async () => {
|
||||
await setUpFastify('oembed.json', 'htmls/oembed-relative.html');
|
||||
const summary = await summaly(host);
|
||||
expect(summary.player.url).toBe('https://example.com/');
|
||||
});
|
||||
|
||||
test('oEmbed with nonexistent path', async () => {
|
||||
await setUpFastify('oembed.json', 'htmls/oembed-nonexistent-path.html');
|
||||
const summary = await summaly(host);
|
||||
expect(summary.player.url).toBe(null);
|
||||
expect(summary.description).toBe('nonexistent');
|
||||
});
|
||||
|
||||
test('oEmbed with wrong path', async () => {
|
||||
await setUpFastify('oembed.json', 'htmls/oembed-wrong-path.html');
|
||||
const summary = await summaly(host);
|
||||
expect(summary.player.url).toBe(null);
|
||||
expect(summary.description).toBe('wrong url');
|
||||
});
|
||||
|
||||
test('oEmbed with OpenGraph', async () => {
|
||||
await setUpFastify('oembed.json', 'htmls/oembed-and-og.html');
|
||||
const summary = await summaly(host);
|
||||
expect(summary.player.url).toBe('https://example.com/');
|
||||
expect(summary.description).toBe('blobcats rule the world');
|
||||
});
|
||||
|
||||
test('Invalid oEmbed with valid OpenGraph', async () => {
|
||||
await setUpFastify('invalid/oembed-insecure.json', 'htmls/oembed-and-og.html');
|
||||
const summary = await summaly(host);
|
||||
expect(summary.player.url).toBe(null);
|
||||
expect(summary.description).toBe('blobcats rule the world');
|
||||
});
|
||||
|
||||
test('oEmbed with og:video', async () => {
|
||||
await setUpFastify('oembed.json', 'htmls/oembed-and-og-video.html');
|
||||
const summary = await summaly(host);
|
||||
expect(summary.player.url).toBe('https://example.com/');
|
||||
expect(summary.player.allow).toStrictEqual([]);
|
||||
});
|
||||
|
||||
test('width: 100%', async () => {
|
||||
await setUpFastify('oembed-percentage-width.json');
|
||||
const summary = await summaly(host);
|
||||
expect(summary.player.width).toBe(null);
|
||||
expect(summary.player.height).toBe(300);
|
||||
});
|
||||
});
|
||||
|
||||
describe('ActivityPub', () => {
|
||||
test('Basic', async () => {
|
||||
app = fastify();
|
||||
app.get('*', (request, reply) => {
|
||||
const content = fs.readFileSync(_dirname + '/htmls/activitypub.html');
|
||||
reply.header('content-length', content.length);
|
||||
reply.header('content-type', 'text/html');
|
||||
return reply.send(content);
|
||||
});
|
||||
await app.listen({ port });
|
||||
|
||||
const summary = await summaly(host);
|
||||
expect(summary.activityPub).toBe('https://misskey.test/notes/abcdefg');
|
||||
});
|
||||
|
||||
test('Null', async () => {
|
||||
app = fastify();
|
||||
app.get('*', (request, reply) => {
|
||||
const content = fs.readFileSync(_dirname + '/htmls/basic.html');
|
||||
reply.header('content-length', content.length);
|
||||
reply.header('content-type', 'text/html');
|
||||
return reply.send(content);
|
||||
});
|
||||
await app.listen({ port });
|
||||
|
||||
const summary = await summaly(host);
|
||||
expect(summary.activityPub).toBe(null);
|
||||
});
|
||||
});
|
||||
|
||||
describe('sensitive', () => {
|
||||
test('default', async () => {
|
||||
app = fastify();
|
||||
app.get('/', (request, reply) => {
|
||||
const content = fs.readFileSync(_dirname + '/htmls/basic.html');
|
||||
reply.header('content-length', content.length);
|
||||
reply.header('content-type', 'text/html');
|
||||
return reply.send(content);
|
||||
});
|
||||
await app.listen({ port });
|
||||
expect((await summaly(host)).sensitive).toBe(false);
|
||||
});
|
||||
|
||||
test('mixi:content-rating 1', async () => {
|
||||
app = fastify();
|
||||
app.get('/', (request, reply) => {
|
||||
const content = fs.readFileSync(_dirname + '/htmls/mixi-sensitive.html');
|
||||
reply.header('content-length', content.length);
|
||||
reply.header('content-type', 'text/html');
|
||||
return reply.send(content);
|
||||
});
|
||||
await app.listen({ port });
|
||||
expect((await summaly(host)).sensitive).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
describe('UserAgent', () => {
|
||||
test('The UA settings are reflected.', async () => {
|
||||
const content = fs.readFileSync(_dirname + '/htmls/basic.html');
|
||||
let ua: string | undefined = undefined;
|
||||
|
||||
app = fastify();
|
||||
app.get('/', (request, reply) => {
|
||||
reply.header('content-length', content.byteLength);
|
||||
reply.header('content-type', 'text/html');
|
||||
ua = request.headers['user-agent'];
|
||||
return reply.send(content);
|
||||
});
|
||||
await app.listen({ port });
|
||||
await summaly(host, { userAgent: 'test-ua' });
|
||||
|
||||
expect(ua).toBe('test-ua');
|
||||
});
|
||||
});
|
||||
|
||||
describe('content-length limit', () => {
|
||||
test('No errors will occur if the content is within the content-length limit.', async () => {
|
||||
const content = fs.readFileSync(_dirname + '/htmls/basic.html');
|
||||
|
||||
app = fastify();
|
||||
app.get('/', (request, reply) => {
|
||||
reply.header('content-length', content.byteLength);
|
||||
reply.header('content-type', 'text/html');
|
||||
return reply.send(content);
|
||||
});
|
||||
await app.listen({ port });
|
||||
|
||||
expect(await summaly(host, { contentLengthLimit: content.byteLength })).toBeDefined();
|
||||
});
|
||||
|
||||
test('If the content-length limit is exceeded, an error will occur.', async () => {
|
||||
const content = fs.readFileSync(_dirname + '/htmls/basic.html');
|
||||
|
||||
app = fastify();
|
||||
app.get('/', (request, reply) => {
|
||||
reply.header('content-length', content.byteLength);
|
||||
reply.header('content-type', 'text/html');
|
||||
return reply.send(content);
|
||||
});
|
||||
await app.listen({ port });
|
||||
|
||||
await expect(summaly(host, { contentLengthLimit: content.byteLength - 1 })).rejects.toThrow();
|
||||
});
|
||||
});
|
||||
|
||||
describe('content-length required', () => {
|
||||
test('[When option is enabled] No error occurs if content-length is returned', async () => {
|
||||
const content = fs.readFileSync(_dirname + '/htmls/basic.html');
|
||||
|
||||
app = fastify();
|
||||
app.get('/', (request, reply) => {
|
||||
reply.header('content-length', content.byteLength);
|
||||
reply.header('content-type', 'text/html');
|
||||
return reply.send(content);
|
||||
});
|
||||
await app.listen({ port });
|
||||
|
||||
expect(await summaly(host, { contentLengthRequired: true, contentLengthLimit: content.byteLength })).toBeDefined();
|
||||
});
|
||||
|
||||
test('[When option is enabled] An error occurs if content-length is not returned.', async () => {
|
||||
app = fastify();
|
||||
app.get('/', (request, reply) => {
|
||||
reply.header('content-type', 'text/html');
|
||||
// If you don't pass it as stream, the content-length will be set automatically.
|
||||
return reply.send(fs.createReadStream(_dirname + '/htmls/basic.html'));
|
||||
});
|
||||
await app.listen({ port });
|
||||
|
||||
await expect(summaly(host, { contentLengthRequired: true })).rejects.toThrow();
|
||||
});
|
||||
|
||||
test('[When option is disabled] No error occurs if content-length is returned', async () => {
|
||||
const content = fs.readFileSync(_dirname + '/htmls/basic.html');
|
||||
|
||||
app = fastify();
|
||||
app.get('/', (request, reply) => {
|
||||
reply.header('content-length', content.byteLength);
|
||||
reply.header('content-type', 'text/html');
|
||||
return reply.send(content);
|
||||
});
|
||||
await app.listen({ port });
|
||||
|
||||
expect(await summaly(host, { contentLengthRequired: false, contentLengthLimit: content.byteLength })).toBeDefined();
|
||||
});
|
||||
|
||||
test('[When option is disabled] No error occurs even if content-length is not returned', async () => {
|
||||
app = fastify();
|
||||
app.get('/', (request, reply) => {
|
||||
reply.header('content-type', 'text/html');
|
||||
// If you don't pass it as stream, the content-length will be set automatically.
|
||||
return reply.send(fs.createReadStream(_dirname + '/htmls/basic.html'));
|
||||
});
|
||||
await app.listen({ port });
|
||||
|
||||
expect(await summaly(host, { contentLengthRequired: false })).toBeDefined();
|
||||
});
|
||||
});
|
7
test/oembed/invalid/oembed-child-iframe.json
Normal file
7
test/oembed/invalid/oembed-child-iframe.json
Normal file
|
@ -0,0 +1,7 @@
|
|||
{
|
||||
"version": "1.0",
|
||||
"type": "rich",
|
||||
"html": "<div><iframe src='https://example.com/'></iframe>",
|
||||
"width": 500,
|
||||
"height": 300
|
||||
}
|
7
test/oembed/invalid/oembed-double-iframes.json
Normal file
7
test/oembed/invalid/oembed-double-iframes.json
Normal file
|
@ -0,0 +1,7 @@
|
|||
{
|
||||
"version": "1.0",
|
||||
"type": "rich",
|
||||
"html": "<iframe src='https://example.com/'></iframe><iframe src='https://example.com/'></iframe>",
|
||||
"width": 500,
|
||||
"height": 300
|
||||
}
|
7
test/oembed/invalid/oembed-future.json
Normal file
7
test/oembed/invalid/oembed-future.json
Normal file
|
@ -0,0 +1,7 @@
|
|||
{
|
||||
"version": "11.0",
|
||||
"type": "rich",
|
||||
"html": "<iframe src='https://example.com/'></iframe>",
|
||||
"width": 500,
|
||||
"height": 300
|
||||
}
|
7
test/oembed/invalid/oembed-insecure.json
Normal file
7
test/oembed/invalid/oembed-insecure.json
Normal file
|
@ -0,0 +1,7 @@
|
|||
{
|
||||
"version": "1.0",
|
||||
"type": "rich",
|
||||
"html": "<iframe src='http://example.com/'></iframe>",
|
||||
"width": 500,
|
||||
"height": 300
|
||||
}
|
7
test/oembed/invalid/oembed-invalid-height.json
Normal file
7
test/oembed/invalid/oembed-invalid-height.json
Normal file
|
@ -0,0 +1,7 @@
|
|||
{
|
||||
"version": "1.0",
|
||||
"type": "rich",
|
||||
"html": "<iframe src='https://example.com/'></iframe>",
|
||||
"width": 500,
|
||||
"height": "blobcat"
|
||||
}
|
6
test/oembed/invalid/oembed-no-height.json
Normal file
6
test/oembed/invalid/oembed-no-height.json
Normal file
|
@ -0,0 +1,6 @@
|
|||
{
|
||||
"version": "1.0",
|
||||
"type": "rich",
|
||||
"html": "<iframe src='https://example.com/'></iframe>",
|
||||
"width": 500
|
||||
}
|
6
test/oembed/invalid/oembed-no-version.json
Normal file
6
test/oembed/invalid/oembed-no-version.json
Normal file
|
@ -0,0 +1,6 @@
|
|||
{
|
||||
"type": "rich",
|
||||
"html": "<iframe src='https://example.com/'></iframe>",
|
||||
"width": 500,
|
||||
"height": 300
|
||||
}
|
7
test/oembed/invalid/oembed-old.json
Normal file
7
test/oembed/invalid/oembed-old.json
Normal file
|
@ -0,0 +1,7 @@
|
|||
{
|
||||
"version": "0.1",
|
||||
"type": "rich",
|
||||
"html": "<iframe src='https://example.com/'></iframe>",
|
||||
"width": 500,
|
||||
"height": 300
|
||||
}
|
7
test/oembed/invalid/oembed-photo.json
Normal file
7
test/oembed/invalid/oembed-photo.json
Normal file
|
@ -0,0 +1,7 @@
|
|||
{
|
||||
"version": "1.0",
|
||||
"type": "photo",
|
||||
"url": "https://example.com/example.avif",
|
||||
"width": 300,
|
||||
"height": 300
|
||||
}
|
7
test/oembed/invalid/oembed-too-powerful.json
Normal file
7
test/oembed/invalid/oembed-too-powerful.json
Normal file
|
@ -0,0 +1,7 @@
|
|||
{
|
||||
"version": "1.0",
|
||||
"type": "rich",
|
||||
"html": "<iframe src='https://example.com/' allow='camera'></iframe>",
|
||||
"width": 500,
|
||||
"height": 300
|
||||
}
|
7
test/oembed/invalid/oembed-too-powerful2.json
Normal file
7
test/oembed/invalid/oembed-too-powerful2.json
Normal file
|
@ -0,0 +1,7 @@
|
|||
{
|
||||
"version": "1.0",
|
||||
"type": "rich",
|
||||
"html": "<iframe src='https://example.com/' allow='fullscreen;camera'></iframe>",
|
||||
"width": 500,
|
||||
"height": 300
|
||||
}
|
7
test/oembed/oembed-allow-fullscreen-legacy.json
Normal file
7
test/oembed/oembed-allow-fullscreen-legacy.json
Normal file
|
@ -0,0 +1,7 @@
|
|||
{
|
||||
"version": "1.0",
|
||||
"type": "rich",
|
||||
"html": "<iframe src='https://example.com/' allowfullscreen></iframe>",
|
||||
"width": 500,
|
||||
"height": 300
|
||||
}
|
7
test/oembed/oembed-allow-fullscreen.json
Normal file
7
test/oembed/oembed-allow-fullscreen.json
Normal file
|
@ -0,0 +1,7 @@
|
|||
{
|
||||
"version": "1.0",
|
||||
"type": "rich",
|
||||
"html": "<iframe src='https://example.com/' allow='fullscreen'></iframe>",
|
||||
"width": 500,
|
||||
"height": 300
|
||||
}
|
7
test/oembed/oembed-allow-safelisted-permissions.json
Normal file
7
test/oembed/oembed-allow-safelisted-permissions.json
Normal file
|
@ -0,0 +1,7 @@
|
|||
{
|
||||
"version": "1.0",
|
||||
"type": "rich",
|
||||
"html": "<iframe src='https://example.com/' allow='autoplay;clipboard-write;fullscreen;encrypted-media;picture-in-picture;web-share'></iframe>",
|
||||
"width": 500,
|
||||
"height": 300
|
||||
}
|
7
test/oembed/oembed-iframe-child.json
Normal file
7
test/oembed/oembed-iframe-child.json
Normal file
|
@ -0,0 +1,7 @@
|
|||
{
|
||||
"version": "1.0",
|
||||
"type": "rich",
|
||||
"html": "<iframe src='https://example.com/'><script>alert('Hahaha I take this world')</script></iframe>",
|
||||
"width": 500,
|
||||
"height": 300
|
||||
}
|
7
test/oembed/oembed-ignore-rare-permissions.json
Normal file
7
test/oembed/oembed-ignore-rare-permissions.json
Normal file
|
@ -0,0 +1,7 @@
|
|||
{
|
||||
"version": "1.0",
|
||||
"type": "rich",
|
||||
"html": "<iframe src='https://example.com/' allow='autoplay;gyroscope;accelerometer'></iframe>",
|
||||
"width": 500,
|
||||
"height": 300
|
||||
}
|
7
test/oembed/oembed-percentage-width.json
Normal file
7
test/oembed/oembed-percentage-width.json
Normal file
|
@ -0,0 +1,7 @@
|
|||
{
|
||||
"version": "1.0",
|
||||
"type": "rich",
|
||||
"html": "<iframe src='https://example.com/'></iframe>",
|
||||
"width": "100%",
|
||||
"height": 300
|
||||
}
|
6
test/oembed/oembed-too-tall.json
Normal file
6
test/oembed/oembed-too-tall.json
Normal file
|
@ -0,0 +1,6 @@
|
|||
{
|
||||
"version": "1.0",
|
||||
"type": "rich",
|
||||
"html": "<iframe src='https://example.com/'></iframe>",
|
||||
"height": 3000
|
||||
}
|
7
test/oembed/oembed-video.json
Normal file
7
test/oembed/oembed-video.json
Normal file
|
@ -0,0 +1,7 @@
|
|||
{
|
||||
"version": "1.0",
|
||||
"type": "video",
|
||||
"html": "<iframe src='https://example.com/'></iframe>",
|
||||
"width": 500,
|
||||
"height": 300
|
||||
}
|
7
test/oembed/oembed.json
Normal file
7
test/oembed/oembed.json
Normal file
|
@ -0,0 +1,7 @@
|
|||
{
|
||||
"version": "1.0",
|
||||
"type": "rich",
|
||||
"html": "<iframe src='https://example.com/'></iframe>",
|
||||
"width": 500,
|
||||
"height": 300
|
||||
}
|
6
test/tsconfig.json
Normal file
6
test/tsconfig.json
Normal file
|
@ -0,0 +1,6 @@
|
|||
{
|
||||
"extends": "../tsconfig.json",
|
||||
"include": [
|
||||
"./**/*.ts",
|
||||
],
|
||||
}
|
49
tsconfig.json
Normal file
49
tsconfig.json
Normal file
|
@ -0,0 +1,49 @@
|
|||
{
|
||||
"compilerOptions": {
|
||||
"allowJs": true,
|
||||
"noEmitOnError": false,
|
||||
"noImplicitAny": true,
|
||||
"noImplicitReturns": true,
|
||||
"noUnusedParameters": false,
|
||||
"noUnusedLocals": false,
|
||||
"noFallthroughCasesInSwitch": true,
|
||||
"declaration": true,
|
||||
"sourceMap": false,
|
||||
"target": "es2021",
|
||||
"module": "esnext",
|
||||
"moduleResolution": "node",
|
||||
"allowSyntheticDefaultImports": true,
|
||||
"removeComments": false,
|
||||
"noLib": false,
|
||||
"strict": true,
|
||||
"strictNullChecks": true,
|
||||
"strictPropertyInitialization": false,
|
||||
"skipLibCheck": true,
|
||||
"experimentalDecorators": true,
|
||||
"emitDecoratorMetadata": true,
|
||||
"resolveJsonModule": true,
|
||||
"isolatedModules": true,
|
||||
"rootDir": "./src",
|
||||
"baseUrl": "./",
|
||||
"paths": {
|
||||
"@/*": [
|
||||
"./src/*"
|
||||
]
|
||||
},
|
||||
"outDir": "./built",
|
||||
"types": [
|
||||
"node"
|
||||
],
|
||||
"typeRoots": [
|
||||
"./node_modules/@types",
|
||||
"./src/@types"
|
||||
],
|
||||
"lib": [
|
||||
"esnext"
|
||||
]
|
||||
},
|
||||
"compileOnSave": false,
|
||||
"include": [
|
||||
"./src/**/*.ts"
|
||||
],
|
||||
}
|
Loading…
Reference in a new issue