Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions lib/Helper/Filter/JSON/FixInstructionsFilter.php
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,11 @@ public function apply(array &$json): bool {
continue;
}

if ($this->jsonService->isSchemaObject($value, 'HowToTip', false)) {
$instructions[$key] = [$this->extractHowToStep($value)];
continue;
}

if ($this->jsonService->isSchemaObject($value, 'HowToSection', false)) {
$newInstructions = $this->flattenHowToSection($value);
$instructions[$key] = $newInstructions;
Expand Down
37 changes: 35 additions & 2 deletions lib/Helper/HTMLParser/HttpJsonLdParser.php
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ public function __construct(IL10N $l10n, JsonService $jsonService) {
public function parse(\DOMDocument $document, ?string $url): array {
$xpath = new \DOMXPath($document);

$json_ld_elements = $xpath->query("//*[@type='application/ld+json']");
$json_ld_elements = $xpath->query("//*[@type='application/ld+json'] | //script");

foreach ($json_ld_elements as $json_ld_element) {
if (!$json_ld_element || !$json_ld_element->nodeValue) {
Expand Down Expand Up @@ -58,7 +58,12 @@ private function parseJsonLdElement(\DOMNode $node): array {
$json = json_decode($string, true);

if ($json === null) {
throw new HtmlParsingException($this->l->t('JSON cannot be decoded.'));
$extractedJson = $this->extractNextJsJson($string);
if ($extractedJson === null) {
throw new HtmlParsingException($this->l->t('JSON cannot be decoded.'));
}

$json = json_decode($extractedJson, true);
}

if ($json === false || $json === true || !is_array($json)) {
Expand All @@ -85,6 +90,34 @@ private function parseJsonLdElement(\DOMNode $node): array {
throw new HtmlParsingException($this->l->t('No recipe was found.'));
}

/**
* Try to extract escaped JSON from a Next.js flight payload.
*
* Example:
* self.__next_f.push([1,"{\"@context\":\"https://schema.org\",...}"])
*
* @param string $rawContent
* @return string|null
*/
private function extractNextJsJson(string $rawContent): ?string {
if (strpos($rawContent, 'self.__next_f.push(') === false) {
return null;
}

$matches = [];
$matched = preg_match('/self\.__next_f\.push\(\[\s*\d+\s*,\s*"((?:\\\\.|[^"\\\\])*)"/s', $rawContent, $matches);
if ($matched !== 1 || !isset($matches[1])) {
return null;
}

$decoded = stripcslashes($matches[1]);
if ($decoded === '') {
return null;
}

return $decoded;
}

/**
* Fix any JSON issues before trying to decode it
*
Expand Down
7 changes: 7 additions & 0 deletions tests/Unit/Helper/HTMLParser/HttpJsonLdParserTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ public static function dataProvider(): array {
'caseJ' => ['caseJ.html', true, 'caseJ.json'],
//'caseK' => ['caseK.html', true, 'caseK.json'],
'caseL' => ['caseL.html', true, 'caseL.json'],
'caseM' => ['caseM.html', true, 'caseM.json'],
];
}

Expand Down Expand Up @@ -70,6 +71,7 @@ public function testHTMLFile($file, $valid, $jsonFile): void {
$parser = new HttpJsonLdParser($l, $jsonService);

$content = file_get_contents(__DIR__ . "/res_JsonLd/$file");
$content = $this->normalizeLineEndings($content);

$document = new \DOMDocument();
$document->loadHTML($content);
Expand All @@ -78,6 +80,7 @@ public function testHTMLFile($file, $valid, $jsonFile): void {
$res = $parser->parse($document, 'http://example.com');

$jsonDest = file_get_contents(__DIR__ . "/res_JsonLd/$jsonFile");
$jsonDest = $this->normalizeLineEndings($jsonDest);
$expected = json_decode($jsonDest, true);

$this->assertEquals($expected, $res);
Expand All @@ -86,4 +89,8 @@ public function testHTMLFile($file, $valid, $jsonFile): void {
$this->assertFalse($valid);
}
}

function normalizeLineEndings(string $text): string {
return str_replace(["\r\n", "\r"], "\n", $text);
}
}
30 changes: 30 additions & 0 deletions tests/Unit/Helper/HTMLParser/HttpMicrodataParserTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -63,15 +63,41 @@ public function testHTMLFile($filename, $valid, $jsonFile, $skipped = false): vo
$parser = new HttpMicrodataParser($l);

$content = file_get_contents(__DIR__ . "/res_Microdata/$filename");
$content = $this->normalizeLineEndings($content);

$document = new \DOMDocument();
$document->loadHTML($content);

try {
$res = $parser->parse($document, 'http://example.com');
if (isset($res['recipeIngredient'])) {
$res['recipeIngredient'] = array_map(
fn($line) => str_replace(["\r\n", "\r"], "\n", $line),
$res['recipeIngredient']
);
}
if (isset($res['recipeInstructions'])) {
$res['recipeInstructions'] = array_map(
fn($line) => str_replace(["\r\n", "\r"], "\n", $line),
$res['recipeInstructions']
);
}

$jsonDest = file_get_contents(__DIR__ . "/res_Microdata/$jsonFile");
$jsonDest = $this->normalizeLineEndings($jsonDest);
$expected = json_decode($jsonDest, true);
if (isset($expected['recipeIngredient'])) {
$expected['recipeIngredient'] = array_map(
fn($line) => str_replace(["\r\n", "\r"], "\n", $line),
$expected['recipeIngredient']
);
}
if (isset($expected['recipeInstructions'])) {
$expected['recipeInstructions'] = array_map(
fn($line) => str_replace(["\r\n", "\r"], "\n", $line),
$expected['recipeInstructions']
);
}

// $this->markTestSkipped();

Expand Down Expand Up @@ -168,4 +194,8 @@ private function finishTest($parser, $content, $jsonFile): void {
$this->assertFalse(true);
}
}

function normalizeLineEndings(string $text): string {
return str_replace(["\r\n", "\r"], "\n", $text);
}
}
Loading