How to exceed the 50 importxml limit in Google Sheets
I created a custom import function that overcomes all limits of IMPORTXML I have a sheet using this in about 800 cells and it works great.
It makes use of Google Sheet’s custom scripts (Tools > Script editor…) and searches through content using regex instead of xpath.
- function importRegex(url, regexInput) {
- var output = '';
- var fetchedUrl = UrlFetchApp.fetch(url, {muteHttpExceptions: true});
- if (fetchedUrl) {
- var html = fetchedUrl.getContentText();
- if (html.length && regexInput.length) {
- output = html.match(new RegExp(regexInput, 'i'))[1];
- }
- }
- // Grace period to avoid call limit
- Utilities.sleep(1000);
- return unescapeHTML(output);
- }
You can then use this function like any function.
=importRegex("https://example.com", "
(.*)<\/title>")Of course, you can also reference cells.
=importRegex(A2, "
(.*)<\/title>")If you don’t want to see HTML entities in the output, you can use this function.
- var htmlEntities = {
- nbsp: ' ',
- cent: '¢',
- pound: '£',
- yen: '¥',
- euro: '€',
- copy: '©',
- reg: '®',
- lt: '<',
- gt: '>',
- mdash: '–',
- ndash: '-',
- quot: '"',
- amp: '&',
- apos: '\''
- };
- function unescapeHTML(str) {
- return str.replace(/\&([^;]+);/g, function (entity, entityCode) {
- var match;
- if (entityCode in htmlEntities) {
- return htmlEntities[entityCode];
- } else if (match = entityCode.match(/^#x([\da-fA-F]+)$/)) {
- return String.fromCharCode(parseInt(match[1], 16));
- } else if (match = entityCode.match(/^#(\d+)$/)) {
- return String.fromCharCode(~~match[1]);
- } else {
- return entity;
- }
- });
- };
All together…
- function importRegex(url, regexInput) {
- var output = '';
- var fetchedUrl = UrlFetchApp.fetch(url, {muteHttpExceptions: true});
- if (fetchedUrl) {
- var html = fetchedUrl.getContentText();
- if (html.length && regexInput.length) {
- output = html.match(new RegExp(regexInput, 'i'))[1];
- }
- }
- // Grace period to avoid call limit
- Utilities.sleep(1000);
- return unescapeHTML(output);
- }
- var htmlEntities = {
- nbsp: ' ',
- cent: '¢',
- pound: '£',
- yen: '¥',
- euro: '€',
- copy: '©',
- reg: '®',
- lt: '<',
- gt: '>',
- mdash: '–',
- ndash: '-',
- quot: '"',
- amp: '&',
- apos: '\''
- };
- function unescapeHTML(str) {
- return str.replace(/\&([^;]+);/g, function (entity, entityCode) {
- var match;
- if (entityCode in htmlEntities) {
- return htmlEntities[entityCode];
- } else if (match = entityCode.match(/^#x([\da-fA-F]+)$/)) {
- return String.fromCharCode(parseInt(match[1], 16));
- } else if (match = entityCode.match(/^#(\d+)$/)) {
- return String.fromCharCode(~~match[1]);
- } else {
- return entity;
- }
- });
- };