i have function running in google sheets script pulls html subreddits , returns them spreadsheet. works me some/most of time, other times error "could not parse text. (line 13)" line var doc = xml.parse(page, true);
. idea why happening or bug google scripts? here's code works...sometimes.
function getreddithtml() { var entries_array = []; var subreddit_array = ['https://www.reddit.com/r/news/','https://www.reddit.com/r/funny/','https://www.reddit.com/r/science/']; (var s = 0; s < subreddit_array.length; s++) { var page = urlfetchapp.fetch(subreddit_array[s]); //this line 13 breaking var doc = xml.parse(page, true); var bodyhtml = doc.html.body.toxmlstring(); doc = xmlservice.parse(bodyhtml); var root = doc.getrootelement(); var entries = getelementsbyclassname(root,'thing'); (var = 0; < entries.length; i++) { var title = getelementsbyclassname(entries[i],'title'); title = xmlservice.getrawformat().format(title[1]).replace(/<[^>]*>/g, ""); var link = getelementsbyclassname(entries[i],'comments'); link = link[0].getattribute('href').getvalue(); var rank = getelementsbyclassname(entries[i],'rank'); rank = rank[0].getvalue(); var likes = getelementsbyclassname(entries[i],'likes'); likes = likes[0].getvalue(); entries_array.push([rank, likes, title, link]); } } return entries_array.sort(function (a, b) { return b[1] - a[1]; }); }
here found upon playing importxml (my usual way of doing this) - reason cannot narrow down - appear randomly stall out , return null few minutes - i'm guessing issue thing not code site or google temporarily blocks/won't return data -
however found json endpoint piece want - , noticed when xml went down - json didnt.
you can take , fix push own array of topics/urls - left 1 link show how url breaks down , should modified:
the url 'https://www.reddit.com/r/news/hot.json?raw_json=1&subredditname=news&sort=top&t=day&feature=link_preview&sr_detail=true&app=mweb-client
news
mentioned in 2 places modify urls follow method - can load javascript in browser see fields available
also portion hot.json
can change whether want ranked list (called hot), or new,top,promoted, etc. change keyword.
score same upvotes/likes
function getsubreddit() { var ss = spreadsheetapp.getactivespreadsheet(); var sheet = ss.getactivesheet(); //get active sheet var subject = 'news'; var url = 'https://www.reddit.com/r/' + subject + '/hot.json?raw_json=1&subredditname=' + subject + '&sort=top&t=day&feature=link_preview&sr_detail=true&app=mweb-client'; //json endpoint data var response = urlfetchapp.fetch(url); // api endpoint var json = response.getcontenttext(); // response content text var redditdata = json.parse(json); //parse text json logger.log(redditdata); //log data logger check //create empty array hold data points var statsrows = []; var date = new date(); //create new date timestamp //the following lines push parsed json empty stats array (var j=0;j<25;j++){ (var =0;i<25;i++){ var stats=[]; stats.push(date);//timestamp stats.push(i+1); stats.push(redditdata.data.children[i].data.score); //score stats.push(redditdata.data.children[i].data.title); //title stats.push(redditdata.data.children[i].data.url); //article url // stats.push('http://www.reddit.com' + redditdata.data.children[i].data.permalink); //reddit permalink statsrows.push(stats) } //append stats array active sheet sheet.appendrow(statsrows[j]) } }
Comments
Post a Comment