mirror of
https://github.com/samsonjs/5by5Browser.git
synced 2026-03-25 09:05:50 +00:00
935 lines
No EOL
30 KiB
Objective-C
935 lines
No EOL
30 KiB
Objective-C
//
|
|
// MWFeedParser.m
|
|
// MWFeedParser
|
|
//
|
|
// Copyright (c) 2010 Michael Waterfall
|
|
//
|
|
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
// of this software and associated documentation files (the "Software"), to deal
|
|
// in the Software without restriction, including without limitation the rights
|
|
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
// copies of the Software, and to permit persons to whom the Software is
|
|
// furnished to do so, subject to the following conditions:
|
|
//
|
|
// 1. The above copyright notice and this permission notice shall be included
|
|
// in all copies or substantial portions of the Software.
|
|
//
|
|
// 2. This Software cannot be used to archive or collect data such as (but not
|
|
// limited to) that of events, news, experiences and activities, for the
|
|
// purpose of any concept relating to diary/journal keeping.
|
|
//
|
|
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
// THE SOFTWARE.
|
|
//
|
|
|
|
#import "MWFeedParser.h"
|
|
#import "MWFeedParser_Private.h"
|
|
#import "NSString+HTML.h"
|
|
#import "NSDate+InternetDateTime.h"
|
|
|
|
// NSXMLParser Logging
|
|
#if 0 // Set to 1 to enable XML parsing logs
|
|
#define MWXMLLog(x, ...) NSLog(x, ## __VA_ARGS__);
|
|
#else
|
|
#define MWXMLLog(x, ...)
|
|
#endif
|
|
|
|
// Empty XHTML elements ( <!ELEMENT br EMPTY> in http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd )
|
|
#define ELEMENT_IS_EMPTY(e) ([e isEqualToString:@"br"] || [e isEqualToString:@"img"] || [e isEqualToString:@"input"] || \
|
|
[e isEqualToString:@"hr"] || [e isEqualToString:@"link"] || [e isEqualToString:@"base"] || \
|
|
[e isEqualToString:@"basefont"] || [e isEqualToString:@"frame"] || [e isEqualToString:@"meta"] || \
|
|
[e isEqualToString:@"area"] || [e isEqualToString:@"col"] || [e isEqualToString:@"param"])
|
|
|
|
// Implementation
|
|
@implementation MWFeedParser
|
|
|
|
// Properties
|
|
@synthesize url, delegate;
|
|
@synthesize urlConnection, asyncData, asyncTextEncodingName, connectionType;
|
|
@synthesize feedParseType, feedParser, currentPath, currentText, currentElementAttributes, item, info;
|
|
@synthesize pathOfElementWithXHTMLType;
|
|
@synthesize stopped, failed, parsing;
|
|
|
|
#pragma mark -
|
|
#pragma mark NSObject
|
|
|
|
- (id)init {
|
|
if ((self = [super init])) {
|
|
|
|
// Defaults
|
|
feedParseType = ParseTypeFull;
|
|
connectionType = ConnectionTypeSynchronously;
|
|
|
|
// Date Formatters
|
|
// Good info on internet dates here: http://developer.apple.com/iphone/library/qa/qa2010/qa1480.html
|
|
NSLocale *en_US_POSIX = [[NSLocale alloc] initWithLocaleIdentifier:@"en_US_POSIX"];
|
|
dateFormatterRFC822 = [[NSDateFormatter alloc] init];
|
|
dateFormatterRFC3339 = [[NSDateFormatter alloc] init];
|
|
[dateFormatterRFC822 setLocale:en_US_POSIX];
|
|
[dateFormatterRFC3339 setLocale:en_US_POSIX];
|
|
[dateFormatterRFC822 setTimeZone:[NSTimeZone timeZoneForSecondsFromGMT:0]];
|
|
[dateFormatterRFC3339 setTimeZone:[NSTimeZone timeZoneForSecondsFromGMT:0]];
|
|
|
|
}
|
|
return self;
|
|
}
|
|
|
|
// Initialise with a URL
|
|
// Mainly for historic reasons before -parseURL:
|
|
- (id)initWithFeedURL:(NSURL *)feedURL {
|
|
if ((self = [self init])) {
|
|
|
|
// Check if an string was passed as old init asked for NSString not NSURL
|
|
if ([feedURL isKindOfClass:[NSString class]]) {
|
|
feedURL = [NSURL URLWithString:(NSString *)feedURL];
|
|
}
|
|
|
|
// Remember url
|
|
self.url = feedURL;
|
|
|
|
}
|
|
return self;
|
|
}
|
|
|
|
|
|
#pragma mark -
|
|
#pragma mark Parsing
|
|
|
|
// Reset data variables before processing
|
|
// Exclude parse state variables as they are needed after parse
|
|
- (void)reset {
|
|
self.asyncData = nil;
|
|
self.asyncTextEncodingName = nil;
|
|
self.urlConnection = nil;
|
|
feedType = FeedTypeUnknown;
|
|
self.currentPath = @"/";
|
|
self.currentText = [[NSMutableString alloc] init];
|
|
self.item = nil;
|
|
self.info = nil;
|
|
self.currentElementAttributes = nil;
|
|
parseStructureAsContent = NO;
|
|
self.pathOfElementWithXHTMLType = nil;
|
|
hasEncounteredItems = NO;
|
|
}
|
|
|
|
// Parse using URL for backwards compatibility
|
|
- (BOOL)parse {
|
|
|
|
// Reset
|
|
[self reset];
|
|
|
|
// Perform checks before parsing
|
|
if (!url || !delegate) { [self parsingFailedWithErrorCode:MWErrorCodeNotInitiated
|
|
andDescription:@"Delegate or URL not specified"]; return NO; }
|
|
if (parsing) { [self parsingFailedWithErrorCode:MWErrorCodeGeneral
|
|
andDescription:@"Cannot start parsing as parsing is already in progress"]; return NO; }
|
|
|
|
// Reset state for next parse
|
|
parsing = YES;
|
|
aborted = NO;
|
|
stopped = NO;
|
|
failed = NO;
|
|
parsingComplete = NO;
|
|
|
|
// Start
|
|
BOOL success = YES;
|
|
|
|
// Request
|
|
NSMutableURLRequest *request = [[NSMutableURLRequest alloc] initWithURL:url
|
|
cachePolicy:NSURLRequestReloadIgnoringLocalAndRemoteCacheData
|
|
timeoutInterval:60];
|
|
[request setValue:@"MWFeedParser" forHTTPHeaderField:@"User-Agent"];
|
|
|
|
// Debug Log
|
|
MWLog(@"MWFeedParser: Connecting & downloading feed data");
|
|
|
|
// Connection
|
|
if (connectionType == ConnectionTypeAsynchronously) {
|
|
|
|
// Async
|
|
urlConnection = [[NSURLConnection alloc] initWithRequest:request delegate:self];
|
|
if (urlConnection) {
|
|
asyncData = [[NSMutableData alloc] init];// Create data
|
|
} else {
|
|
[self parsingFailedWithErrorCode:MWErrorCodeConnectionFailed
|
|
andDescription:[NSString stringWithFormat:@"Asynchronous connection failed to URL: %@", url]];
|
|
success = NO;
|
|
}
|
|
|
|
} else {
|
|
|
|
// Sync
|
|
NSURLResponse *response = nil;
|
|
NSError *error = nil;
|
|
NSData *data = [NSURLConnection sendSynchronousRequest:request returningResponse:&response error:&error];
|
|
if (data && !error) {
|
|
[self startParsingData:data textEncodingName:[response textEncodingName]]; // Process
|
|
} else {
|
|
[self parsingFailedWithErrorCode:MWErrorCodeConnectionFailed
|
|
andDescription:[NSString stringWithFormat:@"Synchronous connection failed to URL: %@", url]];
|
|
success = NO;
|
|
}
|
|
|
|
}
|
|
|
|
// return
|
|
return success;
|
|
|
|
}
|
|
|
|
// Begin XML parsing
|
|
- (void)startParsingData:(NSData *)data textEncodingName:(NSString *)textEncodingName {
|
|
if (data && !feedParser) {
|
|
|
|
// Create feed info
|
|
MWFeedInfo *i = [[MWFeedInfo alloc] init];
|
|
self.info = i;
|
|
|
|
// Check whether it's UTF-8
|
|
if (![[textEncodingName lowercaseString] isEqualToString:@"utf-8"]) {
|
|
|
|
// Not UTF-8 so convert
|
|
MWLog(@"MWFeedParser: XML document was not UTF-8 so we're converting it");
|
|
NSString *string = nil;
|
|
|
|
// Attempt to detect encoding from response header
|
|
NSStringEncoding nsEncoding = 0;
|
|
if (textEncodingName) {
|
|
CFStringEncoding cfEncoding = CFStringConvertIANACharSetNameToEncoding((__bridge_retained CFStringRef)textEncodingName);
|
|
if (cfEncoding != kCFStringEncodingInvalidId) {
|
|
nsEncoding = CFStringConvertEncodingToNSStringEncoding(cfEncoding);
|
|
if (nsEncoding != 0) string = [[NSString alloc] initWithData:data encoding:nsEncoding];
|
|
}
|
|
}
|
|
|
|
// If that failed then make our own attempts
|
|
if (!string) {
|
|
// http://www.mikeash.com/pyblog/friday-qa-2010-02-19-character-encodings.html
|
|
string = [[NSString alloc] initWithData:data encoding:NSUTF8StringEncoding];
|
|
if (!string) string = [[NSString alloc] initWithData:data encoding:NSISOLatin1StringEncoding];
|
|
if (!string) string = [[NSString alloc] initWithData:data encoding:NSMacOSRomanStringEncoding];
|
|
}
|
|
|
|
// Nil data
|
|
data = nil;
|
|
|
|
// Parse
|
|
if (string) {
|
|
|
|
// Set XML encoding to UTF-8
|
|
if ([string hasPrefix:@"<?xml"]) {
|
|
NSRange a = [string rangeOfString:@"?>"];
|
|
if (a.location != NSNotFound) {
|
|
NSString *xmlDec = [string substringToIndex:a.location];
|
|
if ([xmlDec rangeOfString:@"encoding=\"UTF-8\""
|
|
options:NSCaseInsensitiveSearch].location == NSNotFound) {
|
|
NSRange b = [xmlDec rangeOfString:@"encoding=\""];
|
|
if (b.location != NSNotFound) {
|
|
NSUInteger s = b.location+b.length;
|
|
NSRange c = [xmlDec rangeOfString:@"\"" options:0 range:NSMakeRange(s, [xmlDec length] - s)];
|
|
if (c.location != NSNotFound) {
|
|
NSString *temp = [string stringByReplacingCharactersInRange:NSMakeRange(b.location,c.location+c.length-b.location)
|
|
withString:@"encoding=\"UTF-8\""];
|
|
string = temp;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Convert string to UTF-8 data
|
|
if (string) {
|
|
data = [string dataUsingEncoding:NSUTF8StringEncoding];
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// Create NSXMLParser
|
|
if (data) {
|
|
NSXMLParser *newFeedParser = [[NSXMLParser alloc] initWithData:data];
|
|
self.feedParser = newFeedParser;
|
|
if (feedParser) {
|
|
|
|
// Parse!
|
|
feedParser.delegate = self;
|
|
[feedParser setShouldProcessNamespaces:YES];
|
|
[feedParser parse];
|
|
self.feedParser = nil; // Release after parse
|
|
|
|
} else {
|
|
[self parsingFailedWithErrorCode:MWErrorCodeFeedParsingError andDescription:@"Feed not a valid XML document"];
|
|
}
|
|
} else {
|
|
[self parsingFailedWithErrorCode:MWErrorCodeFeedParsingError andDescription:@"Error with feed encoding"];
|
|
}
|
|
|
|
}
|
|
}
|
|
|
|
// Abort parsing early if we're ignoring feed items
|
|
- (void)abortParsingEarly {
|
|
|
|
// Abort
|
|
aborted = YES; [feedParser abortParsing];
|
|
[self parsingFinished];
|
|
|
|
}
|
|
|
|
// Stop parsing
|
|
- (void)stopParsing {
|
|
|
|
// Only if we're parsing
|
|
if (parsing && !parsingComplete) {
|
|
|
|
// Debug Log
|
|
MWLog(@"MWFeedParser: Parsing stopped");
|
|
|
|
// Stop
|
|
stopped = YES;
|
|
|
|
// Stop downloading
|
|
[urlConnection cancel];
|
|
self.urlConnection = nil;
|
|
self.asyncData = nil;
|
|
self.asyncTextEncodingName = nil;
|
|
|
|
// Abort
|
|
aborted = YES;
|
|
[feedParser abortParsing];
|
|
|
|
// Finished
|
|
[self parsingFinished];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// Finished parsing document successfully
|
|
- (void)parsingFinished {
|
|
|
|
// Finish
|
|
if (!parsingComplete) {
|
|
|
|
// Set state and notify delegate
|
|
parsing = NO;
|
|
parsingComplete = YES;
|
|
if ([delegate respondsToSelector:@selector(feedParserDidFinish:)])
|
|
[delegate feedParserDidFinish:self];
|
|
|
|
// Reset
|
|
[self reset];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// If an error occurs, create NSError and inform delegate
|
|
- (void)parsingFailedWithErrorCode:(int)code andDescription:(NSString *)description {
|
|
|
|
// Finish & create error
|
|
if (!parsingComplete) {
|
|
|
|
// State
|
|
failed = YES;
|
|
parsing = NO;
|
|
parsingComplete = YES;
|
|
|
|
// Create error
|
|
NSError *error = [NSError errorWithDomain:MWErrorDomain
|
|
code:code
|
|
userInfo:[NSDictionary dictionaryWithObject:description
|
|
forKey:NSLocalizedDescriptionKey]];
|
|
MWLog(@"%@", error);
|
|
|
|
// Abort parsing
|
|
if (feedParser) {
|
|
aborted = YES;
|
|
[feedParser abortParsing];
|
|
}
|
|
|
|
// Reset
|
|
[self reset];
|
|
|
|
// Inform delegate
|
|
if ([delegate respondsToSelector:@selector(feedParser:didFailWithError:)])
|
|
[delegate feedParser:self didFailWithError:error];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
#pragma mark -
|
|
#pragma mark NSURLConnection Delegate (Async)
|
|
|
|
- (void)connection:(NSURLConnection *)connection didReceiveResponse:(NSURLResponse *)response {
|
|
[asyncData setLength:0];
|
|
self.asyncTextEncodingName = [response textEncodingName];
|
|
}
|
|
|
|
- (void)connection:(NSURLConnection *)connection didReceiveData:(NSData *)data {
|
|
[asyncData appendData:data];
|
|
}
|
|
|
|
- (void)connection:(NSURLConnection *)connection didFailWithError:(NSError *)error {
|
|
|
|
// Failed
|
|
self.urlConnection = nil;
|
|
self.asyncData = nil;
|
|
self.asyncTextEncodingName = nil;
|
|
|
|
// Error
|
|
[self parsingFailedWithErrorCode:MWErrorCodeConnectionFailed andDescription:[error localizedDescription]];
|
|
|
|
}
|
|
|
|
- (void)connectionDidFinishLoading:(NSURLConnection *)connection {
|
|
|
|
// Succeed
|
|
MWLog(@"MWFeedParser: Connection successful... received %d bytes of data", [asyncData length]);
|
|
|
|
// Parse
|
|
if (!stopped) [self startParsingData:asyncData textEncodingName:self.asyncTextEncodingName];
|
|
|
|
// Cleanup
|
|
self.urlConnection = nil;
|
|
self.asyncData = nil;
|
|
self.asyncTextEncodingName = nil;
|
|
|
|
}
|
|
|
|
-(NSCachedURLResponse *)connection:(NSURLConnection *)connection willCacheResponse:(NSCachedURLResponse *)cachedResponse {
|
|
return nil; // Don't cache
|
|
}
|
|
|
|
#pragma mark -
|
|
#pragma mark XML Parsing
|
|
|
|
- (void)parser:(NSXMLParser *)parser didStartElement:(NSString *)elementName namespaceURI:(NSString *)namespaceURI
|
|
qualifiedName:(NSString *)qualifiedName attributes:(NSDictionary *)attributeDict {
|
|
MWXMLLog(@"NSXMLParser: didStartElement: %@", qualifiedName);
|
|
|
|
|
|
@autoreleasepool {
|
|
|
|
// Adjust path
|
|
self.currentPath = [currentPath stringByAppendingPathComponent:qualifiedName];
|
|
self.currentElementAttributes = attributeDict;
|
|
|
|
// Parse content as structure (Atom feeds with element type="xhtml")
|
|
// - Use elementName not qualifiedName to ignore XML namespaces for XHTML entities
|
|
if (parseStructureAsContent) {
|
|
|
|
// Open XHTML tag
|
|
[currentText appendFormat:@"<%@", elementName];
|
|
|
|
// Add attributes
|
|
for (NSString *key in attributeDict) {
|
|
[currentText appendFormat:@" %@=\"%@\"", key,
|
|
[[attributeDict objectForKey:key] stringByEncodingHTMLEntities]];
|
|
}
|
|
|
|
// End tag or close
|
|
if (ELEMENT_IS_EMPTY(elementName)) {
|
|
[currentText appendFormat:@" />", elementName];
|
|
} else {
|
|
[currentText appendFormat:@">", elementName];
|
|
}
|
|
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
// Reset
|
|
[self.currentText setString:@""];
|
|
|
|
// Determine feed type
|
|
if (feedType == FeedTypeUnknown) {
|
|
if ([qualifiedName isEqualToString:@"rss"]) feedType = FeedTypeRSS;
|
|
else if ([qualifiedName isEqualToString:@"rdf:RDF"]) feedType = FeedTypeRSS1;
|
|
else if ([qualifiedName isEqualToString:@"feed"]) feedType = FeedTypeAtom;
|
|
else {
|
|
|
|
// Invalid format so fail
|
|
[self parsingFailedWithErrorCode:MWErrorCodeFeedParsingError
|
|
andDescription:@"XML document is not a valid web feed document."];
|
|
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
// Entering new feed element
|
|
if (feedParseType != ParseTypeItemsOnly) {
|
|
if ((feedType == FeedTypeRSS && [currentPath isEqualToString:@"/rss/channel"]) ||
|
|
(feedType == FeedTypeRSS1 && [currentPath isEqualToString:@"/rdf:RDF/channel"]) ||
|
|
(feedType == FeedTypeAtom && [currentPath isEqualToString:@"/feed"])) {
|
|
return;
|
|
}
|
|
}
|
|
|
|
// Entering new item element
|
|
if ((feedType == FeedTypeRSS && [currentPath isEqualToString:@"/rss/channel/item"]) ||
|
|
(feedType == FeedTypeRSS1 && [currentPath isEqualToString:@"/rdf:RDF/item"]) ||
|
|
(feedType == FeedTypeAtom && [currentPath isEqualToString:@"/feed/entry"])) {
|
|
|
|
// Send off feed info to delegate
|
|
if (!hasEncounteredItems) {
|
|
hasEncounteredItems = YES;
|
|
if (feedParseType != ParseTypeItemsOnly) { // Check whether to ignore feed info
|
|
|
|
// Dispatch feed info to delegate
|
|
[self dispatchFeedInfoToDelegate];
|
|
|
|
// Stop parsing if only requiring meta data
|
|
if (feedParseType == ParseTypeInfoOnly) {
|
|
|
|
// Debug log
|
|
MWLog(@"MWFeedParser: Parse type is ParseTypeInfoOnly so finishing here");
|
|
|
|
// Finish
|
|
[self abortParsingEarly];
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
// Ignoring feed info so debug log
|
|
MWLog(@"MWFeedParser: Parse type is ParseTypeItemsOnly so ignoring feed info");
|
|
|
|
}
|
|
}
|
|
|
|
// New item
|
|
MWFeedItem *newItem = [[MWFeedItem alloc] init];
|
|
self.item = newItem;
|
|
|
|
|
|
|
|
// Return
|
|
return;
|
|
|
|
}
|
|
|
|
// Check if entering into an Atom content tag with type "xhtml"
|
|
// If type is "xhtml" then it can contain child elements and structure needs
|
|
// to be parsed as content
|
|
// See: http://www.atomenabled.org/developers/syndication/atom-format-spec.php#rfc.section.3.1.1
|
|
if (feedType == FeedTypeAtom) {
|
|
|
|
// Check type attribute
|
|
NSString *typeAttribute = [attributeDict objectForKey:@"type"];
|
|
if (typeAttribute && [typeAttribute isEqualToString:@"xhtml"]) {
|
|
|
|
// Start parsing structure as content
|
|
parseStructureAsContent = YES;
|
|
|
|
// Remember path so we can stop parsing structure when element ends
|
|
self.pathOfElementWithXHTMLType = currentPath;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
- (void)parser:(NSXMLParser *)parser didEndElement:(NSString *)elementName
|
|
namespaceURI:(NSString *)namespaceURI qualifiedName:(NSString *)qName {
|
|
MWXMLLog(@"NSXMLParser: didEndElement: %@", qName);
|
|
|
|
|
|
@autoreleasepool {
|
|
|
|
// Parse content as structure (Atom feeds with element type="xhtml")
|
|
// - Use elementName not qualifiedName to ignore XML namespaces for XHTML entities
|
|
if (parseStructureAsContent) {
|
|
|
|
// Check for finishing parsing structure as content
|
|
if (currentPath.length > pathOfElementWithXHTMLType.length) {
|
|
|
|
// Close XHTML tag unless it is an empty element
|
|
if (!ELEMENT_IS_EMPTY(elementName)) [currentText appendFormat:@"</%@>", elementName];
|
|
|
|
// Adjust path & don't continue
|
|
self.currentPath = [currentPath stringByDeletingLastPathComponent];
|
|
|
|
// Return
|
|
return;
|
|
|
|
}
|
|
|
|
// Finish
|
|
parseStructureAsContent = NO;
|
|
self.pathOfElementWithXHTMLType = nil;
|
|
|
|
// Continue...
|
|
|
|
}
|
|
|
|
// Store data
|
|
BOOL processed = NO;
|
|
if (currentText) {
|
|
|
|
// Remove newlines and whitespace from currentText
|
|
NSString *processedText = [currentText stringByRemovingNewLinesAndWhitespace];
|
|
|
|
// Process
|
|
switch (feedType) {
|
|
case FeedTypeRSS: {
|
|
|
|
// Item
|
|
if (!processed) {
|
|
if ([currentPath isEqualToString:@"/rss/channel/item/title"]) { if (processedText.length > 0) item.title = processedText; processed = YES; }
|
|
else if ([currentPath isEqualToString:@"/rss/channel/item/link"]) { if (processedText.length > 0) item.link = processedText; processed = YES; }
|
|
else if ([currentPath isEqualToString:@"/rss/channel/item/guid"]) { if (processedText.length > 0) item.identifier = processedText; processed = YES; }
|
|
else if ([currentPath isEqualToString:@"/rss/channel/item/description"]) { if (processedText.length > 0) item.summary = processedText; processed = YES; }
|
|
else if ([currentPath isEqualToString:@"/rss/channel/item/content:encoded"]) { if (processedText.length > 0) item.content = processedText; processed = YES; }
|
|
else if ([currentPath isEqualToString:@"/rss/channel/item/pubDate"]) { if (processedText.length > 0) item.date = [NSDate dateFromInternetDateTimeString:processedText formatHint:DateFormatHintRFC822]; processed = YES; }
|
|
else if ([currentPath isEqualToString:@"/rss/channel/item/enclosure"]) { [self createEnclosureFromAttributes:currentElementAttributes andAddToItem:item]; processed = YES; }
|
|
else if ([currentPath isEqualToString:@"/rss/channel/item/dc:date"]) { if (processedText.length > 0) item.date = [NSDate dateFromInternetDateTimeString:processedText formatHint:DateFormatHintRFC3339]; processed = YES; }
|
|
}
|
|
|
|
// Info
|
|
if (!processed && feedParseType != ParseTypeItemsOnly) {
|
|
if ([currentPath isEqualToString:@"/rss/channel/title"]) { if (processedText.length > 0) info.title = processedText; processed = YES; }
|
|
else if ([currentPath isEqualToString:@"/rss/channel/description"]) { if (processedText.length > 0) info.summary = processedText; processed = YES; }
|
|
else if ([currentPath isEqualToString:@"/rss/channel/link"]) { if (processedText.length > 0) info.link = processedText; processed = YES; }
|
|
}
|
|
|
|
break;
|
|
}
|
|
case FeedTypeRSS1: {
|
|
|
|
// Item
|
|
if (!processed) {
|
|
if ([currentPath isEqualToString:@"/rdf:RDF/item/title"]) { if (processedText.length > 0) item.title = processedText; processed = YES; }
|
|
else if ([currentPath isEqualToString:@"/rdf:RDF/item/link"]) { if (processedText.length > 0) item.link = processedText; processed = YES; }
|
|
else if ([currentPath isEqualToString:@"/rdf:RDF/item/dc:identifier"]) { if (processedText.length > 0) item.identifier = processedText; processed = YES; }
|
|
else if ([currentPath isEqualToString:@"/rdf:RDF/item/description"]) { if (processedText.length > 0) item.summary = processedText; processed = YES; }
|
|
else if ([currentPath isEqualToString:@"/rdf:RDF/item/content:encoded"]) { if (processedText.length > 0) item.content = processedText; processed = YES; }
|
|
else if ([currentPath isEqualToString:@"/rdf:RDF/item/dc:date"]) { if (processedText.length > 0) item.date = [NSDate dateFromInternetDateTimeString:processedText formatHint:DateFormatHintRFC3339]; processed = YES; }
|
|
else if ([currentPath isEqualToString:@"/rdf:RDF/item/enc:enclosure"]) { [self createEnclosureFromAttributes:currentElementAttributes andAddToItem:item]; processed = YES; }
|
|
}
|
|
|
|
// Info
|
|
if (!processed && feedParseType != ParseTypeItemsOnly) {
|
|
if ([currentPath isEqualToString:@"/rdf:RDF/channel/title"]) { if (processedText.length > 0) info.title = processedText; processed = YES; }
|
|
else if ([currentPath isEqualToString:@"/rdf:RDF/channel/description"]) { if (processedText.length > 0) info.summary = processedText; processed = YES; }
|
|
else if ([currentPath isEqualToString:@"/rdf:RDF/channel/link"]) { if (processedText.length > 0) info.link = processedText; processed = YES; }
|
|
}
|
|
|
|
break;
|
|
}
|
|
case FeedTypeAtom: {
|
|
|
|
// Item
|
|
if (!processed) {
|
|
if ([currentPath isEqualToString:@"/feed/entry/title"]) { if (processedText.length > 0) item.title = processedText; processed = YES; }
|
|
else if ([currentPath isEqualToString:@"/feed/entry/link"]) { [self processAtomLink:currentElementAttributes andAddToMWObject:item]; processed = YES; }
|
|
else if ([currentPath isEqualToString:@"/feed/entry/id"]) { if (processedText.length > 0) item.identifier = processedText; processed = YES; }
|
|
else if ([currentPath isEqualToString:@"/feed/entry/summary"]) { if (processedText.length > 0) item.summary = processedText; processed = YES; }
|
|
else if ([currentPath isEqualToString:@"/feed/entry/content"]) { if (processedText.length > 0) item.content = processedText; processed = YES; }
|
|
else if ([currentPath isEqualToString:@"/feed/entry/published"]) { if (processedText.length > 0) item.date = [NSDate dateFromInternetDateTimeString:processedText formatHint:DateFormatHintRFC3339]; processed = YES; }
|
|
else if ([currentPath isEqualToString:@"/feed/entry/updated"]) { if (processedText.length > 0) item.updated = [NSDate dateFromInternetDateTimeString:processedText formatHint:DateFormatHintRFC3339]; processed = YES; }
|
|
}
|
|
|
|
// Info
|
|
if (!processed && feedParseType != ParseTypeItemsOnly) {
|
|
if ([currentPath isEqualToString:@"/feed/title"]) { if (processedText.length > 0) info.title = processedText; processed = YES; }
|
|
else if ([currentPath isEqualToString:@"/feed/description"]) { if (processedText.length > 0) info.summary = processedText; processed = YES; }
|
|
else if ([currentPath isEqualToString:@"/feed/link"]) { [self processAtomLink:currentElementAttributes andAddToMWObject:info]; processed = YES;}
|
|
}
|
|
|
|
break;
|
|
}
|
|
default: break;
|
|
}
|
|
}
|
|
|
|
// Adjust path
|
|
self.currentPath = [currentPath stringByDeletingLastPathComponent];
|
|
|
|
// If end of an item then tell delegate
|
|
if (!processed) {
|
|
if (((feedType == FeedTypeRSS || feedType == FeedTypeRSS1) && [qName isEqualToString:@"item"]) ||
|
|
(feedType == FeedTypeAtom && [qName isEqualToString:@"entry"])) {
|
|
|
|
// Dispatch item to delegate
|
|
[self dispatchFeedItemToDelegate];
|
|
|
|
}
|
|
}
|
|
|
|
// Check if the document has finished parsing and send off info if needed (i.e. there were no items)
|
|
if (!processed) {
|
|
if ((feedType == FeedTypeRSS && [qName isEqualToString:@"rss"]) ||
|
|
(feedType == FeedTypeRSS1 && [qName isEqualToString:@"rdf:RDF"]) ||
|
|
(feedType == FeedTypeAtom && [qName isEqualToString:@"feed"])) {
|
|
|
|
// Document ending so if we havent sent off feed info yet, do so
|
|
if (info && feedParseType != ParseTypeItemsOnly) [self dispatchFeedInfoToDelegate];
|
|
|
|
}
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
//- (void)parser:(NSXMLParser *)parser foundAttributeDeclarationWithName:(NSString *)attributeName
|
|
// forElement:(NSString *)elementName type:(NSString *)type defaultValue:(NSString *)defaultValue {
|
|
// MWXMLLog(@"NSXMLParser: foundAttributeDeclarationWithName: %@", attributeName);
|
|
//}
|
|
|
|
- (void)parser:(NSXMLParser *)parser foundCDATA:(NSData *)CDATABlock {
|
|
MWXMLLog(@"NSXMLParser: foundCDATA (%d bytes)", CDATABlock.length);
|
|
|
|
// Remember characters
|
|
NSString *string = nil;
|
|
@try {
|
|
|
|
// Try decoding with NSUTF8StringEncoding & NSISOLatin1StringEncoding
|
|
string = [[NSString alloc] initWithData:CDATABlock encoding:NSUTF8StringEncoding];
|
|
if (!string) string = [[NSString alloc] initWithData:CDATABlock encoding:NSISOLatin1StringEncoding];
|
|
|
|
// Add - No need to encode as CDATA should not be encoded as it's ignored by the parser
|
|
if (string) [currentText appendString:string];
|
|
|
|
} @catch (NSException * e) {
|
|
} @finally {
|
|
}
|
|
|
|
}
|
|
|
|
- (void)parser:(NSXMLParser *)parser foundCharacters:(NSString *)string {
|
|
MWXMLLog(@"NSXMLParser: foundCharacters: %@", string);
|
|
|
|
// Remember characters
|
|
if (!parseStructureAsContent) {
|
|
|
|
// Add characters normally
|
|
[currentText appendString:string];
|
|
|
|
} else {
|
|
|
|
// If parsing structure as content then we should encode characters
|
|
[currentText appendString:[string stringByEncodingHTMLEntities]];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
- (void)parserDidStartDocument:(NSXMLParser *)parser {
|
|
MWXMLLog(@"NSXMLParser: parserDidStartDocument");
|
|
|
|
// Debug Log
|
|
MWLog(@"MWFeedParser: Parsing started");
|
|
|
|
// Inform delegate
|
|
if ([delegate respondsToSelector:@selector(feedParserDidStart:)])
|
|
[delegate feedParserDidStart:self];
|
|
|
|
}
|
|
|
|
- (void)parserDidEndDocument:(NSXMLParser *)parser {
|
|
MWXMLLog(@"NSXMLParser: parserDidEndDocument");
|
|
|
|
// Debug Log
|
|
MWLog(@"MWFeedParser: Parsing finished");
|
|
|
|
// Inform delegate
|
|
[self parsingFinished];
|
|
|
|
}
|
|
|
|
// Call if parsing error occured or parse was aborted
|
|
- (void)parser:(NSXMLParser *)parser parseErrorOccurred:(NSError *)parseError {
|
|
MWXMLLog(@"NSXMLParser: parseErrorOccurred: %@", parseError);
|
|
|
|
// Fail with error
|
|
if (!aborted) {
|
|
// This method is called when legimitaly aboring the parser so ignore if this is the case
|
|
[self parsingFailedWithErrorCode:MWErrorCodeFeedParsingError andDescription:[parseError localizedDescription]];
|
|
}
|
|
|
|
}
|
|
|
|
- (void)parser:(NSXMLParser *)parser validationErrorOccurred:(NSError *)validError {
|
|
MWXMLLog(@"NSXMLParser: validationErrorOccurred: %@", validError);
|
|
|
|
// Fail with error
|
|
[self parsingFailedWithErrorCode:MWErrorCodeFeedValidationError andDescription:[validError localizedDescription]];
|
|
|
|
}
|
|
|
|
#pragma mark -
|
|
#pragma mark Send Items to Delegate
|
|
|
|
- (void)dispatchFeedInfoToDelegate {
|
|
if (info) {
|
|
|
|
// Inform delegate
|
|
if ([delegate respondsToSelector:@selector(feedParser:didParseFeedInfo:)])
|
|
[delegate feedParser:self didParseFeedInfo:info];
|
|
|
|
// Debug log
|
|
MWLog(@"MWFeedParser: Feed info for \"%@\" successfully parsed", info.title);
|
|
|
|
// Finish
|
|
self.info = nil;
|
|
|
|
}
|
|
}
|
|
|
|
- (void)dispatchFeedItemToDelegate {
|
|
if (item) {
|
|
|
|
// Process before hand
|
|
if (!item.summary) { item.summary = item.content; item.content = nil; }
|
|
if (!item.date && item.updated) { item.date = item.updated; }
|
|
|
|
// Debug log
|
|
MWLog(@"MWFeedParser: Feed item \"%@\" successfully parsed", item.title);
|
|
|
|
// Inform delegate
|
|
if ([delegate respondsToSelector:@selector(feedParser:didParseFeedItem:)])
|
|
[delegate feedParser:self didParseFeedItem:item];
|
|
|
|
// Finish
|
|
self.item = nil;
|
|
|
|
}
|
|
}
|
|
|
|
#pragma mark -
|
|
#pragma mark Helpers & Properties
|
|
|
|
// Set URL to parse and removing feed: uri scheme info
|
|
// http://en.wikipedia.org/wiki/Feed:_URI_scheme
|
|
- (void)setUrl:(NSURL *)value {
|
|
|
|
// Check if an string was passed as old init asked for NSString not NSURL
|
|
if ([value isKindOfClass:[NSString class]]) {
|
|
value = [NSURL URLWithString:(NSString *)value];
|
|
}
|
|
|
|
// Create new instance of NSURL and check URL scheme
|
|
NSURL *newURL = nil;
|
|
if (value) {
|
|
if ([value.scheme isEqualToString:@"feed"]) {
|
|
|
|
// Remove feed URL scheme
|
|
newURL = [NSURL URLWithString:[NSString stringWithFormat:@"%@%@",
|
|
([value.resourceSpecifier hasPrefix:@"//"] ? @"http:" : @""),
|
|
value.resourceSpecifier]];
|
|
|
|
} else {
|
|
|
|
// Copy
|
|
newURL = [value copy];
|
|
|
|
}
|
|
}
|
|
|
|
// Set new url
|
|
if (url) url = nil;
|
|
url = newURL;
|
|
|
|
}
|
|
|
|
#pragma mark -
|
|
#pragma mark Misc
|
|
|
|
// Create an enclosure NSDictionary from enclosure (or link) attributes
|
|
- (BOOL)createEnclosureFromAttributes:(NSDictionary *)attributes andAddToItem:(MWFeedItem *)currentItem {
|
|
|
|
// Create enclosure
|
|
NSDictionary *enclosure = nil;
|
|
NSString *encURL = nil, *encType = nil;
|
|
NSNumber *encLength = nil;
|
|
if (attributes) {
|
|
switch (feedType) {
|
|
case FeedTypeRSS: { // http://cyber.law.harvard.edu/rss/rss.html#ltenclosuregtSubelementOfLtitemgt
|
|
// <enclosure>
|
|
encURL = [attributes objectForKey:@"url"];
|
|
encType = [attributes objectForKey:@"type"];
|
|
encLength = [NSNumber numberWithLongLong:[((NSString *)[attributes objectForKey:@"length"]) longLongValue]];
|
|
break;
|
|
}
|
|
case FeedTypeRSS1: { // http://www.xs4all.nl/~foz/mod_enclosure.html
|
|
// <enc:enclosure>
|
|
encURL = [attributes objectForKey:@"rdf:resource"];
|
|
encType = [attributes objectForKey:@"enc:type"];
|
|
encLength = [NSNumber numberWithLongLong:[((NSString *)[attributes objectForKey:@"enc:length"]) longLongValue]];
|
|
break;
|
|
}
|
|
case FeedTypeAtom: { // http://www.atomenabled.org/developers/syndication/atom-format-spec.php#rel_attribute
|
|
// <link rel="enclosure" href=...
|
|
if ([[attributes objectForKey:@"rel"] isEqualToString:@"enclosure"]) {
|
|
encURL = [attributes objectForKey:@"href"];
|
|
encType = [attributes objectForKey:@"type"];
|
|
encLength = [NSNumber numberWithLongLong:[((NSString *)[attributes objectForKey:@"length"]) longLongValue]];
|
|
}
|
|
break;
|
|
}
|
|
default: break;
|
|
}
|
|
}
|
|
if (encURL) {
|
|
NSMutableDictionary *e = [[NSMutableDictionary alloc] initWithCapacity:3];
|
|
[e setObject:encURL forKey:@"url"];
|
|
if (encType) [e setObject:encType forKey:@"type"];
|
|
if (encLength) [e setObject:encLength forKey:@"length"];
|
|
enclosure = [NSDictionary dictionaryWithDictionary:e];
|
|
}
|
|
|
|
// Add to item
|
|
if (enclosure) {
|
|
if (currentItem.enclosures) {
|
|
currentItem.enclosures = [currentItem.enclosures arrayByAddingObject:enclosure];
|
|
} else {
|
|
currentItem.enclosures = [NSArray arrayWithObject:enclosure];
|
|
}
|
|
return YES;
|
|
} else {
|
|
return NO;
|
|
}
|
|
|
|
}
|
|
|
|
// Process ATOM link and determine whether to ignore it, add it as the link element or add as enclosure
|
|
// Links can be added to MWObject (info or item)
|
|
- (BOOL)processAtomLink:(NSDictionary *)attributes andAddToMWObject:(id)MWObject {
|
|
if (attributes && [attributes objectForKey:@"rel"]) {
|
|
|
|
// Use as link if rel == alternate
|
|
if ([[attributes objectForKey:@"rel"] isEqualToString:@"alternate"]) {
|
|
[MWObject setLink:[attributes objectForKey:@"href"]]; // Can be added to MWFeedItem or MWFeedInfo
|
|
return YES;
|
|
}
|
|
|
|
// Use as enclosure if rel == enclosure
|
|
if ([[attributes objectForKey:@"rel"] isEqualToString:@"enclosure"]) {
|
|
if ([MWObject isMemberOfClass:[MWFeedItem class]]) { // Enclosures can only be added to MWFeedItem
|
|
[self createEnclosureFromAttributes:attributes andAddToItem:(MWFeedItem *)MWObject];
|
|
return YES;
|
|
}
|
|
}
|
|
|
|
}
|
|
return NO;
|
|
}
|
|
|
|
@end |